2020-04-16 02:41:53 +03:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from app.filter import Filter
|
2021-12-07 07:39:50 +03:00
|
|
|
from app.models.config import Config
|
Improve public instance session management (#480)
This introduces a new approach to handling user sessions, which should
allow for users to set more reliable config settings on public instances.
Previously, when a user with cookies disabled would update their config,
this would modify the app's default config file, which would in turn
cause new users to inherit these settings when visiting the app for the
first time and cause users to inherit these settings when their current
session cookie expired (which was after 30 days by default I believe).
There was also some half-baked logic for determining on the backend
whether or not a user had cookies disabled, which lead to some issues
with out of control session file creation by Flask.
Now, when a user visits the site, their initial request is forwarded to
a session/<session id> endpoint, and during that subsequent request
their current session id is matched against the one found in the url. If
the ids match, the user has cookies enabled. If not, their original
request is modified with a 'cookies_disabled' query param that tells
Flask not to bother trying to set up a new session for that user, and
instead just use the app's fallback Fernet key for encryption and the
default config.
Since attempting to create a session for a user with cookies disabled
creates a new session file, there is now also a clean-up routine included
in the new session decorator, which will remove all sessions that don't
include a valid key in the dict. NOTE!!! This means that current user
sessions on public instances will be cleared once this update is merged
in. In the long run that's a good thing though, since this will allow session
mgmt to be a lot more reliable overall for users regardless of their cookie
preference.
Individual user sessions still use a unique Fernet key for encrypting queries,
but users with cookies disabled will use the default app key for encryption
and decryption.
Sessions are also now (semi)permanent and have a lifetime of 1 year.
2021-11-18 05:35:30 +03:00
|
|
|
from app.models.endpoint import Endpoint
|
2023-11-01 23:07:45 +03:00
|
|
|
from app.utils import results
|
2022-12-05 22:14:14 +03:00
|
|
|
from app.utils.session import generate_key
|
2020-04-16 02:41:53 +03:00
|
|
|
from datetime import datetime
|
2022-06-24 19:51:15 +03:00
|
|
|
from dateutil.parser import ParserError, parse
|
2021-05-07 18:45:53 +03:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
from test.conftest import demo_config
|
2020-04-16 02:41:53 +03:00
|
|
|
|
|
|
|
|
|
|
|
def get_search_results(data):
|
2022-12-05 22:14:14 +03:00
|
|
|
secret_key = generate_key()
|
2021-12-07 07:39:50 +03:00
|
|
|
soup = Filter(user_key=secret_key, config=Config(**demo_config)).clean(
|
2020-12-18 00:06:47 +03:00
|
|
|
BeautifulSoup(data, 'html.parser'))
|
2020-04-16 02:41:53 +03:00
|
|
|
|
|
|
|
main_divs = soup.find('div', {'id': 'main'})
|
|
|
|
assert len(main_divs) > 1
|
|
|
|
|
|
|
|
result_divs = []
|
|
|
|
for div in main_divs:
|
|
|
|
# Result divs should only have 1 inner div
|
2020-12-18 00:06:47 +03:00
|
|
|
if (len(list(div.children)) != 1
|
|
|
|
or not div.findChild()
|
|
|
|
or 'div' not in div.findChild().name):
|
2020-04-16 02:41:53 +03:00
|
|
|
continue
|
|
|
|
|
|
|
|
result_divs.append(div)
|
|
|
|
|
|
|
|
return result_divs
|
|
|
|
|
|
|
|
|
2020-04-29 03:59:33 +03:00
|
|
|
def test_get_results(client):
|
Improve public instance session management (#480)
This introduces a new approach to handling user sessions, which should
allow for users to set more reliable config settings on public instances.
Previously, when a user with cookies disabled would update their config,
this would modify the app's default config file, which would in turn
cause new users to inherit these settings when visiting the app for the
first time and cause users to inherit these settings when their current
session cookie expired (which was after 30 days by default I believe).
There was also some half-baked logic for determining on the backend
whether or not a user had cookies disabled, which lead to some issues
with out of control session file creation by Flask.
Now, when a user visits the site, their initial request is forwarded to
a session/<session id> endpoint, and during that subsequent request
their current session id is matched against the one found in the url. If
the ids match, the user has cookies enabled. If not, their original
request is modified with a 'cookies_disabled' query param that tells
Flask not to bother trying to set up a new session for that user, and
instead just use the app's fallback Fernet key for encryption and the
default config.
Since attempting to create a session for a user with cookies disabled
creates a new session file, there is now also a clean-up routine included
in the new session decorator, which will remove all sessions that don't
include a valid key in the dict. NOTE!!! This means that current user
sessions on public instances will be cleared once this update is merged
in. In the long run that's a good thing though, since this will allow session
mgmt to be a lot more reliable overall for users regardless of their cookie
preference.
Individual user sessions still use a unique Fernet key for encrypting queries,
but users with cookies disabled will use the default app key for encryption
and decryption.
Sessions are also now (semi)permanent and have a lifetime of 1 year.
2021-11-18 05:35:30 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=test')
|
2020-04-16 02:41:53 +03:00
|
|
|
assert rv._status_code == 200
|
|
|
|
|
2020-04-16 02:54:38 +03:00
|
|
|
# Depending on the search, there can be more
|
|
|
|
# than 10 result divs
|
2021-06-15 17:14:42 +03:00
|
|
|
results = get_search_results(rv.data)
|
|
|
|
assert len(results) >= 10
|
|
|
|
assert len(results) <= 15
|
2020-04-16 02:41:53 +03:00
|
|
|
|
|
|
|
|
2020-04-29 03:59:33 +03:00
|
|
|
def test_post_results(client):
|
Improve public instance session management (#480)
This introduces a new approach to handling user sessions, which should
allow for users to set more reliable config settings on public instances.
Previously, when a user with cookies disabled would update their config,
this would modify the app's default config file, which would in turn
cause new users to inherit these settings when visiting the app for the
first time and cause users to inherit these settings when their current
session cookie expired (which was after 30 days by default I believe).
There was also some half-baked logic for determining on the backend
whether or not a user had cookies disabled, which lead to some issues
with out of control session file creation by Flask.
Now, when a user visits the site, their initial request is forwarded to
a session/<session id> endpoint, and during that subsequent request
their current session id is matched against the one found in the url. If
the ids match, the user has cookies enabled. If not, their original
request is modified with a 'cookies_disabled' query param that tells
Flask not to bother trying to set up a new session for that user, and
instead just use the app's fallback Fernet key for encryption and the
default config.
Since attempting to create a session for a user with cookies disabled
creates a new session file, there is now also a clean-up routine included
in the new session decorator, which will remove all sessions that don't
include a valid key in the dict. NOTE!!! This means that current user
sessions on public instances will be cleared once this update is merged
in. In the long run that's a good thing though, since this will allow session
mgmt to be a lot more reliable overall for users regardless of their cookie
preference.
Individual user sessions still use a unique Fernet key for encrypting queries,
but users with cookies disabled will use the default app key for encryption
and decryption.
Sessions are also now (semi)permanent and have a lifetime of 1 year.
2021-11-18 05:35:30 +03:00
|
|
|
rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))
|
2023-10-17 01:46:15 +03:00
|
|
|
assert rv._status_code == 302
|
2021-06-15 17:14:42 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_translate_search(client):
|
2023-10-17 01:46:15 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=translate hola')
|
2021-06-15 17:14:42 +03:00
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
# Pretty weak test, but better than nothing
|
|
|
|
str_data = str(rv.data)
|
|
|
|
assert 'iframe' in str_data
|
2022-03-21 19:08:52 +03:00
|
|
|
assert '/auto/en/ hola' in str_data
|
2020-04-29 03:59:33 +03:00
|
|
|
|
|
|
|
|
2021-05-07 18:45:53 +03:00
|
|
|
def test_block_results(client):
|
2023-10-17 01:46:15 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=pinterest')
|
2021-05-07 18:45:53 +03:00
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
has_pinterest = False
|
|
|
|
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
|
|
|
|
if 'pinterest.com' in urlparse(link['href']).netloc:
|
|
|
|
has_pinterest = True
|
|
|
|
break
|
|
|
|
|
|
|
|
assert has_pinterest
|
|
|
|
|
2021-12-19 21:22:47 +03:00
|
|
|
demo_config['block'] = 'pinterest.com'
|
Improve public instance session management (#480)
This introduces a new approach to handling user sessions, which should
allow for users to set more reliable config settings on public instances.
Previously, when a user with cookies disabled would update their config,
this would modify the app's default config file, which would in turn
cause new users to inherit these settings when visiting the app for the
first time and cause users to inherit these settings when their current
session cookie expired (which was after 30 days by default I believe).
There was also some half-baked logic for determining on the backend
whether or not a user had cookies disabled, which lead to some issues
with out of control session file creation by Flask.
Now, when a user visits the site, their initial request is forwarded to
a session/<session id> endpoint, and during that subsequent request
their current session id is matched against the one found in the url. If
the ids match, the user has cookies enabled. If not, their original
request is modified with a 'cookies_disabled' query param that tells
Flask not to bother trying to set up a new session for that user, and
instead just use the app's fallback Fernet key for encryption and the
default config.
Since attempting to create a session for a user with cookies disabled
creates a new session file, there is now also a clean-up routine included
in the new session decorator, which will remove all sessions that don't
include a valid key in the dict. NOTE!!! This means that current user
sessions on public instances will be cleared once this update is merged
in. In the long run that's a good thing though, since this will allow session
mgmt to be a lot more reliable overall for users regardless of their cookie
preference.
Individual user sessions still use a unique Fernet key for encrypting queries,
but users with cookies disabled will use the default app key for encryption
and decryption.
Sessions are also now (semi)permanent and have a lifetime of 1 year.
2021-11-18 05:35:30 +03:00
|
|
|
rv = client.post(f'/{Endpoint.config}', data=demo_config)
|
2021-05-07 18:45:53 +03:00
|
|
|
assert rv._status_code == 302
|
|
|
|
|
2023-10-17 01:46:15 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=pinterest')
|
2021-05-07 18:45:53 +03:00
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
|
2021-12-19 21:22:47 +03:00
|
|
|
result_site = urlparse(link['href']).netloc
|
|
|
|
if not result_site:
|
|
|
|
continue
|
|
|
|
assert result_site not in 'pinterest.com'
|
2021-05-07 18:45:53 +03:00
|
|
|
|
|
|
|
|
2022-02-14 21:40:11 +03:00
|
|
|
def test_view_my_ip(client):
|
2023-10-17 01:46:15 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=my ip address')
|
2022-02-14 21:40:11 +03:00
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
# Pretty weak test, but better than nothing
|
|
|
|
str_data = str(rv.data)
|
|
|
|
assert 'Your public IP address' in str_data
|
|
|
|
assert '127.0.0.1' in str_data
|
|
|
|
|
|
|
|
|
2020-04-16 02:41:53 +03:00
|
|
|
def test_recent_results(client):
|
|
|
|
times = {
|
2020-04-29 03:59:33 +03:00
|
|
|
'past year': 365,
|
|
|
|
'past month': 31,
|
|
|
|
'past week': 7
|
2020-04-16 02:41:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for time, num_days in times.items():
|
2023-10-17 01:46:15 +03:00
|
|
|
rv = client.get(f'/{Endpoint.search}?q=test :' + time)
|
2020-04-16 02:41:53 +03:00
|
|
|
result_divs = get_search_results(rv.data)
|
|
|
|
|
|
|
|
current_date = datetime.now()
|
2020-06-28 19:52:53 +03:00
|
|
|
for div in [_ for _ in result_divs if _.find('span')]:
|
2020-04-16 02:41:53 +03:00
|
|
|
date_span = div.find('span').decode_contents()
|
2020-04-27 03:11:02 +03:00
|
|
|
if not date_span or len(date_span) > 15 or len(date_span) < 7:
|
2020-04-16 02:41:53 +03:00
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
date = parse(date_span)
|
2020-12-18 00:06:47 +03:00
|
|
|
# Date can have a little bit of wiggle room
|
|
|
|
assert (current_date - date).days <= (num_days + 5)
|
2020-04-16 02:41:53 +03:00
|
|
|
except ParserError:
|
2020-05-23 23:27:23 +03:00
|
|
|
pass
|
2022-06-03 23:03:57 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_leading_slash_search(client):
|
|
|
|
# Ensure searches with a leading slash are interpreted
|
|
|
|
# correctly as queries and not endpoints
|
|
|
|
q = '/test'
|
|
|
|
rv = client.get(f'/{Endpoint.search}?q={q}')
|
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
soup = Filter(
|
2022-12-05 22:14:14 +03:00
|
|
|
user_key=generate_key(),
|
2022-06-03 23:03:57 +03:00
|
|
|
config=Config(**demo_config),
|
|
|
|
query=q
|
|
|
|
).clean(BeautifulSoup(rv.data, 'html.parser'))
|
|
|
|
|
|
|
|
for link in soup.find_all('a', href=True):
|
|
|
|
if 'start=' not in link['href']:
|
|
|
|
continue
|
|
|
|
|
|
|
|
assert link['href'].startswith(f'{Endpoint.search}')
|
2023-11-01 23:07:45 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_site_alt_prefix_skip():
|
|
|
|
# Ensure prefixes are skipped correctly for site alts
|
|
|
|
|
|
|
|
# default silte_alts (farside.link)
|
|
|
|
assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'
|
|
|
|
assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'
|
|
|
|
assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'
|
|
|
|
|
|
|
|
test_site_alts = {
|
|
|
|
'reddit.com': 'reddit.endswithmobile.domain',
|
|
|
|
'twitter.com': 'https://twitter.endswithm.domain',
|
|
|
|
'youtube.com': 'http://yt.endswithwww.domain',
|
|
|
|
}
|
|
|
|
# Domains with part of SKIP_PREFIX in them
|
|
|
|
assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'
|
|
|
|
assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'
|
|
|
|
assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'
|