Updated tests, fixed a few bugs

Added opensearch routes test and individual tests for searching via GET
and POST separately.

Fixed incorrect assignment in gen_query.
main
Ben Busby 2020-04-28 18:59:33 -06:00
parent 0c0ebb8917
commit 1cbe394e6f
6 changed files with 50 additions and 26 deletions

View File

@ -34,11 +34,12 @@ class Filter:
def clean(self, soup): def clean(self, soup):
self.remove_ads(soup) self.remove_ads(soup)
self.sync_images(soup) self.update_image_paths(soup)
self.update_styling(soup) self.update_styling(soup)
self.update_links(soup) self.update_links(soup)
input_form = soup.find('form') input_form = soup.find('form')
if input_form is not None:
input_form['method'] = 'POST' input_form['method'] = 'POST'
return soup return soup
@ -55,7 +56,7 @@ class Filter:
for div in ad_divs: for div in ad_divs:
div.decompose() div.decompose()
def sync_images(self, soup): def update_image_paths(self, soup):
for img in [_ for _ in soup.find_all('img') if 'src' in _]: for img in [_ for _ in soup.find_all('img') if 'src' in _]:
img_src = img['src'] img_src = img['src']
if img_src.startswith('//'): if img_src.startswith('//'):

View File

@ -10,7 +10,12 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Valid query params # Valid query params
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near'] VALID_PARAMS = {
'tbs': '',
'tbm': '',
'start': '',
'near': ''
}
def gen_user_agent(normal_ua): def gen_user_agent(normal_ua):
@ -26,33 +31,34 @@ def gen_user_agent(normal_ua):
return DESKTOP_UA.format(mozilla, linux, firefox) return DESKTOP_UA.format(mozilla, linux, firefox)
def gen_query(q, args, near_city=None): def gen_query(query, args, near_city=None):
# Use :past(hour/day/week/month/year) if available # Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month" # example search "new restaurants :past month"
tbs = '' if ':past' in query:
if ':past' in q: time_range = str.strip(query.split(':past', 1)[-1])
time_range = str.strip(q.split(':past', 1)[-1]) VALID_PARAMS['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable # Ensure search query is parsable
q = urlparse.quote(q) query = urlparse.quote(query)
# Pass along type of results (news, images, books, etc) # Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args: if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm') VALID_PARAMS['tbm'] = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20) # Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args: if 'start' in args:
start = '&start=' + args.get('start') VALID_PARAMS['start'] = '&start=' + args.get('start')
# Search for results near a particular city, if available # Search for results near a particular city, if available
near = ''
if near_city is not None: if near_city is not None:
near = '&near=' + urlparse.quote(near_city) VALID_PARAMS['near'] = '&near=' + urlparse.quote(near_city)
return q + tbs + tbm + start + near for val in VALID_PARAMS.values():
if not val or val is None:
continue
query += val
return query
class Request: class Request:

View File

@ -40,7 +40,6 @@ def opensearch():
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
def search(): def search():
q = None
if request.method == 'GET': if request.method == 'GET':
q = request.args.get('q') q = request.args.get('q')
try: try:
@ -50,7 +49,7 @@ def search():
else: else:
q = request.form['q'] q = request.form['q']
if q is None or len(q) <= 0: if q is None or len(q) == 0:
return render_template('error.html') return render_template('error.html')
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')

View File

@ -2,7 +2,7 @@
<head> <head>
<link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon"> <link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="icon" href="/static/img/favicon.ico" type="image/x-icon"> <link rel="icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="search" href="/static/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search"> <link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ query }} - Shoogle Search</title> <title>{{ query }} - Shoogle Search</title>
</head> </head>

View File

@ -1,11 +1,13 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cryptography.fernet import Fernet
from app.filter import Filter from app.filter import Filter
from datetime import datetime from datetime import datetime
from dateutil.parser import * from dateutil.parser import *
def get_search_results(data): def get_search_results(data):
soup = Filter().clean(BeautifulSoup(data, 'html.parser')) secret_key = Fernet.generate_key()
soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'}) main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1 assert len(main_divs) > 1
@ -21,7 +23,7 @@ def get_search_results(data):
return result_divs return result_divs
def test_search_results(client): def test_get_results(client):
rv = client.get('/search?q=test') rv = client.get('/search?q=test')
assert rv._status_code == 200 assert rv._status_code == 200
@ -31,6 +33,16 @@ def test_search_results(client):
assert len(get_search_results(rv.data)) <= 15 assert len(get_search_results(rv.data)) <= 15
def test_post_results(client):
rv = client.post('/search', data=dict(q='test'))
assert rv._status_code == 200
# Depending on the search, there can be more
# than 10 result divs
assert len(get_search_results(rv.data)) >= 10
assert len(get_search_results(rv.data)) <= 15
def test_recent_results(client): def test_recent_results(client):
times = { times = {
'past year': 365, 'past year': 365,
@ -39,7 +51,7 @@ def test_recent_results(client):
} }
for time, num_days in times.items(): for time, num_days in times.items():
rv = client.get('/search?q=test%20%3A' + time) rv = client.post('/search', data=dict(q='test :' + time))
result_divs = get_search_results(rv.data) result_divs = get_search_results(rv.data)
current_date = datetime.now() current_date = datetime.now()

View File

@ -28,3 +28,9 @@ def test_config(client):
config = json.loads(rv.data) config = json.loads(rv.data)
for key in demo_config.keys(): for key in demo_config.keys():
assert config[key] == demo_config[key] assert config[key] == demo_config[key]
def test_opensearch(client):
rv = client.get('/opensearch.xml')
assert rv._status_code == 200
assert 'Shoogle' in str(rv.data)