Updated tests, fixed a few bugs

Added opensearch routes test and individual tests for searching via GET
and POST separately.

Fixed incorrect assignment in gen_query.
main
Ben Busby 2020-04-28 18:59:33 -06:00
parent 0c0ebb8917
commit 1cbe394e6f
6 changed files with 50 additions and 26 deletions

View File

@ -34,11 +34,12 @@ class Filter:
def clean(self, soup):
self.remove_ads(soup)
self.sync_images(soup)
self.update_image_paths(soup)
self.update_styling(soup)
self.update_links(soup)
input_form = soup.find('form')
if input_form is not None:
input_form['method'] = 'POST'
return soup
@ -55,7 +56,7 @@ class Filter:
for div in ad_divs:
div.decompose()
def sync_images(self, soup):
def update_image_paths(self, soup):
for img in [_ for _ in soup.find_all('img') if 'src' in _]:
img_src = img['src']
if img_src.startswith('//'):

View File

@ -10,7 +10,12 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Valid query params
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
VALID_PARAMS = {
'tbs': '',
'tbm': '',
'start': '',
'near': ''
}
def gen_user_agent(normal_ua):
@ -26,33 +31,34 @@ def gen_user_agent(normal_ua):
return DESKTOP_UA.format(mozilla, linux, firefox)
def gen_query(q, args, near_city=None):
def gen_query(query, args, near_city=None):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
if ':past' in query:
time_range = str.strip(query.split(':past', 1)[-1])
VALID_PARAMS['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
query = urlparse.quote(query)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
VALID_PARAMS['tbm'] = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
VALID_PARAMS['start'] = '&start=' + args.get('start')
# Search for results near a particular city, if available
near = ''
if near_city is not None:
near = '&near=' + urlparse.quote(near_city)
VALID_PARAMS['near'] = '&near=' + urlparse.quote(near_city)
return q + tbs + tbm + start + near
for val in VALID_PARAMS.values():
if not val or val is None:
continue
query += val
return query
class Request:

View File

@ -40,7 +40,6 @@ def opensearch():
@app.route('/search', methods=['GET', 'POST'])
def search():
q = None
if request.method == 'GET':
q = request.args.get('q')
try:
@ -50,7 +49,7 @@ def search():
else:
q = request.form['q']
if q is None or len(q) <= 0:
if q is None or len(q) == 0:
return render_template('error.html')
user_agent = request.headers.get('User-Agent')

View File

@ -2,7 +2,7 @@
<head>
<link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="search" href="/static/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search">
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ query }} - Shoogle Search</title>
</head>

View File

@ -1,11 +1,13 @@
from bs4 import BeautifulSoup
from cryptography.fernet import Fernet
from app.filter import Filter
from datetime import datetime
from dateutil.parser import *
def get_search_results(data):
soup = Filter().clean(BeautifulSoup(data, 'html.parser'))
secret_key = Fernet.generate_key()
soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1
@ -21,7 +23,7 @@ def get_search_results(data):
return result_divs
def test_search_results(client):
def test_get_results(client):
rv = client.get('/search?q=test')
assert rv._status_code == 200
@ -31,15 +33,25 @@ def test_search_results(client):
assert len(get_search_results(rv.data)) <= 15
def test_post_results(client):
rv = client.post('/search', data=dict(q='test'))
assert rv._status_code == 200
# Depending on the search, there can be more
# than 10 result divs
assert len(get_search_results(rv.data)) >= 10
assert len(get_search_results(rv.data)) <= 15
def test_recent_results(client):
times = {
'pastyear': 365,
'pastmonth': 31,
'pastweek': 7
'past year': 365,
'past month': 31,
'past week': 7
}
for time, num_days in times.items():
rv = client.get('/search?q=test%20%3A' + time)
rv = client.post('/search', data=dict(q='test :' + time))
result_divs = get_search_results(rv.data)
current_date = datetime.now()

View File

@ -28,3 +28,9 @@ def test_config(client):
config = json.loads(rv.data)
for key in demo_config.keys():
assert config[key] == demo_config[key]
def test_opensearch(client):
rv = client.get('/opensearch.xml')
assert rv._status_code == 200
assert 'Shoogle' in str(rv.data)