Updated tests, fixed a few bugs
Added opensearch routes test and individual tests for searching via GET and POST separately. Fixed incorrect assignment in gen_query.main
parent
0c0ebb8917
commit
1cbe394e6f
|
@ -34,11 +34,12 @@ class Filter:
|
||||||
|
|
||||||
def clean(self, soup):
|
def clean(self, soup):
|
||||||
self.remove_ads(soup)
|
self.remove_ads(soup)
|
||||||
self.sync_images(soup)
|
self.update_image_paths(soup)
|
||||||
self.update_styling(soup)
|
self.update_styling(soup)
|
||||||
self.update_links(soup)
|
self.update_links(soup)
|
||||||
|
|
||||||
input_form = soup.find('form')
|
input_form = soup.find('form')
|
||||||
|
if input_form is not None:
|
||||||
input_form['method'] = 'POST'
|
input_form['method'] = 'POST'
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
@ -55,7 +56,7 @@ class Filter:
|
||||||
for div in ad_divs:
|
for div in ad_divs:
|
||||||
div.decompose()
|
div.decompose()
|
||||||
|
|
||||||
def sync_images(self, soup):
|
def update_image_paths(self, soup):
|
||||||
for img in [_ for _ in soup.find_all('img') if 'src' in _]:
|
for img in [_ for _ in soup.find_all('img') if 'src' in _]:
|
||||||
img_src = img['src']
|
img_src = img['src']
|
||||||
if img_src.startswith('//'):
|
if img_src.startswith('//'):
|
||||||
|
|
|
@ -10,7 +10,12 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
||||||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||||
|
|
||||||
# Valid query params
|
# Valid query params
|
||||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
VALID_PARAMS = {
|
||||||
|
'tbs': '',
|
||||||
|
'tbm': '',
|
||||||
|
'start': '',
|
||||||
|
'near': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def gen_user_agent(normal_ua):
|
def gen_user_agent(normal_ua):
|
||||||
|
@ -26,33 +31,34 @@ def gen_user_agent(normal_ua):
|
||||||
return DESKTOP_UA.format(mozilla, linux, firefox)
|
return DESKTOP_UA.format(mozilla, linux, firefox)
|
||||||
|
|
||||||
|
|
||||||
def gen_query(q, args, near_city=None):
|
def gen_query(query, args, near_city=None):
|
||||||
# Use :past(hour/day/week/month/year) if available
|
# Use :past(hour/day/week/month/year) if available
|
||||||
# example search "new restaurants :past month"
|
# example search "new restaurants :past month"
|
||||||
tbs = ''
|
if ':past' in query:
|
||||||
if ':past' in q:
|
time_range = str.strip(query.split(':past', 1)[-1])
|
||||||
time_range = str.strip(q.split(':past', 1)[-1])
|
VALID_PARAMS['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
|
||||||
tbs = '&tbs=qdr:' + str.lower(time_range[0])
|
|
||||||
|
|
||||||
# Ensure search query is parsable
|
# Ensure search query is parsable
|
||||||
q = urlparse.quote(q)
|
query = urlparse.quote(query)
|
||||||
|
|
||||||
# Pass along type of results (news, images, books, etc)
|
# Pass along type of results (news, images, books, etc)
|
||||||
tbm = ''
|
|
||||||
if 'tbm' in args:
|
if 'tbm' in args:
|
||||||
tbm = '&tbm=' + args.get('tbm')
|
VALID_PARAMS['tbm'] = '&tbm=' + args.get('tbm')
|
||||||
|
|
||||||
# Get results page start value (10 per page, ie page 2 start val = 20)
|
# Get results page start value (10 per page, ie page 2 start val = 20)
|
||||||
start = ''
|
|
||||||
if 'start' in args:
|
if 'start' in args:
|
||||||
start = '&start=' + args.get('start')
|
VALID_PARAMS['start'] = '&start=' + args.get('start')
|
||||||
|
|
||||||
# Search for results near a particular city, if available
|
# Search for results near a particular city, if available
|
||||||
near = ''
|
|
||||||
if near_city is not None:
|
if near_city is not None:
|
||||||
near = '&near=' + urlparse.quote(near_city)
|
VALID_PARAMS['near'] = '&near=' + urlparse.quote(near_city)
|
||||||
|
|
||||||
return q + tbs + tbm + start + near
|
for val in VALID_PARAMS.values():
|
||||||
|
if not val or val is None:
|
||||||
|
continue
|
||||||
|
query += val
|
||||||
|
|
||||||
|
return query
|
||||||
|
|
||||||
|
|
||||||
class Request:
|
class Request:
|
||||||
|
|
|
@ -40,7 +40,6 @@ def opensearch():
|
||||||
|
|
||||||
@app.route('/search', methods=['GET', 'POST'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
def search():
|
def search():
|
||||||
q = None
|
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
q = request.args.get('q')
|
q = request.args.get('q')
|
||||||
try:
|
try:
|
||||||
|
@ -50,7 +49,7 @@ def search():
|
||||||
else:
|
else:
|
||||||
q = request.form['q']
|
q = request.form['q']
|
||||||
|
|
||||||
if q is None or len(q) <= 0:
|
if q is None or len(q) == 0:
|
||||||
return render_template('error.html')
|
return render_template('error.html')
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent')
|
user_agent = request.headers.get('User-Agent')
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
<head>
|
<head>
|
||||||
<link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon">
|
<link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon">
|
||||||
<link rel="icon" href="/static/img/favicon.ico" type="image/x-icon">
|
<link rel="icon" href="/static/img/favicon.ico" type="image/x-icon">
|
||||||
<link rel="search" href="/static/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search">
|
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Shoogle Search">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>{{ query }} - Shoogle Search</title>
|
<title>{{ query }} - Shoogle Search</title>
|
||||||
</head>
|
</head>
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
from app.filter import Filter
|
from app.filter import Filter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil.parser import *
|
from dateutil.parser import *
|
||||||
|
|
||||||
|
|
||||||
def get_search_results(data):
|
def get_search_results(data):
|
||||||
soup = Filter().clean(BeautifulSoup(data, 'html.parser'))
|
secret_key = Fernet.generate_key()
|
||||||
|
soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
||||||
|
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
assert len(main_divs) > 1
|
assert len(main_divs) > 1
|
||||||
|
@ -21,7 +23,7 @@ def get_search_results(data):
|
||||||
return result_divs
|
return result_divs
|
||||||
|
|
||||||
|
|
||||||
def test_search_results(client):
|
def test_get_results(client):
|
||||||
rv = client.get('/search?q=test')
|
rv = client.get('/search?q=test')
|
||||||
assert rv._status_code == 200
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
@ -31,6 +33,16 @@ def test_search_results(client):
|
||||||
assert len(get_search_results(rv.data)) <= 15
|
assert len(get_search_results(rv.data)) <= 15
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_results(client):
|
||||||
|
rv = client.post('/search', data=dict(q='test'))
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
# Depending on the search, there can be more
|
||||||
|
# than 10 result divs
|
||||||
|
assert len(get_search_results(rv.data)) >= 10
|
||||||
|
assert len(get_search_results(rv.data)) <= 15
|
||||||
|
|
||||||
|
|
||||||
def test_recent_results(client):
|
def test_recent_results(client):
|
||||||
times = {
|
times = {
|
||||||
'past year': 365,
|
'past year': 365,
|
||||||
|
@ -39,7 +51,7 @@ def test_recent_results(client):
|
||||||
}
|
}
|
||||||
|
|
||||||
for time, num_days in times.items():
|
for time, num_days in times.items():
|
||||||
rv = client.get('/search?q=test%20%3A' + time)
|
rv = client.post('/search', data=dict(q='test :' + time))
|
||||||
result_divs = get_search_results(rv.data)
|
result_divs = get_search_results(rv.data)
|
||||||
|
|
||||||
current_date = datetime.now()
|
current_date = datetime.now()
|
||||||
|
|
|
@ -28,3 +28,9 @@ def test_config(client):
|
||||||
config = json.loads(rv.data)
|
config = json.loads(rv.data)
|
||||||
for key in demo_config.keys():
|
for key in demo_config.keys():
|
||||||
assert config[key] == demo_config[key]
|
assert config[key] == demo_config[key]
|
||||||
|
|
||||||
|
|
||||||
|
def test_opensearch(client):
|
||||||
|
rv = client.get('/opensearch.xml')
|
||||||
|
assert rv._status_code == 200
|
||||||
|
assert 'Shoogle' in str(rv.data)
|
||||||
|
|
Loading…
Reference in New Issue