Added testing and ci build, refactored filter class, refactored project structure

main
Ben Busby 2020-04-15 17:41:53 -06:00
parent 2600f494b7
commit b5b6e64177
15 changed files with 269 additions and 128 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ __pycache__/
*.pem *.pem
*.xml *.xml
config.json config.json
test/static

6
.travis.yml Normal file
View File

@ -0,0 +1,6 @@
language: python
python: 3.6
install:
- pip install -r config/requirements.txt
script:
- ./run test

View File

@ -3,6 +3,6 @@ FROM python:3
WORKDIR /usr/src/app WORKDIR /usr/src/app
COPY . . COPY . .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r config/requirements.txt
CMD ["./run.sh"] CMD ["./run"]

View File

@ -37,6 +37,18 @@ Depending on your preferences, you can also deploy the app yourself on your own
- SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/)) - SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/))
- A bit more experience or willingness to work through issues - A bit more experience or willingness to work through issues
## Setup (Local Only)
If you want to test the app out on your own machine first, you can build it with the following instructions:
```bash
git clone https://github.com/benbusby/shoogle.git
cd shoogle
python3 -m venv venv
source venv/bin/activate
pip install -r config/requirements.txt
./run
```
## Usage ## Usage
Same as most search engines, with the exception of filtering by time range. Same as most search engines, with the exception of filtering by time range.
@ -44,7 +56,7 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
## Extra Steps ## Extra Steps
### Set Shoogle as your primary search engine ### Set Shoogle as your primary search engine
1. From the main shoogle folder, run `python opensearch.py "<your app url>"` 1. From the main shoogle folder, run `python config/opensearch.py "<your app url>"`
2. Rebuild and release your updated app 2. Rebuild and release your updated app
- `heroku container:push web` and then `heroku container:release web` - `heroku container:push web` and then `heroku container:release web`
3. Update browser settings 3. Update browser settings

View File

@ -3,28 +3,36 @@ import re
import urllib.parse as urlparse import urllib.parse as urlparse
from urllib.parse import parse_qs from urllib.parse import parse_qs
AD_CLASS = 'ZINbbc'
SPONS_CLASS = 'D1fz0e'
class Filter:
def __init__(self, mobile=False, config=None):
self.mobile = False
self.dark_mode = False
self.nojs = False
self.near_city = None
def reskin(page, dark_mode=False): if config is None:
config = {}
near_city = config['near'] if 'near' in config else None
dark_mode = config['dark_mode'] if 'dark_mode' in config else False
nojs = config['nojs'] if 'nojs' in config else False
mobile = mobile
def reskin(self, page):
# Aesthetic only re-skinning # Aesthetic only re-skinning
page = page.replace('>G<', '>Sh<') page = page.replace('>G<', '>Sh<')
pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE) pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE)
page = pattern.sub('685e79', page) page = pattern.sub('685e79', page)
if dark_mode: if self.dark_mode:
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea') page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
return page return page
def gen_query(self, q, args):
def gen_query(q, args, near_city=None):
# Use :past(hour/day/week/month/year) if available # Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month" # example search "new restaurants :past month"
tbs = '' tbs = ''
# if 'tbs' in request.args:
# tbs = '&tbs=' + request.args.get('tbs')
# q = q.replace(q.split(':past', 1)[-1], '').replace(':past', '')
if ':past' in q: if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1]) time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0]) tbs = '&tbs=qdr:' + str.lower(time_range[0])
@ -44,19 +52,20 @@ def gen_query(q, args, near_city=None):
# Grab city from config, if available # Grab city from config, if available
near = '' near = ''
if near_city: if self.near_city:
near = '&near=' + urlparse.quote(near_city) near = '&near=' + urlparse.quote(self.near_city)
return q + tbs + tbm + start + near return q + tbs + tbm + start + near
def clean(self, soup):
def cook(soup, user_agent, nojs=False, dark_mode=False): # Remove all ads
# Remove all ads (TODO: Ad specific div classes probably change over time, look into a more generic method)
main_divs = soup.find('div', {'id': 'main'}) main_divs = soup.find('div', {'id': 'main'})
if main_divs is not None: if main_divs is not None:
ad_divs = main_divs.findAll('div', {'class': AD_CLASS}, recursive=False) result_divs = main_divs.findAll('div', recursive=False)
sponsored_divs = main_divs.findAll('div', {'class': SPONS_CLASS}, recursive=False)
for div in ad_divs + sponsored_divs: # Only ads/sponsored content use classes in the list of result divs
ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs]
for div in ad_divs:
div.decompose() div.decompose()
# Remove unnecessary button(s) # Remove unnecessary button(s)
@ -69,7 +78,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
# Update logo # Update logo
logo = soup.find('a', {'class': 'l'}) logo = soup.find('a', {'class': 'l'})
if logo is not None and ('Android' in user_agent or 'iPhone' in user_agent): if logo is not None and self.mobile:
logo.insert(0, 'Shoogle') logo.insert(0, 'Shoogle')
logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;' logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;'
@ -86,7 +95,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
href = parse_qs(href.query)['q'][0] href = parse_qs(href.query)['q'][0]
# Add no-js option # Add no-js option
if nojs: if self.nojs:
nojs_link = soup.new_tag('a') nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + href nojs_link['href'] = '/window?location=' + href
nojs_link['style'] = 'display:block;width:100%;' nojs_link['style'] = 'display:block;width:100%;'
@ -95,7 +104,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
a.append(nojs_link) a.append(nojs_link)
# Set up dark mode if active # Set up dark mode if active
if dark_mode: if self.dark_mode:
soup.find('html')['style'] = 'scrollbar-color: #333 #111;' soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
for input_element in soup.findAll('input'): for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;' input_element['style'] = 'color:#fff;'

View File

@ -1,4 +1,5 @@
from app import app, rhyme, filter from app import app, rhyme
from app.filter import Filter
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from flask import request, redirect, render_template from flask import request, redirect, render_template
from io import BytesIO from io import BytesIO
@ -7,8 +8,8 @@ import os
import pycurl import pycurl
import urllib.parse as urlparse import urllib.parse as urlparse
APP_ROOT = os.path.dirname(os.path.abspath(__file__)) app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
STATIC_FOLDER = os.path.join(APP_ROOT, 'static') app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
# Get Mozilla Firefox rhyme (important) and form a new user agent # Get Mozilla Firefox rhyme (important) and form a new user agent
mozilla = rhyme.get_rhyme('Mo') + 'zilla' mozilla = rhyme.get_rhyme('Mo') + 'zilla'
@ -20,7 +21,7 @@ DESKTOP_UA = mozilla + '/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/2010010
# Base search url # Base search url
SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
user_config = json.load(open(STATIC_FOLDER + '/config.json')) user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json'))
def get_ua(user_agent): def get_ua(user_agent):
@ -55,23 +56,25 @@ def search():
if q is None or len(q) <= 0: if q is None or len(q) <= 0:
return render_template('error.html') return render_template('error.html')
full_query = filter.gen_query(q, request.args)
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')
dark_mode = 'dark' in user_config and user_config['dark'] mobile = 'Android' in user_agent or 'iPhone' in user_agent
nojs = 'nojs' in user_config and user_config['nojs']
get_body = filter.reskin(send_request( content_filter = Filter(mobile, user_config)
SEARCH_URL + full_query, get_ua(user_agent)), dark_mode=dark_mode) full_query = content_filter.gen_query(q, request.args)
get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent))
soup = filter.cook(BeautifulSoup(get_body, 'html.parser'), user_agent, nojs=nojs, dark_mode=dark_mode) get_body = content_filter.reskin(get_body)
soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser'))
return render_template('display.html', query=urlparse.unquote(q), response=soup) return render_template('display.html', query=urlparse.unquote(q), response=soup)
@app.route('/config', methods=['POST']) @app.route('/config', methods=['GET', 'POST'])
def config(): def config():
global user_config global user_config
with open(STATIC_FOLDER + '/config.json', 'w') as config_file: if request.method == 'GET':
return json.dumps(user_config)
else:
with open(app.config['STATIC_FOLDER'] + '/config.json', 'w') as config_file:
config_file.write(json.dumps(json.loads(request.data), indent=4)) config_file.write(json.dumps(json.loads(request.data), indent=4))
config_file.close() config_file.close()

View File

@ -1,7 +1,9 @@
import os
import sys import sys
template_path = './app/static/opensearch.template' script_path = os.path.dirname(os.path.realpath(__file__))
opensearch_path = './app/static/opensearch.xml' template_path = script_path + '/../app/static/opensearch.template'
opensearch_path = script_path + '/../app/static/opensearch.xml'
replace_tag = 'SHOOGLE_URL' replace_tag = 'SHOOGLE_URL'
if len(sys.argv) != 2: if len(sys.argv) != 2:

View File

@ -11,6 +11,7 @@ Phyme==0.0.9
pycparser==2.19 pycparser==2.19
pycurl==7.43.0.4 pycurl==7.43.0.4
pyOpenSSL==19.1.0 pyOpenSSL==19.1.0
pytest==5.4.1
six==1.14.0 six==1.14.0
soupsieve==1.9.5 soupsieve==1.9.5
Werkzeug==0.16.0 Werkzeug==0.16.0

33
run Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
# Usage:
# ./run # Runs the full web app
# ./run test # Runs the testing suite
SCRIPT=`realpath $0`
SCRIPT_DIR=`dirname $SCRIPT`
# Set default port if unavailable
if [[ -z "${PORT}" ]]; then
PORT=5000
fi
# Set directory to serve static content from
[[ ! -z $1 ]] && SUBDIR="$1" || SUBDIR="app"
export APP_ROOT=$SCRIPT_DIR/$SUBDIR
export STATIC_FOLDER=$APP_ROOT/static
mkdir -p $STATIC_FOLDER
# Create default config json if it doesn't exist
if [[ ! -f $STATIC_FOLDER/config.json ]]; then
echo "{}" > $STATIC_FOLDER/config.json
fi
pkill flask
# Check for regular vs test run
if [[ $SUBDIR == "test" ]]; then
pytest -sv
else
flask run --host="0.0.0.0" --port=$PORT
fi

17
run.sh
View File

@ -1,17 +0,0 @@
#!/bin/bash
SCRIPT=`realpath $0`
SCRIPT_DIR=`dirname $SCRIPT`
if [[ -z "${PORT}" ]]; then
PORT=5000
fi
# Create config json if it doesn't exist
if [[ ! -f $SCRIPT_DIR/app/static/config.json ]]; then
echo "{}" > $SCRIPT_DIR/app/static/config.json
fi
pkill flask
flask run --host="0.0.0.0" --port=$PORT

View File

@ -1 +0,0 @@
from app import app

0
test/__init__.py Normal file
View File

8
test/conftest.py Normal file
View File

@ -0,0 +1,8 @@
from app import app
import pytest
@pytest.fixture
def client():
client = app.test_client()
yield client

54
test/test_results.py Normal file
View File

@ -0,0 +1,54 @@
from bs4 import BeautifulSoup
from app.filter import Filter
import json
from datetime import datetime
from dateutil.parser import *
from test.conftest import client
def get_search_results(data):
soup = Filter().clean(BeautifulSoup(rv.data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1
result_divs = []
for div in main_divs:
# Result divs should only have 1 inner div
if len(list(div.children)) != 1 or not div.findChild() or 'div' not in div.findChild().name:
continue
result_divs.append(div)
return result_divs
def test_search_results(client):
rv = client.get('/search?q=test')
assert rv._status_code == 200
assert len(get_search_results(rv.data)) == 10
def test_recent_results(client):
times = {
'pastyear': 365,
'pastmonth': 31,
'pastweek': 7
}
for time, num_days in times.items():
rv = client.get('/search?q=test%20%3A' + time)
result_divs = get_search_results(rv.data)
current_date = datetime.now()
for div in result_divs:
date_span = div.find('span').decode_contents()
if not date_span or len(date_span) > 15:
continue
try:
date = parse(date_span)
assert (current_date - date).days < num_days
except ParserError:
assert ' ago' in date_span

30
test/test_routes.py Normal file
View File

@ -0,0 +1,30 @@
import json
from test.conftest import client
demo_config = {
'near': 'Seattle',
'dark_mode': 0,
'nojs': 0
}
def test_main(client):
rv = client.get('/')
assert rv._status_code == 200
def test_search(client):
rv = client.get('/search?q=test')
assert rv._status_code == 200
def test_config(client):
rv = client.post('/config', data=json.dumps(demo_config))
assert rv._status_code == 200
rv = client.get('/config')
assert rv._status_code == 200
config = json.loads(rv.data)
for key in demo_config.keys():
assert config[key] == demo_config[key]