Added testing and ci build, refactored filter class, refactored project structure
parent
2600f494b7
commit
b5b6e64177
|
@ -5,3 +5,4 @@ __pycache__/
|
||||||
*.pem
|
*.pem
|
||||||
*.xml
|
*.xml
|
||||||
config.json
|
config.json
|
||||||
|
test/static
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
language: python
|
||||||
|
python: 3.6
|
||||||
|
install:
|
||||||
|
- pip install -r config/requirements.txt
|
||||||
|
script:
|
||||||
|
- ./run test
|
|
@ -3,6 +3,6 @@ FROM python:3
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r config/requirements.txt
|
||||||
|
|
||||||
CMD ["./run.sh"]
|
CMD ["./run"]
|
||||||
|
|
14
README.md
14
README.md
|
@ -37,6 +37,18 @@ Depending on your preferences, you can also deploy the app yourself on your own
|
||||||
- SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/))
|
- SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/))
|
||||||
- A bit more experience or willingness to work through issues
|
- A bit more experience or willingness to work through issues
|
||||||
|
|
||||||
|
## Setup (Local Only)
|
||||||
|
If you want to test the app out on your own machine first, you can build it with the following instructions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/benbusby/shoogle.git
|
||||||
|
cd shoogle
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install -r config/requirements.txt
|
||||||
|
./run
|
||||||
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Same as most search engines, with the exception of filtering by time range.
|
Same as most search engines, with the exception of filtering by time range.
|
||||||
|
|
||||||
|
@ -44,7 +56,7 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
|
||||||
|
|
||||||
## Extra Steps
|
## Extra Steps
|
||||||
### Set Shoogle as your primary search engine
|
### Set Shoogle as your primary search engine
|
||||||
1. From the main shoogle folder, run `python opensearch.py "<your app url>"`
|
1. From the main shoogle folder, run `python config/opensearch.py "<your app url>"`
|
||||||
2. Rebuild and release your updated app
|
2. Rebuild and release your updated app
|
||||||
- `heroku container:push web` and then `heroku container:release web`
|
- `heroku container:push web` and then `heroku container:release web`
|
||||||
3. Update browser settings
|
3. Update browser settings
|
||||||
|
|
|
@ -3,28 +3,36 @@ import re
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
AD_CLASS = 'ZINbbc'
|
|
||||||
SPONS_CLASS = 'D1fz0e'
|
|
||||||
|
|
||||||
|
class Filter:
|
||||||
|
def __init__(self, mobile=False, config=None):
|
||||||
|
self.mobile = False
|
||||||
|
self.dark_mode = False
|
||||||
|
self.nojs = False
|
||||||
|
self.near_city = None
|
||||||
|
|
||||||
def reskin(page, dark_mode=False):
|
if config is None:
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
near_city = config['near'] if 'near' in config else None
|
||||||
|
dark_mode = config['dark_mode'] if 'dark_mode' in config else False
|
||||||
|
nojs = config['nojs'] if 'nojs' in config else False
|
||||||
|
mobile = mobile
|
||||||
|
|
||||||
|
def reskin(self, page):
|
||||||
# Aesthetic only re-skinning
|
# Aesthetic only re-skinning
|
||||||
page = page.replace('>G<', '>Sh<')
|
page = page.replace('>G<', '>Sh<')
|
||||||
pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE)
|
pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE)
|
||||||
page = pattern.sub('685e79', page)
|
page = pattern.sub('685e79', page)
|
||||||
if dark_mode:
|
if self.dark_mode:
|
||||||
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
|
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
|
||||||
|
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
def gen_query(self, q, args):
|
||||||
def gen_query(q, args, near_city=None):
|
|
||||||
# Use :past(hour/day/week/month/year) if available
|
# Use :past(hour/day/week/month/year) if available
|
||||||
# example search "new restaurants :past month"
|
# example search "new restaurants :past month"
|
||||||
tbs = ''
|
tbs = ''
|
||||||
# if 'tbs' in request.args:
|
|
||||||
# tbs = '&tbs=' + request.args.get('tbs')
|
|
||||||
# q = q.replace(q.split(':past', 1)[-1], '').replace(':past', '')
|
|
||||||
if ':past' in q:
|
if ':past' in q:
|
||||||
time_range = str.strip(q.split(':past', 1)[-1])
|
time_range = str.strip(q.split(':past', 1)[-1])
|
||||||
tbs = '&tbs=qdr:' + str.lower(time_range[0])
|
tbs = '&tbs=qdr:' + str.lower(time_range[0])
|
||||||
|
@ -44,19 +52,20 @@ def gen_query(q, args, near_city=None):
|
||||||
|
|
||||||
# Grab city from config, if available
|
# Grab city from config, if available
|
||||||
near = ''
|
near = ''
|
||||||
if near_city:
|
if self.near_city:
|
||||||
near = '&near=' + urlparse.quote(near_city)
|
near = '&near=' + urlparse.quote(self.near_city)
|
||||||
|
|
||||||
return q + tbs + tbm + start + near
|
return q + tbs + tbm + start + near
|
||||||
|
|
||||||
|
def clean(self, soup):
|
||||||
def cook(soup, user_agent, nojs=False, dark_mode=False):
|
# Remove all ads
|
||||||
# Remove all ads (TODO: Ad specific div classes probably change over time, look into a more generic method)
|
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
if main_divs is not None:
|
if main_divs is not None:
|
||||||
ad_divs = main_divs.findAll('div', {'class': AD_CLASS}, recursive=False)
|
result_divs = main_divs.findAll('div', recursive=False)
|
||||||
sponsored_divs = main_divs.findAll('div', {'class': SPONS_CLASS}, recursive=False)
|
|
||||||
for div in ad_divs + sponsored_divs:
|
# Only ads/sponsored content use classes in the list of result divs
|
||||||
|
ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs]
|
||||||
|
for div in ad_divs:
|
||||||
div.decompose()
|
div.decompose()
|
||||||
|
|
||||||
# Remove unnecessary button(s)
|
# Remove unnecessary button(s)
|
||||||
|
@ -69,7 +78,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
|
||||||
|
|
||||||
# Update logo
|
# Update logo
|
||||||
logo = soup.find('a', {'class': 'l'})
|
logo = soup.find('a', {'class': 'l'})
|
||||||
if logo is not None and ('Android' in user_agent or 'iPhone' in user_agent):
|
if logo is not None and self.mobile:
|
||||||
logo.insert(0, 'Shoogle')
|
logo.insert(0, 'Shoogle')
|
||||||
logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;'
|
logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;'
|
||||||
|
|
||||||
|
@ -86,7 +95,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
|
||||||
href = parse_qs(href.query)['q'][0]
|
href = parse_qs(href.query)['q'][0]
|
||||||
|
|
||||||
# Add no-js option
|
# Add no-js option
|
||||||
if nojs:
|
if self.nojs:
|
||||||
nojs_link = soup.new_tag('a')
|
nojs_link = soup.new_tag('a')
|
||||||
nojs_link['href'] = '/window?location=' + href
|
nojs_link['href'] = '/window?location=' + href
|
||||||
nojs_link['style'] = 'display:block;width:100%;'
|
nojs_link['style'] = 'display:block;width:100%;'
|
||||||
|
@ -95,7 +104,7 @@ def cook(soup, user_agent, nojs=False, dark_mode=False):
|
||||||
a.append(nojs_link)
|
a.append(nojs_link)
|
||||||
|
|
||||||
# Set up dark mode if active
|
# Set up dark mode if active
|
||||||
if dark_mode:
|
if self.dark_mode:
|
||||||
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
||||||
for input_element in soup.findAll('input'):
|
for input_element in soup.findAll('input'):
|
||||||
input_element['style'] = 'color:#fff;'
|
input_element['style'] = 'color:#fff;'
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from app import app, rhyme, filter
|
from app import app, rhyme
|
||||||
|
from app.filter import Filter
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from flask import request, redirect, render_template
|
from flask import request, redirect, render_template
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
@ -7,8 +8,8 @@ import os
|
||||||
import pycurl
|
import pycurl
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
APP_ROOT = os.path.dirname(os.path.abspath(__file__))
|
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
||||||
STATIC_FOLDER = os.path.join(APP_ROOT, 'static')
|
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
||||||
|
|
||||||
# Get Mozilla Firefox rhyme (important) and form a new user agent
|
# Get Mozilla Firefox rhyme (important) and form a new user agent
|
||||||
mozilla = rhyme.get_rhyme('Mo') + 'zilla'
|
mozilla = rhyme.get_rhyme('Mo') + 'zilla'
|
||||||
|
@ -20,7 +21,7 @@ DESKTOP_UA = mozilla + '/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/2010010
|
||||||
# Base search url
|
# Base search url
|
||||||
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
||||||
|
|
||||||
user_config = json.load(open(STATIC_FOLDER + '/config.json'))
|
user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json'))
|
||||||
|
|
||||||
|
|
||||||
def get_ua(user_agent):
|
def get_ua(user_agent):
|
||||||
|
@ -55,23 +56,25 @@ def search():
|
||||||
if q is None or len(q) <= 0:
|
if q is None or len(q) <= 0:
|
||||||
return render_template('error.html')
|
return render_template('error.html')
|
||||||
|
|
||||||
full_query = filter.gen_query(q, request.args)
|
|
||||||
user_agent = request.headers.get('User-Agent')
|
user_agent = request.headers.get('User-Agent')
|
||||||
dark_mode = 'dark' in user_config and user_config['dark']
|
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
||||||
nojs = 'nojs' in user_config and user_config['nojs']
|
|
||||||
|
|
||||||
get_body = filter.reskin(send_request(
|
content_filter = Filter(mobile, user_config)
|
||||||
SEARCH_URL + full_query, get_ua(user_agent)), dark_mode=dark_mode)
|
full_query = content_filter.gen_query(q, request.args)
|
||||||
|
get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent))
|
||||||
soup = filter.cook(BeautifulSoup(get_body, 'html.parser'), user_agent, nojs=nojs, dark_mode=dark_mode)
|
get_body = content_filter.reskin(get_body)
|
||||||
|
soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser'))
|
||||||
|
|
||||||
return render_template('display.html', query=urlparse.unquote(q), response=soup)
|
return render_template('display.html', query=urlparse.unquote(q), response=soup)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/config', methods=['POST'])
|
@app.route('/config', methods=['GET', 'POST'])
|
||||||
def config():
|
def config():
|
||||||
global user_config
|
global user_config
|
||||||
with open(STATIC_FOLDER + '/config.json', 'w') as config_file:
|
if request.method == 'GET':
|
||||||
|
return json.dumps(user_config)
|
||||||
|
else:
|
||||||
|
with open(app.config['STATIC_FOLDER'] + '/config.json', 'w') as config_file:
|
||||||
config_file.write(json.dumps(json.loads(request.data), indent=4))
|
config_file.write(json.dumps(json.loads(request.data), indent=4))
|
||||||
config_file.close()
|
config_file.close()
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
template_path = './app/static/opensearch.template'
|
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
opensearch_path = './app/static/opensearch.xml'
|
template_path = script_path + '/../app/static/opensearch.template'
|
||||||
|
opensearch_path = script_path + '/../app/static/opensearch.xml'
|
||||||
replace_tag = 'SHOOGLE_URL'
|
replace_tag = 'SHOOGLE_URL'
|
||||||
|
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
|
@ -11,6 +11,7 @@ Phyme==0.0.9
|
||||||
pycparser==2.19
|
pycparser==2.19
|
||||||
pycurl==7.43.0.4
|
pycurl==7.43.0.4
|
||||||
pyOpenSSL==19.1.0
|
pyOpenSSL==19.1.0
|
||||||
|
pytest==5.4.1
|
||||||
six==1.14.0
|
six==1.14.0
|
||||||
soupsieve==1.9.5
|
soupsieve==1.9.5
|
||||||
Werkzeug==0.16.0
|
Werkzeug==0.16.0
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Usage:
|
||||||
|
# ./run # Runs the full web app
|
||||||
|
# ./run test # Runs the testing suite
|
||||||
|
|
||||||
|
SCRIPT=`realpath $0`
|
||||||
|
SCRIPT_DIR=`dirname $SCRIPT`
|
||||||
|
|
||||||
|
# Set default port if unavailable
|
||||||
|
if [[ -z "${PORT}" ]]; then
|
||||||
|
PORT=5000
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set directory to serve static content from
|
||||||
|
[[ ! -z $1 ]] && SUBDIR="$1" || SUBDIR="app"
|
||||||
|
export APP_ROOT=$SCRIPT_DIR/$SUBDIR
|
||||||
|
export STATIC_FOLDER=$APP_ROOT/static
|
||||||
|
|
||||||
|
mkdir -p $STATIC_FOLDER
|
||||||
|
|
||||||
|
# Create default config json if it doesn't exist
|
||||||
|
if [[ ! -f $STATIC_FOLDER/config.json ]]; then
|
||||||
|
echo "{}" > $STATIC_FOLDER/config.json
|
||||||
|
fi
|
||||||
|
|
||||||
|
pkill flask
|
||||||
|
|
||||||
|
# Check for regular vs test run
|
||||||
|
if [[ $SUBDIR == "test" ]]; then
|
||||||
|
pytest -sv
|
||||||
|
else
|
||||||
|
flask run --host="0.0.0.0" --port=$PORT
|
||||||
|
fi
|
17
run.sh
17
run.sh
|
@ -1,17 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
SCRIPT=`realpath $0`
|
|
||||||
SCRIPT_DIR=`dirname $SCRIPT`
|
|
||||||
|
|
||||||
if [[ -z "${PORT}" ]]; then
|
|
||||||
PORT=5000
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Create config json if it doesn't exist
|
|
||||||
if [[ ! -f $SCRIPT_DIR/app/static/config.json ]]; then
|
|
||||||
echo "{}" > $SCRIPT_DIR/app/static/config.json
|
|
||||||
fi
|
|
||||||
|
|
||||||
pkill flask
|
|
||||||
|
|
||||||
flask run --host="0.0.0.0" --port=$PORT
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
from app import app
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
client = app.test_client()
|
||||||
|
yield client
|
|
@ -0,0 +1,54 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from app.filter import Filter
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from dateutil.parser import *
|
||||||
|
from test.conftest import client
|
||||||
|
|
||||||
|
|
||||||
|
def get_search_results(data):
|
||||||
|
soup = Filter().clean(BeautifulSoup(rv.data, 'html.parser'))
|
||||||
|
|
||||||
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
|
assert len(main_divs) > 1
|
||||||
|
|
||||||
|
result_divs = []
|
||||||
|
for div in main_divs:
|
||||||
|
# Result divs should only have 1 inner div
|
||||||
|
if len(list(div.children)) != 1 or not div.findChild() or 'div' not in div.findChild().name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
result_divs.append(div)
|
||||||
|
|
||||||
|
return result_divs
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_results(client):
|
||||||
|
rv = client.get('/search?q=test')
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
assert len(get_search_results(rv.data)) == 10
|
||||||
|
|
||||||
|
|
||||||
|
def test_recent_results(client):
|
||||||
|
times = {
|
||||||
|
'pastyear': 365,
|
||||||
|
'pastmonth': 31,
|
||||||
|
'pastweek': 7
|
||||||
|
}
|
||||||
|
|
||||||
|
for time, num_days in times.items():
|
||||||
|
rv = client.get('/search?q=test%20%3A' + time)
|
||||||
|
result_divs = get_search_results(rv.data)
|
||||||
|
|
||||||
|
current_date = datetime.now()
|
||||||
|
for div in result_divs:
|
||||||
|
date_span = div.find('span').decode_contents()
|
||||||
|
if not date_span or len(date_span) > 15:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = parse(date_span)
|
||||||
|
assert (current_date - date).days < num_days
|
||||||
|
except ParserError:
|
||||||
|
assert ' ago' in date_span
|
|
@ -0,0 +1,30 @@
|
||||||
|
import json
|
||||||
|
from test.conftest import client
|
||||||
|
|
||||||
|
demo_config = {
|
||||||
|
'near': 'Seattle',
|
||||||
|
'dark_mode': 0,
|
||||||
|
'nojs': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_main(client):
|
||||||
|
rv = client.get('/')
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def test_search(client):
|
||||||
|
rv = client.get('/search?q=test')
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def test_config(client):
|
||||||
|
rv = client.post('/config', data=json.dumps(demo_config))
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
rv = client.get('/config')
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
config = json.loads(rv.data)
|
||||||
|
for key in demo_config.keys():
|
||||||
|
assert config[key] == demo_config[key]
|
Loading…
Reference in New Issue