Refactored routes, added filter class for returned results, added dockerignore

main
Ben Busby 2020-04-10 14:52:27 -06:00
parent d88b3904ff
commit 850a46aea1
4 changed files with 121 additions and 102 deletions

1
.dockerignore Normal file
View File

@ -0,0 +1 @@
.git/

111
app/filter.py Normal file
View File

@ -0,0 +1,111 @@
from bs4 import BeautifulSoup
import re
import urllib.parse as urlparse
from urllib.parse import parse_qs
AD_CLASS = 'ZINbbc'
SPONS_CLASS = 'D1fz0e'
def reskin(page, dark_mode=False):
# Aesthetic only re-skinning
page = page.replace('>G<', '>Sh<')
pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE)
page = pattern.sub('685e79', page)
if dark_mode:
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
return page
def gen_query(q, args, near_city=None):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
# if 'tbs' in request.args:
# tbs = '&tbs=' + request.args.get('tbs')
# q = q.replace(q.split(':past', 1)[-1], '').replace(':past', '')
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
# Grab city from config, if available
near = ''
if near_city:
near = '&near=' + urlparse.quote(near_city)
return q + tbs + tbm + start + near
def cook(soup, user_agent, nojs=False, dark_mode=False):
# Remove all ads (TODO: Ad specific div classes probably change over time, look into a more generic method)
main_divs = soup.find('div', {'id': 'main'})
if main_divs is not None:
ad_divs = main_divs.findAll('div', {'class': AD_CLASS}, recursive=False)
sponsored_divs = main_divs.findAll('div', {'class': SPONS_CLASS}, recursive=False)
for div in ad_divs + sponsored_divs:
div.decompose()
# Remove unnecessary button(s)
for button in soup.find_all('button'):
button.decompose()
# Remove svg logos
for svg in soup.find_all('svg'):
svg.decompose()
# Update logo
logo = soup.find('a', {'class': 'l'})
if logo is not None and ('Android' in user_agent or 'iPhone' in user_agent):
logo.insert(0, 'Shoogle')
logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;'
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href']
if '/advanced_search' in href:
a.decompose()
continue
if 'url?q=' in href:
# Strip unneeded arguments
href = urlparse.urlparse(href)
href = parse_qs(href.query)['q'][0]
# Add no-js option
if nojs:
nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + href
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
a.append(nojs_link)
# Set up dark mode if active
if dark_mode:
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;'
# Ensure no extra scripts passed through
try:
for script in soup('script'):
script.decompose()
soup.find('div', id='sfooter').decompose()
except Exception:
pass
return soup

View File

@ -8,6 +8,7 @@ random.seed(time.time())
ph = Phyme() ph = Phyme()
def get_rhyme(word): def get_rhyme(word):
# Get all rhymes and merge to one list (normally separated by syllable count) # Get all rhymes and merge to one list (normally separated by syllable count)
rhymes = ph.get_perfect_rhymes(word) rhymes = ph.get_perfect_rhymes(word)

View File

@ -1,14 +1,11 @@
from app import app from app import app, rhyme, filter
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from flask import request, redirect, Response, render_template from flask import request, redirect, render_template
from io import BytesIO
import json import json
import os import os
import pycurl import pycurl
import rhyme
import re
import urllib.parse as urlparse import urllib.parse as urlparse
from urllib.parse import parse_qs
from io import BytesIO
APP_ROOT = os.path.dirname(os.path.abspath(__file__)) APP_ROOT = os.path.dirname(os.path.abspath(__file__))
STATIC_FOLDER = os.path.join(APP_ROOT, 'static') STATIC_FOLDER = os.path.join(APP_ROOT, 'static')
@ -31,7 +28,6 @@ def get_ua(user_agent):
def send_request(curl_url, ua): def send_request(curl_url, ua):
request_header = []
response_header = [] response_header = []
b_obj = BytesIO() b_obj = BytesIO()
@ -59,105 +55,15 @@ def search():
if q is None or len(q) <= 0: if q is None or len(q) <= 0:
return render_template('error.html') return render_template('error.html')
# Use :past(hour/day/week/month/year) if available full_query = filter.gen_query(q, request.args)
# example search "new restaurants :past month"
tbs = ''
# if 'tbs' in request.args:
# tbs = '&tbs=' + request.args.get('tbs')
# q = q.replace(q.split(':past', 1)[-1], '').replace(':past', '')
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in request.args:
tbm = '&tbm=' + request.args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in request.args:
start = '&start=' + request.args.get('start')
# Grab city from config, if available
near = ''
if 'near' in user_config:
near = '&near=' + urlparse.quote(user_config['near'])
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')
full_query = q + tbs + tbm + start + near
get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent))
# Aesthetic only re-skinning
dark_mode = 'dark' in user_config and user_config['dark'] dark_mode = 'dark' in user_config and user_config['dark']
get_body = get_body.replace('>G<', '>Sh<') nojs = 'nojs' in user_config and user_config['nojs']
pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE)
get_body = pattern.sub('685e79', get_body)
if dark_mode:
get_body = get_body.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
soup = BeautifulSoup(get_body, 'html.parser') get_body = filter.reskin(send_request(
SEARCH_URL + full_query, get_ua(user_agent)), dark_mode=dark_mode)
# Remove all ads (TODO: Ad specific div classes probably change over time, look into a more generic method) soup = filter.cook(BeautifulSoup(get_body, 'html.parser'), user_agent, nojs=nojs, dark_mode=dark_mode)
main_divs = soup.find('div', {'id': 'main'})
if main_divs is not None:
ad_divs = main_divs.findAll('div', {'class': 'ZINbbc'}, recursive=False)
sponsored_divs = main_divs.findAll('div', {'class': 'D1fz0e'}, recursive=False)
for div in ad_divs + sponsored_divs:
div.decompose()
# Remove unnecessary button(s)
for button in soup.find_all('button'):
button.decompose()
# Remove svg logos
for svg in soup.find_all('svg'):
svg.decompose()
# Update logo
logo = soup.find('a', {'class': 'l'})
if logo is not None and ('Android' in user_agent or 'iPhone' in user_agent):
logo.insert(0, 'Shoogle')
logo['style'] = 'display: flex;justify-content: center;align-items: center;color: #685e79;font-size: 18px;'
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href']
if '/advanced_search' in href:
a.decompose()
continue
if 'url?q=' in href:
# Strip unneeded arguments
href = urlparse.urlparse(href)
href = parse_qs(href.query)['q'][0]
# Add no-js option
if 'nojs' in user_config and user_config['nojs']:
nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + href
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
a.append(nojs_link)
# Set up dark mode if active
if dark_mode:
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;'
# Ensure no extra scripts passed through
try:
for script in soup('script'):
script.decompose()
soup.find('div', id='sfooter').decompose()
except Exception:
pass
return render_template('display.html', query=urlparse.unquote(q), response=soup) return render_template('display.html', query=urlparse.unquote(q), response=soup)