Refactoring of user requests and routing

Curl requests and user agent related functionality was moved to its own
request class.

Routes was refactored to only include strictly routing related
functionality.

Filter class was cleaned up (had routing/request related logic in here,
which didn't make sense)
main
Ben Busby 2020-04-23 20:59:43 -06:00
parent 6f80c35fd2
commit a7005c012e
3 changed files with 100 additions and 75 deletions

View File

@ -14,6 +14,9 @@ class Filter:
self.nojs = config['nojs'] if 'nojs' in config else False self.nojs = config['nojs'] if 'nojs' in config else False
self.mobile = mobile self.mobile = mobile
def __getitem__(self, name):
return getattr(self, name)
def reskin(self, page): def reskin(self, page):
# Aesthetic only re-skinning # Aesthetic only re-skinning
page = page.replace('>G<', '>Sh<') page = page.replace('>G<', '>Sh<')
@ -24,34 +27,6 @@ class Filter:
return page return page
def gen_query(self, q, args):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
# Grab city from config, if available
near = ''
if self.near:
near = '&near=' + urlparse.quote(self.near)
return q + tbs + tbm + start + near
def clean(self, soup): def clean(self, soup):
# Remove all ads # Remove all ads
main_divs = soup.find('div', {'id': 'main'}) main_divs = soup.find('div', {'id': 'main'})
@ -76,6 +51,10 @@ class Filter:
if logo and self.mobile: if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;' logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;'
# Fix search bar length on mobile
search_bar = soup.find('header').find('form').find('div')
search_bar['style'] = 'width: 100%;'
# Replace hrefs with only the intended destination (no "utm" type tags) # Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True): for a in soup.find_all('a', href=True):
href = a['href'] href = a['href']

76
app/request.py Normal file
View File

@ -0,0 +1,76 @@
from app import rhyme
from app.filter import Filter
from io import BytesIO
import pycurl
import urllib.parse as urlparse
# Base search url
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
def gen_user_agent(normal_ua):
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla')
firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox')
linux = rhyme.get_rhyme('Lin') + 'ux'
if is_mobile:
return MOBILE_UA.format(mozilla, firefox)
else:
return DESKTOP_UA.format(mozilla, linux, firefox)
def gen_query(q, args, near_city=None):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
# Search for results near a particular city, if available
near = ''
if near_city is not None:
near = '&near=' + urlparse.quote(near_city)
return q + tbs + tbm + start + near
class Request:
def __init__(self, normal_ua):
self.modified_user_agent = gen_user_agent(normal_ua)
def __getitem__(self, name):
return getattr(self, name)
def send(self, base_url=SEARCH_URL, query=''):
response_header = []
b_obj = BytesIO()
crl = pycurl.Curl()
crl.setopt(crl.URL, base_url + query)
crl.setopt(crl.USERAGENT, self.modified_user_agent)
crl.setopt(crl.WRITEDATA, b_obj)
crl.setopt(crl.HEADERFUNCTION, response_header.append)
crl.setopt(pycurl.FOLLOWLOCATION, 1)
crl.perform()
crl.close()
return b_obj.getvalue().decode('utf-8', 'ignore')

View File

@ -1,58 +1,27 @@
from app import app, rhyme from app import app
from app.filter import Filter from app.filter import Filter
from app.request import Request, gen_query
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from flask import request, redirect, render_template from flask import g, request, redirect, render_template
from io import BytesIO
import json import json
import os import os
import pycurl
import urllib.parse as urlparse import urllib.parse as urlparse
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Base search url
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json')) user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json'))
def get_ua(user_agent): @app.before_request
is_mobile = 'Android' in user_agent or 'iPhone' in user_agent def before_request_func():
g.user_request = Request(request.headers.get('User-Agent'))
mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla')
firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox')
linux = rhyme.get_rhyme('Lin') + 'ux'
if is_mobile:
return MOBILE_UA.format(mozilla, firefox)
else:
return DESKTOP_UA.format(mozilla, linux, firefox)
def send_request(curl_url, ua):
response_header = []
b_obj = BytesIO()
crl = pycurl.Curl()
crl.setopt(crl.URL, curl_url)
crl.setopt(crl.USERAGENT, ua)
crl.setopt(crl.WRITEDATA, b_obj)
crl.setopt(crl.HEADERFUNCTION, response_header.append)
crl.setopt(pycurl.FOLLOWLOCATION, 1)
crl.perform()
crl.close()
return b_obj.getvalue().decode('utf-8', 'ignore')
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def index(): def index():
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff' bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
return render_template('index.html', bg=bg, ua=get_ua(request.headers.get('User-Agent'))) return render_template('index.html', bg=bg, ua=g.user_request.modified_user_agent)
@app.route('/search', methods=['GET']) @app.route('/search', methods=['GET'])
@ -65,12 +34,13 @@ def search():
mobile = 'Android' in user_agent or 'iPhone' in user_agent mobile = 'Android' in user_agent or 'iPhone' in user_agent
content_filter = Filter(mobile, user_config) content_filter = Filter(mobile, user_config)
full_query = content_filter.gen_query(q, request.args) full_query = gen_query(q, request.args, content_filter.near)
get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent)) get_body = g.user_request.send(query=full_query)
get_body = content_filter.reskin(get_body)
soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser'))
return render_template('display.html', query=urlparse.unquote(q), response=soup) shoogle_results = content_filter.reskin(get_body)
formatted_results = content_filter.clean(BeautifulSoup(shoogle_results, 'html.parser'))
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)
@app.route('/config', methods=['GET', 'POST']) @app.route('/config', methods=['GET', 'POST'])
@ -107,19 +77,19 @@ def imgres():
@app.route('/window') @app.route('/window')
def window(): def window():
get_body = send_request(request.args.get('location'), get_ua(request.headers.get('User-Agent'))) get_body = g.user_request.send(base_url=request.args.get('location'))
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
soup = BeautifulSoup(get_body, 'html.parser') results = BeautifulSoup(get_body, 'html.parser')
try: try:
for script in soup('script'): for script in results('script'):
script.decompose() script.decompose()
except Exception: except Exception:
pass pass
return render_template('display.html', response=soup) return render_template('display.html', response=results)
if __name__ == '__main__': if __name__ == '__main__':