Added image proxying, refactored filter class

Images were previously directly fetched from google search results,
which was a potential privacy hazard. All image sources are now modified
to be passed through shoogle's routing first, which will then fetch raw
image data and pass it through to the user.

Filter class was refactored to split the primary clean method into
smaller, more manageable submethods.
main
Ben Busby 2020-04-27 20:21:36 -06:00
parent b0e6167733
commit 4180aedd87
3 changed files with 106 additions and 67 deletions

View File

@ -30,9 +30,11 @@ class Filter:
return page
def clean(self, soup):
# Remove all ads
main_divs = soup.find('div', {'id': 'main'})
if main_divs is not None:
def remove_ads():
main_divs = soup.find('div', {'id': 'main'})
if main_divs is None:
return
result_divs = main_divs.findAll('div', recursive=False)
# Only ads/sponsored content use classes in the list of result divs
@ -40,78 +42,92 @@ class Filter:
for div in ad_divs:
div.decompose()
# Remove unnecessary button(s)
for button in soup.find_all('button'):
button.decompose()
def sync_images():
for img in soup.find_all('img'):
if img['src'].startswith('//'):
img['src'] = 'https:' + img['src']
# Remove svg logos
for svg in soup.find_all('svg'):
svg.decompose()
img['src'] = '/tmp?image_url=' + img['src']
# Update logo
logo = soup.find('a', {'class': 'l'})
if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;'
def update_styling():
# Remove unnecessary button(s)
for button in soup.find_all('button'):
button.decompose()
# Fix search bar length on mobile
try:
search_bar = soup.find('header').find('form').find('div')
search_bar['style'] = 'width: 100%;'
except AttributeError:
pass
# Remove svg logos
for svg in soup.find_all('svg'):
svg.decompose()
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href']
if '/advanced_search' in href:
a.decompose()
continue
# Update logo
logo = soup.find('a', {'class': 'l'})
if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
'font-size:18px; '
if 'url?q=' in href:
# Strip unneeded arguments
result_link = urlparse.urlparse(href)
result_link = parse_qs(result_link.query)['q'][0]
# Fix search bar length on mobile
try:
search_bar = soup.find('header').find('form').find('div')
search_bar['style'] = 'width: 100%;'
except AttributeError:
pass
parsed_link = urlparse.urlparse(result_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
# Set up dark mode if active
if self.dark:
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;'
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
def update_links():
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href']
if '/advanced_search' in href:
a.decompose()
continue
safe_args[arg] = link_args[arg]
if 'url?q=' in href:
# Strip unneeded arguments
result_link = urlparse.urlparse(href)
result_link = parse_qs(result_link.query)['q'][0]
# Remove original link query and replace with filtered args
result_link = result_link.replace(parsed_link.query, '')
if len(safe_args) > 1:
result_link = result_link + urlparse.urlencode(safe_args)
else:
result_link = result_link.replace('?', '')
parsed_link = urlparse.urlparse(result_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
a['href'] = result_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
# Add no-js option
if self.nojs:
nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + result_link
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
a.append(nojs_link)
safe_args[arg] = link_args[arg]
# Set up dark mode if active
if self.dark:
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;'
# Remove original link query and replace with filtered args
result_link = result_link.replace(parsed_link.query, '')
if len(safe_args) > 1:
result_link = result_link + urlparse.urlencode(safe_args)
else:
result_link = result_link.replace('?', '')
# Ensure no extra scripts passed through
try:
for script in soup('script'):
script.decompose()
soup.find('div', id='sfooter').decompose()
except Exception:
pass
a['href'] = result_link
# Add no-js option
if self.nojs:
nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + result_link
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
a.append(nojs_link)
# Ensure no extra scripts passed through
try:
for script in soup('script'):
script.decompose()
soup.find('div', id='sfooter').decompose()
except Exception:
pass
remove_ads()
sync_images()
update_styling()
update_links()
return soup

View File

@ -1,5 +1,4 @@
from app import rhyme
from app.filter import Filter
from io import BytesIO
import pycurl
import urllib.parse as urlparse
@ -60,7 +59,7 @@ class Request:
def __getitem__(self, name):
return getattr(self, name)
def send(self, base_url=SEARCH_URL, query=''):
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
response_header = []
b_obj = BytesIO()
@ -73,4 +72,7 @@ class Request:
crl.perform()
crl.close()
return b_obj.getvalue().decode('utf-8', 'ignore')
if return_bytes:
return b_obj.getvalue()
else:
return b_obj.getvalue().decode('utf-8', 'ignore')

View File

@ -2,7 +2,8 @@ from app import app
from app.filter import Filter
from app.request import Request, gen_query
from bs4 import BeautifulSoup
from flask import g, make_response, request, redirect, render_template
from flask import g, make_response, request, redirect, render_template, send_file
import io
import json
import os
import urllib.parse as urlparse
@ -18,6 +19,11 @@ def before_request_func():
g.user_request = Request(request.headers.get('User-Agent'))
# @app.after_request
# def after_request(response):
# return response
@app.route('/', methods=['GET'])
def index():
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
@ -87,6 +93,21 @@ def imgres():
return redirect(request.args.get('imgurl'))
@app.route('/tmp')
def tmp():
file_data = g.user_request.send(base_url=request.args.get('image_url'), return_bytes=True)
tmp_mem = io.BytesIO()
tmp_mem.write(file_data)
tmp_mem.seek(0)
return send_file(
tmp_mem,
as_attachment=True,
attachment_filename='tmp.png',
mimetype='image/png'
)
@app.route('/window')
def window():
get_body = g.user_request.send(base_url=request.args.get('location'))