Added image proxying, refactored filter class
Images were previously directly fetched from google search results, which was a potential privacy hazard. All image sources are now modified to be passed through shoogle's routing first, which will then fetch raw image data and pass it through to the user. Filter class was refactored to split the primary clean method into smaller, more manageable submethods.main
parent
b0e6167733
commit
4180aedd87
142
app/filter.py
142
app/filter.py
|
@ -30,9 +30,11 @@ class Filter:
|
|||
return page
|
||||
|
||||
def clean(self, soup):
|
||||
# Remove all ads
|
||||
main_divs = soup.find('div', {'id': 'main'})
|
||||
if main_divs is not None:
|
||||
def remove_ads():
|
||||
main_divs = soup.find('div', {'id': 'main'})
|
||||
if main_divs is None:
|
||||
return
|
||||
|
||||
result_divs = main_divs.findAll('div', recursive=False)
|
||||
|
||||
# Only ads/sponsored content use classes in the list of result divs
|
||||
|
@ -40,78 +42,92 @@ class Filter:
|
|||
for div in ad_divs:
|
||||
div.decompose()
|
||||
|
||||
# Remove unnecessary button(s)
|
||||
for button in soup.find_all('button'):
|
||||
button.decompose()
|
||||
def sync_images():
|
||||
for img in soup.find_all('img'):
|
||||
if img['src'].startswith('//'):
|
||||
img['src'] = 'https:' + img['src']
|
||||
|
||||
# Remove svg logos
|
||||
for svg in soup.find_all('svg'):
|
||||
svg.decompose()
|
||||
img['src'] = '/tmp?image_url=' + img['src']
|
||||
|
||||
# Update logo
|
||||
logo = soup.find('a', {'class': 'l'})
|
||||
if logo and self.mobile:
|
||||
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;'
|
||||
def update_styling():
|
||||
# Remove unnecessary button(s)
|
||||
for button in soup.find_all('button'):
|
||||
button.decompose()
|
||||
|
||||
# Fix search bar length on mobile
|
||||
try:
|
||||
search_bar = soup.find('header').find('form').find('div')
|
||||
search_bar['style'] = 'width: 100%;'
|
||||
except AttributeError:
|
||||
pass
|
||||
# Remove svg logos
|
||||
for svg in soup.find_all('svg'):
|
||||
svg.decompose()
|
||||
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href']
|
||||
if '/advanced_search' in href:
|
||||
a.decompose()
|
||||
continue
|
||||
# Update logo
|
||||
logo = soup.find('a', {'class': 'l'})
|
||||
if logo and self.mobile:
|
||||
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
|
||||
'font-size:18px; '
|
||||
|
||||
if 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
result_link = urlparse.urlparse(href)
|
||||
result_link = parse_qs(result_link.query)['q'][0]
|
||||
# Fix search bar length on mobile
|
||||
try:
|
||||
search_bar = soup.find('header').find('form').find('div')
|
||||
search_bar['style'] = 'width: 100%;'
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
parsed_link = urlparse.urlparse(result_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
# Set up dark mode if active
|
||||
if self.dark:
|
||||
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
||||
for input_element in soup.findAll('input'):
|
||||
input_element['style'] = 'color:#fff;'
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
def update_links():
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href']
|
||||
if '/advanced_search' in href:
|
||||
a.decompose()
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
if 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
result_link = urlparse.urlparse(href)
|
||||
result_link = parse_qs(result_link.query)['q'][0]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
result_link = result_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 1:
|
||||
result_link = result_link + urlparse.urlencode(safe_args)
|
||||
else:
|
||||
result_link = result_link.replace('?', '')
|
||||
parsed_link = urlparse.urlparse(result_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
a['href'] = result_link
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
# Add no-js option
|
||||
if self.nojs:
|
||||
nojs_link = soup.new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + result_link
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
a.append(nojs_link)
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Set up dark mode if active
|
||||
if self.dark:
|
||||
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
||||
for input_element in soup.findAll('input'):
|
||||
input_element['style'] = 'color:#fff;'
|
||||
# Remove original link query and replace with filtered args
|
||||
result_link = result_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 1:
|
||||
result_link = result_link + urlparse.urlencode(safe_args)
|
||||
else:
|
||||
result_link = result_link.replace('?', '')
|
||||
|
||||
# Ensure no extra scripts passed through
|
||||
try:
|
||||
for script in soup('script'):
|
||||
script.decompose()
|
||||
soup.find('div', id='sfooter').decompose()
|
||||
except Exception:
|
||||
pass
|
||||
a['href'] = result_link
|
||||
|
||||
# Add no-js option
|
||||
if self.nojs:
|
||||
nojs_link = soup.new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + result_link
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
a.append(nojs_link)
|
||||
|
||||
# Ensure no extra scripts passed through
|
||||
try:
|
||||
for script in soup('script'):
|
||||
script.decompose()
|
||||
soup.find('div', id='sfooter').decompose()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
remove_ads()
|
||||
sync_images()
|
||||
update_styling()
|
||||
update_links()
|
||||
return soup
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from app import rhyme
|
||||
from app.filter import Filter
|
||||
from io import BytesIO
|
||||
import pycurl
|
||||
import urllib.parse as urlparse
|
||||
|
@ -60,7 +59,7 @@ class Request:
|
|||
def __getitem__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
def send(self, base_url=SEARCH_URL, query=''):
|
||||
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
|
||||
response_header = []
|
||||
|
||||
b_obj = BytesIO()
|
||||
|
@ -73,4 +72,7 @@ class Request:
|
|||
crl.perform()
|
||||
crl.close()
|
||||
|
||||
return b_obj.getvalue().decode('utf-8', 'ignore')
|
||||
if return_bytes:
|
||||
return b_obj.getvalue()
|
||||
else:
|
||||
return b_obj.getvalue().decode('utf-8', 'ignore')
|
||||
|
|
|
@ -2,7 +2,8 @@ from app import app
|
|||
from app.filter import Filter
|
||||
from app.request import Request, gen_query
|
||||
from bs4 import BeautifulSoup
|
||||
from flask import g, make_response, request, redirect, render_template
|
||||
from flask import g, make_response, request, redirect, render_template, send_file
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import urllib.parse as urlparse
|
||||
|
@ -18,6 +19,11 @@ def before_request_func():
|
|||
g.user_request = Request(request.headers.get('User-Agent'))
|
||||
|
||||
|
||||
# @app.after_request
|
||||
# def after_request(response):
|
||||
# return response
|
||||
|
||||
|
||||
@app.route('/', methods=['GET'])
|
||||
def index():
|
||||
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
|
||||
|
@ -87,6 +93,21 @@ def imgres():
|
|||
return redirect(request.args.get('imgurl'))
|
||||
|
||||
|
||||
@app.route('/tmp')
|
||||
def tmp():
|
||||
file_data = g.user_request.send(base_url=request.args.get('image_url'), return_bytes=True)
|
||||
tmp_mem = io.BytesIO()
|
||||
tmp_mem.write(file_data)
|
||||
tmp_mem.seek(0)
|
||||
|
||||
return send_file(
|
||||
tmp_mem,
|
||||
as_attachment=True,
|
||||
attachment_filename='tmp.png',
|
||||
mimetype='image/png'
|
||||
)
|
||||
|
||||
|
||||
@app.route('/window')
|
||||
def window():
|
||||
get_body = g.user_request.send(base_url=request.args.get('location'))
|
||||
|
|
Loading…
Reference in New Issue