Project refactor (#85)
* Major refactor of requests and session management - Switches from pycurl to requests library - Allows for less janky decoding, especially with non-latin character sets - Adds session level management of user configs - Allows for each session to set its own config (people are probably going to complain about this, though not sure if it'll be the same number of people who are upset that their friends/family have to share their config) - Updates key gen/regen to more aggressively swap out keys after each request * Added ability to save/load configs by name - New PUT method for config allows changing config with specified name - New methods in js controller to handle loading/saving of configs * Result formatting and removal of unused elements - Fixed question section formatting from results page (added appropriate padding and made questions styled as italic) - Removed user agent display from main config settings * Minor change to button label * Fixed issue with "de-pickling" of flask session Having a gitignore-everything ("*") file within a flask session folder seems to cause a weird bug where the state of the app becomes unusable from continuously trying to prune files listed in the gitignore (and it can't prune '*'). * Switched to pickling saved configs * Updated ad/sponsored content filter and conf naming Configs are now named with a .conf extension to allow for easier manual cleanup/modification of named config files Sponsored content now removed by basic string matching of span content * Version bump to 0.2.0 * Fixed request.send return stylemain
parent
71ba00785f
commit
b6fb4723f9
|
@ -5,6 +5,8 @@ __pycache__/
|
|||
*.pem
|
||||
config.json
|
||||
test/static
|
||||
flask_session/
|
||||
app/static/config
|
||||
|
||||
# pip stuff
|
||||
build/
|
||||
|
|
|
@ -1,12 +1,24 @@
|
|||
from app.utils.misc import generate_user_keys
|
||||
from cryptography.fernet import Fernet
|
||||
from flask import Flask
|
||||
from flask_session import Session
|
||||
import os
|
||||
|
||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
||||
app.secret_key = Fernet.generate_key()
|
||||
app.config['VERSION_NUMBER'] = '0.1.4'
|
||||
app.user_elements = {}
|
||||
app.config['SECRET_KEY'] = os.urandom(16)
|
||||
app.config['SESSION_TYPE'] = 'filesystem'
|
||||
app.config['VERSION_NUMBER'] = '0.2.0'
|
||||
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
||||
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
||||
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
|
||||
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER'] + '/config')
|
||||
app.config['SESSION_FILE_DIR'] = app.config['CONFIG_PATH']
|
||||
app.config['SESSION_COOKIE_SECURE'] = True
|
||||
|
||||
if not os.path.exists(app.config['CONFIG_PATH']):
|
||||
os.makedirs(app.config['CONFIG_PATH'])
|
||||
|
||||
sess = Session()
|
||||
sess.init_app(app)
|
||||
|
||||
from app import routes
|
||||
|
|
133
app/filter.py
133
app/filter.py
|
@ -1,5 +1,6 @@
|
|||
from app.request import VALID_PARAMS
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import ResultSet
|
||||
from cryptography.fernet import Fernet
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
|
@ -17,14 +18,9 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42m
|
|||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href'].replace('https://www.google.com', '')
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in href:
|
||||
return filter_link_args(href)
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
|
@ -51,8 +47,12 @@ def filter_link_args(query_link):
|
|||
return query_link
|
||||
|
||||
|
||||
def has_ad_content(element):
|
||||
return element == 'ad' or element == 'sponsoredⓘ'
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, mobile=False, config=None, secret_key=''):
|
||||
def __init__(self, user_keys: dict, mobile=False, config=None):
|
||||
if config is None:
|
||||
config = {}
|
||||
|
||||
|
@ -61,11 +61,17 @@ class Filter:
|
|||
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||
self.new_tab = config['new_tab'] if 'new_tab' in config else False
|
||||
self.mobile = mobile
|
||||
self.secret_key = secret_key
|
||||
self.user_keys = user_keys
|
||||
self.main_divs = ResultSet('')
|
||||
self._elements = 0
|
||||
|
||||
def __getitem__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
@property
|
||||
def elements(self):
|
||||
return self._elements
|
||||
|
||||
def reskin(self, page):
|
||||
# Aesthetic only re-skinning
|
||||
page = page.replace('>G<', '>Wh<')
|
||||
|
@ -76,11 +82,31 @@ class Filter:
|
|||
|
||||
return page
|
||||
|
||||
def encrypt_path(self, msg, is_element=False):
|
||||
# Encrypts path to avoid plaintext results in logs
|
||||
if is_element:
|
||||
# Element paths are tracked differently in order for the element key to be regenerated
|
||||
# once all elements have been loaded
|
||||
enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode()
|
||||
self._elements += 1
|
||||
return enc_path
|
||||
|
||||
return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode()
|
||||
|
||||
def clean(self, soup):
|
||||
self.remove_ads(soup)
|
||||
self.update_image_paths(soup)
|
||||
self.main_divs = soup.find('div', {'id': 'main'})
|
||||
self.remove_ads()
|
||||
self.fix_question_section()
|
||||
self.update_styling(soup)
|
||||
self.update_links(soup)
|
||||
|
||||
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
|
||||
self.update_element_src(img, 'image/png')
|
||||
|
||||
for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]:
|
||||
self.update_element_src(audio, 'audio/mpeg')
|
||||
|
||||
for link in soup.find_all('a', href=True):
|
||||
self.update_link(link)
|
||||
|
||||
input_form = soup.find('form')
|
||||
if input_form is not None:
|
||||
|
@ -105,35 +131,42 @@ class Filter:
|
|||
|
||||
return soup
|
||||
|
||||
def remove_ads(self, soup):
|
||||
main_divs = soup.find('div', {'id': 'main'})
|
||||
if main_divs is None:
|
||||
def remove_ads(self):
|
||||
if not self.main_divs:
|
||||
return
|
||||
result_divs = main_divs.find_all('div', recursive=False)
|
||||
|
||||
for div in [_ for _ in result_divs]:
|
||||
has_ad = len([_ for _ in div.find_all('span', recursive=True) if 'ad' == _.text.lower()])
|
||||
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||
has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text.lower())])
|
||||
_ = div.decompose() if has_ad else None
|
||||
|
||||
def update_image_paths(self, soup):
|
||||
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
|
||||
img_src = img['src']
|
||||
if img_src.startswith('//'):
|
||||
img_src = 'https:' + img_src
|
||||
elif img_src.startswith(LOGO_URL):
|
||||
# Re-brand with Whoogle logo
|
||||
img['src'] = '/static/img/logo.png'
|
||||
img['style'] = 'height:40px;width:162px'
|
||||
continue
|
||||
elif img_src.startswith(GOOG_IMG):
|
||||
img['src'] = BLANK_B64
|
||||
continue
|
||||
def fix_question_section(self):
|
||||
if not self.main_divs:
|
||||
return
|
||||
|
||||
enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
|
||||
img['src'] = '/tmp?image_url=' + enc_src.decode()
|
||||
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
|
||||
for x in question_divs:
|
||||
questions = [_ for _ in x.find_all('div', recursive=True) if _.text.endswith('?')]
|
||||
for question in questions:
|
||||
question['style'] = 'padding: 10px; font-style: italic;'
|
||||
|
||||
def update_element_src(self, element, mimetype):
|
||||
element_src = element['src']
|
||||
if element_src.startswith('//'):
|
||||
element_src = 'https:' + element_src
|
||||
elif element_src.startswith(LOGO_URL):
|
||||
# Re-brand with Whoogle logo
|
||||
element['src'] = '/static/img/logo.png'
|
||||
element['style'] = 'height:40px;width:162px'
|
||||
return
|
||||
elif element_src.startswith(GOOG_IMG):
|
||||
element['src'] = BLANK_B64
|
||||
return
|
||||
|
||||
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
|
||||
'&type=' + urlparse.quote(mimetype)
|
||||
# TODO: Non-mobile image results link to website instead of image
|
||||
# if not self.mobile:
|
||||
# img.append(BeautifulSoup(FULL_RES_IMG.format(img_src), 'html.parser'))
|
||||
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
|
||||
|
||||
def update_styling(self, soup):
|
||||
# Remove unnecessary button(s)
|
||||
|
@ -169,44 +202,42 @@ class Filter:
|
|||
for href_element in soup.findAll('a'):
|
||||
href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else ''
|
||||
|
||||
def update_links(self, soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href'].replace('https://www.google.com', '')
|
||||
def update_link(self, link):
|
||||
# Replace href with only the intended destination (no "utm" type tags)
|
||||
href = link['href'].replace('https://www.google.com', '')
|
||||
if '/advanced_search' in href:
|
||||
a.decompose()
|
||||
continue
|
||||
link.decompose()
|
||||
return
|
||||
elif self.new_tab:
|
||||
a['target'] = '_blank'
|
||||
link['target'] = '_blank'
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
if query_link.startswith('/'):
|
||||
a['href'] = 'https://google.com' + query_link
|
||||
link['href'] = 'https://google.com' + query_link
|
||||
elif '/search?q=' in href:
|
||||
enc_result = Fernet(self.secret_key).encrypt(query_link.encode())
|
||||
new_search = '/search?q=' + enc_result.decode()
|
||||
new_search = '/search?q=' + self.encrypt_path(query_link)
|
||||
|
||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||
for param in VALID_PARAMS:
|
||||
param_val = query_params[param][0] if param in query_params else ''
|
||||
new_search += '&' + param + '=' + param_val
|
||||
a['href'] = new_search
|
||||
link['href'] = new_search
|
||||
elif 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
a['href'] = filter_link_args(query_link)
|
||||
link['href'] = filter_link_args(query_link)
|
||||
|
||||
# Add no-js option
|
||||
if self.nojs:
|
||||
gen_nojs(soup, a['href'], a)
|
||||
gen_nojs(link)
|
||||
else:
|
||||
a['href'] = href
|
||||
link['href'] = href
|
||||
|
||||
|
||||
def gen_nojs(soup, link, sibling):
|
||||
nojs_link = soup.new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + link
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from io import BytesIO
|
||||
from lxml import etree
|
||||
import pycurl
|
||||
import random
|
||||
import requests
|
||||
from requests import Response
|
||||
import urllib.parse as urlparse
|
||||
|
||||
# Core Google search URLs
|
||||
|
@ -15,7 +15,7 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
|||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
||||
|
||||
|
||||
def gen_user_agent(normal_ua, is_mobile):
|
||||
def gen_user_agent(is_mobile):
|
||||
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
|
||||
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
|
||||
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
|
||||
|
@ -66,20 +66,14 @@ class Request:
|
|||
def __init__(self, normal_ua, language='lang_en'):
|
||||
self.language = language
|
||||
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
||||
self.modified_user_agent = gen_user_agent(normal_ua, self.mobile)
|
||||
self.modified_user_agent = gen_user_agent(self.mobile)
|
||||
|
||||
def __getitem__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
def get_decode_value(self):
|
||||
if 'lang_zh' in self.language:
|
||||
return 'gb2312'
|
||||
else:
|
||||
return 'unicode-escape'
|
||||
|
||||
def autocomplete(self, query):
|
||||
ac_query = dict(hl=self.language, q=query)
|
||||
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query))
|
||||
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text
|
||||
|
||||
if response:
|
||||
dom = etree.fromstring(response)
|
||||
|
@ -87,20 +81,9 @@ class Request:
|
|||
|
||||
return []
|
||||
|
||||
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
|
||||
response_header = []
|
||||
def send(self, base_url=SEARCH_URL, query='') -> Response:
|
||||
headers = {
|
||||
'User-Agent': self.modified_user_agent
|
||||
}
|
||||
|
||||
b_obj = BytesIO()
|
||||
crl = pycurl.Curl()
|
||||
crl.setopt(crl.URL, base_url + query)
|
||||
crl.setopt(crl.USERAGENT, self.modified_user_agent)
|
||||
crl.setopt(crl.WRITEDATA, b_obj)
|
||||
crl.setopt(crl.HEADERFUNCTION, response_header.append)
|
||||
crl.setopt(pycurl.FOLLOWLOCATION, 1)
|
||||
crl.perform()
|
||||
crl.close()
|
||||
|
||||
if return_bytes:
|
||||
return b_obj.getvalue()
|
||||
else:
|
||||
return b_obj.getvalue().decode(self.get_decode_value(), 'ignore')
|
||||
return requests.get(base_url + query, headers=headers)
|
||||
|
|
143
app/routes.py
143
app/routes.py
|
@ -1,18 +1,21 @@
|
|||
from app import app
|
||||
from app.filter import Filter, get_first_link
|
||||
from app.models.config import Config
|
||||
from app.request import Request, gen_query
|
||||
from app.request import Request
|
||||
from app.utils.misc import generate_user_keys, valid_user_session
|
||||
from app.utils.routing_utils import *
|
||||
import argparse
|
||||
import base64
|
||||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
from flask import g, jsonify, make_response, request, redirect, render_template, send_file
|
||||
from cryptography.fernet import Fernet
|
||||
from flask import g, jsonify, make_response, request, redirect, render_template, send_file, session
|
||||
from functools import wraps
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
from pycurl import error as pycurl_error
|
||||
import pickle
|
||||
import urllib.parse as urlparse
|
||||
from requests import exceptions
|
||||
import uuid
|
||||
import waitress
|
||||
|
||||
|
||||
|
@ -34,17 +37,22 @@ def auth_required(f):
|
|||
|
||||
@app.before_request
|
||||
def before_request_func():
|
||||
# Always redirect to https if HTTPS_ONLY is set (otherwise default to false)
|
||||
# Generate secret key for user if unavailable
|
||||
if not valid_user_session(session):
|
||||
session['config'] = {'url': request.url_root}
|
||||
session['keys'] = generate_user_keys()
|
||||
session['uuid'] = str(uuid.uuid4())
|
||||
|
||||
if session['uuid'] not in app.user_elements:
|
||||
app.user_elements.update({session['uuid']: 0})
|
||||
|
||||
# Always redirect to https if HTTPS_ONLY is set (otherwise default to False)
|
||||
https_only = os.getenv('HTTPS_ONLY', False)
|
||||
config_path = app.config['CONFIG_PATH']
|
||||
|
||||
if https_only and request.url.startswith('http://'):
|
||||
https_url = request.url.replace('http://', 'https://', 1)
|
||||
code = 308
|
||||
return redirect(https_url, code=code)
|
||||
return redirect(request.url.replace('http://', 'https://', 1), code=308)
|
||||
|
||||
json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root}
|
||||
g.user_config = Config(**json_config)
|
||||
g.user_config = Config(**session['config'])
|
||||
|
||||
if not g.user_config.url:
|
||||
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
|
||||
|
@ -53,6 +61,16 @@ def before_request_func():
|
|||
g.app_location = g.user_config.url
|
||||
|
||||
|
||||
@app.after_request
|
||||
def after_request_func(response):
|
||||
# Regenerate element key if all elements have been served to user
|
||||
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
|
||||
session['keys']['element_key'] = Fernet.generate_key()
|
||||
app.user_elements[session['uuid']] = 0
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@app.errorhandler(404)
|
||||
def unknown_page(e):
|
||||
return redirect(g.app_location)
|
||||
|
@ -62,14 +80,10 @@ def unknown_page(e):
|
|||
@auth_required
|
||||
def index():
|
||||
return render_template('index.html',
|
||||
dark_mode=g.user_config.dark,
|
||||
ua=g.user_request.modified_user_agent,
|
||||
languages=Config.LANGUAGES,
|
||||
countries=Config.COUNTRIES,
|
||||
current_lang=g.user_config.lang,
|
||||
current_ctry=g.user_config.ctry,
|
||||
version_number=app.config['VERSION_NUMBER'],
|
||||
request_type='get' if g.user_config.get_only else 'post')
|
||||
config=g.user_config,
|
||||
version_number=app.config['VERSION_NUMBER'])
|
||||
|
||||
|
||||
@app.route('/opensearch.xml', methods=['GET'])
|
||||
|
@ -103,68 +117,60 @@ def autocomplete():
|
|||
@app.route('/search', methods=['GET', 'POST'])
|
||||
@auth_required
|
||||
def search():
|
||||
request_params = request.args if request.method == 'GET' else request.form
|
||||
q = request_params.get('q')
|
||||
# Clear previous elements and generate a new key each time a new search is performed
|
||||
app.user_elements[session['uuid']] = 0
|
||||
session['keys']['element_key'] = Fernet.generate_key()
|
||||
|
||||
if q is None or len(q) == 0:
|
||||
search_util = RoutingUtils(request, g.user_config, session)
|
||||
query = search_util.new_search_query()
|
||||
|
||||
# Redirect to home if invalid/blank search
|
||||
if not query:
|
||||
return redirect('/')
|
||||
else:
|
||||
# Attempt to decrypt if this is an internal link
|
||||
try:
|
||||
q = Fernet(app.secret_key).decrypt(q.encode()).decode()
|
||||
except InvalidToken:
|
||||
pass
|
||||
|
||||
feeling_lucky = q.startswith('! ')
|
||||
# Generate response and number of external elements from the page
|
||||
response, elements = search_util.generate_response()
|
||||
if search_util.feeling_lucky:
|
||||
return redirect(response, code=303)
|
||||
|
||||
if feeling_lucky: # Well do you, punk?
|
||||
q = q[2:]
|
||||
|
||||
user_agent = request.headers.get('User-Agent')
|
||||
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
||||
|
||||
content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key)
|
||||
full_query = gen_query(q, request_params, g.user_config, content_filter.near)
|
||||
get_body = g.user_request.send(query=full_query)
|
||||
dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
||||
|
||||
if feeling_lucky:
|
||||
return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL
|
||||
else:
|
||||
formatted_results = content_filter.clean(dirty_soup)
|
||||
|
||||
# Set search type to be used in the header template to allow for repeated searches
|
||||
# in the same category
|
||||
search_type = request_params.get('tbm') if 'tbm' in request_params else ''
|
||||
# Keep count of external elements to fetch before element key can be regenerated
|
||||
app.user_elements[session['uuid']] = elements
|
||||
|
||||
return render_template(
|
||||
'display.html',
|
||||
query=urlparse.unquote(q),
|
||||
search_type=search_type,
|
||||
query=urlparse.unquote(query),
|
||||
search_type=search_util.search_type,
|
||||
dark_mode=g.user_config.dark,
|
||||
response=formatted_results,
|
||||
response=response,
|
||||
search_header=render_template(
|
||||
'header.html',
|
||||
dark_mode=g.user_config.dark,
|
||||
q=urlparse.unquote(q),
|
||||
search_type=search_type,
|
||||
mobile=g.user_request.mobile) if 'isch' not in search_type else '')
|
||||
query=urlparse.unquote(query),
|
||||
search_type=search_util.search_type,
|
||||
mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '')
|
||||
|
||||
|
||||
@app.route('/config', methods=['GET', 'POST'])
|
||||
@app.route('/config', methods=['GET', 'POST', 'PUT'])
|
||||
@auth_required
|
||||
def config():
|
||||
if request.method == 'GET':
|
||||
return json.dumps(g.user_config.__dict__)
|
||||
elif request.method == 'PUT':
|
||||
if 'name' in request.args:
|
||||
config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name'))
|
||||
session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config']
|
||||
return json.dumps(session['config'])
|
||||
else:
|
||||
return json.dumps({})
|
||||
else:
|
||||
config_data = request.form.to_dict()
|
||||
if 'url' not in config_data or not config_data['url']:
|
||||
config_data['url'] = g.user_config.url
|
||||
|
||||
with open(app.config['CONFIG_PATH'], 'w') as config_file:
|
||||
config_file.write(json.dumps(config_data, indent=4))
|
||||
config_file.close()
|
||||
if 'name' in request.args:
|
||||
pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb'))
|
||||
|
||||
session['config'] = config_data
|
||||
return redirect(config_data['url'])
|
||||
|
||||
|
||||
|
@ -187,25 +193,22 @@ def imgres():
|
|||
return redirect(request.args.get('imgurl'))
|
||||
|
||||
|
||||
@app.route('/tmp')
|
||||
@app.route('/element')
|
||||
@auth_required
|
||||
def tmp():
|
||||
cipher_suite = Fernet(app.secret_key)
|
||||
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
|
||||
def element():
|
||||
cipher_suite = Fernet(session['keys']['element_key'])
|
||||
src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode()
|
||||
src_type = request.args.get('type')
|
||||
|
||||
try:
|
||||
file_data = g.user_request.send(base_url=img_url, return_bytes=True)
|
||||
file_data = g.user_request.send(base_url=src_url).content
|
||||
app.user_elements[session['uuid']] -= 1
|
||||
tmp_mem = io.BytesIO()
|
||||
tmp_mem.write(file_data)
|
||||
tmp_mem.seek(0)
|
||||
|
||||
return send_file(
|
||||
tmp_mem,
|
||||
as_attachment=True,
|
||||
attachment_filename='tmp.png',
|
||||
mimetype='image/png'
|
||||
)
|
||||
except pycurl_error:
|
||||
return send_file(tmp_mem, mimetype=src_type)
|
||||
except exceptions.RequestException:
|
||||
pass
|
||||
|
||||
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
||||
|
@ -215,7 +218,7 @@ def tmp():
|
|||
@app.route('/window')
|
||||
@auth_required
|
||||
def window():
|
||||
get_body = g.user_request.send(base_url=request.args.get('location'))
|
||||
get_body = g.user_request.send(base_url=request.args.get('location')).text
|
||||
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
|
||||
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
|
||||
|
||||
|
|
|
@ -71,6 +71,41 @@ const setupConfigLayout = () => {
|
|||
fillConfigValues();
|
||||
};
|
||||
|
||||
const loadConfig = event => {
|
||||
event.preventDefault();
|
||||
let config = prompt("Enter name of config:");
|
||||
if (!config) {
|
||||
alert("Must specify a name for the config to load");
|
||||
return;
|
||||
}
|
||||
|
||||
let xhrPUT = new XMLHttpRequest();
|
||||
xhrPUT.open("PUT", "/config?name=" + config + ".conf");
|
||||
xhrPUT.onload = function() {
|
||||
if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) {
|
||||
alert("Error loading Whoogle config");
|
||||
return;
|
||||
}
|
||||
|
||||
location.reload(true);
|
||||
};
|
||||
|
||||
xhrPUT.send();
|
||||
};
|
||||
|
||||
const saveConfig = event => {
|
||||
event.preventDefault();
|
||||
let config = prompt("Enter name for this config:");
|
||||
if (!config) {
|
||||
alert("Must specify a name for the config to save");
|
||||
return;
|
||||
}
|
||||
|
||||
let configForm = document.getElementById("config-form");
|
||||
configForm.action = '/config?name=' + config + ".conf";
|
||||
configForm.submit();
|
||||
};
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
setTimeout(function() {
|
||||
document.getElementById("main").style.display = "block";
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
||||
color: {{ '#685e79' if dark_mode else '#000' }};
|
||||
border: {{ '1px solid #685e79' if dark_mode else '' }}"
|
||||
spellcheck="false" type="text" value="{{ q }}">
|
||||
spellcheck="false" type="text" value="{{ query }}">
|
||||
<input name="tbm" value="{{ search_type }}" style="display: none">
|
||||
<div class="sc"></div>
|
||||
</div>
|
||||
|
@ -37,7 +37,7 @@
|
|||
<div class="autocomplete" style="width: 100%; flex: 1">
|
||||
<div style="width: 100%; display: flex">
|
||||
<input id="search-bar" autocapitalize="none" autocomplete="off" class="noHIxc" name="q"
|
||||
spellcheck="false" type="text" value="{{ q }}"
|
||||
spellcheck="false" type="text" value="{{ query }}"
|
||||
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
||||
color: {{ '#685e79' if dark_mode else '#000' }};
|
||||
border: {{ '1px solid #685e79' if dark_mode else '' }}">
|
||||
|
|
|
@ -21,14 +21,14 @@
|
|||
<script type="text/javascript" src="/static/js/controller.js"></script>
|
||||
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if dark_mode else 'search' }}.css">
|
||||
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if config.dark else 'search' }}.css">
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
<title>Whoogle Search</title>
|
||||
</head>
|
||||
<body id="main" style="display: none; background-color: {{ '#000' if dark_mode else '#fff' }}">
|
||||
<body id="main" style="display: none; background-color: {{ '#000' if config.dark else '#fff' }}">
|
||||
<div class="search-container">
|
||||
<img class="logo" src="/static/img/logo.png">
|
||||
<form id="search-form" action="/search" method="{{ request_type }}">
|
||||
<form id="search-form" action="/search" method="{{ 'get' if config.get_only else 'post' }}">
|
||||
<div class="search-fields">
|
||||
<div class="autocomplete">
|
||||
<input type="text" name="q" id="search-bar" autofocus="autofocus">
|
||||
|
@ -40,17 +40,13 @@
|
|||
<button id="config-collapsible" class="collapsible">Configuration</button>
|
||||
<div class="content">
|
||||
<div class="config-fields">
|
||||
<form action="/config" method="post">
|
||||
<div class="config-div">
|
||||
<!-- TODO: Add option to regenerate user agent? -->
|
||||
<span class="ua-span">User Agent: {{ ua }}</span>
|
||||
</div>
|
||||
<form id="config-form" action="/config" method="post">
|
||||
<div class="config-div">
|
||||
<label for="config-ctry">Country: </label>
|
||||
<select name="ctry" id="config-ctry">
|
||||
{% for ctry in countries %}
|
||||
<option value="{{ ctry.value }}"
|
||||
{% if ctry.value in current_ctry %}
|
||||
{% if ctry.value in config.ctry %}
|
||||
selected
|
||||
{% endif %}>
|
||||
{{ ctry.name }}
|
||||
|
@ -63,7 +59,7 @@
|
|||
<select name="lang" id="config-lang">
|
||||
{% for lang in languages %}
|
||||
<option value="{{ lang.value }}"
|
||||
{% if lang.value in current_lang %}
|
||||
{% if lang.value in config.lang %}
|
||||
selected
|
||||
{% endif %}>
|
||||
{{ lang.name }}
|
||||
|
@ -100,7 +96,9 @@
|
|||
<input type="text" name="url" id="config-url" value="">
|
||||
</div>
|
||||
<div class="config-div">
|
||||
<input type="submit" id="config-submit" value="Save">
|
||||
<input type="submit" id="config-load" onclick="loadConfig(event)" value="Load">
|
||||
<input type="submit" id="config-submit" value="Apply">
|
||||
<input type="submit" id="config-submit" onclick="saveConfig(event)" value="Save As...">
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
from cryptography.fernet import Fernet
|
||||
|
||||
SESSION_VALS = ['uuid', 'config', 'keys']
|
||||
|
||||
|
||||
def generate_user_keys():
|
||||
# Generate/regenerate unique key per user
|
||||
return {
|
||||
'element_key': Fernet.generate_key(),
|
||||
'text_key': Fernet.generate_key()
|
||||
}
|
||||
|
||||
|
||||
def valid_user_session(session):
|
||||
# Generate secret key for user if unavailable
|
||||
for value in SESSION_VALS:
|
||||
if value not in session:
|
||||
return False
|
||||
|
||||
return True
|
|
@ -0,0 +1,69 @@
|
|||
from app import app
|
||||
from app.filter import Filter, get_first_link
|
||||
from app.request import gen_query
|
||||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
from flask import g
|
||||
from typing import Any, Tuple
|
||||
|
||||
|
||||
class RoutingUtils:
|
||||
def __init__(self, request, config, session):
|
||||
self.request_params = request.args if request.method == 'GET' else request.form
|
||||
self.user_agent = request.headers.get('User-Agent')
|
||||
self.feeling_lucky = False
|
||||
self.config = config
|
||||
self.session = session
|
||||
self.query = ''
|
||||
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else ''
|
||||
|
||||
def __getitem__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
return setattr(self, name, value)
|
||||
|
||||
def __delitem__(self, name):
|
||||
return delattr(self, name)
|
||||
|
||||
def __contains__(self, name):
|
||||
return hasattr(self, name)
|
||||
|
||||
def new_search_query(self) -> str:
|
||||
app.user_elements[self.session['uuid']] = 0
|
||||
self.session['keys']['element_key'] = Fernet.generate_key()
|
||||
|
||||
q = self.request_params.get('q')
|
||||
|
||||
if q is None or len(q) == 0:
|
||||
return ''
|
||||
else:
|
||||
# Attempt to decrypt if this is an internal link
|
||||
try:
|
||||
q = Fernet(self.session['keys']['text_key']).decrypt(q.encode()).decode()
|
||||
except InvalidToken:
|
||||
pass
|
||||
|
||||
# Reset text key
|
||||
self.session['keys']['text_key'] = Fernet.generate_key()
|
||||
|
||||
# Format depending on whether or not the query is a "feeling lucky" query
|
||||
self.feeling_lucky = q.startswith('! ')
|
||||
self.query = q[2:] if self.feeling_lucky else q
|
||||
return self.query
|
||||
|
||||
def generate_response(self) -> Tuple[Any, int]:
|
||||
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
|
||||
|
||||
content_filter = Filter(self.session['keys'], mobile=mobile, config=self.config)
|
||||
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
|
||||
get_body = g.user_request.send(query=full_query).text
|
||||
|
||||
# Produce cleanable html soup from response
|
||||
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
||||
|
||||
if self.feeling_lucky:
|
||||
return get_first_link(html_soup), 1
|
||||
else:
|
||||
formatted_results = content_filter.clean(html_soup)
|
||||
return formatted_results, content_filter.elements
|
|
@ -4,15 +4,16 @@ cffi==1.13.2
|
|||
Click==7.0
|
||||
cryptography==2.8
|
||||
Flask==1.1.1
|
||||
Flask-Session==0.3.2
|
||||
itsdangerous==1.1.0
|
||||
Jinja2==2.10.3
|
||||
lxml==4.5.1
|
||||
MarkupSafe==1.1.1
|
||||
pycparser==2.19
|
||||
pycurl==7.43.0.4
|
||||
pyOpenSSL==19.1.0
|
||||
pytest==5.4.1
|
||||
python-dateutil==2.8.1
|
||||
requests==2.23.0
|
||||
six==1.14.0
|
||||
soupsieve==1.9.5
|
||||
Werkzeug==0.16.0
|
||||
|
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@ setuptools.setup(
|
|||
author='Ben Busby',
|
||||
author_email='benbusby@protonmail.com',
|
||||
name='whoogle-search',
|
||||
version='0.1.4',
|
||||
version='0.2.0',
|
||||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
from app.utils.misc import generate_user_keys, valid_user_session
|
||||
|
||||
|
||||
def test_generate_user_keys():
|
||||
keys = generate_user_keys()
|
||||
assert 'text_key' in keys
|
||||
assert 'element_key' in keys
|
||||
assert keys['text_key'] not in keys['element_key']
|
||||
|
||||
|
||||
def test_valid_session(client):
|
||||
with client.session_transaction() as session:
|
||||
assert not valid_user_session(session)
|
||||
|
||||
session['uuid'] = 'test'
|
||||
session['keys'] = generate_user_keys()
|
||||
session['config'] = {}
|
||||
|
||||
assert valid_user_session(session)
|
||||
|
||||
|
||||
def test_request_key_generation(client):
|
||||
text_key = ''
|
||||
rv = client.get('/search?q=test+1')
|
||||
assert rv._status_code == 200
|
||||
|
||||
with client.session_transaction() as session:
|
||||
assert valid_user_session(session)
|
||||
text_key = session['keys']['text_key']
|
||||
|
||||
rv = client.get('/search?q=test+2')
|
||||
assert rv._status_code == 200
|
||||
|
||||
with client.session_transaction() as session:
|
||||
assert valid_user_session(session)
|
||||
assert text_key not in session['keys']['text_key']
|
|
@ -1,13 +1,13 @@
|
|||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet
|
||||
from app.filter import Filter
|
||||
from app.utils.misc import generate_user_keys
|
||||
from datetime import datetime
|
||||
from dateutil.parser import *
|
||||
|
||||
|
||||
def get_search_results(data):
|
||||
secret_key = Fernet.generate_key()
|
||||
soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
||||
secret_key = generate_user_keys()
|
||||
soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
||||
|
||||
main_divs = soup.find('div', {'id': 'main'})
|
||||
assert len(main_divs) > 1
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
from app.models.config import Config
|
||||
import json
|
||||
import random
|
||||
|
||||
demo_config = {
|
||||
'near': random.choice(['Seattle', 'New York', 'San Francisco']),
|
||||
'dark_mode': str(random.getrandbits(1)),
|
||||
'nojs': str(random.getrandbits(1))
|
||||
'nojs': str(random.getrandbits(1)),
|
||||
'lang': random.choice(Config.LANGUAGES)['value'],
|
||||
'ctry': random.choice(Config.COUNTRIES)['value']
|
||||
}
|
||||
|
||||
|
||||
|
@ -17,6 +20,7 @@ def test_search(client):
|
|||
rv = client.get('/search?q=test')
|
||||
assert rv._status_code == 200
|
||||
|
||||
|
||||
def test_feeling_lucky(client):
|
||||
rv = client.get('/search?q=!%20test')
|
||||
assert rv._status_code == 303
|
||||
|
|
Loading…
Reference in New Issue