diff --git a/.gitignore b/.gitignore index 20747c7..f6e039f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ __pycache__/ *.pem config.json test/static +flask_session/ +app/static/config # pip stuff build/ diff --git a/app/__init__.py b/app/__init__.py index 4b78a8d..53d4a59 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,12 +1,24 @@ +from app.utils.misc import generate_user_keys from cryptography.fernet import Fernet from flask import Flask +from flask_session import Session import os app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') -app.secret_key = Fernet.generate_key() -app.config['VERSION_NUMBER'] = '0.1.4' +app.user_elements = {} +app.config['SECRET_KEY'] = os.urandom(16) +app.config['SESSION_TYPE'] = 'filesystem' +app.config['VERSION_NUMBER'] = '0.2.0' app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) -app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' +app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER'] + '/config') +app.config['SESSION_FILE_DIR'] = app.config['CONFIG_PATH'] +app.config['SESSION_COOKIE_SECURE'] = True + +if not os.path.exists(app.config['CONFIG_PATH']): + os.makedirs(app.config['CONFIG_PATH']) + +sess = Session() +sess.init_app(app) from app import routes diff --git a/app/filter.py b/app/filter.py index 8c25fe4..be9809b 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,5 +1,6 @@ from app.request import VALID_PARAMS from bs4 import BeautifulSoup +from bs4.element import ResultSet from cryptography.fernet import Fernet import re import urllib.parse as urlparse @@ -17,14 +18,9 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42m def get_first_link(soup): # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): - href = a['href'].replace('https://www.google.com', '') - - result_link = urlparse.urlparse(href) - query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' - # Return the first search result URL - if 'url?q=' in href: - return filter_link_args(href) + if 'url?q=' in a['href']: + return filter_link_args(a['href']) def filter_link_args(query_link): @@ -51,8 +47,12 @@ def filter_link_args(query_link): return query_link +def has_ad_content(element): + return element == 'ad' or element == 'sponsoredⓘ' + + class Filter: - def __init__(self, mobile=False, config=None, secret_key=''): + def __init__(self, user_keys: dict, mobile=False, config=None): if config is None: config = {} @@ -61,11 +61,17 @@ class Filter: self.nojs = config['nojs'] if 'nojs' in config else False self.new_tab = config['new_tab'] if 'new_tab' in config else False self.mobile = mobile - self.secret_key = secret_key + self.user_keys = user_keys + self.main_divs = ResultSet('') + self._elements = 0 def __getitem__(self, name): return getattr(self, name) + @property + def elements(self): + return self._elements + def reskin(self, page): # Aesthetic only re-skinning page = page.replace('>G<', '>Wh<') @@ -76,11 +82,31 @@ class Filter: return page + def encrypt_path(self, msg, is_element=False): + # Encrypts path to avoid plaintext results in logs + if is_element: + # Element paths are tracked differently in order for the element key to be regenerated + # once all elements have been loaded + enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode() + self._elements += 1 + return enc_path + + return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode() + def clean(self, soup): - self.remove_ads(soup) - self.update_image_paths(soup) + self.main_divs = soup.find('div', {'id': 'main'}) + self.remove_ads() + self.fix_question_section() self.update_styling(soup) - self.update_links(soup) + + for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]: + self.update_element_src(img, 'image/png') + + for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]: + self.update_element_src(audio, 'audio/mpeg') + + for link in soup.find_all('a', href=True): + self.update_link(link) input_form = soup.find('form') if input_form is not None: @@ -105,35 +131,42 @@ class Filter: return soup - def remove_ads(self, soup): - main_divs = soup.find('div', {'id': 'main'}) - if main_divs is None: + def remove_ads(self): + if not self.main_divs: return - result_divs = main_divs.find_all('div', recursive=False) - for div in [_ for _ in result_divs]: - has_ad = len([_ for _ in div.find_all('span', recursive=True) if 'ad' == _.text.lower()]) + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text.lower())]) _ = div.decompose() if has_ad else None - def update_image_paths(self, soup): - for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]: - img_src = img['src'] - if img_src.startswith('//'): - img_src = 'https:' + img_src - elif img_src.startswith(LOGO_URL): - # Re-brand with Whoogle logo - img['src'] = '/static/img/logo.png' - img['style'] = 'height:40px;width:162px' - continue - elif img_src.startswith(GOOG_IMG): - img['src'] = BLANK_B64 - continue + def fix_question_section(self): + if not self.main_divs: + return - enc_src = Fernet(self.secret_key).encrypt(img_src.encode()) - img['src'] = '/tmp?image_url=' + enc_src.decode() - # TODO: Non-mobile image results link to website instead of image - # if not self.mobile: - # img.append(BeautifulSoup(FULL_RES_IMG.format(img_src), 'html.parser')) + question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0] + for x in question_divs: + questions = [_ for _ in x.find_all('div', recursive=True) if _.text.endswith('?')] + for question in questions: + question['style'] = 'padding: 10px; font-style: italic;' + + def update_element_src(self, element, mimetype): + element_src = element['src'] + if element_src.startswith('//'): + element_src = 'https:' + element_src + elif element_src.startswith(LOGO_URL): + # Re-brand with Whoogle logo + element['src'] = '/static/img/logo.png' + element['style'] = 'height:40px;width:162px' + return + elif element_src.startswith(GOOG_IMG): + element['src'] = BLANK_B64 + return + + element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \ + '&type=' + urlparse.quote(mimetype) + # TODO: Non-mobile image results link to website instead of image + # if not self.mobile: + # img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser')) def update_styling(self, soup): # Remove unnecessary button(s) @@ -169,45 +202,43 @@ class Filter: for href_element in soup.findAll('a'): href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else '' - def update_links(self, soup): - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - href = a['href'].replace('https://www.google.com', '') - if '/advanced_search' in href: - a.decompose() - continue - elif self.new_tab: - a['target'] = '_blank' + def update_link(self, link): + # Replace href with only the intended destination (no "utm" type tags) + href = link['href'].replace('https://www.google.com', '') + if '/advanced_search' in href: + link.decompose() + return + elif self.new_tab: + link['target'] = '_blank' - result_link = urlparse.urlparse(href) - query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' + result_link = urlparse.urlparse(href) + query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' - if query_link.startswith('/'): - a['href'] = 'https://google.com' + query_link - elif '/search?q=' in href: - enc_result = Fernet(self.secret_key).encrypt(query_link.encode()) - new_search = '/search?q=' + enc_result.decode() + if query_link.startswith('/'): + link['href'] = 'https://google.com' + query_link + elif '/search?q=' in href: + new_search = '/search?q=' + self.encrypt_path(query_link) - query_params = parse_qs(urlparse.urlparse(href).query) - for param in VALID_PARAMS: - param_val = query_params[param][0] if param in query_params else '' - new_search += '&' + param + '=' + param_val - a['href'] = new_search - elif 'url?q=' in href: - # Strip unneeded arguments - a['href'] = filter_link_args(query_link) + query_params = parse_qs(urlparse.urlparse(href).query) + for param in VALID_PARAMS: + param_val = query_params[param][0] if param in query_params else '' + new_search += '&' + param + '=' + param_val + link['href'] = new_search + elif 'url?q=' in href: + # Strip unneeded arguments + link['href'] = filter_link_args(query_link) - # Add no-js option - if self.nojs: - gen_nojs(soup, a['href'], a) - else: - a['href'] = href + # Add no-js option + if self.nojs: + gen_nojs(link) + else: + link['href'] = href -def gen_nojs(soup, link, sibling): - nojs_link = soup.new_tag('a') - nojs_link['href'] = '/window?location=' + link +def gen_nojs(sibling): + nojs_link = BeautifulSoup().new_tag('a') + nojs_link['href'] = '/window?location=' + sibling['href'] nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) \ No newline at end of file + sibling.append(nojs_link) diff --git a/app/request.py b/app/request.py index 7ecd887..38b47b0 100644 --- a/app/request.py +++ b/app/request.py @@ -1,7 +1,7 @@ -from io import BytesIO from lxml import etree -import pycurl import random +import requests +from requests import Response import urllib.parse as urlparse # Core Google search URLs @@ -15,7 +15,7 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near'] -def gen_user_agent(normal_ua, is_mobile): +def gen_user_agent(is_mobile): mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' @@ -66,20 +66,14 @@ class Request: def __init__(self, normal_ua, language='lang_en'): self.language = language self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua - self.modified_user_agent = gen_user_agent(normal_ua, self.mobile) + self.modified_user_agent = gen_user_agent(self.mobile) def __getitem__(self, name): return getattr(self, name) - def get_decode_value(self): - if 'lang_zh' in self.language: - return 'gb2312' - else: - return 'unicode-escape' - def autocomplete(self, query): ac_query = dict(hl=self.language, q=query) - response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)) + response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text if response: dom = etree.fromstring(response) @@ -87,20 +81,9 @@ class Request: return [] - def send(self, base_url=SEARCH_URL, query='', return_bytes=False): - response_header = [] + def send(self, base_url=SEARCH_URL, query='') -> Response: + headers = { + 'User-Agent': self.modified_user_agent + } - b_obj = BytesIO() - crl = pycurl.Curl() - crl.setopt(crl.URL, base_url + query) - crl.setopt(crl.USERAGENT, self.modified_user_agent) - crl.setopt(crl.WRITEDATA, b_obj) - crl.setopt(crl.HEADERFUNCTION, response_header.append) - crl.setopt(pycurl.FOLLOWLOCATION, 1) - crl.perform() - crl.close() - - if return_bytes: - return b_obj.getvalue() - else: - return b_obj.getvalue().decode(self.get_decode_value(), 'ignore') + return requests.get(base_url + query, headers=headers) diff --git a/app/routes.py b/app/routes.py index 3f50082..ca3bac4 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,18 +1,21 @@ from app import app -from app.filter import Filter, get_first_link from app.models.config import Config -from app.request import Request, gen_query +from app.request import Request +from app.utils.misc import generate_user_keys, valid_user_session +from app.utils.routing_utils import * import argparse import base64 from bs4 import BeautifulSoup -from cryptography.fernet import Fernet, InvalidToken -from flask import g, jsonify, make_response, request, redirect, render_template, send_file +from cryptography.fernet import Fernet +from flask import g, jsonify, make_response, request, redirect, render_template, send_file, session from functools import wraps import io import json import os -from pycurl import error as pycurl_error +import pickle import urllib.parse as urlparse +from requests import exceptions +import uuid import waitress @@ -34,17 +37,22 @@ def auth_required(f): @app.before_request def before_request_func(): - # Always redirect to https if HTTPS_ONLY is set (otherwise default to false) + # Generate secret key for user if unavailable + if not valid_user_session(session): + session['config'] = {'url': request.url_root} + session['keys'] = generate_user_keys() + session['uuid'] = str(uuid.uuid4()) + + if session['uuid'] not in app.user_elements: + app.user_elements.update({session['uuid']: 0}) + + # Always redirect to https if HTTPS_ONLY is set (otherwise default to False) https_only = os.getenv('HTTPS_ONLY', False) - config_path = app.config['CONFIG_PATH'] if https_only and request.url.startswith('http://'): - https_url = request.url.replace('http://', 'https://', 1) - code = 308 - return redirect(https_url, code=code) + return redirect(request.url.replace('http://', 'https://', 1), code=308) - json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root} - g.user_config = Config(**json_config) + g.user_config = Config(**session['config']) if not g.user_config.url: g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root @@ -53,6 +61,16 @@ def before_request_func(): g.app_location = g.user_config.url +@app.after_request +def after_request_func(response): + # Regenerate element key if all elements have been served to user + if app.user_elements[session['uuid']] <= 0 and '/element' in request.url: + session['keys']['element_key'] = Fernet.generate_key() + app.user_elements[session['uuid']] = 0 + + return response + + @app.errorhandler(404) def unknown_page(e): return redirect(g.app_location) @@ -62,14 +80,10 @@ def unknown_page(e): @auth_required def index(): return render_template('index.html', - dark_mode=g.user_config.dark, - ua=g.user_request.modified_user_agent, languages=Config.LANGUAGES, countries=Config.COUNTRIES, - current_lang=g.user_config.lang, - current_ctry=g.user_config.ctry, - version_number=app.config['VERSION_NUMBER'], - request_type='get' if g.user_config.get_only else 'post') + config=g.user_config, + version_number=app.config['VERSION_NUMBER']) @app.route('/opensearch.xml', methods=['GET']) @@ -103,68 +117,60 @@ def autocomplete(): @app.route('/search', methods=['GET', 'POST']) @auth_required def search(): - request_params = request.args if request.method == 'GET' else request.form - q = request_params.get('q') + # Clear previous elements and generate a new key each time a new search is performed + app.user_elements[session['uuid']] = 0 + session['keys']['element_key'] = Fernet.generate_key() - if q is None or len(q) == 0: + search_util = RoutingUtils(request, g.user_config, session) + query = search_util.new_search_query() + + # Redirect to home if invalid/blank search + if not query: return redirect('/') - else: - # Attempt to decrypt if this is an internal link - try: - q = Fernet(app.secret_key).decrypt(q.encode()).decode() - except InvalidToken: - pass - feeling_lucky = q.startswith('! ') + # Generate response and number of external elements from the page + response, elements = search_util.generate_response() + if search_util.feeling_lucky: + return redirect(response, code=303) - if feeling_lucky: # Well do you, punk? - q = q[2:] - - user_agent = request.headers.get('User-Agent') - mobile = 'Android' in user_agent or 'iPhone' in user_agent - - content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key) - full_query = gen_query(q, request_params, g.user_config, content_filter.near) - get_body = g.user_request.send(query=full_query) - dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser') - - if feeling_lucky: - return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL - else: - formatted_results = content_filter.clean(dirty_soup) - - # Set search type to be used in the header template to allow for repeated searches - # in the same category - search_type = request_params.get('tbm') if 'tbm' in request_params else '' + # Keep count of external elements to fetch before element key can be regenerated + app.user_elements[session['uuid']] = elements return render_template( 'display.html', - query=urlparse.unquote(q), - search_type=search_type, + query=urlparse.unquote(query), + search_type=search_util.search_type, dark_mode=g.user_config.dark, - response=formatted_results, + response=response, search_header=render_template( 'header.html', dark_mode=g.user_config.dark, - q=urlparse.unquote(q), - search_type=search_type, - mobile=g.user_request.mobile) if 'isch' not in search_type else '') + query=urlparse.unquote(query), + search_type=search_util.search_type, + mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '') -@app.route('/config', methods=['GET', 'POST']) +@app.route('/config', methods=['GET', 'POST', 'PUT']) @auth_required def config(): if request.method == 'GET': return json.dumps(g.user_config.__dict__) + elif request.method == 'PUT': + if 'name' in request.args: + config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name')) + session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config'] + return json.dumps(session['config']) + else: + return json.dumps({}) else: config_data = request.form.to_dict() if 'url' not in config_data or not config_data['url']: config_data['url'] = g.user_config.url - with open(app.config['CONFIG_PATH'], 'w') as config_file: - config_file.write(json.dumps(config_data, indent=4)) - config_file.close() + if 'name' in request.args: + pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb')) + session['config'] = config_data return redirect(config_data['url']) @@ -187,25 +193,22 @@ def imgres(): return redirect(request.args.get('imgurl')) -@app.route('/tmp') +@app.route('/element') @auth_required -def tmp(): - cipher_suite = Fernet(app.secret_key) - img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode() +def element(): + cipher_suite = Fernet(session['keys']['element_key']) + src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode() + src_type = request.args.get('type') try: - file_data = g.user_request.send(base_url=img_url, return_bytes=True) + file_data = g.user_request.send(base_url=src_url).content + app.user_elements[session['uuid']] -= 1 tmp_mem = io.BytesIO() tmp_mem.write(file_data) tmp_mem.seek(0) - return send_file( - tmp_mem, - as_attachment=True, - attachment_filename='tmp.png', - mimetype='image/png' - ) - except pycurl_error: + return send_file(tmp_mem, mimetype=src_type) + except exceptions.RequestException: pass empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==') @@ -215,7 +218,7 @@ def tmp(): @app.route('/window') @auth_required def window(): - get_body = g.user_request.send(base_url=request.args.get('location')) + get_body = g.user_request.send(base_url=request.args.get('location')).text get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 4817195..95d917b 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -71,6 +71,41 @@ const setupConfigLayout = () => { fillConfigValues(); }; +const loadConfig = event => { + event.preventDefault(); + let config = prompt("Enter name of config:"); + if (!config) { + alert("Must specify a name for the config to load"); + return; + } + + let xhrPUT = new XMLHttpRequest(); + xhrPUT.open("PUT", "/config?name=" + config + ".conf"); + xhrPUT.onload = function() { + if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) { + alert("Error loading Whoogle config"); + return; + } + + location.reload(true); + }; + + xhrPUT.send(); +}; + +const saveConfig = event => { + event.preventDefault(); + let config = prompt("Enter name for this config:"); + if (!config) { + alert("Must specify a name for the config to save"); + return; + } + + let configForm = document.getElementById("config-form"); + configForm.action = '/config?name=' + config + ".conf"; + configForm.submit(); +}; + document.addEventListener("DOMContentLoaded", function() { setTimeout(function() { document.getElementById("main").style.display = "block"; diff --git a/app/templates/header.html b/app/templates/header.html index 5356ec2..5573b99 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -15,7 +15,7 @@ style="background-color: {{ '#000' if dark_mode else '#fff' }}; color: {{ '#685e79' if dark_mode else '#000' }}; border: {{ '1px solid #685e79' if dark_mode else '' }}" - spellcheck="false" type="text" value="{{ q }}"> + spellcheck="false" type="text" value="{{ query }}">
@@ -37,7 +37,7 @@
diff --git a/app/templates/index.html b/app/templates/index.html index 9279031..7d32b9f 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -21,14 +21,14 @@ - + Whoogle Search - +
-
+
@@ -40,17 +40,13 @@
- -
- - User Agent: {{ ua }} -
+
{% for lang in languages %}
- +   +   +
diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/utils/misc.py b/app/utils/misc.py new file mode 100644 index 0000000..a70a82a --- /dev/null +++ b/app/utils/misc.py @@ -0,0 +1,20 @@ +from cryptography.fernet import Fernet + +SESSION_VALS = ['uuid', 'config', 'keys'] + + +def generate_user_keys(): + # Generate/regenerate unique key per user + return { + 'element_key': Fernet.generate_key(), + 'text_key': Fernet.generate_key() + } + + +def valid_user_session(session): + # Generate secret key for user if unavailable + for value in SESSION_VALS: + if value not in session: + return False + + return True diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py new file mode 100644 index 0000000..cc3ed1f --- /dev/null +++ b/app/utils/routing_utils.py @@ -0,0 +1,69 @@ +from app import app +from app.filter import Filter, get_first_link +from app.request import gen_query +from bs4 import BeautifulSoup +from cryptography.fernet import Fernet, InvalidToken +from flask import g +from typing import Any, Tuple + + +class RoutingUtils: + def __init__(self, request, config, session): + self.request_params = request.args if request.method == 'GET' else request.form + self.user_agent = request.headers.get('User-Agent') + self.feeling_lucky = False + self.config = config + self.session = session + self.query = '' + self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else '' + + def __getitem__(self, name): + return getattr(self, name) + + def __setitem__(self, name, value): + return setattr(self, name, value) + + def __delitem__(self, name): + return delattr(self, name) + + def __contains__(self, name): + return hasattr(self, name) + + def new_search_query(self) -> str: + app.user_elements[self.session['uuid']] = 0 + self.session['keys']['element_key'] = Fernet.generate_key() + + q = self.request_params.get('q') + + if q is None or len(q) == 0: + return '' + else: + # Attempt to decrypt if this is an internal link + try: + q = Fernet(self.session['keys']['text_key']).decrypt(q.encode()).decode() + except InvalidToken: + pass + + # Reset text key + self.session['keys']['text_key'] = Fernet.generate_key() + + # Format depending on whether or not the query is a "feeling lucky" query + self.feeling_lucky = q.startswith('! ') + self.query = q[2:] if self.feeling_lucky else q + return self.query + + def generate_response(self) -> Tuple[Any, int]: + mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent + + content_filter = Filter(self.session['keys'], mobile=mobile, config=self.config) + full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) + get_body = g.user_request.send(query=full_query).text + + # Produce cleanable html soup from response + html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser') + + if self.feeling_lucky: + return get_first_link(html_soup), 1 + else: + formatted_results = content_filter.clean(html_soup) + return formatted_results, content_filter.elements diff --git a/requirements.txt b/requirements.txt index 030780c..702d8ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,15 +4,16 @@ cffi==1.13.2 Click==7.0 cryptography==2.8 Flask==1.1.1 +Flask-Session==0.3.2 itsdangerous==1.1.0 Jinja2==2.10.3 lxml==4.5.1 MarkupSafe==1.1.1 pycparser==2.19 -pycurl==7.43.0.4 pyOpenSSL==19.1.0 pytest==5.4.1 python-dateutil==2.8.1 +requests==2.23.0 six==1.14.0 soupsieve==1.9.5 Werkzeug==0.16.0 diff --git a/setup.py b/setup.py index 3428459..08652bc 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( author='Ben Busby', author_email='benbusby@protonmail.com', name='whoogle-search', - version='0.1.4', + version='0.2.0', include_package_data=True, install_requires=requirements, description='Self-hosted, ad-free, privacy-respecting Google metasearch engine', diff --git a/test/test_misc.py b/test/test_misc.py new file mode 100644 index 0000000..296d03a --- /dev/null +++ b/test/test_misc.py @@ -0,0 +1,36 @@ +from app.utils.misc import generate_user_keys, valid_user_session + + +def test_generate_user_keys(): + keys = generate_user_keys() + assert 'text_key' in keys + assert 'element_key' in keys + assert keys['text_key'] not in keys['element_key'] + + +def test_valid_session(client): + with client.session_transaction() as session: + assert not valid_user_session(session) + + session['uuid'] = 'test' + session['keys'] = generate_user_keys() + session['config'] = {} + + assert valid_user_session(session) + + +def test_request_key_generation(client): + text_key = '' + rv = client.get('/search?q=test+1') + assert rv._status_code == 200 + + with client.session_transaction() as session: + assert valid_user_session(session) + text_key = session['keys']['text_key'] + + rv = client.get('/search?q=test+2') + assert rv._status_code == 200 + + with client.session_transaction() as session: + assert valid_user_session(session) + assert text_key not in session['keys']['text_key'] diff --git a/test/test_results.py b/test/test_results.py index 7f500c8..a943de6 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,13 +1,13 @@ from bs4 import BeautifulSoup -from cryptography.fernet import Fernet from app.filter import Filter +from app.utils.misc import generate_user_keys from datetime import datetime from dateutil.parser import * def get_search_results(data): - secret_key = Fernet.generate_key() - soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser')) + secret_key = generate_user_keys() + soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser')) main_divs = soup.find('div', {'id': 'main'}) assert len(main_divs) > 1 diff --git a/test/test_routes.py b/test/test_routes.py index 91e17be..56c9909 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -1,10 +1,13 @@ +from app.models.config import Config import json import random demo_config = { 'near': random.choice(['Seattle', 'New York', 'San Francisco']), 'dark_mode': str(random.getrandbits(1)), - 'nojs': str(random.getrandbits(1)) + 'nojs': str(random.getrandbits(1)), + 'lang': random.choice(Config.LANGUAGES)['value'], + 'ctry': random.choice(Config.COUNTRIES)['value'] } @@ -17,6 +20,7 @@ def test_search(client): rv = client.get('/search?q=test') assert rv._status_code == 200 + def test_feeling_lucky(client): rv = client.get('/search?q=!%20test') assert rv._status_code == 303