Session refactoring and improved filter (#86)
* Project refactor (#85) * Major refactor of requests and session management - Switches from pycurl to requests library - Allows for less janky decoding, especially with non-latin character sets - Adds session level management of user configs - Allows for each session to set its own config -- users with blocked cookies fall back to the "default" profile (same usage as before) - Updates key gen/regen to more aggressively swap out keys after each request * Added ability to save/load configs by name - New PUT method for config allows changing config with specified name - New methods in js controller to handle loading/saving of configs * Result formatting and removal of unused elements - Fixed question section formatting from results page (added appropriate padding and made questions styled as italic) - Removed user agent display from main config settings * Minor change to save config button label (now "Save As...") * Fixed issue with "de-pickling" of flask session Having a gitignore-everything ("*") file within a flask session folder seems to cause a weird bug where the state of the app becomes unusable from continuously trying to prune files listed in the gitignore (and it can't prune '*'). * Switched to pickling saved configs * Updated ad/sponsored content filter and conf naming Configs are now named with a .conf extension to allow for easier manual cleanup/modification of named config files Sponsored content now removed by basic string matching of span content * Version bump to 0.2.0 * Fixed request.send return style * Moved custom conf files to their own directory * Refactored whoogle session mgmt Now allows a fallback "default" session to be used if a user's browser is blocking cookies * Reworked pytest client fixture to support new session mgmt * Added better multilingual support, updated filter Results page now includes method for switching to "All Languages" from whichever language is specified as the primary in the config (see #74). Also removes the non-Whoogle links from the page footer, leaving only the page navigation controls Added support for the date range filter on the results page, though I'd still recommend using the ":past <unit>" query instead. * Removed no-cache enforcement, minor styling/formatting improvements * Improving ad filtering for non-English languages * Added footer to results pagemain
parent
d859e46a6c
commit
b2133edaa3
|
@ -3,8 +3,12 @@ venv/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pem
|
*.pem
|
||||||
|
*.conf
|
||||||
config.json
|
config.json
|
||||||
test/static
|
test/static
|
||||||
|
flask_session/
|
||||||
|
app/static/config
|
||||||
|
app/static/custom_config
|
||||||
|
|
||||||
# pip stuff
|
# pip stuff
|
||||||
build/
|
build/
|
||||||
|
|
|
@ -1,12 +1,27 @@
|
||||||
from cryptography.fernet import Fernet
|
from app.utils.misc import generate_user_keys
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
|
from flask_session import Session
|
||||||
import os
|
import os
|
||||||
|
|
||||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
||||||
app.secret_key = Fernet.generate_key()
|
app.user_elements = {}
|
||||||
app.config['VERSION_NUMBER'] = '0.1.4'
|
app.default_key_set = generate_user_keys()
|
||||||
|
app.no_cookie_ips = []
|
||||||
|
app.config['SECRET_KEY'] = os.urandom(32)
|
||||||
|
app.config['SESSION_TYPE'] = 'filesystem'
|
||||||
|
app.config['VERSION_NUMBER'] = '0.2.0'
|
||||||
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
||||||
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
||||||
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
|
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config'))
|
||||||
|
app.config['DEFAULT_CONFIG'] = os.path.join(app.config['CONFIG_PATH'], 'config.json')
|
||||||
|
app.config['SESSION_FILE_DIR'] = os.path.join(app.config['CONFIG_PATH'], 'session')
|
||||||
|
|
||||||
|
if not os.path.exists(app.config['CONFIG_PATH']):
|
||||||
|
os.makedirs(app.config['CONFIG_PATH'])
|
||||||
|
|
||||||
|
if not os.path.exists(app.config['SESSION_FILE_DIR']):
|
||||||
|
os.makedirs(app.config['SESSION_FILE_DIR'])
|
||||||
|
|
||||||
|
Session(app)
|
||||||
|
|
||||||
from app import routes
|
from app import routes
|
||||||
|
|
179
app/filter.py
179
app/filter.py
|
@ -1,5 +1,7 @@
|
||||||
from app.request import VALID_PARAMS
|
from app.request import VALID_PARAMS
|
||||||
|
from app.utils.misc import BLACKLIST
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.element import ResultSet
|
||||||
from cryptography.fernet import Fernet
|
from cryptography.fernet import Fernet
|
||||||
import re
|
import re
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
@ -17,14 +19,9 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42m
|
||||||
def get_first_link(soup):
|
def get_first_link(soup):
|
||||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||||
for a in soup.find_all('a', href=True):
|
for a in soup.find_all('a', href=True):
|
||||||
href = a['href'].replace('https://www.google.com', '')
|
|
||||||
|
|
||||||
result_link = urlparse.urlparse(href)
|
|
||||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
|
||||||
|
|
||||||
# Return the first search result URL
|
# Return the first search result URL
|
||||||
if 'url?q=' in href:
|
if 'url?q=' in a['href']:
|
||||||
return filter_link_args(href)
|
return filter_link_args(a['href'])
|
||||||
|
|
||||||
|
|
||||||
def filter_link_args(query_link):
|
def filter_link_args(query_link):
|
||||||
|
@ -51,8 +48,12 @@ def filter_link_args(query_link):
|
||||||
return query_link
|
return query_link
|
||||||
|
|
||||||
|
|
||||||
|
def has_ad_content(element: str):
|
||||||
|
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||||
|
|
||||||
|
|
||||||
class Filter:
|
class Filter:
|
||||||
def __init__(self, mobile=False, config=None, secret_key=''):
|
def __init__(self, user_keys: dict, mobile=False, config=None):
|
||||||
if config is None:
|
if config is None:
|
||||||
config = {}
|
config = {}
|
||||||
|
|
||||||
|
@ -61,11 +62,17 @@ class Filter:
|
||||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||||
self.new_tab = config['new_tab'] if 'new_tab' in config else False
|
self.new_tab = config['new_tab'] if 'new_tab' in config else False
|
||||||
self.mobile = mobile
|
self.mobile = mobile
|
||||||
self.secret_key = secret_key
|
self.user_keys = user_keys
|
||||||
|
self.main_divs = ResultSet('')
|
||||||
|
self._elements = 0
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return getattr(self, name)
|
return getattr(self, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elements(self):
|
||||||
|
return self._elements
|
||||||
|
|
||||||
def reskin(self, page):
|
def reskin(self, page):
|
||||||
# Aesthetic only re-skinning
|
# Aesthetic only re-skinning
|
||||||
page = page.replace('>G<', '>Wh<')
|
page = page.replace('>G<', '>Wh<')
|
||||||
|
@ -76,11 +83,31 @@ class Filter:
|
||||||
|
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
def encrypt_path(self, msg, is_element=False):
|
||||||
|
# Encrypts path to avoid plaintext results in logs
|
||||||
|
if is_element:
|
||||||
|
# Element paths are tracked differently in order for the element key to be regenerated
|
||||||
|
# once all elements have been loaded
|
||||||
|
enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode()
|
||||||
|
self._elements += 1
|
||||||
|
return enc_path
|
||||||
|
|
||||||
|
return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode()
|
||||||
|
|
||||||
def clean(self, soup):
|
def clean(self, soup):
|
||||||
self.remove_ads(soup)
|
self.main_divs = soup.find('div', {'id': 'main'})
|
||||||
self.update_image_paths(soup)
|
self.remove_ads()
|
||||||
|
self.fix_question_section()
|
||||||
self.update_styling(soup)
|
self.update_styling(soup)
|
||||||
self.update_links(soup)
|
|
||||||
|
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
|
||||||
|
self.update_element_src(img, 'image/png')
|
||||||
|
|
||||||
|
for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]:
|
||||||
|
self.update_element_src(audio, 'audio/mpeg')
|
||||||
|
|
||||||
|
for link in soup.find_all('a', href=True):
|
||||||
|
self.update_link(link)
|
||||||
|
|
||||||
input_form = soup.find('form')
|
input_form = soup.find('form')
|
||||||
if input_form is not None:
|
if input_form is not None:
|
||||||
|
@ -90,14 +117,11 @@ class Filter:
|
||||||
for script in soup('script'):
|
for script in soup('script'):
|
||||||
script.decompose()
|
script.decompose()
|
||||||
|
|
||||||
# Remove google's language/time config
|
# Update default footer and header
|
||||||
st_card = soup.find('div', id='st-card')
|
footer = soup.find('footer')
|
||||||
if st_card:
|
|
||||||
st_card.decompose()
|
|
||||||
|
|
||||||
footer = soup.find('div', id='sfooter')
|
|
||||||
if footer:
|
if footer:
|
||||||
footer.decompose()
|
# Remove divs that have multiple links beyond just page navigation
|
||||||
|
[_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2]
|
||||||
|
|
||||||
header = soup.find('header')
|
header = soup.find('header')
|
||||||
if header:
|
if header:
|
||||||
|
@ -105,35 +129,42 @@ class Filter:
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def remove_ads(self, soup):
|
def remove_ads(self):
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
if not self.main_divs:
|
||||||
if main_divs is None:
|
|
||||||
return
|
return
|
||||||
result_divs = main_divs.find_all('div', recursive=False)
|
|
||||||
|
|
||||||
for div in [_ for _ in result_divs]:
|
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||||
has_ad = len([_ for _ in div.find_all('span', recursive=True) if 'ad' == _.text.lower()])
|
has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text)])
|
||||||
_ = div.decompose() if has_ad else None
|
_ = div.decompose() if has_ad else None
|
||||||
|
|
||||||
def update_image_paths(self, soup):
|
def fix_question_section(self):
|
||||||
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
|
if not self.main_divs:
|
||||||
img_src = img['src']
|
return
|
||||||
if img_src.startswith('//'):
|
|
||||||
img_src = 'https:' + img_src
|
|
||||||
elif img_src.startswith(LOGO_URL):
|
|
||||||
# Re-brand with Whoogle logo
|
|
||||||
img['src'] = '/static/img/logo.png'
|
|
||||||
img['style'] = 'height:40px;width:162px'
|
|
||||||
continue
|
|
||||||
elif img_src.startswith(GOOG_IMG):
|
|
||||||
img['src'] = BLANK_B64
|
|
||||||
continue
|
|
||||||
|
|
||||||
enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
|
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
|
||||||
img['src'] = '/tmp?image_url=' + enc_src.decode()
|
for question_div in question_divs:
|
||||||
# TODO: Non-mobile image results link to website instead of image
|
questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')]
|
||||||
# if not self.mobile:
|
for question in questions:
|
||||||
# img.append(BeautifulSoup(FULL_RES_IMG.format(img_src), 'html.parser'))
|
question['style'] = 'padding: 10px; font-style: italic;'
|
||||||
|
|
||||||
|
def update_element_src(self, element, mime):
|
||||||
|
element_src = element['src']
|
||||||
|
if element_src.startswith('//'):
|
||||||
|
element_src = 'https:' + element_src
|
||||||
|
elif element_src.startswith(LOGO_URL):
|
||||||
|
# Re-brand with Whoogle logo
|
||||||
|
element['src'] = '/static/img/logo.png'
|
||||||
|
element['style'] = 'height:40px;width:162px'
|
||||||
|
return
|
||||||
|
elif element_src.startswith(GOOG_IMG):
|
||||||
|
element['src'] = BLANK_B64
|
||||||
|
return
|
||||||
|
|
||||||
|
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
|
||||||
|
'&type=' + urlparse.quote(mime)
|
||||||
|
# TODO: Non-mobile image results link to website instead of image
|
||||||
|
# if not self.mobile:
|
||||||
|
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
|
||||||
|
|
||||||
def update_styling(self, soup):
|
def update_styling(self, soup):
|
||||||
# Remove unnecessary button(s)
|
# Remove unnecessary button(s)
|
||||||
|
@ -169,44 +200,42 @@ class Filter:
|
||||||
for href_element in soup.findAll('a'):
|
for href_element in soup.findAll('a'):
|
||||||
href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else ''
|
href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else ''
|
||||||
|
|
||||||
def update_links(self, soup):
|
def update_link(self, link):
|
||||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
# Replace href with only the intended destination (no "utm" type tags)
|
||||||
for a in soup.find_all('a', href=True):
|
href = link['href'].replace('https://www.google.com', '')
|
||||||
href = a['href'].replace('https://www.google.com', '')
|
if '/advanced_search' in href:
|
||||||
if '/advanced_search' in href:
|
link.decompose()
|
||||||
a.decompose()
|
return
|
||||||
continue
|
elif self.new_tab:
|
||||||
elif self.new_tab:
|
link['target'] = '_blank'
|
||||||
a['target'] = '_blank'
|
|
||||||
|
|
||||||
result_link = urlparse.urlparse(href)
|
result_link = urlparse.urlparse(href)
|
||||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||||
|
|
||||||
if query_link.startswith('/'):
|
if query_link.startswith('/'):
|
||||||
a['href'] = 'https://google.com' + query_link
|
link['href'] = 'https://google.com' + query_link
|
||||||
elif '/search?q=' in href:
|
elif '/search?q=' in href:
|
||||||
enc_result = Fernet(self.secret_key).encrypt(query_link.encode())
|
new_search = '/search?q=' + self.encrypt_path(query_link)
|
||||||
new_search = '/search?q=' + enc_result.decode()
|
|
||||||
|
|
||||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||||
for param in VALID_PARAMS:
|
for param in VALID_PARAMS:
|
||||||
param_val = query_params[param][0] if param in query_params else ''
|
param_val = query_params[param][0] if param in query_params else ''
|
||||||
new_search += '&' + param + '=' + param_val
|
new_search += '&' + param + '=' + param_val
|
||||||
a['href'] = new_search
|
link['href'] = new_search
|
||||||
elif 'url?q=' in href:
|
elif 'url?q=' in href:
|
||||||
# Strip unneeded arguments
|
# Strip unneeded arguments
|
||||||
a['href'] = filter_link_args(query_link)
|
link['href'] = filter_link_args(query_link)
|
||||||
|
|
||||||
# Add no-js option
|
# Add no-js option
|
||||||
if self.nojs:
|
if self.nojs:
|
||||||
gen_nojs(soup, a['href'], a)
|
gen_nojs(link)
|
||||||
else:
|
else:
|
||||||
a['href'] = href
|
link['href'] = href
|
||||||
|
|
||||||
|
|
||||||
def gen_nojs(soup, link, sibling):
|
def gen_nojs(sibling):
|
||||||
nojs_link = soup.new_tag('a')
|
nojs_link = BeautifulSoup().new_tag('a')
|
||||||
nojs_link['href'] = '/window?location=' + link
|
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||||
nojs_link['style'] = 'display:block;width:100%;'
|
nojs_link['style'] = 'display:block;width:100%;'
|
||||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from io import BytesIO
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import pycurl
|
|
||||||
import random
|
import random
|
||||||
|
import requests
|
||||||
|
from requests import Response
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
# Core Google search URLs
|
# Core Google search URLs
|
||||||
|
@ -12,27 +12,38 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
||||||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||||
|
|
||||||
# Valid query params
|
# Valid query params
|
||||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source']
|
||||||
|
|
||||||
|
|
||||||
def gen_user_agent(normal_ua, is_mobile):
|
def gen_user_agent(is_mobile):
|
||||||
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
|
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
|
||||||
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
|
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
|
||||||
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
|
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
|
||||||
|
|
||||||
if is_mobile:
|
if is_mobile:
|
||||||
return MOBILE_UA.format(mozilla, firefox)
|
return MOBILE_UA.format(mozilla, firefox)
|
||||||
else:
|
|
||||||
return DESKTOP_UA.format(mozilla, linux, firefox)
|
return DESKTOP_UA.format(mozilla, linux, firefox)
|
||||||
|
|
||||||
|
|
||||||
def gen_query(query, args, config, near_city=None):
|
def gen_query(query, args, config, near_city=None):
|
||||||
param_dict = {key: '' for key in VALID_PARAMS}
|
param_dict = {key: '' for key in VALID_PARAMS}
|
||||||
|
|
||||||
# Use :past(hour/day/week/month/year) if available
|
# Use :past(hour/day/week/month/year) if available
|
||||||
# example search "new restaurants :past month"
|
# example search "new restaurants :past month"
|
||||||
if ':past' in query:
|
sub_lang = ''
|
||||||
|
if ':past' in query and 'tbs' not in args:
|
||||||
time_range = str.strip(query.split(':past', 1)[-1])
|
time_range = str.strip(query.split(':past', 1)[-1])
|
||||||
param_dict['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
|
param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
|
||||||
|
elif 'tbs' in args:
|
||||||
|
result_tbs = args.get('tbs')
|
||||||
|
param_dict['tbs'] = '&tbs=' + result_tbs
|
||||||
|
|
||||||
|
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
|
||||||
|
# strangely. This is a (admittedly not very elegant) solution for this.
|
||||||
|
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
|
||||||
|
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
|
||||||
|
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
|
||||||
|
|
||||||
# Ensure search query is parsable
|
# Ensure search query is parsable
|
||||||
query = urlparse.quote(query)
|
query = urlparse.quote(query)
|
||||||
|
@ -49,13 +60,20 @@ def gen_query(query, args, config, near_city=None):
|
||||||
if near_city:
|
if near_city:
|
||||||
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
||||||
|
|
||||||
# Set language for results (lr) and interface (hl)
|
# Set language for results (lr) if source isn't set, otherwise use the result
|
||||||
param_dict['lr'] = '&lr=' + config.lang + '&hl=' + config.lang.replace('lang_', '')
|
# language param provided by google (but with the strange digit(s) removed)
|
||||||
|
if 'source' in args:
|
||||||
|
param_dict['source'] = '&source=' + args.get('source')
|
||||||
|
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
|
||||||
|
else:
|
||||||
|
param_dict['lr'] = '&lr=' + config.lang
|
||||||
|
|
||||||
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
||||||
|
param_dict['hl'] = '&hl=' + config.lang.replace('lang_', '')
|
||||||
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
||||||
|
|
||||||
for val in param_dict.values():
|
for val in param_dict.values():
|
||||||
if not val or val is None:
|
if not val:
|
||||||
continue
|
continue
|
||||||
query += val
|
query += val
|
||||||
|
|
||||||
|
@ -66,20 +84,14 @@ class Request:
|
||||||
def __init__(self, normal_ua, language='lang_en'):
|
def __init__(self, normal_ua, language='lang_en'):
|
||||||
self.language = language
|
self.language = language
|
||||||
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
||||||
self.modified_user_agent = gen_user_agent(normal_ua, self.mobile)
|
self.modified_user_agent = gen_user_agent(self.mobile)
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return getattr(self, name)
|
return getattr(self, name)
|
||||||
|
|
||||||
def get_decode_value(self):
|
|
||||||
if 'lang_zh' in self.language:
|
|
||||||
return 'gb2312'
|
|
||||||
else:
|
|
||||||
return 'unicode-escape'
|
|
||||||
|
|
||||||
def autocomplete(self, query):
|
def autocomplete(self, query):
|
||||||
ac_query = dict(hl=self.language, q=query)
|
ac_query = dict(hl=self.language, q=query)
|
||||||
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query))
|
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
dom = etree.fromstring(response)
|
dom = etree.fromstring(response)
|
||||||
|
@ -87,20 +99,9 @@ class Request:
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
|
def send(self, base_url=SEARCH_URL, query='') -> Response:
|
||||||
response_header = []
|
headers = {
|
||||||
|
'User-Agent': self.modified_user_agent
|
||||||
|
}
|
||||||
|
|
||||||
b_obj = BytesIO()
|
return requests.get(base_url + query, headers=headers)
|
||||||
crl = pycurl.Curl()
|
|
||||||
crl.setopt(crl.URL, base_url + query)
|
|
||||||
crl.setopt(crl.USERAGENT, self.modified_user_agent)
|
|
||||||
crl.setopt(crl.WRITEDATA, b_obj)
|
|
||||||
crl.setopt(crl.HEADERFUNCTION, response_header.append)
|
|
||||||
crl.setopt(pycurl.FOLLOWLOCATION, 1)
|
|
||||||
crl.perform()
|
|
||||||
crl.close()
|
|
||||||
|
|
||||||
if return_bytes:
|
|
||||||
return b_obj.getvalue()
|
|
||||||
else:
|
|
||||||
return b_obj.getvalue().decode(self.get_decode_value(), 'ignore')
|
|
||||||
|
|
181
app/routes.py
181
app/routes.py
|
@ -1,19 +1,22 @@
|
||||||
from app import app
|
|
||||||
from app.filter import Filter, get_first_link
|
|
||||||
from app.models.config import Config
|
|
||||||
from app.request import Request, gen_query
|
|
||||||
import argparse
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from cryptography.fernet import Fernet, InvalidToken
|
|
||||||
from flask import g, jsonify, make_response, request, redirect, render_template, send_file
|
|
||||||
from functools import wraps
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from pycurl import error as pycurl_error
|
import pickle
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
import uuid
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
import waitress
|
import waitress
|
||||||
|
from flask import jsonify, make_response, request, redirect, render_template, send_file, session
|
||||||
|
from requests import exceptions
|
||||||
|
|
||||||
|
from app import app
|
||||||
|
from app.models.config import Config
|
||||||
|
from app.request import Request
|
||||||
|
from app.utils.misc import valid_user_session
|
||||||
|
from app.utils.routing_utils import *
|
||||||
|
|
||||||
|
|
||||||
def auth_required(f):
|
def auth_required(f):
|
||||||
|
@ -34,17 +37,30 @@ def auth_required(f):
|
||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def before_request_func():
|
def before_request_func():
|
||||||
# Always redirect to https if HTTPS_ONLY is set (otherwise default to false)
|
g.request_params = request.args if request.method == 'GET' else request.form
|
||||||
|
g.cookies_disabled = False
|
||||||
|
|
||||||
|
# Generate session values for user if unavailable
|
||||||
|
if not valid_user_session(session):
|
||||||
|
session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \
|
||||||
|
if os.path.exists(app.config['DEFAULT_CONFIG']) else {'url': request.url_root}
|
||||||
|
session['uuid'] = str(uuid.uuid4())
|
||||||
|
session['fernet_keys'] = generate_user_keys(True)
|
||||||
|
|
||||||
|
# Flag cookies as possibly disabled in order to prevent against
|
||||||
|
# unnecessary session directory expansion
|
||||||
|
g.cookies_disabled = True
|
||||||
|
|
||||||
|
if session['uuid'] not in app.user_elements:
|
||||||
|
app.user_elements.update({session['uuid']: 0})
|
||||||
|
|
||||||
|
# Always redirect to https if HTTPS_ONLY is set (otherwise default to False)
|
||||||
https_only = os.getenv('HTTPS_ONLY', False)
|
https_only = os.getenv('HTTPS_ONLY', False)
|
||||||
config_path = app.config['CONFIG_PATH']
|
|
||||||
|
|
||||||
if https_only and request.url.startswith('http://'):
|
if https_only and request.url.startswith('http://'):
|
||||||
https_url = request.url.replace('http://', 'https://', 1)
|
return redirect(request.url.replace('http://', 'https://', 1), code=308)
|
||||||
code = 308
|
|
||||||
return redirect(https_url, code=code)
|
|
||||||
|
|
||||||
json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root}
|
g.user_config = Config(**session['config'])
|
||||||
g.user_config = Config(**json_config)
|
|
||||||
|
|
||||||
if not g.user_config.url:
|
if not g.user_config.url:
|
||||||
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
|
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
|
||||||
|
@ -53,6 +69,27 @@ def before_request_func():
|
||||||
g.app_location = g.user_config.url
|
g.app_location = g.user_config.url
|
||||||
|
|
||||||
|
|
||||||
|
@app.after_request
|
||||||
|
def after_request_func(response):
|
||||||
|
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
|
||||||
|
# Regenerate element key if all elements have been served to user
|
||||||
|
session['fernet_keys']['element_key'] = '' if not g.cookies_disabled else app.default_key_set['element_key']
|
||||||
|
app.user_elements[session['uuid']] = 0
|
||||||
|
|
||||||
|
# Check if address consistently has cookies blocked, in which case start removing session
|
||||||
|
# files after creation.
|
||||||
|
# Note: This is primarily done to prevent overpopulation of session directories, since browsers that
|
||||||
|
# block cookies will still trigger Flask's session creation routine with every request.
|
||||||
|
if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips:
|
||||||
|
app.no_cookie_ips.append(request.remote_addr)
|
||||||
|
elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips:
|
||||||
|
session_list = list(session.keys())
|
||||||
|
for key in session_list:
|
||||||
|
session.pop(key)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(404)
|
@app.errorhandler(404)
|
||||||
def unknown_page(e):
|
def unknown_page(e):
|
||||||
return redirect(g.app_location)
|
return redirect(g.app_location)
|
||||||
|
@ -61,15 +98,14 @@ def unknown_page(e):
|
||||||
@app.route('/', methods=['GET'])
|
@app.route('/', methods=['GET'])
|
||||||
@auth_required
|
@auth_required
|
||||||
def index():
|
def index():
|
||||||
|
# Reset keys
|
||||||
|
session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
|
||||||
|
|
||||||
return render_template('index.html',
|
return render_template('index.html',
|
||||||
dark_mode=g.user_config.dark,
|
|
||||||
ua=g.user_request.modified_user_agent,
|
|
||||||
languages=Config.LANGUAGES,
|
languages=Config.LANGUAGES,
|
||||||
countries=Config.COUNTRIES,
|
countries=Config.COUNTRIES,
|
||||||
current_lang=g.user_config.lang,
|
config=g.user_config,
|
||||||
current_ctry=g.user_config.ctry,
|
version_number=app.config['VERSION_NUMBER'])
|
||||||
version_number=app.config['VERSION_NUMBER'],
|
|
||||||
request_type='get' if g.user_config.get_only else 'post')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/opensearch.xml', methods=['GET'])
|
@app.route('/opensearch.xml', methods=['GET'])
|
||||||
|
@ -89,8 +125,7 @@ def opensearch():
|
||||||
|
|
||||||
@app.route('/autocomplete', methods=['GET', 'POST'])
|
@app.route('/autocomplete', methods=['GET', 'POST'])
|
||||||
def autocomplete():
|
def autocomplete():
|
||||||
request_params = request.args if request.method == 'GET' else request.form
|
q = g.request_params.get('q')
|
||||||
q = request_params.get('q')
|
|
||||||
|
|
||||||
if not q and not request.data:
|
if not q and not request.data:
|
||||||
return jsonify({'?': []})
|
return jsonify({'?': []})
|
||||||
|
@ -103,68 +138,65 @@ def autocomplete():
|
||||||
@app.route('/search', methods=['GET', 'POST'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
@auth_required
|
@auth_required
|
||||||
def search():
|
def search():
|
||||||
request_params = request.args if request.method == 'GET' else request.form
|
# Reset element counter
|
||||||
q = request_params.get('q')
|
app.user_elements[session['uuid']] = 0
|
||||||
|
|
||||||
if q is None or len(q) == 0:
|
search_util = RoutingUtils(request, g.user_config, session, cookies_disabled=g.cookies_disabled)
|
||||||
|
query = search_util.new_search_query()
|
||||||
|
|
||||||
|
# Redirect to home if invalid/blank search
|
||||||
|
if not query:
|
||||||
return redirect('/')
|
return redirect('/')
|
||||||
else:
|
|
||||||
# Attempt to decrypt if this is an internal link
|
|
||||||
try:
|
|
||||||
q = Fernet(app.secret_key).decrypt(q.encode()).decode()
|
|
||||||
except InvalidToken:
|
|
||||||
pass
|
|
||||||
|
|
||||||
feeling_lucky = q.startswith('! ')
|
# Generate response and number of external elements from the page
|
||||||
|
response, elements = search_util.generate_response()
|
||||||
|
if search_util.feeling_lucky:
|
||||||
|
return redirect(response, code=303)
|
||||||
|
|
||||||
if feeling_lucky: # Well do you, punk?
|
# Keep count of external elements to fetch before element key can be regenerated
|
||||||
q = q[2:]
|
app.user_elements[session['uuid']] = elements
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent')
|
|
||||||
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
|
||||||
|
|
||||||
content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key)
|
|
||||||
full_query = gen_query(q, request_params, g.user_config, content_filter.near)
|
|
||||||
get_body = g.user_request.send(query=full_query)
|
|
||||||
dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
|
||||||
|
|
||||||
if feeling_lucky:
|
|
||||||
return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL
|
|
||||||
else:
|
|
||||||
formatted_results = content_filter.clean(dirty_soup)
|
|
||||||
|
|
||||||
# Set search type to be used in the header template to allow for repeated searches
|
|
||||||
# in the same category
|
|
||||||
search_type = request_params.get('tbm') if 'tbm' in request_params else ''
|
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
'display.html',
|
'display.html',
|
||||||
query=urlparse.unquote(q),
|
query=urlparse.unquote(query),
|
||||||
search_type=search_type,
|
search_type=search_util.search_type,
|
||||||
dark_mode=g.user_config.dark,
|
dark_mode=g.user_config.dark,
|
||||||
response=formatted_results,
|
response=response,
|
||||||
|
version_number=app.config['VERSION_NUMBER'],
|
||||||
search_header=render_template(
|
search_header=render_template(
|
||||||
'header.html',
|
'header.html',
|
||||||
dark_mode=g.user_config.dark,
|
dark_mode=g.user_config.dark,
|
||||||
q=urlparse.unquote(q),
|
query=urlparse.unquote(query),
|
||||||
search_type=search_type,
|
search_type=search_util.search_type,
|
||||||
mobile=g.user_request.mobile) if 'isch' not in search_type else '')
|
mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/config', methods=['GET', 'POST'])
|
@app.route('/config', methods=['GET', 'POST', 'PUT'])
|
||||||
@auth_required
|
@auth_required
|
||||||
def config():
|
def config():
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
return json.dumps(g.user_config.__dict__)
|
return json.dumps(g.user_config.__dict__)
|
||||||
|
elif request.method == 'PUT':
|
||||||
|
if 'name' in request.args:
|
||||||
|
config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name'))
|
||||||
|
session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config']
|
||||||
|
return json.dumps(session['config'])
|
||||||
|
else:
|
||||||
|
return json.dumps({})
|
||||||
else:
|
else:
|
||||||
config_data = request.form.to_dict()
|
config_data = request.form.to_dict()
|
||||||
if 'url' not in config_data or not config_data['url']:
|
if 'url' not in config_data or not config_data['url']:
|
||||||
config_data['url'] = g.user_config.url
|
config_data['url'] = g.user_config.url
|
||||||
|
|
||||||
with open(app.config['CONFIG_PATH'], 'w') as config_file:
|
# Save config by name to allow a user to easily load later
|
||||||
config_file.write(json.dumps(config_data, indent=4))
|
if 'name' in request.args:
|
||||||
config_file.close()
|
pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb'))
|
||||||
|
|
||||||
|
# Overwrite default config if user has cookies disabled
|
||||||
|
if g.cookies_disabled:
|
||||||
|
open(app.config['DEFAULT_CONFIG'], 'w').write(json.dumps(config_data, indent=4))
|
||||||
|
|
||||||
|
session['config'] = config_data
|
||||||
return redirect(config_data['url'])
|
return redirect(config_data['url'])
|
||||||
|
|
||||||
|
|
||||||
|
@ -187,25 +219,22 @@ def imgres():
|
||||||
return redirect(request.args.get('imgurl'))
|
return redirect(request.args.get('imgurl'))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tmp')
|
@app.route('/element')
|
||||||
@auth_required
|
@auth_required
|
||||||
def tmp():
|
def element():
|
||||||
cipher_suite = Fernet(app.secret_key)
|
cipher_suite = Fernet(session['fernet_keys']['element_key'])
|
||||||
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
|
src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode()
|
||||||
|
src_type = request.args.get('type')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_data = g.user_request.send(base_url=img_url, return_bytes=True)
|
file_data = g.user_request.send(base_url=src_url).content
|
||||||
|
app.user_elements[session['uuid']] -= 1
|
||||||
tmp_mem = io.BytesIO()
|
tmp_mem = io.BytesIO()
|
||||||
tmp_mem.write(file_data)
|
tmp_mem.write(file_data)
|
||||||
tmp_mem.seek(0)
|
tmp_mem.seek(0)
|
||||||
|
|
||||||
return send_file(
|
return send_file(tmp_mem, mimetype=src_type)
|
||||||
tmp_mem,
|
except exceptions.RequestException:
|
||||||
as_attachment=True,
|
|
||||||
attachment_filename='tmp.png',
|
|
||||||
mimetype='image/png'
|
|
||||||
)
|
|
||||||
except pycurl_error:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
||||||
|
@ -215,7 +244,7 @@ def tmp():
|
||||||
@app.route('/window')
|
@app.route('/window')
|
||||||
@auth_required
|
@auth_required
|
||||||
def window():
|
def window():
|
||||||
get_body = g.user_request.send(base_url=request.args.get('location'))
|
get_body = g.user_request.send(base_url=request.args.get('location')).text
|
||||||
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
|
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
|
||||||
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
|
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ const handleUserInput = searchBar => {
|
||||||
xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
|
xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
|
||||||
xhrRequest.onload = function() {
|
xhrRequest.onload = function() {
|
||||||
if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) {
|
if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) {
|
||||||
alert("Error fetching autocomplete results");
|
// Do nothing if failed to fetch autocomplete results
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,41 @@ const setupConfigLayout = () => {
|
||||||
fillConfigValues();
|
fillConfigValues();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const loadConfig = event => {
|
||||||
|
event.preventDefault();
|
||||||
|
let config = prompt("Enter name of config:");
|
||||||
|
if (!config) {
|
||||||
|
alert("Must specify a name for the config to load");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let xhrPUT = new XMLHttpRequest();
|
||||||
|
xhrPUT.open("PUT", "/config?name=" + config + ".conf");
|
||||||
|
xhrPUT.onload = function() {
|
||||||
|
if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) {
|
||||||
|
alert("Error loading Whoogle config");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
location.reload(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
xhrPUT.send();
|
||||||
|
};
|
||||||
|
|
||||||
|
const saveConfig = event => {
|
||||||
|
event.preventDefault();
|
||||||
|
let config = prompt("Enter name for this config:");
|
||||||
|
if (!config) {
|
||||||
|
alert("Must specify a name for the config to save");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let configForm = document.getElementById("config-form");
|
||||||
|
configForm.action = '/config?name=' + config + ".conf";
|
||||||
|
configForm.submit();
|
||||||
|
};
|
||||||
|
|
||||||
document.addEventListener("DOMContentLoaded", function() {
|
document.addEventListener("DOMContentLoaded", function() {
|
||||||
setTimeout(function() {
|
setTimeout(function() {
|
||||||
document.getElementById("main").style.display = "block";
|
document.getElementById("main").style.display = "block";
|
||||||
|
|
|
@ -11,7 +11,13 @@
|
||||||
<title>{{ query }} - Whoogle Search</title>
|
<title>{{ query }} - Whoogle Search</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
{{ search_header|safe }}
|
{{ search_header|safe }}
|
||||||
{{ response|safe }}
|
{{ response|safe }}
|
||||||
</body>
|
</body>
|
||||||
|
<footer>
|
||||||
|
<p style="color: {{ '#fff' if dark_mode else '#000' }};">
|
||||||
|
Whoogle Search v{{ version_number }} ||
|
||||||
|
<a style="color: #685e79" href="https://github.com/benbusby/whoogle-search">View on GitHub</a>
|
||||||
|
</p>
|
||||||
|
</footer>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
||||||
color: {{ '#685e79' if dark_mode else '#000' }};
|
color: {{ '#685e79' if dark_mode else '#000' }};
|
||||||
border: {{ '1px solid #685e79' if dark_mode else '' }}"
|
border: {{ '1px solid #685e79' if dark_mode else '' }}"
|
||||||
spellcheck="false" type="text" value="{{ q }}">
|
spellcheck="false" type="text" value="{{ query }}">
|
||||||
<input name="tbm" value="{{ search_type }}" style="display: none">
|
<input name="tbm" value="{{ search_type }}" style="display: none">
|
||||||
<div class="sc"></div>
|
<div class="sc"></div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -37,7 +37,7 @@
|
||||||
<div class="autocomplete" style="width: 100%; flex: 1">
|
<div class="autocomplete" style="width: 100%; flex: 1">
|
||||||
<div style="width: 100%; display: flex">
|
<div style="width: 100%; display: flex">
|
||||||
<input id="search-bar" autocapitalize="none" autocomplete="off" class="noHIxc" name="q"
|
<input id="search-bar" autocapitalize="none" autocomplete="off" class="noHIxc" name="q"
|
||||||
spellcheck="false" type="text" value="{{ q }}"
|
spellcheck="false" type="text" value="{{ query }}"
|
||||||
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
style="background-color: {{ '#000' if dark_mode else '#fff' }};
|
||||||
color: {{ '#685e79' if dark_mode else '#000' }};
|
color: {{ '#685e79' if dark_mode else '#000' }};
|
||||||
border: {{ '1px solid #685e79' if dark_mode else '' }}">
|
border: {{ '1px solid #685e79' if dark_mode else '' }}">
|
||||||
|
|
|
@ -21,14 +21,14 @@
|
||||||
<script type="text/javascript" src="/static/js/controller.js"></script>
|
<script type="text/javascript" src="/static/js/controller.js"></script>
|
||||||
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
|
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if dark_mode else 'search' }}.css">
|
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if config.dark else 'search' }}.css">
|
||||||
<link rel="stylesheet" href="/static/css/main.css">
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
<title>Whoogle Search</title>
|
<title>Whoogle Search</title>
|
||||||
</head>
|
</head>
|
||||||
<body id="main" style="display: none; background-color: {{ '#000' if dark_mode else '#fff' }}">
|
<body id="main" style="display: none; background-color: {{ '#000' if config.dark else '#fff' }}">
|
||||||
<div class="search-container">
|
<div class="search-container">
|
||||||
<img class="logo" src="/static/img/logo.png">
|
<img class="logo" src="/static/img/logo.png">
|
||||||
<form id="search-form" action="/search" method="{{ request_type }}">
|
<form id="search-form" action="/search" method="{{ 'get' if config.get_only else 'post' }}">
|
||||||
<div class="search-fields">
|
<div class="search-fields">
|
||||||
<div class="autocomplete">
|
<div class="autocomplete">
|
||||||
<input type="text" name="q" id="search-bar" autofocus="autofocus">
|
<input type="text" name="q" id="search-bar" autofocus="autofocus">
|
||||||
|
@ -40,17 +40,13 @@
|
||||||
<button id="config-collapsible" class="collapsible">Configuration</button>
|
<button id="config-collapsible" class="collapsible">Configuration</button>
|
||||||
<div class="content">
|
<div class="content">
|
||||||
<div class="config-fields">
|
<div class="config-fields">
|
||||||
<form action="/config" method="post">
|
<form id="config-form" action="/config" method="post">
|
||||||
<div class="config-div">
|
|
||||||
<!-- TODO: Add option to regenerate user agent? -->
|
|
||||||
<span class="ua-span">User Agent: {{ ua }}</span>
|
|
||||||
</div>
|
|
||||||
<div class="config-div">
|
<div class="config-div">
|
||||||
<label for="config-ctry">Country: </label>
|
<label for="config-ctry">Country: </label>
|
||||||
<select name="ctry" id="config-ctry">
|
<select name="ctry" id="config-ctry">
|
||||||
{% for ctry in countries %}
|
{% for ctry in countries %}
|
||||||
<option value="{{ ctry.value }}"
|
<option value="{{ ctry.value }}"
|
||||||
{% if ctry.value in current_ctry %}
|
{% if ctry.value in config.ctry %}
|
||||||
selected
|
selected
|
||||||
{% endif %}>
|
{% endif %}>
|
||||||
{{ ctry.name }}
|
{{ ctry.name }}
|
||||||
|
@ -63,7 +59,7 @@
|
||||||
<select name="lang" id="config-lang">
|
<select name="lang" id="config-lang">
|
||||||
{% for lang in languages %}
|
{% for lang in languages %}
|
||||||
<option value="{{ lang.value }}"
|
<option value="{{ lang.value }}"
|
||||||
{% if lang.value in current_lang %}
|
{% if lang.value in config.lang %}
|
||||||
selected
|
selected
|
||||||
{% endif %}>
|
{% endif %}>
|
||||||
{{ lang.name }}
|
{{ lang.name }}
|
||||||
|
@ -100,7 +96,9 @@
|
||||||
<input type="text" name="url" id="config-url" value="">
|
<input type="text" name="url" id="config-url" value="">
|
||||||
</div>
|
</div>
|
||||||
<div class="config-div">
|
<div class="config-div">
|
||||||
<input type="submit" id="config-submit" value="Save">
|
<input type="submit" id="config-load" onclick="loadConfig(event)" value="Load">
|
||||||
|
<input type="submit" id="config-submit" value="Apply">
|
||||||
|
<input type="submit" id="config-submit" onclick="saveConfig(event)" value="Save As...">
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
from flask import current_app as app
|
||||||
|
|
||||||
|
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
|
||||||
|
BLACKLIST = [
|
||||||
|
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||||
|
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||||
|
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_user_keys(cookies_disabled=False) -> dict:
|
||||||
|
if cookies_disabled:
|
||||||
|
return app.default_key_set
|
||||||
|
|
||||||
|
# Generate/regenerate unique key per user
|
||||||
|
return {
|
||||||
|
'element_key': Fernet.generate_key(),
|
||||||
|
'text_key': Fernet.generate_key()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def valid_user_session(session):
|
||||||
|
# Generate secret key for user if unavailable
|
||||||
|
for value in REQUIRED_SESSION_VALUES:
|
||||||
|
if value not in session:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
|
@ -0,0 +1,72 @@
|
||||||
|
from app.filter import Filter, get_first_link
|
||||||
|
from app.utils.misc import generate_user_keys
|
||||||
|
from app.request import gen_query
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from cryptography.fernet import Fernet, InvalidToken
|
||||||
|
from flask import g
|
||||||
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
class RoutingUtils:
|
||||||
|
def __init__(self, request, config, session, cookies_disabled=False):
|
||||||
|
self.request_params = request.args if request.method == 'GET' else request.form
|
||||||
|
self.user_agent = request.headers.get('User-Agent')
|
||||||
|
self.feeling_lucky = False
|
||||||
|
self.config = config
|
||||||
|
self.session = session
|
||||||
|
self.query = ''
|
||||||
|
self.cookies_disabled = cookies_disabled
|
||||||
|
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else ''
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
return getattr(self, name)
|
||||||
|
|
||||||
|
def __setitem__(self, name, value):
|
||||||
|
return setattr(self, name, value)
|
||||||
|
|
||||||
|
def __delitem__(self, name):
|
||||||
|
return delattr(self, name)
|
||||||
|
|
||||||
|
def __contains__(self, name):
|
||||||
|
return hasattr(self, name)
|
||||||
|
|
||||||
|
def new_search_query(self) -> str:
|
||||||
|
# Generate a new element key each time a new search is performed
|
||||||
|
self.session['fernet_keys']['element_key'] = generate_user_keys(
|
||||||
|
cookies_disabled=self.cookies_disabled)['element_key']
|
||||||
|
|
||||||
|
q = self.request_params.get('q')
|
||||||
|
|
||||||
|
if q is None or len(q) == 0:
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
# Attempt to decrypt if this is an internal link
|
||||||
|
try:
|
||||||
|
q = Fernet(self.session['fernet_keys']['text_key']).decrypt(q.encode()).decode()
|
||||||
|
except InvalidToken:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Reset text key
|
||||||
|
self.session['fernet_keys']['text_key'] = generate_user_keys(
|
||||||
|
cookies_disabled=self.cookies_disabled)['text_key']
|
||||||
|
|
||||||
|
# Format depending on whether or not the query is a "feeling lucky" query
|
||||||
|
self.feeling_lucky = q.startswith('! ')
|
||||||
|
self.query = q[2:] if self.feeling_lucky else q
|
||||||
|
return self.query
|
||||||
|
|
||||||
|
def generate_response(self) -> Tuple[Any, int]:
|
||||||
|
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
|
||||||
|
|
||||||
|
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
|
||||||
|
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
|
||||||
|
get_body = g.user_request.send(query=full_query).text
|
||||||
|
|
||||||
|
# Produce cleanable html soup from response
|
||||||
|
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
||||||
|
|
||||||
|
if self.feeling_lucky:
|
||||||
|
return get_first_link(html_soup), 1
|
||||||
|
else:
|
||||||
|
formatted_results = content_filter.clean(html_soup)
|
||||||
|
return formatted_results, content_filter.elements
|
|
@ -4,15 +4,16 @@ cffi==1.13.2
|
||||||
Click==7.0
|
Click==7.0
|
||||||
cryptography==2.8
|
cryptography==2.8
|
||||||
Flask==1.1.1
|
Flask==1.1.1
|
||||||
|
Flask-Session==0.3.2
|
||||||
itsdangerous==1.1.0
|
itsdangerous==1.1.0
|
||||||
Jinja2==2.10.3
|
Jinja2==2.10.3
|
||||||
lxml==4.5.1
|
lxml==4.5.1
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
pycparser==2.19
|
pycparser==2.19
|
||||||
pycurl==7.43.0.4
|
|
||||||
pyOpenSSL==19.1.0
|
pyOpenSSL==19.1.0
|
||||||
pytest==5.4.1
|
pytest==5.4.1
|
||||||
python-dateutil==2.8.1
|
python-dateutil==2.8.1
|
||||||
|
requests==2.23.0
|
||||||
six==1.14.0
|
six==1.14.0
|
||||||
soupsieve==1.9.5
|
soupsieve==1.9.5
|
||||||
Werkzeug==0.16.0
|
Werkzeug==0.16.0
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@ setuptools.setup(
|
||||||
author='Ben Busby',
|
author='Ben Busby',
|
||||||
author_email='benbusby@protonmail.com',
|
author_email='benbusby@protonmail.com',
|
||||||
name='whoogle-search',
|
name='whoogle-search',
|
||||||
version='0.1.4',
|
version='0.2.0',
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
install_requires=requirements,
|
install_requires=requirements,
|
||||||
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',
|
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
from app import app
|
from app import app
|
||||||
|
from app.utils.misc import generate_user_keys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def client():
|
def client():
|
||||||
client = app.test_client()
|
with app.test_client() as client:
|
||||||
yield client
|
with client.session_transaction() as session:
|
||||||
|
session['uuid'] = 'test'
|
||||||
|
session['fernet_keys'] = generate_user_keys()
|
||||||
|
session['config'] = {}
|
||||||
|
yield client
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
from app.utils.misc import generate_user_keys, valid_user_session
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_user_keys():
|
||||||
|
keys = generate_user_keys()
|
||||||
|
assert 'text_key' in keys
|
||||||
|
assert 'element_key' in keys
|
||||||
|
assert keys['text_key'] not in keys['element_key']
|
||||||
|
|
||||||
|
|
||||||
|
def test_valid_session(client):
|
||||||
|
assert not valid_user_session({'fernet_keys': '', 'config': {}})
|
||||||
|
with client.session_transaction() as session:
|
||||||
|
assert valid_user_session(session)
|
||||||
|
|
||||||
|
|
||||||
|
def test_request_key_generation(client):
|
||||||
|
rv = client.get('/')
|
||||||
|
cookie = rv.headers['Set-Cookie']
|
||||||
|
|
||||||
|
rv = client.get('/search?q=test+1', headers={'Cookie': cookie})
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
with client.session_transaction() as session:
|
||||||
|
assert valid_user_session(session)
|
||||||
|
text_key = session['fernet_keys']['text_key']
|
||||||
|
|
||||||
|
rv = client.get('/search?q=test+2', headers={'Cookie': cookie})
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
with client.session_transaction() as session:
|
||||||
|
assert valid_user_session(session)
|
||||||
|
assert text_key not in session['fernet_keys']['text_key']
|
|
@ -1,13 +1,13 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from cryptography.fernet import Fernet
|
|
||||||
from app.filter import Filter
|
from app.filter import Filter
|
||||||
|
from app.utils.misc import generate_user_keys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil.parser import *
|
from dateutil.parser import *
|
||||||
|
|
||||||
|
|
||||||
def get_search_results(data):
|
def get_search_results(data):
|
||||||
secret_key = Fernet.generate_key()
|
secret_key = generate_user_keys()
|
||||||
soup = Filter(secret_key=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
||||||
|
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
assert len(main_divs) > 1
|
assert len(main_divs) > 1
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
|
from app.models.config import Config
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
|
||||||
demo_config = {
|
demo_config = {
|
||||||
'near': random.choice(['Seattle', 'New York', 'San Francisco']),
|
'near': random.choice(['Seattle', 'New York', 'San Francisco']),
|
||||||
'dark_mode': str(random.getrandbits(1)),
|
'dark_mode': str(random.getrandbits(1)),
|
||||||
'nojs': str(random.getrandbits(1))
|
'nojs': str(random.getrandbits(1)),
|
||||||
|
'lang': random.choice(Config.LANGUAGES)['value'],
|
||||||
|
'ctry': random.choice(Config.COUNTRIES)['value']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,6 +20,7 @@ def test_search(client):
|
||||||
rv = client.get('/search?q=test')
|
rv = client.get('/search?q=test')
|
||||||
assert rv._status_code == 200
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
|
||||||
def test_feeling_lucky(client):
|
def test_feeling_lucky(client):
|
||||||
rv = client.get('/search?q=!%20test')
|
rv = client.get('/search?q=!%20test')
|
||||||
assert rv._status_code == 303
|
assert rv._status_code == 303
|
||||||
|
|
Loading…
Reference in New Issue