From 614dceeb7065685b9a39241cb811f86f15fbe92d Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 4 Jun 2021 11:09:30 -0400 Subject: [PATCH] Add fallback interface/search lang + cleanup Since the interface language defaults to IP geolocation by google, the default language is now set to english. Still not sure if this is the best solution, but at least temporarily should clear up some confusion for users with instances deployed in countries outside of their own. Also performed some minor cleanup: - Updated name of strip_blocked_sites to clean_query - Added clean_query to list of jinja template functions - Ensured site block list doesn't contain duplicate filters --- app/__init__.py | 4 ++++ app/filter.py | 28 ++++++++++++++-------------- app/request.py | 30 +++++++++++++++++++++--------- app/routes.py | 3 +-- app/templates/display.html | 2 +- app/templates/header.html | 4 ++-- app/templates/index.html | 2 +- 7 files changed, 44 insertions(+), 29 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 0fef0a8..7f23607 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,3 +1,4 @@ +from app.filter import clean_query from app.request import send_tor_signal from app.utils.session import generate_user_key from app.utils.bangs import gen_bangs_json @@ -60,6 +61,9 @@ app.config['CSP'] = 'default-src \'none\';' \ 'connect-src \'self\';' \ 'form-action \'self\';' +# Templating functions +app.jinja_env.globals.update(clean_query=clean_query) + if not os.path.exists(app.config['CONFIG_PATH']): os.makedirs(app.config['CONFIG_PATH']) diff --git a/app/filter.py b/app/filter.py index e0ad87c..c56ab8d 100644 --- a/app/filter.py +++ b/app/filter.py @@ -9,19 +9,6 @@ import urllib.parse as urlparse from urllib.parse import parse_qs -def strip_blocked_sites(query: str) -> str: - """Strips the blocked site list from the query, if one is being - used. - - Args: - query: The query string - - Returns: - str: The query string without any "-site:..." filters - """ - return query[:query.find('-site:')] if '-site:' in query else query - - def extract_q(q_str: str, href: str) -> str: """Extracts the 'q' element from a result link. This is typically either the link to a result's website, or a string. @@ -37,6 +24,19 @@ def extract_q(q_str: str, href: str) -> str: return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else '' +def clean_query(query: str) -> str: + """Strips the blocked site list from the query, if one is being + used. + + Args: + query: The query string + + Returns: + str: The query string without any "-site:..." filters + """ + return query[:query.find('-site:')] if '-site:' in query else query + + class Filter: def __init__(self, user_key: str, mobile=False, config=None) -> None: if config is None: @@ -268,7 +268,7 @@ class Filter: else: if href.startswith(MAPS_URL): # Maps links don't work if a site filter is applied - link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q) + link['href'] = MAPS_URL + "?q=" + clean_query(q) else: link['href'] = href diff --git a/app/request.py b/app/request.py index ab3853b..4458c54 100644 --- a/app/request.py +++ b/app/request.py @@ -20,6 +20,9 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' # Valid query params VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] +# Fallback language if none have been configured +DEFAULT_LANG = 'lang_en' + class TorError(Exception): """Exception raised for errors in Tor requests. @@ -108,23 +111,28 @@ def gen_query(query, args, config, near_city=None) -> str: [_ for _ in lang if not _.isdigit()] )) if lang else '' else: - param_dict['lr'] = ( - '&lr=' + config.lang_search - ) if config.lang_search else '' + param_dict['lr'] = '&lr=' + ( + config.lang_search if config.lang_search else DEFAULT_LANG + ) # 'nfpr' defines the exclusion of results from an auto-corrected query if 'nfpr' in args: param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' - param_dict['hl'] = ( - '&hl=' + config.lang_interface.replace('lang_', '') - ) if config.lang_interface else '' + param_dict['hl'] = '&hl=' + ( + config.lang_interface.replace('lang_', '') + if config.lang_interface else DEFAULT_LANG.replace('lang_', '') + ) param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') # Block all sites specified in the user config - for blocked in config.block.split(','): - query += (' -site:' + blocked) if blocked else '' + unquoted_query = urlparse.unquote(query) + for blocked_site in config.block.replace(' ', '').split(','): + if not blocked_site: + continue + block = (' -site:' + blocked_site) + query += block if block not in unquoted_query else '' for val in param_dict.values(): if not val: @@ -149,7 +157,9 @@ class Request: # enable Tor for future requests send_tor_signal(Signal.HEARTBEAT) - self.language = config.lang_search + self.language = ( + config.lang_search if config.lang_search else DEFAULT_LANG + ) self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) if not self.mobile: @@ -210,6 +220,8 @@ class Request: query: The optional query string for the request attempt: The number of attempts made for the request (used for cycling through Tor identities, if enabled) + force_mobile: Optional flag to enable a mobile user agent + (used for fetching full size images in search results) Returns: Response: The Response object returned by the requests call diff --git a/app/routes.py b/app/routes.py index d325fbd..4409901 100644 --- a/app/routes.py +++ b/app/routes.py @@ -13,7 +13,6 @@ from flask import jsonify, make_response, request, redirect, render_template, \ from requests import exceptions from app import app -from app.filter import strip_blocked_sites from app.models.config import Config from app.request import Request, TorError from app.utils.bangs import resolve_bang @@ -248,7 +247,7 @@ def search(): 'header.html', config=g.user_config, logo=render_template('logo.html', dark=g.user_config.dark), - query=strip_blocked_sites(urlparse.unquote(query)), + query=urlparse.unquote(query), search_type=search_util.search_type, mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '')), resp_code diff --git a/app/templates/display.html b/app/templates/display.html index 8c30f6e..cdbb983 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -11,7 +11,7 @@ - {{ query }} - Whoogle Search + {{ clean_query(query) }} - Whoogle Search {{ search_header|safe }} diff --git a/app/templates/header.html b/app/templates/header.html index 0f17064..11a0565 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -22,7 +22,7 @@ style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};" type="text" - value="{{ query }}"> + value="{{ clean_query(query) }}"> @@ -54,7 +54,7 @@ name="q" spellcheck="false" type="text" - value="{{ query }}" + value="{{ clean_query(query) }}" style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};"> diff --git a/app/templates/index.html b/app/templates/index.html index 5cba56c..49f125a 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -158,7 +158,7 @@ spellcheck="false" autocorrect="off" value=""> - {{ config.style }} + {{ config.style.replace('\t', '') }}