From cbe32a081eff36640b0c0e3b5e68b62e59f06fcd Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 29 May 2021 12:21:20 -0400 Subject: [PATCH] Hotfix: extract only 'q' element from query string Occasionally the search results will contain links with arguments such as 'dq', which was being erroneously used in attempts to extract the 'q' element from query strings. This enforces that only links with '?q=' or '&q=' (elements with a standalone 'q' arg) will have the element extracted. I also refactored the naming of this element once extracted to be just 'q'. Although this seems counterintuitive, it makes a little more sense since this element is the one we're extracting. It's a vague url arg name, but it is what it is. Bump version to 0.5.2 for hotfix release --- app/__init__.py | 2 +- app/filter.py | 31 ++++++++++++++++++++++--------- setup.py | 2 +- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index fe05ede..0fef0a8 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -22,7 +22,7 @@ app.default_key = generate_user_key() app.no_cookie_ips = [] app.config['SECRET_KEY'] = os.urandom(32) app.config['SESSION_TYPE'] = 'filesystem' -app.config['VERSION_NUMBER'] = '0.5.1' +app.config['VERSION_NUMBER'] = '0.5.2' app.config['APP_ROOT'] = os.getenv( 'APP_ROOT', os.path.dirname(os.path.abspath(__file__))) diff --git a/app/filter.py b/app/filter.py index 3804822..e0ad87c 100644 --- a/app/filter.py +++ b/app/filter.py @@ -22,6 +22,21 @@ def strip_blocked_sites(query: str) -> str: return query[:query.find('-site:')] if '-site:' in query else query +def extract_q(q_str: str, href: str) -> str: + """Extracts the 'q' element from a result link. This is typically + either the link to a result's website, or a string. + + Args: + q_str: The result link to parse + href: The full url to check for standalone 'q' elements first, + rather than parsing the whole query string and then checking. + + Returns: + str: The 'q' element of the link, or an empty string + """ + return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else '' + + class Filter: def __init__(self, user_key: str, mobile=False, config=None) -> None: if config is None: @@ -223,20 +238,18 @@ class Filter: link['target'] = '_blank' result_link = urlparse.urlparse(href) - query = parse_qs( - result_link.query - )['q'][0] if 'q=' in href else '' + q = extract_q(result_link.query, href) - if query.startswith('/'): + if q.startswith('/'): # Internal google links (i.e. mail, maps, etc) should still # be forwarded to Google - link['href'] = 'https://google.com' + query + link['href'] = 'https://google.com' + q elif '/search?q=' in href: # "li:1" implies the query should be interpreted verbatim, # which is accomplished by wrapping the query in double quotes if 'li:1' in href: - query = '"' + query + '"' - new_search = 'search?q=' + self.encrypt_path(query) + q = '"' + q + '"' + new_search = 'search?q=' + self.encrypt_path(q) query_params = parse_qs(urlparse.urlparse(href).query) for param in VALID_PARAMS: @@ -247,7 +260,7 @@ class Filter: link['href'] = new_search elif 'url?q=' in href: # Strip unneeded arguments - link['href'] = filter_link_args(query) + link['href'] = filter_link_args(q) # Add no-js option if self.nojs: @@ -255,7 +268,7 @@ class Filter: else: if href.startswith(MAPS_URL): # Maps links don't work if a site filter is applied - link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(query) + link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q) else: link['href'] = href diff --git a/setup.py b/setup.py index 997ea16..1d1308b 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( author='Ben Busby', author_email='benbusby@protonmail.com', name='whoogle-search', - version='0.5.1', + version='0.5.2', include_package_data=True, install_requires=requirements, description='Self-hosted, ad-free, privacy-respecting metasearch engine',