Hotfix: extract only 'q' element from query string

Occasionally the search results will contain links with arguments such
as 'dq', which was being erroneously used in attempts to extract the 'q'
element from query strings. This enforces that only links with '?q=' or
'&q=' (elements with a standalone 'q' arg) will have the element
extracted.

I also refactored the naming of this element once extracted to be just
'q'. Although this seems counterintuitive, it makes a little more sense
since this element is the one we're extracting. It's a vague url arg
name, but it is what it is.

Bump version to 0.5.2 for hotfix release
main
Ben Busby 2021-05-29 12:21:20 -04:00
parent e1e6e84649
commit cbe32a081e
No known key found for this signature in database
GPG Key ID: 3B08611DF6E62ED2
3 changed files with 24 additions and 11 deletions

View File

@ -22,7 +22,7 @@ app.default_key = generate_user_key()
app.no_cookie_ips = []
app.config['SECRET_KEY'] = os.urandom(32)
app.config['SESSION_TYPE'] = 'filesystem'
app.config['VERSION_NUMBER'] = '0.5.1'
app.config['VERSION_NUMBER'] = '0.5.2'
app.config['APP_ROOT'] = os.getenv(
'APP_ROOT',
os.path.dirname(os.path.abspath(__file__)))

View File

@ -22,6 +22,21 @@ def strip_blocked_sites(query: str) -> str:
return query[:query.find('-site:')] if '-site:' in query else query
def extract_q(q_str: str, href: str) -> str:
"""Extracts the 'q' element from a result link. This is typically
either the link to a result's website, or a string.
Args:
q_str: The result link to parse
href: The full url to check for standalone 'q' elements first,
rather than parsing the whole query string and then checking.
Returns:
str: The 'q' element of the link, or an empty string
"""
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else ''
class Filter:
def __init__(self, user_key: str, mobile=False, config=None) -> None:
if config is None:
@ -223,20 +238,18 @@ class Filter:
link['target'] = '_blank'
result_link = urlparse.urlparse(href)
query = parse_qs(
result_link.query
)['q'][0] if 'q=' in href else ''
q = extract_q(result_link.query, href)
if query.startswith('/'):
if q.startswith('/'):
# Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + query
link['href'] = 'https://google.com' + q
elif '/search?q=' in href:
# "li:1" implies the query should be interpreted verbatim,
# which is accomplished by wrapping the query in double quotes
if 'li:1' in href:
query = '"' + query + '"'
new_search = 'search?q=' + self.encrypt_path(query)
q = '"' + q + '"'
new_search = 'search?q=' + self.encrypt_path(q)
query_params = parse_qs(urlparse.urlparse(href).query)
for param in VALID_PARAMS:
@ -247,7 +260,7 @@ class Filter:
link['href'] = new_search
elif 'url?q=' in href:
# Strip unneeded arguments
link['href'] = filter_link_args(query)
link['href'] = filter_link_args(q)
# Add no-js option
if self.nojs:
@ -255,7 +268,7 @@ class Filter:
else:
if href.startswith(MAPS_URL):
# Maps links don't work if a site filter is applied
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(query)
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q)
else:
link['href'] = href

View File

@ -8,7 +8,7 @@ setuptools.setup(
author='Ben Busby',
author_email='benbusby@protonmail.com',
name='whoogle-search',
version='0.5.1',
version='0.5.2',
include_package_data=True,
install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting metasearch engine',