Hotfix: extract only 'q' element from query string
Occasionally the search results will contain links with arguments such as 'dq', which was being erroneously used in attempts to extract the 'q' element from query strings. This enforces that only links with '?q=' or '&q=' (elements with a standalone 'q' arg) will have the element extracted. I also refactored the naming of this element once extracted to be just 'q'. Although this seems counterintuitive, it makes a little more sense since this element is the one we're extracting. It's a vague url arg name, but it is what it is. Bump version to 0.5.2 for hotfix releasemain
parent
e1e6e84649
commit
cbe32a081e
|
@ -22,7 +22,7 @@ app.default_key = generate_user_key()
|
|||
app.no_cookie_ips = []
|
||||
app.config['SECRET_KEY'] = os.urandom(32)
|
||||
app.config['SESSION_TYPE'] = 'filesystem'
|
||||
app.config['VERSION_NUMBER'] = '0.5.1'
|
||||
app.config['VERSION_NUMBER'] = '0.5.2'
|
||||
app.config['APP_ROOT'] = os.getenv(
|
||||
'APP_ROOT',
|
||||
os.path.dirname(os.path.abspath(__file__)))
|
||||
|
|
|
@ -22,6 +22,21 @@ def strip_blocked_sites(query: str) -> str:
|
|||
return query[:query.find('-site:')] if '-site:' in query else query
|
||||
|
||||
|
||||
def extract_q(q_str: str, href: str) -> str:
|
||||
"""Extracts the 'q' element from a result link. This is typically
|
||||
either the link to a result's website, or a string.
|
||||
|
||||
Args:
|
||||
q_str: The result link to parse
|
||||
href: The full url to check for standalone 'q' elements first,
|
||||
rather than parsing the whole query string and then checking.
|
||||
|
||||
Returns:
|
||||
str: The 'q' element of the link, or an empty string
|
||||
"""
|
||||
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else ''
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
||||
if config is None:
|
||||
|
@ -223,20 +238,18 @@ class Filter:
|
|||
link['target'] = '_blank'
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query = parse_qs(
|
||||
result_link.query
|
||||
)['q'][0] if 'q=' in href else ''
|
||||
q = extract_q(result_link.query, href)
|
||||
|
||||
if query.startswith('/'):
|
||||
if q.startswith('/'):
|
||||
# Internal google links (i.e. mail, maps, etc) should still
|
||||
# be forwarded to Google
|
||||
link['href'] = 'https://google.com' + query
|
||||
link['href'] = 'https://google.com' + q
|
||||
elif '/search?q=' in href:
|
||||
# "li:1" implies the query should be interpreted verbatim,
|
||||
# which is accomplished by wrapping the query in double quotes
|
||||
if 'li:1' in href:
|
||||
query = '"' + query + '"'
|
||||
new_search = 'search?q=' + self.encrypt_path(query)
|
||||
q = '"' + q + '"'
|
||||
new_search = 'search?q=' + self.encrypt_path(q)
|
||||
|
||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||
for param in VALID_PARAMS:
|
||||
|
@ -247,7 +260,7 @@ class Filter:
|
|||
link['href'] = new_search
|
||||
elif 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
link['href'] = filter_link_args(query)
|
||||
link['href'] = filter_link_args(q)
|
||||
|
||||
# Add no-js option
|
||||
if self.nojs:
|
||||
|
@ -255,7 +268,7 @@ class Filter:
|
|||
else:
|
||||
if href.startswith(MAPS_URL):
|
||||
# Maps links don't work if a site filter is applied
|
||||
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(query)
|
||||
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q)
|
||||
else:
|
||||
link['href'] = href
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@ setuptools.setup(
|
|||
author='Ben Busby',
|
||||
author_email='benbusby@protonmail.com',
|
||||
name='whoogle-search',
|
||||
version='0.5.1',
|
||||
version='0.5.2',
|
||||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
description='Self-hosted, ad-free, privacy-respecting metasearch engine',
|
||||
|
|
Loading…
Reference in New Issue