Hotfix: extract only 'q' element from query string
Occasionally the search results will contain links with arguments such as 'dq', which was being erroneously used in attempts to extract the 'q' element from query strings. This enforces that only links with '?q=' or '&q=' (elements with a standalone 'q' arg) will have the element extracted. I also refactored the naming of this element once extracted to be just 'q'. Although this seems counterintuitive, it makes a little more sense since this element is the one we're extracting. It's a vague url arg name, but it is what it is. Bump version to 0.5.2 for hotfix releasemain
parent
e1e6e84649
commit
cbe32a081e
|
@ -22,7 +22,7 @@ app.default_key = generate_user_key()
|
||||||
app.no_cookie_ips = []
|
app.no_cookie_ips = []
|
||||||
app.config['SECRET_KEY'] = os.urandom(32)
|
app.config['SECRET_KEY'] = os.urandom(32)
|
||||||
app.config['SESSION_TYPE'] = 'filesystem'
|
app.config['SESSION_TYPE'] = 'filesystem'
|
||||||
app.config['VERSION_NUMBER'] = '0.5.1'
|
app.config['VERSION_NUMBER'] = '0.5.2'
|
||||||
app.config['APP_ROOT'] = os.getenv(
|
app.config['APP_ROOT'] = os.getenv(
|
||||||
'APP_ROOT',
|
'APP_ROOT',
|
||||||
os.path.dirname(os.path.abspath(__file__)))
|
os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
|
@ -22,6 +22,21 @@ def strip_blocked_sites(query: str) -> str:
|
||||||
return query[:query.find('-site:')] if '-site:' in query else query
|
return query[:query.find('-site:')] if '-site:' in query else query
|
||||||
|
|
||||||
|
|
||||||
|
def extract_q(q_str: str, href: str) -> str:
|
||||||
|
"""Extracts the 'q' element from a result link. This is typically
|
||||||
|
either the link to a result's website, or a string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
q_str: The result link to parse
|
||||||
|
href: The full url to check for standalone 'q' elements first,
|
||||||
|
rather than parsing the whole query string and then checking.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The 'q' element of the link, or an empty string
|
||||||
|
"""
|
||||||
|
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else ''
|
||||||
|
|
||||||
|
|
||||||
class Filter:
|
class Filter:
|
||||||
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
||||||
if config is None:
|
if config is None:
|
||||||
|
@ -223,20 +238,18 @@ class Filter:
|
||||||
link['target'] = '_blank'
|
link['target'] = '_blank'
|
||||||
|
|
||||||
result_link = urlparse.urlparse(href)
|
result_link = urlparse.urlparse(href)
|
||||||
query = parse_qs(
|
q = extract_q(result_link.query, href)
|
||||||
result_link.query
|
|
||||||
)['q'][0] if 'q=' in href else ''
|
|
||||||
|
|
||||||
if query.startswith('/'):
|
if q.startswith('/'):
|
||||||
# Internal google links (i.e. mail, maps, etc) should still
|
# Internal google links (i.e. mail, maps, etc) should still
|
||||||
# be forwarded to Google
|
# be forwarded to Google
|
||||||
link['href'] = 'https://google.com' + query
|
link['href'] = 'https://google.com' + q
|
||||||
elif '/search?q=' in href:
|
elif '/search?q=' in href:
|
||||||
# "li:1" implies the query should be interpreted verbatim,
|
# "li:1" implies the query should be interpreted verbatim,
|
||||||
# which is accomplished by wrapping the query in double quotes
|
# which is accomplished by wrapping the query in double quotes
|
||||||
if 'li:1' in href:
|
if 'li:1' in href:
|
||||||
query = '"' + query + '"'
|
q = '"' + q + '"'
|
||||||
new_search = 'search?q=' + self.encrypt_path(query)
|
new_search = 'search?q=' + self.encrypt_path(q)
|
||||||
|
|
||||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||||
for param in VALID_PARAMS:
|
for param in VALID_PARAMS:
|
||||||
|
@ -247,7 +260,7 @@ class Filter:
|
||||||
link['href'] = new_search
|
link['href'] = new_search
|
||||||
elif 'url?q=' in href:
|
elif 'url?q=' in href:
|
||||||
# Strip unneeded arguments
|
# Strip unneeded arguments
|
||||||
link['href'] = filter_link_args(query)
|
link['href'] = filter_link_args(q)
|
||||||
|
|
||||||
# Add no-js option
|
# Add no-js option
|
||||||
if self.nojs:
|
if self.nojs:
|
||||||
|
@ -255,7 +268,7 @@ class Filter:
|
||||||
else:
|
else:
|
||||||
if href.startswith(MAPS_URL):
|
if href.startswith(MAPS_URL):
|
||||||
# Maps links don't work if a site filter is applied
|
# Maps links don't work if a site filter is applied
|
||||||
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(query)
|
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q)
|
||||||
else:
|
else:
|
||||||
link['href'] = href
|
link['href'] = href
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@ setuptools.setup(
|
||||||
author='Ben Busby',
|
author='Ben Busby',
|
||||||
author_email='benbusby@protonmail.com',
|
author_email='benbusby@protonmail.com',
|
||||||
name='whoogle-search',
|
name='whoogle-search',
|
||||||
version='0.5.1',
|
version='0.5.2',
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
install_requires=requirements,
|
install_requires=requirements,
|
||||||
description='Self-hosted, ad-free, privacy-respecting metasearch engine',
|
description='Self-hosted, ad-free, privacy-respecting metasearch engine',
|
||||||
|
|
Loading…
Reference in New Issue