Remove google prefs link for mismatched language queries

Queries performed in a different language than what is configured
contain a result div that prompts the user to configure their language
preferences using google's preferences page.

Since we want all language configuration to occur on Whoogle only, we
can safely remove this result div.

Fixes #444
Fixes #386
main
Ben Busby 2022-08-01 13:46:06 -06:00
parent 839683b4e1
commit 73dd5b80b5
No known key found for this signature in database
GPG Key ID: B9B7231E01D924A1
1 changed files with 27 additions and 6 deletions

View File

@ -443,18 +443,39 @@ class Filter:
None (the tag is updated directly) None (the tag is updated directly)
""" """
link_netloc = urlparse.urlparse(link['href']).netloc parsed_link = urlparse.urlparse(link['href'])
link_netloc = ''
if '/url?q=' in link['href']:
link_netloc = extract_q(parsed_link.query, link['href'])
else:
link_netloc = parsed_link.netloc
# Remove any elements that direct to unsupported Google pages # Remove any elements that direct to unsupported Google pages
if any(url in link_netloc for url in unsupported_g_pages): if any(url in link_netloc for url in unsupported_g_pages):
# FIXME: The "Shopping" tab requires further filtering (see #136) # FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now. # Temporarily removing all links to that tab for now.
parent = link.parent parent = link.parent
while parent: if 'google.com/preferences?hl=' in link_netloc:
p_cls = parent.attrs.get('class') or [] # Handle case where a search is performed in a different
if parent.name == 'footer' or f'{GClasses.footer}' in p_cls: # language than what is configured. This usually returns a
link.decompose() # div with the same classes as normal search results, but with
parent = parent.parent # a link to configure language preferences through Google.
# Since we want all language config done through Whoogle, we
# can safely decompose this element.
while parent:
p_cls = parent.attrs.get('class') or []
if f'{GClasses.result_class_a}' in p_cls:
parent.decompose()
break
parent = parent.parent
else:
# Remove cases where google links appear in the footer
while parent:
p_cls = parent.attrs.get('class') or []
if parent.name == 'footer' or f'{GClasses.footer}' in p_cls:
link.decompose()
parent = parent.parent
return return
# Replace href with only the intended destination (no "utm" type tags) # Replace href with only the intended destination (no "utm" type tags)