Ensure searches with a leading slash are treated as queries

A user reported a bug where searches with a leading slash (in this case:
"/e/OS apps" were interpreted as a Google specific link when clicking
the next page of results.

This was due to the behavior that Google's search results exhibit, where
internal links for pages like support.google.com are delivered with
params like "?q=/support" rather than a direct link. This fixes that
scenario by checking the "q" param value against the user's original
query to ensure they don't match before assuming that the result is
intended as a redirect.

Fixes 
main
Ben Busby 2022-06-03 14:03:57 -06:00
parent 57d9ae9351
commit ef98d85dc5
No known key found for this signature in database
GPG Key ID: B9B7231E01D924A1
3 changed files with 25 additions and 2 deletions

View File

@ -89,11 +89,13 @@ class Filter:
config: Config, config: Config,
root_url='', root_url='',
page_url='', page_url='',
query='',
mobile=False) -> None: mobile=False) -> None:
self.config = config self.config = config
self.mobile = mobile self.mobile = mobile
self.user_key = user_key self.user_key = user_key
self.page_url = page_url self.page_url = page_url
self.query = query
self.main_divs = ResultSet('') self.main_divs = ResultSet('')
self._elements = 0 self._elements = 0
self._av = set() self._av = set()
@ -429,7 +431,7 @@ class Filter:
result_link = urlparse.urlparse(href) result_link = urlparse.urlparse(href)
q = extract_q(result_link.query, href) q = extract_q(result_link.query, href)
if q.startswith('/'): if q.startswith('/') and q not in self.query:
# Internal google links (i.e. mail, maps, etc) should still # Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google # be forwarded to Google
link['href'] = 'https://google.com' + q link['href'] = 'https://google.com' + q

View File

@ -118,7 +118,8 @@ class Search:
content_filter = Filter(self.session_key, content_filter = Filter(self.session_key,
root_url=self.request.url_root, root_url=self.request.url_root,
mobile=mobile, mobile=mobile,
config=self.config) config=self.config,
query=self.query)
full_query = gen_query(self.query, full_query = gen_query(self.query,
self.request_params, self.request_params,
self.config) self.config)

View File

@ -122,3 +122,23 @@ def test_recent_results(client):
assert (current_date - date).days <= (num_days + 5) assert (current_date - date).days <= (num_days + 5)
except ParserError: except ParserError:
pass pass
def test_leading_slash_search(client):
# Ensure searches with a leading slash are interpreted
# correctly as queries and not endpoints
q = '/test'
rv = client.get(f'/{Endpoint.search}?q={q}')
assert rv._status_code == 200
soup = Filter(
user_key=generate_user_key(),
config=Config(**demo_config),
query=q
).clean(BeautifulSoup(rv.data, 'html.parser'))
for link in soup.find_all('a', href=True):
if 'start=' not in link['href']:
continue
assert link['href'].startswith(f'{Endpoint.search}')