diff --git a/app/utils/results.py b/app/utils/results.py index c78f866..dbd60cc 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -144,12 +144,26 @@ def get_first_link(soup: BeautifulSoup) -> str: str: A str link to the first result """ + first_link = '' + orig_details = [] + + # Temporarily remove details so we don't grab those links + for details in soup.find_all('details'): + temp_details = soup.new_tag('removed_details') + orig_details.append(details.replace_with(temp_details)) + # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): # Return the first search result URL - if 'url?q=' in a['href']: - return filter_link_args(a['href']) - return '' + if a['href'].startswith('http://') or a['href'].startswith('https://'): + first_link = a['href'] + break + + # Add the details back + for orig_detail, details in zip(orig_details, soup.find_all('removed_details')): + details.replace_with(orig_detail) + + return first_link def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str: diff --git a/app/utils/search.py b/app/utils/search.py index 6e2d62d..ed3b0f6 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -102,9 +102,15 @@ class Search: except InvalidToken: pass - # Strip leading '! ' for "feeling lucky" queries - self.feeling_lucky = q.startswith('! ') - self.query = q[2:] if self.feeling_lucky else q + # Strip '!' for "feeling lucky" queries + if match := re.search("(^|\s)!($|\s)", q): + self.feeling_lucky = True + start, end = match.span() + self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg]) + else: + self.feeling_lucky = False + self.query = q + # Check for possible widgets self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" + "($|( *[^a-z0-9] *(((addres|address|adres|" + @@ -161,22 +167,25 @@ class Search: if g.user_request.tor_valid: html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser')) + formatted_results = content_filter.clean(html_soup) if self.feeling_lucky: - return get_first_link(html_soup) - else: - formatted_results = content_filter.clean(html_soup) + if lucky_link := get_first_link(formatted_results): + return lucky_link - # Append user config to all search links, if available - param_str = ''.join('&{}={}'.format(k, v) - for k, v in - self.request_params.to_dict(flat=True).items() - if self.config.is_safe_key(k)) - for link in formatted_results.find_all('a', href=True): - link['rel'] = "nofollow noopener noreferrer" - if 'search?' not in link['href'] or link['href'].index( - 'search?') > 1: - continue - link['href'] += param_str + # Fall through to regular search if unable to find link + self.feeling_lucky = False - return str(formatted_results) + # Append user config to all search links, if available + param_str = ''.join('&{}={}'.format(k, v) + for k, v in + self.request_params.to_dict(flat=True).items() + if self.config.is_safe_key(k)) + for link in formatted_results.find_all('a', href=True): + link['rel'] = "nofollow noopener noreferrer" + if 'search?' not in link['href'] or link['href'].index( + 'search?') > 1: + continue + link['href'] += param_str + + return str(formatted_results) diff --git a/test/test_routes.py b/test/test_routes.py index 6409f2d..1f64827 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -17,8 +17,15 @@ def test_search(client): def test_feeling_lucky(client): - rv = client.get(f'/{Endpoint.search}?q=!%20test') + # Bang at beginning of query + rv = client.get(f'/{Endpoint.search}?q=!%20wikipedia') assert rv._status_code == 303 + assert rv.headers.get('Location').startswith('https://www.wikipedia.org') + + # Move bang to end of query + rv = client.get(f'/{Endpoint.search}?q=github%20!') + assert rv._status_code == 303 + assert rv.headers.get('Location').startswith('https://github.com') def test_ddg_bang(client):