From 2c9cf3ecc6835699bcdb1877e7f44549034477d8 Mon Sep 17 00:00:00 2001 From: DUO Labs Date: Tue, 26 Oct 2021 16:59:23 -0400 Subject: [PATCH] Bold search query in results (#487) This modifies the search result page by bold-ing all appearances of any word in the original query. If portions of the query are in quotes (i.e. "ice cream"), only exact matches of the sequence of words will be made bold. Co-authored-by: Ben Busby --- app/routes.py | 3 ++- app/utils/results.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/app/routes.py b/app/routes.py index 2139683..56256ed 100644 --- a/app/routes.py +++ b/app/routes.py @@ -14,6 +14,7 @@ from app.request import Request, TorError from app.utils.bangs import resolve_bang from app.utils.misc import read_config_bool from app.utils.results import add_ip_card +from app.utils.results import bold_search_terms from app.utils.search import * from app.utils.session import generate_user_key, valid_user_session from bs4 import BeautifulSoup as bsoup @@ -250,7 +251,7 @@ def search(): # Return 503 if temporarily blocked by captcha resp_code = 503 if has_captcha(str(response)) else 200 - + response = bold_search_terms(response, query) # Feature to display IP address if search_util.check_kw_ip(): html_soup = bsoup(response, "html.parser") diff --git a/app/utils/results.py b/app/utils/results.py index 8141074..befc86d 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -2,6 +2,8 @@ from bs4 import BeautifulSoup import os import urllib.parse as urlparse from urllib.parse import parse_qs +import re +from bs4 import NavigableString SKIP_ARGS = ['ref_src', 'utm'] @@ -34,6 +36,45 @@ SITE_ALTS = { } +def bold_search_terms(response: str, query: str) -> BeautifulSoup: + """Wraps all search terms in bold tags (). If any terms are wrapped + in quotes, only that exact phrase will be made bold. + + Args: + response: The initial response body for the query + query: The original search query + + Returns: + BeautifulSoup: modified soup object with bold items + """ + response = BeautifulSoup(response, 'html.parser') + + def replace_any_case(element: NavigableString, target_word: str) -> None: + # Replace all instances of the word, but maintaining the same case in + # the replacement + element.replace_with( + element.replace( + target_word.lower(), f'{target_word.lower()}' + ).replace( + target_word.capitalize(), f'{target_word.capitalize()}' + ).replace( + target_word.title(), f'{target_word.title()}' + ).replace( + target_word.upper(), f'{target_word.upper()}' + ) + ) + + # Split all words out of query, grouping the ones wrapped in quotes + for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query): + word = re.sub(r'[^A-Za-z0-9 ]+', '', word) + target = response.find_all( + text=re.compile(r'' + re.escape(word), re.I)) + for nav_str in target: + replace_any_case(nav_str, word) + + return response + + def has_ad_content(element: str) -> bool: """Inspects an HTML element for ad related content