diff --git a/app/routes.py b/app/routes.py index 2139683..56256ed 100644 --- a/app/routes.py +++ b/app/routes.py @@ -14,6 +14,7 @@ from app.request import Request, TorError from app.utils.bangs import resolve_bang from app.utils.misc import read_config_bool from app.utils.results import add_ip_card +from app.utils.results import bold_search_terms from app.utils.search import * from app.utils.session import generate_user_key, valid_user_session from bs4 import BeautifulSoup as bsoup @@ -250,7 +251,7 @@ def search(): # Return 503 if temporarily blocked by captcha resp_code = 503 if has_captcha(str(response)) else 200 - + response = bold_search_terms(response, query) # Feature to display IP address if search_util.check_kw_ip(): html_soup = bsoup(response, "html.parser") diff --git a/app/utils/results.py b/app/utils/results.py index 8141074..befc86d 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -2,6 +2,8 @@ from bs4 import BeautifulSoup import os import urllib.parse as urlparse from urllib.parse import parse_qs +import re +from bs4 import NavigableString SKIP_ARGS = ['ref_src', 'utm'] @@ -34,6 +36,45 @@ SITE_ALTS = { } +def bold_search_terms(response: str, query: str) -> BeautifulSoup: + """Wraps all search terms in bold tags (). If any terms are wrapped + in quotes, only that exact phrase will be made bold. + + Args: + response: The initial response body for the query + query: The original search query + + Returns: + BeautifulSoup: modified soup object with bold items + """ + response = BeautifulSoup(response, 'html.parser') + + def replace_any_case(element: NavigableString, target_word: str) -> None: + # Replace all instances of the word, but maintaining the same case in + # the replacement + element.replace_with( + element.replace( + target_word.lower(), f'{target_word.lower()}' + ).replace( + target_word.capitalize(), f'{target_word.capitalize()}' + ).replace( + target_word.title(), f'{target_word.title()}' + ).replace( + target_word.upper(), f'{target_word.upper()}' + ) + ) + + # Split all words out of query, grouping the ones wrapped in quotes + for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query): + word = re.sub(r'[^A-Za-z0-9 ]+', '', word) + target = response.find_all( + text=re.compile(r'' + re.escape(word), re.I)) + for nav_str in target: + replace_any_case(nav_str, word) + + return response + + def has_ad_content(element: str) -> bool: """Inspects an HTML element for ad related content