From 6decab5a5102c8a39ff05398f253819c36817379 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 26 Oct 2021 16:15:24 -0600 Subject: [PATCH] Improve regex for bolding search terms Co-authored by @DUOLabs333 --- app/utils/results.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/app/utils/results.py b/app/utils/results.py index befc86d..8f79326 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -1,9 +1,8 @@ -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, NavigableString import os import urllib.parse as urlparse from urllib.parse import parse_qs import re -from bs4 import NavigableString SKIP_ARGS = ['ref_src', 'utm'] @@ -52,16 +51,14 @@ def bold_search_terms(response: str, query: str) -> BeautifulSoup: def replace_any_case(element: NavigableString, target_word: str) -> None: # Replace all instances of the word, but maintaining the same case in # the replacement + if len(element) == len(target_word): + return + element.replace_with( - element.replace( - target_word.lower(), f'{target_word.lower()}' - ).replace( - target_word.capitalize(), f'{target_word.capitalize()}' - ).replace( - target_word.title(), f'{target_word.title()}' - ).replace( - target_word.upper(), f'{target_word.upper()}' - ) + re.sub(r'\b((?![{}<>-])' + target_word + r'(?![{}<>-]))\b', + r'\1', + element, + flags=re.I) ) # Split all words out of query, grouping the ones wrapped in quotes