Bold search query in results (#487)

This modifies the search result page by bold-ing all appearances
of any word in the original query. If portions of the query are in
quotes (i.e. "ice cream"), only exact matches of the sequence of
words will be made bold.

Co-authored-by: Ben Busby <noreply+git@benbusby.com>
main
DUO Labs 2021-10-26 16:59:23 -04:00 committed by GitHub
parent 90441b2668
commit 2c9cf3ecc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 1 deletions

View File

@ -14,6 +14,7 @@ from app.request import Request, TorError
from app.utils.bangs import resolve_bang
from app.utils.misc import read_config_bool
from app.utils.results import add_ip_card
from app.utils.results import bold_search_terms
from app.utils.search import *
from app.utils.session import generate_user_key, valid_user_session
from bs4 import BeautifulSoup as bsoup
@ -250,7 +251,7 @@ def search():
# Return 503 if temporarily blocked by captcha
resp_code = 503 if has_captcha(str(response)) else 200
response = bold_search_terms(response, query)
# Feature to display IP address
if search_util.check_kw_ip():
html_soup = bsoup(response, "html.parser")

View File

@ -2,6 +2,8 @@ from bs4 import BeautifulSoup
import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
from bs4 import NavigableString
SKIP_ARGS = ['ref_src', 'utm']
@ -34,6 +36,45 @@ SITE_ALTS = {
}
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
in quotes, only that exact phrase will be made bold.
Args:
response: The initial response body for the query
query: The original search query
Returns:
BeautifulSoup: modified soup object with bold items
"""
response = BeautifulSoup(response, 'html.parser')
def replace_any_case(element: NavigableString, target_word: str) -> None:
# Replace all instances of the word, but maintaining the same case in
# the replacement
element.replace_with(
element.replace(
target_word.lower(), f'<b>{target_word.lower()}</b>'
).replace(
target_word.capitalize(), f'<b>{target_word.capitalize()}</b>'
).replace(
target_word.title(), f'<b>{target_word.title()}</b>'
).replace(
target_word.upper(), f'<b>{target_word.upper()}</b>'
)
)
# Split all words out of query, grouping the ones wrapped in quotes
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
target = response.find_all(
text=re.compile(r'' + re.escape(word), re.I))
for nav_str in target:
replace_any_case(nav_str, word)
return response
def has_ad_content(element: str) -> bool:
"""Inspects an HTML element for ad related content