Always bold `CN`/`JA`/`KO` search terms (#928)
Add a function to check if target_word contains CJK characters If a search term contains Chinese, Japanese, or Korean characters, the term is bolded in search results regardless of whitespace. CJK characters: Chinese, Japanese (hiragana, katakana, kanji), and Korean (hangul syllables, hangul jamo) Co-authored-by: Ben Busby <contact@benbusby.com>main
parent
ccf9f06f2f
commit
e5a5aad997
|
@ -44,6 +44,27 @@ SITE_ALTS = {
|
|||
}
|
||||
|
||||
|
||||
def contains_cjko(s: str) -> bool:
|
||||
"""This function check whether or not a string contains Chinese, Japanese,
|
||||
or Korean characters. It employs regex and uses the u escape sequence to
|
||||
match any character in a set of Unicode ranges.
|
||||
|
||||
Args:
|
||||
s (str): string to be checked
|
||||
|
||||
Returns:
|
||||
bool: True if the input s contains the characters and False otherwise
|
||||
"""
|
||||
unicode_ranges = ('\u4e00-\u9fff' # Chinese characters
|
||||
'\u3040-\u309f' # Japanese hiragana
|
||||
'\u30a0-\u30ff' # Japanese katakana
|
||||
'\u4e00-\u9faf' # Japanese kanji
|
||||
'\uac00-\ud7af' # Korean hangul syllables
|
||||
'\u1100-\u11ff' # Korean hangul jamo
|
||||
)
|
||||
return bool(re.search(fr'[{unicode_ranges}]', s))
|
||||
|
||||
|
||||
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
|
||||
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
|
||||
in quotes, only that exact phrase will be made bold.
|
||||
|
@ -66,12 +87,18 @@ def bold_search_terms(response: str, query: str) -> BeautifulSoup:
|
|||
# Ensure target word is escaped for regex
|
||||
target_word = re.escape(target_word)
|
||||
|
||||
# Check if the word contains Chinese, Japanese, or Korean characters
|
||||
if contains_cjko(target_word):
|
||||
reg_pattern = fr'((?![{{}}<>-]){target_word}(?![{{}}<>-]))'
|
||||
else:
|
||||
reg_pattern = fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b'
|
||||
|
||||
if re.match('.*[@_!#$%^&*()<>?/\|}{~:].*', target_word) or (
|
||||
element.parent and element.parent.name == 'style'):
|
||||
return
|
||||
|
||||
element.replace_with(BeautifulSoup(
|
||||
re.sub(fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b',
|
||||
re.sub(reg_pattern,
|
||||
r'<b>\1</b>',
|
||||
element,
|
||||
flags=re.I), 'html.parser')
|
||||
|
|
Loading…
Reference in New Issue