Escape html text in result body (#912)

Moved the cleaner functions to app/utils/escaper.py

Removed unused import 're'

Moved the cleaner functionalities to the "search.py" and "routes.py"

Making sure escaped chars stay escaped during process

Replaced "<" and ">" with "andlt;" and "andgt;", respectively. This way,
when the 'response' object get loaded to bsoup (which happens several times
throughout the process between search.py and routes.py), bsoup will not
unescape them.
main
Ahmad Alkadri 2022-12-29 23:19:28 +01:00 committed by GitHub
parent 08aa1ab8f1
commit 3dda8b25ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 4 deletions

View File

@ -284,7 +284,6 @@ def autocomplete():
g.user_request.autocomplete(q) if not g.user_config.tor else [] g.user_request.autocomplete(q) if not g.user_config.tor else []
]) ])
@app.route(f'/{Endpoint.search}', methods=['GET', 'POST']) @app.route(f'/{Endpoint.search}', methods=['GET', 'POST'])
@session_required @session_required
@auth_required @auth_required
@ -323,6 +322,7 @@ def search():
soup = bsoup(response, "html.parser"); soup = bsoup(response, "html.parser");
for x in soup.find_all(attrs={"id": "st-card"}): for x in soup.find_all(attrs={"id": "st-card"}):
x.replace_with("") x.replace_with("")
response = str(soup) response = str(soup)
# Return 503 if temporarily blocked by captcha # Return 503 if temporarily blocked by captcha
@ -336,6 +336,7 @@ def search():
config=g.user_config, config=g.user_config,
query=urlparse.unquote(query), query=urlparse.unquote(query),
params=g.user_config.to_params(keys=['preferences'])), 503 params=g.user_config.to_params(keys=['preferences'])), 503
response = bold_search_terms(response, query) response = bold_search_terms(response, query)
# Feature to display IP address # Feature to display IP address
@ -358,6 +359,7 @@ def search():
preferences = g.user_config.preferences preferences = g.user_config.preferences
home_url = f"home?preferences={preferences}" if preferences else "home" home_url = f"home?preferences={preferences}" if preferences else "home"
cleanresponse = str(response).replace("andlt;","<").replace("andgt;",">")
return render_template( return render_template(
'display.html', 'display.html',
@ -378,7 +380,7 @@ def search():
is_translation=any( is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate'] _ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only ) and not search_util.search_type, # Standard search queries only
response=response, response=cleanresponse,
version_number=app.config['VERSION_NUMBER'], version_number=app.config['VERSION_NUMBER'],
search_header=render_template( search_header=render_template(
'header.html', 'header.html',

View File

@ -1,7 +1,6 @@
import os import os
import re import re
from typing import Any from typing import Any
from app.filter import Filter from app.filter import Filter
from app.request import gen_query from app.request import gen_query
from app.utils.misc import get_proxy_host_url from app.utils.misc import get_proxy_host_url
@ -142,7 +141,8 @@ class Search:
force_mobile=view_image) force_mobile=view_image)
# Produce cleanable html soup from response # Produce cleanable html soup from response
html_soup = bsoup(get_body.text, 'html.parser') get_body_safed = get_body.text.replace("<","andlt;").replace(">","andgt;")
html_soup = bsoup(get_body_safed, 'html.parser')
# Replace current soup if view_image is active # Replace current soup if view_image is active
if view_image: if view_image: