From 3dda8b25ef27c9e00b33eb9bef52c1eb0407cf69 Mon Sep 17 00:00:00 2001 From: Ahmad Alkadri Date: Thu, 29 Dec 2022 23:19:28 +0100 Subject: [PATCH] Escape html text in result body (#912) Moved the cleaner functions to app/utils/escaper.py Removed unused import 're' Moved the cleaner functionalities to the "search.py" and "routes.py" Making sure escaped chars stay escaped during process Replaced "<" and ">" with "andlt;" and "andgt;", respectively. This way, when the 'response' object get loaded to bsoup (which happens several times throughout the process between search.py and routes.py), bsoup will not unescape them. --- app/routes.py | 6 ++++-- app/utils/search.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/app/routes.py b/app/routes.py index 36e6278..526331d 100644 --- a/app/routes.py +++ b/app/routes.py @@ -284,7 +284,6 @@ def autocomplete(): g.user_request.autocomplete(q) if not g.user_config.tor else [] ]) - @app.route(f'/{Endpoint.search}', methods=['GET', 'POST']) @session_required @auth_required @@ -323,6 +322,7 @@ def search(): soup = bsoup(response, "html.parser"); for x in soup.find_all(attrs={"id": "st-card"}): x.replace_with("") + response = str(soup) # Return 503 if temporarily blocked by captcha @@ -336,6 +336,7 @@ def search(): config=g.user_config, query=urlparse.unquote(query), params=g.user_config.to_params(keys=['preferences'])), 503 + response = bold_search_terms(response, query) # Feature to display IP address @@ -358,6 +359,7 @@ def search(): preferences = g.user_config.preferences home_url = f"home?preferences={preferences}" if preferences else "home" + cleanresponse = str(response).replace("andlt;","<").replace("andgt;",">") return render_template( 'display.html', @@ -378,7 +380,7 @@ def search(): is_translation=any( _ in query.lower() for _ in [translation['translate'], 'translate'] ) and not search_util.search_type, # Standard search queries only - response=response, + response=cleanresponse, version_number=app.config['VERSION_NUMBER'], search_header=render_template( 'header.html', diff --git a/app/utils/search.py b/app/utils/search.py index d39e539..ecdadba 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -1,7 +1,6 @@ import os import re from typing import Any - from app.filter import Filter from app.request import gen_query from app.utils.misc import get_proxy_host_url @@ -142,7 +141,8 @@ class Search: force_mobile=view_image) # Produce cleanable html soup from response - html_soup = bsoup(get_body.text, 'html.parser') + get_body_safed = get_body.text.replace("<","andlt;").replace(">","andgt;") + html_soup = bsoup(get_body_safed, 'html.parser') # Replace current soup if view_image is active if view_image: