Suppress invalid warning from bs4, add 404 handler

An invalid parsing warning was being thrown by the latest version of the
bs4 library. This suppresses that warning from being shown in the
console.

A 404 handler was added to move logging from the console to the error
template, since a lot of users assumed that 404 errors from the result
page were problems with Whoogle itself.

Fixes #967
main
Ben Busby 2023-03-07 11:28:55 -07:00
parent f7c4381ba6
commit 8c426ab180
No known key found for this signature in database
GPG Key ID: B9B7231E01D924A1
2 changed files with 10 additions and 1 deletions

View File

@ -557,6 +557,11 @@ def window():
)
@app.errorhandler(404)
def page_not_found(e):
return render_template('error.html', error_message=str(e)), 404
def run_app() -> None:
parser = argparse.ArgumentParser(
description='Whoogle Search console runner')

View File

@ -1,6 +1,6 @@
from app.models.config import Config
from app.models.endpoint import Endpoint
from bs4 import BeautifulSoup, NavigableString
from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning
import copy
from flask import current_app
import html
@ -8,6 +8,10 @@ import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
import warnings
# Suppress incorrect warnings from bs4 related to parsing HTML content
warnings.filterwarnings('ignore', category=MarkupResemblesLocatorWarning)
SKIP_ARGS = ['ref_src', 'utm']
SKIP_PREFIX = ['//www.', '//mobile.', '//m.', 'www.', 'mobile.', 'm.']