Add support for custom bangs (#1132)

Add the possibility for user-defined bangs, stored in app/static/bangs. 

These are parsed in alphabetical order, with the DDG bangs parsed first.
main
David Shen 2024-04-19 14:26:42 -04:00 committed by GitHub
parent 7a1ebfe975
commit fd20135af0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 112 additions and 23 deletions

4
.gitignore vendored
View File

@ -1,4 +1,5 @@
venv/ venv/
.venv/
.idea/ .idea/
__pycache__/ __pycache__/
*.pyc *.pyc
@ -10,7 +11,8 @@ test/static
flask_session/ flask_session/
app/static/config app/static/config
app/static/custom_config app/static/custom_config
app/static/bangs app/static/bangs/*
!app/static/bangs/00-whoogle.json
# pip stuff # pip stuff
/build/ /build/

View File

@ -35,6 +35,7 @@ Contents
6. [Extra Steps](#extra-steps) 6. [Extra Steps](#extra-steps)
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine) 1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
2. [Custom Redirecting](#custom-redirecting) 2. [Custom Redirecting](#custom-redirecting)
2. [Custom Bangs](#custom-bangs)
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only) 3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
4. [Manual HTTPS Enforcement](#https-enforcement) 4. [Manual HTTPS Enforcement](#https-enforcement)
5. [Using with Firefox Containers](#using-with-firefox-containers) 5. [Using with Firefox Containers](#using-with-firefox-containers)
@ -61,6 +62,7 @@ Contents
- Randomly generated User Agent - Randomly generated User Agent
- Easy to install/deploy - Easy to install/deploy
- DDG-style bang (i.e. `!<tag> <query>`) searches - DDG-style bang (i.e. `!<tag> <query>`) searches
- User-defined [custom bangs](#custom-bangs)
- Optional location-based searching (i.e. results near \<city\>) - Optional location-based searching (i.e. results near \<city\>)
- Optional NoJS mode to view search results in a separate window with JavaScript blocked - Optional NoJS mode to view search results in a separate window with JavaScript blocked
@ -539,6 +541,14 @@ WHOOGLE_REDIRECTS="badA.com:goodA.com,badB.com:goodB.com"
NOTE: Do not include "http(s)://" when defining your redirect. NOTE: Do not include "http(s)://" when defining your redirect.
### Custom Bangs
You can create your own custom bangs. By default, bangs are stored in
`app/static/bangs`. See [`00-whoogle.json`](https://github.com/benbusby/whoogle-search/blob/main/app/static/bangs/00-whoogle.json)
for an example. These are parsed in alphabetical order with later files
overriding bangs set in earlier files, with the exception that DDG bangs
(downloaded to `app/static/bangs/bangs.json`) are always parsed first. Thus,
any custom bangs will always override the DDG ones.
### Prevent Downtime (Heroku only) ### Prevent Downtime (Heroku only)
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry. Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.

View File

@ -1,7 +1,7 @@
from app.filter import clean_query from app.filter import clean_query
from app.request import send_tor_signal from app.request import send_tor_signal
from app.utils.session import generate_key from app.utils.session import generate_key
from app.utils.bangs import gen_bangs_json from app.utils.bangs import gen_bangs_json, load_all_bangs
from app.utils.misc import gen_file_hash, read_config_bool from app.utils.misc import gen_file_hash, read_config_bool
from base64 import b64encode from base64 import b64encode
from bs4 import MarkupResemblesLocatorWarning from bs4 import MarkupResemblesLocatorWarning
@ -139,7 +139,9 @@ app.config['CSP'] = 'default-src \'none\';' \
'connect-src \'self\';' 'connect-src \'self\';'
# Generate DDG bang filter # Generate DDG bang filter
generating_bangs = False
if not os.path.exists(app.config['BANG_FILE']): if not os.path.exists(app.config['BANG_FILE']):
generating_bangs = True
json.dump({}, open(app.config['BANG_FILE'], 'w')) json.dump({}, open(app.config['BANG_FILE'], 'w'))
bangs_thread = threading.Thread( bangs_thread = threading.Thread(
target=gen_bangs_json, target=gen_bangs_json,
@ -181,6 +183,11 @@ warnings.simplefilter('ignore', MarkupResemblesLocatorWarning)
from app import routes # noqa from app import routes # noqa
# The gen_bangs_json function takes care of loading bangs, so skip it here if
# it's already being loaded
if not generating_bangs:
load_all_bangs(app.config['BANG_FILE'])
# Disable logging from imported modules # Disable logging from imported modules
logging.config.dictConfig({ logging.config.dictConfig({
'version': 1, 'version': 1,

View File

@ -8,6 +8,8 @@ import re
import urllib.parse as urlparse import urllib.parse as urlparse
import uuid import uuid
import validators import validators
import sys
import traceback
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import wraps from functools import wraps
@ -16,7 +18,7 @@ from app import app
from app.models.config import Config from app.models.config import Config
from app.models.endpoint import Endpoint from app.models.endpoint import Endpoint
from app.request import Request, TorError from app.request import Request, TorError
from app.utils.bangs import resolve_bang from app.utils.bangs import suggest_bang, resolve_bang
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \ from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
fetch_favicon fetch_favicon
from app.filter import Filter from app.filter import Filter
@ -36,9 +38,6 @@ from cryptography.fernet import Fernet, InvalidToken
from cryptography.exceptions import InvalidSignature from cryptography.exceptions import InvalidSignature
from werkzeug.datastructures import MultiDict from werkzeug.datastructures import MultiDict
# Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE'])) or {}
ac_var = 'WHOOGLE_AUTOCOMPLETE' ac_var = 'WHOOGLE_AUTOCOMPLETE'
autocomplete_enabled = os.getenv(ac_var, '1') autocomplete_enabled = os.getenv(ac_var, '1')
@ -130,7 +129,6 @@ def session_required(f):
@app.before_request @app.before_request
def before_request_func(): def before_request_func():
global bang_json
session.permanent = True session.permanent = True
# Check for latest version if needed # Check for latest version if needed
@ -172,15 +170,6 @@ def before_request_func():
g.app_location = g.user_config.url g.app_location = g.user_config.url
# Attempt to reload bangs json if not generated yet
if not bang_json and os.path.getsize(app.config['BANG_FILE']) > 4:
try:
bang_json = json.load(open(app.config['BANG_FILE']))
except json.decoder.JSONDecodeError:
# Ignore decoding error, can occur if file is still
# being written
pass
@app.after_request @app.after_request
def after_request_func(resp): def after_request_func(resp):
@ -282,8 +271,7 @@ def autocomplete():
# Search bangs if the query begins with "!", but not "! " (feeling lucky) # Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '): if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if return jsonify([q, suggest_bang(q)])
_.startswith(q)]])
if not q and not request.data: if not q and not request.data:
return jsonify({'?': []}) return jsonify({'?': []})
@ -314,7 +302,7 @@ def search():
search_util = Search(request, g.user_config, g.session_key) search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query() query = search_util.new_search_query()
bang = resolve_bang(query, bang_json) bang = resolve_bang(query)
if bang: if bang:
return redirect(bang) return redirect(bang)

View File

@ -0,0 +1,14 @@
{
"!i": {
"url": "search?q={}&tbm=isch",
"suggestion": "!i (Whoogle Images)"
},
"!v": {
"url": "search?q={}&tbm=vid",
"suggestion": "!v (Whoogle Videos)"
},
"!n": {
"url": "search?q={}&tbm=nws",
"suggestion": "!n (Whoogle News)"
}
}

View File

@ -1,10 +1,58 @@
import json import json
import requests import requests
import urllib.parse as urlparse import urllib.parse as urlparse
import os
import glob
bangs_dict = {}
DDG_BANGS = 'https://duckduckgo.com/bang.js' DDG_BANGS = 'https://duckduckgo.com/bang.js'
def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}):
"""Loads all the bang files in alphabetical order
Args:
ddg_bangs_file: The str path to the new DDG bangs json file
ddg_bangs: The dict of ddg bangs. If this is empty, it will load the
bangs from the file
Returns:
None
"""
global bangs_dict
ddg_bangs_file = os.path.normpath(ddg_bangs_file)
if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4:
return
bangs = {}
bangs_dir = os.path.dirname(ddg_bangs_file)
bang_files = glob.glob(os.path.join(bangs_dir, '*.json'))
# Normalize the paths
bang_files = [os.path.normpath(f) for f in bang_files]
# Move the ddg bangs file to the beginning
bang_files = sorted([f for f in bang_files if f != ddg_bangs_file])
if ddg_bangs:
bangs |= ddg_bangs
else:
bang_files.insert(0, ddg_bangs_file)
for i, bang_file in enumerate(bang_files):
try:
bangs |= json.load(open(bang_file))
except json.decoder.JSONDecodeError:
# Ignore decoding error only for the ddg bangs file, since this can
# occur if file is still being written
if i != 0:
raise
bangs_dict = dict(sorted(bangs.items()))
def gen_bangs_json(bangs_file: str) -> None: def gen_bangs_json(bangs_file: str) -> None:
"""Generates a json file from the DDG bangs list """Generates a json file from the DDG bangs list
@ -37,22 +85,35 @@ def gen_bangs_json(bangs_file: str) -> None:
json.dump(bangs_data, open(bangs_file, 'w')) json.dump(bangs_data, open(bangs_file, 'w'))
print('* Finished creating ddg bangs json') print('* Finished creating ddg bangs json')
load_all_bangs(bangs_file, bangs_data)
def resolve_bang(query: str, bangs_dict: dict) -> str: def suggest_bang(query: str) -> list[str]:
"""Suggests bangs for a user's query
Args:
query: The search query
Returns:
list[str]: A list of bang suggestions
"""
global bangs_dict
return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)]
def resolve_bang(query: str) -> str:
"""Transform's a user's query to a bang search, if an operator is found """Transform's a user's query to a bang search, if an operator is found
Args: Args:
query: The search query query: The search query
bangs_dict: The dict of available bang operators, with corresponding
format string search URLs
(i.e. "!w": "https://en.wikipedia.org...?search={}")
Returns: Returns:
str: A formatted redirect for a bang search, or an empty str if there str: A formatted redirect for a bang search, or an empty str if there
wasn't a match or didn't contain a bang operator wasn't a match or didn't contain a bang operator
""" """
global bangs_dict
#if ! not in query simply return (speed up processing) #if ! not in query simply return (speed up processing)
if '!' not in query: if '!' not in query:

View File

@ -48,6 +48,13 @@ def test_ddg_bang(client):
assert rv.headers.get('Location').startswith('https://github.com') assert rv.headers.get('Location').startswith('https://github.com')
def test_custom_bang(client):
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!i%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('search?q=')
def test_config(client): def test_config(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config) rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302 assert rv._status_code == 302