Add support for custom bangs (#1132)
Add the possibility for user-defined bangs, stored in app/static/bangs. These are parsed in alphabetical order, with the DDG bangs parsed first.main
parent
7a1ebfe975
commit
fd20135af0
|
@ -1,4 +1,5 @@
|
||||||
venv/
|
venv/
|
||||||
|
.venv/
|
||||||
.idea/
|
.idea/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
|
@ -10,7 +11,8 @@ test/static
|
||||||
flask_session/
|
flask_session/
|
||||||
app/static/config
|
app/static/config
|
||||||
app/static/custom_config
|
app/static/custom_config
|
||||||
app/static/bangs
|
app/static/bangs/*
|
||||||
|
!app/static/bangs/00-whoogle.json
|
||||||
|
|
||||||
# pip stuff
|
# pip stuff
|
||||||
/build/
|
/build/
|
||||||
|
|
10
README.md
10
README.md
|
@ -35,6 +35,7 @@ Contents
|
||||||
6. [Extra Steps](#extra-steps)
|
6. [Extra Steps](#extra-steps)
|
||||||
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
|
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
|
||||||
2. [Custom Redirecting](#custom-redirecting)
|
2. [Custom Redirecting](#custom-redirecting)
|
||||||
|
2. [Custom Bangs](#custom-bangs)
|
||||||
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
|
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
|
||||||
4. [Manual HTTPS Enforcement](#https-enforcement)
|
4. [Manual HTTPS Enforcement](#https-enforcement)
|
||||||
5. [Using with Firefox Containers](#using-with-firefox-containers)
|
5. [Using with Firefox Containers](#using-with-firefox-containers)
|
||||||
|
@ -61,6 +62,7 @@ Contents
|
||||||
- Randomly generated User Agent
|
- Randomly generated User Agent
|
||||||
- Easy to install/deploy
|
- Easy to install/deploy
|
||||||
- DDG-style bang (i.e. `!<tag> <query>`) searches
|
- DDG-style bang (i.e. `!<tag> <query>`) searches
|
||||||
|
- User-defined [custom bangs](#custom-bangs)
|
||||||
- Optional location-based searching (i.e. results near \<city\>)
|
- Optional location-based searching (i.e. results near \<city\>)
|
||||||
- Optional NoJS mode to view search results in a separate window with JavaScript blocked
|
- Optional NoJS mode to view search results in a separate window with JavaScript blocked
|
||||||
|
|
||||||
|
@ -539,6 +541,14 @@ WHOOGLE_REDIRECTS="badA.com:goodA.com,badB.com:goodB.com"
|
||||||
|
|
||||||
NOTE: Do not include "http(s)://" when defining your redirect.
|
NOTE: Do not include "http(s)://" when defining your redirect.
|
||||||
|
|
||||||
|
### Custom Bangs
|
||||||
|
You can create your own custom bangs. By default, bangs are stored in
|
||||||
|
`app/static/bangs`. See [`00-whoogle.json`](https://github.com/benbusby/whoogle-search/blob/main/app/static/bangs/00-whoogle.json)
|
||||||
|
for an example. These are parsed in alphabetical order with later files
|
||||||
|
overriding bangs set in earlier files, with the exception that DDG bangs
|
||||||
|
(downloaded to `app/static/bangs/bangs.json`) are always parsed first. Thus,
|
||||||
|
any custom bangs will always override the DDG ones.
|
||||||
|
|
||||||
### Prevent Downtime (Heroku only)
|
### Prevent Downtime (Heroku only)
|
||||||
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.
|
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from app.filter import clean_query
|
from app.filter import clean_query
|
||||||
from app.request import send_tor_signal
|
from app.request import send_tor_signal
|
||||||
from app.utils.session import generate_key
|
from app.utils.session import generate_key
|
||||||
from app.utils.bangs import gen_bangs_json
|
from app.utils.bangs import gen_bangs_json, load_all_bangs
|
||||||
from app.utils.misc import gen_file_hash, read_config_bool
|
from app.utils.misc import gen_file_hash, read_config_bool
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
from bs4 import MarkupResemblesLocatorWarning
|
from bs4 import MarkupResemblesLocatorWarning
|
||||||
|
@ -139,7 +139,9 @@ app.config['CSP'] = 'default-src \'none\';' \
|
||||||
'connect-src \'self\';'
|
'connect-src \'self\';'
|
||||||
|
|
||||||
# Generate DDG bang filter
|
# Generate DDG bang filter
|
||||||
|
generating_bangs = False
|
||||||
if not os.path.exists(app.config['BANG_FILE']):
|
if not os.path.exists(app.config['BANG_FILE']):
|
||||||
|
generating_bangs = True
|
||||||
json.dump({}, open(app.config['BANG_FILE'], 'w'))
|
json.dump({}, open(app.config['BANG_FILE'], 'w'))
|
||||||
bangs_thread = threading.Thread(
|
bangs_thread = threading.Thread(
|
||||||
target=gen_bangs_json,
|
target=gen_bangs_json,
|
||||||
|
@ -181,6 +183,11 @@ warnings.simplefilter('ignore', MarkupResemblesLocatorWarning)
|
||||||
|
|
||||||
from app import routes # noqa
|
from app import routes # noqa
|
||||||
|
|
||||||
|
# The gen_bangs_json function takes care of loading bangs, so skip it here if
|
||||||
|
# it's already being loaded
|
||||||
|
if not generating_bangs:
|
||||||
|
load_all_bangs(app.config['BANG_FILE'])
|
||||||
|
|
||||||
# Disable logging from imported modules
|
# Disable logging from imported modules
|
||||||
logging.config.dictConfig({
|
logging.config.dictConfig({
|
||||||
'version': 1,
|
'version': 1,
|
||||||
|
|
|
@ -8,6 +8,8 @@ import re
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
import uuid
|
import uuid
|
||||||
import validators
|
import validators
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
|
@ -16,7 +18,7 @@ from app import app
|
||||||
from app.models.config import Config
|
from app.models.config import Config
|
||||||
from app.models.endpoint import Endpoint
|
from app.models.endpoint import Endpoint
|
||||||
from app.request import Request, TorError
|
from app.request import Request, TorError
|
||||||
from app.utils.bangs import resolve_bang
|
from app.utils.bangs import suggest_bang, resolve_bang
|
||||||
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
|
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
|
||||||
fetch_favicon
|
fetch_favicon
|
||||||
from app.filter import Filter
|
from app.filter import Filter
|
||||||
|
@ -36,9 +38,6 @@ from cryptography.fernet import Fernet, InvalidToken
|
||||||
from cryptography.exceptions import InvalidSignature
|
from cryptography.exceptions import InvalidSignature
|
||||||
from werkzeug.datastructures import MultiDict
|
from werkzeug.datastructures import MultiDict
|
||||||
|
|
||||||
# Load DDG bang json files only on init
|
|
||||||
bang_json = json.load(open(app.config['BANG_FILE'])) or {}
|
|
||||||
|
|
||||||
ac_var = 'WHOOGLE_AUTOCOMPLETE'
|
ac_var = 'WHOOGLE_AUTOCOMPLETE'
|
||||||
autocomplete_enabled = os.getenv(ac_var, '1')
|
autocomplete_enabled = os.getenv(ac_var, '1')
|
||||||
|
|
||||||
|
@ -130,7 +129,6 @@ def session_required(f):
|
||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def before_request_func():
|
def before_request_func():
|
||||||
global bang_json
|
|
||||||
session.permanent = True
|
session.permanent = True
|
||||||
|
|
||||||
# Check for latest version if needed
|
# Check for latest version if needed
|
||||||
|
@ -172,15 +170,6 @@ def before_request_func():
|
||||||
|
|
||||||
g.app_location = g.user_config.url
|
g.app_location = g.user_config.url
|
||||||
|
|
||||||
# Attempt to reload bangs json if not generated yet
|
|
||||||
if not bang_json and os.path.getsize(app.config['BANG_FILE']) > 4:
|
|
||||||
try:
|
|
||||||
bang_json = json.load(open(app.config['BANG_FILE']))
|
|
||||||
except json.decoder.JSONDecodeError:
|
|
||||||
# Ignore decoding error, can occur if file is still
|
|
||||||
# being written
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def after_request_func(resp):
|
def after_request_func(resp):
|
||||||
|
@ -282,8 +271,7 @@ def autocomplete():
|
||||||
|
|
||||||
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
|
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
|
||||||
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
|
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
|
||||||
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
|
return jsonify([q, suggest_bang(q)])
|
||||||
_.startswith(q)]])
|
|
||||||
|
|
||||||
if not q and not request.data:
|
if not q and not request.data:
|
||||||
return jsonify({'?': []})
|
return jsonify({'?': []})
|
||||||
|
@ -314,7 +302,7 @@ def search():
|
||||||
search_util = Search(request, g.user_config, g.session_key)
|
search_util = Search(request, g.user_config, g.session_key)
|
||||||
query = search_util.new_search_query()
|
query = search_util.new_search_query()
|
||||||
|
|
||||||
bang = resolve_bang(query, bang_json)
|
bang = resolve_bang(query)
|
||||||
if bang:
|
if bang:
|
||||||
return redirect(bang)
|
return redirect(bang)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"!i": {
|
||||||
|
"url": "search?q={}&tbm=isch",
|
||||||
|
"suggestion": "!i (Whoogle Images)"
|
||||||
|
},
|
||||||
|
"!v": {
|
||||||
|
"url": "search?q={}&tbm=vid",
|
||||||
|
"suggestion": "!v (Whoogle Videos)"
|
||||||
|
},
|
||||||
|
"!n": {
|
||||||
|
"url": "search?q={}&tbm=nws",
|
||||||
|
"suggestion": "!n (Whoogle News)"
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,10 +1,58 @@
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
|
||||||
|
bangs_dict = {}
|
||||||
DDG_BANGS = 'https://duckduckgo.com/bang.js'
|
DDG_BANGS = 'https://duckduckgo.com/bang.js'
|
||||||
|
|
||||||
|
|
||||||
|
def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}):
|
||||||
|
"""Loads all the bang files in alphabetical order
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ddg_bangs_file: The str path to the new DDG bangs json file
|
||||||
|
ddg_bangs: The dict of ddg bangs. If this is empty, it will load the
|
||||||
|
bangs from the file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
"""
|
||||||
|
global bangs_dict
|
||||||
|
ddg_bangs_file = os.path.normpath(ddg_bangs_file)
|
||||||
|
|
||||||
|
if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4:
|
||||||
|
return
|
||||||
|
|
||||||
|
bangs = {}
|
||||||
|
bangs_dir = os.path.dirname(ddg_bangs_file)
|
||||||
|
bang_files = glob.glob(os.path.join(bangs_dir, '*.json'))
|
||||||
|
|
||||||
|
# Normalize the paths
|
||||||
|
bang_files = [os.path.normpath(f) for f in bang_files]
|
||||||
|
|
||||||
|
# Move the ddg bangs file to the beginning
|
||||||
|
bang_files = sorted([f for f in bang_files if f != ddg_bangs_file])
|
||||||
|
|
||||||
|
if ddg_bangs:
|
||||||
|
bangs |= ddg_bangs
|
||||||
|
else:
|
||||||
|
bang_files.insert(0, ddg_bangs_file)
|
||||||
|
|
||||||
|
for i, bang_file in enumerate(bang_files):
|
||||||
|
try:
|
||||||
|
bangs |= json.load(open(bang_file))
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
# Ignore decoding error only for the ddg bangs file, since this can
|
||||||
|
# occur if file is still being written
|
||||||
|
if i != 0:
|
||||||
|
raise
|
||||||
|
|
||||||
|
bangs_dict = dict(sorted(bangs.items()))
|
||||||
|
|
||||||
|
|
||||||
def gen_bangs_json(bangs_file: str) -> None:
|
def gen_bangs_json(bangs_file: str) -> None:
|
||||||
"""Generates a json file from the DDG bangs list
|
"""Generates a json file from the DDG bangs list
|
||||||
|
|
||||||
|
@ -37,22 +85,35 @@ def gen_bangs_json(bangs_file: str) -> None:
|
||||||
|
|
||||||
json.dump(bangs_data, open(bangs_file, 'w'))
|
json.dump(bangs_data, open(bangs_file, 'w'))
|
||||||
print('* Finished creating ddg bangs json')
|
print('* Finished creating ddg bangs json')
|
||||||
|
load_all_bangs(bangs_file, bangs_data)
|
||||||
|
|
||||||
|
|
||||||
def resolve_bang(query: str, bangs_dict: dict) -> str:
|
def suggest_bang(query: str) -> list[str]:
|
||||||
|
"""Suggests bangs for a user's query
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[str]: A list of bang suggestions
|
||||||
|
|
||||||
|
"""
|
||||||
|
global bangs_dict
|
||||||
|
return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_bang(query: str) -> str:
|
||||||
"""Transform's a user's query to a bang search, if an operator is found
|
"""Transform's a user's query to a bang search, if an operator is found
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: The search query
|
query: The search query
|
||||||
bangs_dict: The dict of available bang operators, with corresponding
|
|
||||||
format string search URLs
|
|
||||||
(i.e. "!w": "https://en.wikipedia.org...?search={}")
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: A formatted redirect for a bang search, or an empty str if there
|
str: A formatted redirect for a bang search, or an empty str if there
|
||||||
wasn't a match or didn't contain a bang operator
|
wasn't a match or didn't contain a bang operator
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
global bangs_dict
|
||||||
|
|
||||||
#if ! not in query simply return (speed up processing)
|
#if ! not in query simply return (speed up processing)
|
||||||
if '!' not in query:
|
if '!' not in query:
|
||||||
|
|
|
@ -48,6 +48,13 @@ def test_ddg_bang(client):
|
||||||
assert rv.headers.get('Location').startswith('https://github.com')
|
assert rv.headers.get('Location').startswith('https://github.com')
|
||||||
|
|
||||||
|
|
||||||
|
def test_custom_bang(client):
|
||||||
|
# Bang at beginning of query
|
||||||
|
rv = client.get(f'/{Endpoint.search}?q=!i%20whoogle')
|
||||||
|
assert rv._status_code == 302
|
||||||
|
assert rv.headers.get('Location').startswith('search?q=')
|
||||||
|
|
||||||
|
|
||||||
def test_config(client):
|
def test_config(client):
|
||||||
rv = client.post(f'/{Endpoint.config}', data=demo_config)
|
rv = client.post(f'/{Endpoint.config}', data=demo_config)
|
||||||
assert rv._status_code == 302
|
assert rv._status_code == 302
|
||||||
|
|
Loading…
Reference in New Issue