Add lingva translation support in search (#360)
* Add support for Lingva translations in results Searches that contain the word "translate" and are normal search queries (i.e. not news/images/video/etc) now create an iframe to a Lingva url to translate the user's search using their configured search language. The Lingva url can be configured using the WHOOGLE_ALT_TL env var, or will fall back to the official Lingva instance url (lingva.ml). For more info, visit https://github.com/TheDavidDelta/lingva-translate * Add basic test for lingva results * Allow user specified lingva instances through csp frame-src * Fix pep8 issuemain
parent
82ccace647
commit
bcb1d8ecc9
|
@ -55,6 +55,8 @@ ARG instagram_alt='bibliogram.art/u'
|
||||||
ENV WHOOGLE_ALT_IG=$instagram_alt
|
ENV WHOOGLE_ALT_IG=$instagram_alt
|
||||||
ARG reddit_alt='libredd.it'
|
ARG reddit_alt='libredd.it'
|
||||||
ENV WHOOGLE_ALT_RD=$reddit_alt
|
ENV WHOOGLE_ALT_RD=$reddit_alt
|
||||||
|
ARG translate_alt='lingva.ml'
|
||||||
|
ENV WHOOGLE_ALT_TL=$translate_alt
|
||||||
|
|
||||||
WORKDIR /whoogle
|
WORKDIR /whoogle
|
||||||
|
|
||||||
|
|
|
@ -193,6 +193,7 @@ Description=Whoogle
|
||||||
#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org
|
#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org
|
||||||
#Environment=WHOOGLE_ALT_IG=bibliogram.art/u
|
#Environment=WHOOGLE_ALT_IG=bibliogram.art/u
|
||||||
#Environment=WHOOGLE_ALT_RD=libredd.it
|
#Environment=WHOOGLE_ALT_RD=libredd.it
|
||||||
|
#Environment=WHOOGLE_ALT_TL=lingva.ml
|
||||||
# Load values from dotenv only
|
# Load values from dotenv only
|
||||||
#Environment=WHOOGLE_DOTENV=1
|
#Environment=WHOOGLE_DOTENV=1
|
||||||
Type=simple
|
Type=simple
|
||||||
|
@ -311,6 +312,7 @@ There are a few optional environment variables available for customizing a Whoog
|
||||||
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
|
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
|
||||||
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
|
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
|
||||||
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. |
|
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. |
|
||||||
|
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
|
||||||
|
|
||||||
### Config Environment Variables
|
### Config Environment Variables
|
||||||
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
|
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
|
||||||
|
|
5
app.json
5
app.json
|
@ -65,6 +65,11 @@
|
||||||
"value": "libredd.it",
|
"value": "libredd.it",
|
||||||
"required": false
|
"required": false
|
||||||
},
|
},
|
||||||
|
"WHOOGLE_ALT_TL": {
|
||||||
|
"description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.",
|
||||||
|
"value": "lingva.ml",
|
||||||
|
"required": false
|
||||||
|
},
|
||||||
"WHOOGLE_CONFIG_COUNTRY": {
|
"WHOOGLE_CONFIG_COUNTRY": {
|
||||||
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
|
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
|
||||||
"value": "",
|
"value": "",
|
||||||
|
|
|
@ -52,7 +52,17 @@ app.config['BANG_PATH'] = os.getenv(
|
||||||
app.config['BANG_FILE'] = os.path.join(
|
app.config['BANG_FILE'] = os.path.join(
|
||||||
app.config['BANG_PATH'],
|
app.config['BANG_PATH'],
|
||||||
'bangs.json')
|
'bangs.json')
|
||||||
|
|
||||||
|
# The alternative to Google Translate is treated a bit differently than other
|
||||||
|
# social media site alternatives, in that it is used for any translation
|
||||||
|
# related searches.
|
||||||
|
translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://lingva.ml')
|
||||||
|
if not translate_url.startswith('http'):
|
||||||
|
translate_url = 'https://' + translate_url
|
||||||
|
app.config['TRANSLATE_URL'] = translate_url
|
||||||
|
|
||||||
app.config['CSP'] = 'default-src \'none\';' \
|
app.config['CSP'] = 'default-src \'none\';' \
|
||||||
|
'frame-src ' + translate_url + ';' \
|
||||||
'manifest-src \'self\';' \
|
'manifest-src \'self\';' \
|
||||||
'img-src \'self\' data:;' \
|
'img-src \'self\' data:;' \
|
||||||
'style-src \'self\' \'unsafe-inline\';' \
|
'style-src \'self\' \'unsafe-inline\';' \
|
||||||
|
|
|
@ -230,6 +230,12 @@ def search():
|
||||||
if search_util.feeling_lucky:
|
if search_util.feeling_lucky:
|
||||||
return redirect(response, code=303)
|
return redirect(response, code=303)
|
||||||
|
|
||||||
|
# If the user is attempting to translate a string, determine the correct
|
||||||
|
# string for formatting the lingva.ml url
|
||||||
|
localization_lang = g.user_config.get_localization_lang()
|
||||||
|
translation = app.config['TRANSLATIONS'][localization_lang]
|
||||||
|
translate_to = localization_lang.replace('lang_', '')
|
||||||
|
|
||||||
# Return 503 if temporarily blocked by captcha
|
# Return 503 if temporarily blocked by captcha
|
||||||
resp_code = 503 if has_captcha(str(response)) else 200
|
resp_code = 503 if has_captcha(str(response)) else 200
|
||||||
|
|
||||||
|
@ -238,9 +244,17 @@ def search():
|
||||||
query=urlparse.unquote(query),
|
query=urlparse.unquote(query),
|
||||||
search_type=search_util.search_type,
|
search_type=search_util.search_type,
|
||||||
config=g.user_config,
|
config=g.user_config,
|
||||||
translation=app.config['TRANSLATIONS'][
|
lingva_url=app.config['TRANSLATE_URL'],
|
||||||
g.user_config.get_localization_lang()
|
translation=translation,
|
||||||
],
|
translate_to=translate_to,
|
||||||
|
translate_str=query.replace(
|
||||||
|
'translate', ''
|
||||||
|
).replace(
|
||||||
|
translation['translate'], ''
|
||||||
|
),
|
||||||
|
is_translation=any(
|
||||||
|
_ in query.lower() for _ in [translation['translate'], 'translate']
|
||||||
|
) and not search_util.search_type, # Standard search queries only
|
||||||
response=response,
|
response=response,
|
||||||
version_number=app.config['VERSION_NUMBER'],
|
version_number=app.config['VERSION_NUMBER'],
|
||||||
search_header=(render_template(
|
search_header=(render_template(
|
||||||
|
|
|
@ -25,3 +25,9 @@ details summary {
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#lingva-iframe {
|
||||||
|
width: 100%;
|
||||||
|
height: 650px;
|
||||||
|
border: 0;
|
||||||
|
}
|
||||||
|
|
|
@ -1,4 +1,33 @@
|
||||||
{
|
{
|
||||||
|
"lang_en": {
|
||||||
|
"search": "Search",
|
||||||
|
"config": "Configuration",
|
||||||
|
"config-country": "Filter Results by Country",
|
||||||
|
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
|
||||||
|
"config-lang": "Interface Language",
|
||||||
|
"config-lang-search": "Search Language",
|
||||||
|
"config-near": "Near",
|
||||||
|
"config-near-help": "City Name",
|
||||||
|
"config-block": "Block",
|
||||||
|
"config-block-help": "Comma-separated site list",
|
||||||
|
"config-nojs": "Show NoJS Links",
|
||||||
|
"config-dark": "Dark Mode",
|
||||||
|
"config-safe": "Safe Search",
|
||||||
|
"config-alts": "Replace Social Media Links",
|
||||||
|
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
|
||||||
|
"config-new-tab": "Open Links in New Tab",
|
||||||
|
"config-images": "Full Size Image Search",
|
||||||
|
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
|
||||||
|
"config-tor": "Use Tor",
|
||||||
|
"config-get-only": "GET Requests Only",
|
||||||
|
"config-url": "Root URL",
|
||||||
|
"config-css": "Custom CSS",
|
||||||
|
"load": "Load",
|
||||||
|
"apply": "Apply",
|
||||||
|
"save-as": "Save As...",
|
||||||
|
"github-link": "View on GitHub",
|
||||||
|
"translate": "translate"
|
||||||
|
},
|
||||||
"lang_nl": {
|
"lang_nl": {
|
||||||
"search": "Zoeken",
|
"search": "Zoeken",
|
||||||
"config": "Instellingen",
|
"config": "Instellingen",
|
||||||
|
@ -25,7 +54,8 @@
|
||||||
"load": "Laden",
|
"load": "Laden",
|
||||||
"apply": "Opslaan",
|
"apply": "Opslaan",
|
||||||
"save-as": "Opslaan Als...",
|
"save-as": "Opslaan Als...",
|
||||||
"github-link": "Bekijk op GitHub"
|
"github-link": "Bekijk op GitHub",
|
||||||
|
"translate": "vertalen"
|
||||||
},
|
},
|
||||||
"lang_de": {
|
"lang_de": {
|
||||||
"search": "Suchen",
|
"search": "Suchen",
|
||||||
|
@ -53,35 +83,8 @@
|
||||||
"load": "Laden",
|
"load": "Laden",
|
||||||
"apply": "Übernehmen",
|
"apply": "Übernehmen",
|
||||||
"save-as": "Speichern unter...",
|
"save-as": "Speichern unter...",
|
||||||
"github-link": "Auf GitHub öffnen"
|
"github-link": "Auf GitHub öffnen",
|
||||||
},
|
"translate": "Übersetzen"
|
||||||
"lang_en": {
|
|
||||||
"search": "Search",
|
|
||||||
"config": "Configuration",
|
|
||||||
"config-country": "Filter Results by Country",
|
|
||||||
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
|
|
||||||
"config-lang": "Interface Language",
|
|
||||||
"config-lang-search": "Search Language",
|
|
||||||
"config-near": "Near",
|
|
||||||
"config-near-help": "City Name",
|
|
||||||
"config-block": "Block",
|
|
||||||
"config-block-help": "Comma-separated site list",
|
|
||||||
"config-nojs": "Show NoJS Links",
|
|
||||||
"config-dark": "Dark Mode",
|
|
||||||
"config-safe": "Safe Search",
|
|
||||||
"config-alts": "Replace Social Media Links",
|
|
||||||
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
|
|
||||||
"config-new-tab": "Open Links in New Tab",
|
|
||||||
"config-images": "Full Size Image Search",
|
|
||||||
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
|
|
||||||
"config-tor": "Use Tor",
|
|
||||||
"config-get-only": "GET Requests Only",
|
|
||||||
"config-url": "Root URL",
|
|
||||||
"config-css": "Custom CSS",
|
|
||||||
"load": "Load",
|
|
||||||
"apply": "Apply",
|
|
||||||
"save-as": "Save As...",
|
|
||||||
"github-link": "View on GitHub"
|
|
||||||
},
|
},
|
||||||
"lang_es": {
|
"lang_es": {
|
||||||
"search": "Buscar",
|
"search": "Buscar",
|
||||||
|
@ -109,7 +112,8 @@
|
||||||
"load": "Cargar",
|
"load": "Cargar",
|
||||||
"apply": "Aplicar",
|
"apply": "Aplicar",
|
||||||
"save-as": "Guardar como...",
|
"save-as": "Guardar como...",
|
||||||
"github-link": "Ver en GitHub"
|
"github-link": "Ver en GitHub",
|
||||||
|
"translate": "traducir"
|
||||||
},
|
},
|
||||||
"lang_it": {
|
"lang_it": {
|
||||||
"search": "Cerca",
|
"search": "Cerca",
|
||||||
|
@ -137,7 +141,8 @@
|
||||||
"load": "Carica",
|
"load": "Carica",
|
||||||
"apply": "Applica",
|
"apply": "Applica",
|
||||||
"save-as": "Salva Come...",
|
"save-as": "Salva Come...",
|
||||||
"github-link": "Guarda su GitHub"
|
"github-link": "Guarda su GitHub",
|
||||||
|
"translate": "tradurre"
|
||||||
},
|
},
|
||||||
"lang_pt": {
|
"lang_pt": {
|
||||||
"search": "Buscar",
|
"search": "Buscar",
|
||||||
|
@ -165,7 +170,8 @@
|
||||||
"load": "Carregar",
|
"load": "Carregar",
|
||||||
"apply": "Aplicar",
|
"apply": "Aplicar",
|
||||||
"save-as": "Salvar Como...",
|
"save-as": "Salvar Como...",
|
||||||
"github-link": "Ver no GitHub"
|
"github-link": "Ver no GitHub",
|
||||||
|
"translate": "traduzir"
|
||||||
},
|
},
|
||||||
"lang_zh-CN": {
|
"lang_zh-CN": {
|
||||||
"search": "搜索",
|
"search": "搜索",
|
||||||
|
@ -193,7 +199,8 @@
|
||||||
"load": "载入",
|
"load": "载入",
|
||||||
"apply": "应用",
|
"apply": "应用",
|
||||||
"save-as": "另存为...",
|
"save-as": "另存为...",
|
||||||
"github-link": "在 GitHub 上查看"
|
"github-link": "在 GitHub 上查看",
|
||||||
|
"translate": "翻译"
|
||||||
},
|
},
|
||||||
"lang_si": {
|
"lang_si": {
|
||||||
"search": "සොයන්න",
|
"search": "සොයන්න",
|
||||||
|
@ -221,6 +228,7 @@
|
||||||
"load": "පූරනය කරන්න",
|
"load": "පූරනය කරන්න",
|
||||||
"apply": "යොදන්න",
|
"apply": "යොදන්න",
|
||||||
"save-as": "...ලෙස සුරකින්න",
|
"save-as": "...ලෙස සුරකින්න",
|
||||||
"github-link": "ගිට්හබ් හි බලන්න"
|
"github-link": "ගිට්හබ් හි බලන්න",
|
||||||
|
"translate": "පරිවර්තනය කරන්න"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,12 @@
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
{{ search_header|safe }}
|
{{ search_header|safe }}
|
||||||
|
{% if is_translation %}
|
||||||
|
<iframe
|
||||||
|
id="lingva-iframe"
|
||||||
|
src="{{ lingva_url }}/auto/{{ translate_to }}/{{ translate_str }}">
|
||||||
|
</iframe>
|
||||||
|
{% endif %}
|
||||||
{{ response|safe }}
|
{{ response|safe }}
|
||||||
</body>
|
</body>
|
||||||
<footer>
|
<footer>
|
||||||
|
|
|
@ -37,6 +37,7 @@ services:
|
||||||
#- WHOOGLE_ALT_YT=invidious.snopyta.org
|
#- WHOOGLE_ALT_YT=invidious.snopyta.org
|
||||||
#- WHOOGLE_ALT_IG=bibliogram.art/u
|
#- WHOOGLE_ALT_IG=bibliogram.art/u
|
||||||
#- WHOOGLE_ALT_RD=libredd.it
|
#- WHOOGLE_ALT_RD=libredd.it
|
||||||
|
#- WHOOGLE_ALT_TL=lingva.ml
|
||||||
#env_file: # Alternatively, load variables from whoogle.env
|
#env_file: # Alternatively, load variables from whoogle.env
|
||||||
#- whoogle.env
|
#- whoogle.env
|
||||||
ports:
|
ports:
|
||||||
|
|
|
@ -35,8 +35,9 @@ def test_get_results(client):
|
||||||
|
|
||||||
# Depending on the search, there can be more
|
# Depending on the search, there can be more
|
||||||
# than 10 result divs
|
# than 10 result divs
|
||||||
assert len(get_search_results(rv.data)) >= 10
|
results = get_search_results(rv.data)
|
||||||
assert len(get_search_results(rv.data)) <= 15
|
assert len(results) >= 10
|
||||||
|
assert len(results) <= 15
|
||||||
|
|
||||||
|
|
||||||
def test_post_results(client):
|
def test_post_results(client):
|
||||||
|
@ -45,8 +46,19 @@ def test_post_results(client):
|
||||||
|
|
||||||
# Depending on the search, there can be more
|
# Depending on the search, there can be more
|
||||||
# than 10 result divs
|
# than 10 result divs
|
||||||
assert len(get_search_results(rv.data)) >= 10
|
results = get_search_results(rv.data)
|
||||||
assert len(get_search_results(rv.data)) <= 15
|
assert len(results) >= 10
|
||||||
|
assert len(results) <= 15
|
||||||
|
|
||||||
|
|
||||||
|
def test_translate_search(client):
|
||||||
|
rv = client.post('/search', data=dict(q='translate hola'))
|
||||||
|
assert rv._status_code == 200
|
||||||
|
|
||||||
|
# Pretty weak test, but better than nothing
|
||||||
|
str_data = str(rv.data)
|
||||||
|
assert 'iframe' in str_data
|
||||||
|
assert 'lingva.ml/auto/en/ hola' in str_data
|
||||||
|
|
||||||
|
|
||||||
def test_block_results(client):
|
def test_block_results(client):
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#WHOOGLE_ALT_YT=invidious.snopyta.org
|
#WHOOGLE_ALT_YT=invidious.snopyta.org
|
||||||
#WHOOGLE_ALT_IG=bibliogram.art/u
|
#WHOOGLE_ALT_IG=bibliogram.art/u
|
||||||
#WHOOGLE_ALT_RD=libredd.it
|
#WHOOGLE_ALT_RD=libredd.it
|
||||||
|
#WHOOGLE_ALT_TL=lingva.ml
|
||||||
#WHOOGLE_USER=""
|
#WHOOGLE_USER=""
|
||||||
#WHOOGLE_PASS=""
|
#WHOOGLE_PASS=""
|
||||||
#WHOOGLE_PROXY_USER=""
|
#WHOOGLE_PROXY_USER=""
|
||||||
|
|
Loading…
Reference in New Issue