Add lingva translation support in search (#360)

* Add support for Lingva translations in results

Searches that contain the word "translate" and are normal search queries
(i.e. not news/images/video/etc) now create an iframe to a Lingva url to
translate the user's search using their configured search language.

The Lingva url can be configured using the WHOOGLE_ALT_TL env var, or
will fall back to the official Lingva instance url (lingva.ml).

For more info, visit https://github.com/TheDavidDelta/lingva-translate

* Add basic test for lingva results

* Allow user specified lingva instances through csp frame-src

* Fix pep8 issue
main
Ben Busby 2021-06-15 10:14:42 -04:00 committed by GitHub
parent 82ccace647
commit bcb1d8ecc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 109 additions and 42 deletions

View File

@ -55,6 +55,8 @@ ARG instagram_alt='bibliogram.art/u'
ENV WHOOGLE_ALT_IG=$instagram_alt ENV WHOOGLE_ALT_IG=$instagram_alt
ARG reddit_alt='libredd.it' ARG reddit_alt='libredd.it'
ENV WHOOGLE_ALT_RD=$reddit_alt ENV WHOOGLE_ALT_RD=$reddit_alt
ARG translate_alt='lingva.ml'
ENV WHOOGLE_ALT_TL=$translate_alt
WORKDIR /whoogle WORKDIR /whoogle

View File

@ -193,6 +193,7 @@ Description=Whoogle
#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org #Environment=WHOOGLE_ALT_YT=invidious.snopyta.org
#Environment=WHOOGLE_ALT_IG=bibliogram.art/u #Environment=WHOOGLE_ALT_IG=bibliogram.art/u
#Environment=WHOOGLE_ALT_RD=libredd.it #Environment=WHOOGLE_ALT_RD=libredd.it
#Environment=WHOOGLE_ALT_TL=lingva.ml
# Load values from dotenv only # Load values from dotenv only
#Environment=WHOOGLE_DOTENV=1 #Environment=WHOOGLE_DOTENV=1
Type=simple Type=simple
@ -311,6 +312,7 @@ There are a few optional environment variables available for customizing a Whoog
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. | | WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. | | WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. | | WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
### Config Environment Variables ### Config Environment Variables
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time. These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.

View File

@ -65,6 +65,11 @@
"value": "libredd.it", "value": "libredd.it",
"required": false "required": false
}, },
"WHOOGLE_ALT_TL": {
"description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.",
"value": "lingva.ml",
"required": false
},
"WHOOGLE_CONFIG_COUNTRY": { "WHOOGLE_CONFIG_COUNTRY": {
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)", "description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
"value": "", "value": "",

View File

@ -52,7 +52,17 @@ app.config['BANG_PATH'] = os.getenv(
app.config['BANG_FILE'] = os.path.join( app.config['BANG_FILE'] = os.path.join(
app.config['BANG_PATH'], app.config['BANG_PATH'],
'bangs.json') 'bangs.json')
# The alternative to Google Translate is treated a bit differently than other
# social media site alternatives, in that it is used for any translation
# related searches.
translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://lingva.ml')
if not translate_url.startswith('http'):
translate_url = 'https://' + translate_url
app.config['TRANSLATE_URL'] = translate_url
app.config['CSP'] = 'default-src \'none\';' \ app.config['CSP'] = 'default-src \'none\';' \
'frame-src ' + translate_url + ';' \
'manifest-src \'self\';' \ 'manifest-src \'self\';' \
'img-src \'self\' data:;' \ 'img-src \'self\' data:;' \
'style-src \'self\' \'unsafe-inline\';' \ 'style-src \'self\' \'unsafe-inline\';' \

View File

@ -230,6 +230,12 @@ def search():
if search_util.feeling_lucky: if search_util.feeling_lucky:
return redirect(response, code=303) return redirect(response, code=303)
# If the user is attempting to translate a string, determine the correct
# string for formatting the lingva.ml url
localization_lang = g.user_config.get_localization_lang()
translation = app.config['TRANSLATIONS'][localization_lang]
translate_to = localization_lang.replace('lang_', '')
# Return 503 if temporarily blocked by captcha # Return 503 if temporarily blocked by captcha
resp_code = 503 if has_captcha(str(response)) else 200 resp_code = 503 if has_captcha(str(response)) else 200
@ -238,9 +244,17 @@ def search():
query=urlparse.unquote(query), query=urlparse.unquote(query),
search_type=search_util.search_type, search_type=search_util.search_type,
config=g.user_config, config=g.user_config,
translation=app.config['TRANSLATIONS'][ lingva_url=app.config['TRANSLATE_URL'],
g.user_config.get_localization_lang() translation=translation,
], translate_to=translate_to,
translate_str=query.replace(
'translate', ''
).replace(
translation['translate'], ''
),
is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only
response=response, response=response,
version_number=app.config['VERSION_NUMBER'], version_number=app.config['VERSION_NUMBER'],
search_header=(render_template( search_header=(render_template(

View File

@ -25,3 +25,9 @@ details summary {
padding: 10px; padding: 10px;
font-weight: bold; font-weight: bold;
} }
#lingva-iframe {
width: 100%;
height: 650px;
border: 0;
}

View File

@ -1,4 +1,33 @@
{ {
"lang_en": {
"search": "Search",
"config": "Configuration",
"config-country": "Filter Results by Country",
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
"config-lang": "Interface Language",
"config-lang-search": "Search Language",
"config-near": "Near",
"config-near-help": "City Name",
"config-block": "Block",
"config-block-help": "Comma-separated site list",
"config-nojs": "Show NoJS Links",
"config-dark": "Dark Mode",
"config-safe": "Safe Search",
"config-alts": "Replace Social Media Links",
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
"config-new-tab": "Open Links in New Tab",
"config-images": "Full Size Image Search",
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
"config-tor": "Use Tor",
"config-get-only": "GET Requests Only",
"config-url": "Root URL",
"config-css": "Custom CSS",
"load": "Load",
"apply": "Apply",
"save-as": "Save As...",
"github-link": "View on GitHub",
"translate": "translate"
},
"lang_nl": { "lang_nl": {
"search": "Zoeken", "search": "Zoeken",
"config": "Instellingen", "config": "Instellingen",
@ -25,7 +54,8 @@
"load": "Laden", "load": "Laden",
"apply": "Opslaan", "apply": "Opslaan",
"save-as": "Opslaan Als...", "save-as": "Opslaan Als...",
"github-link": "Bekijk op GitHub" "github-link": "Bekijk op GitHub",
"translate": "vertalen"
}, },
"lang_de": { "lang_de": {
"search": "Suchen", "search": "Suchen",
@ -53,35 +83,8 @@
"load": "Laden", "load": "Laden",
"apply": "Übernehmen", "apply": "Übernehmen",
"save-as": "Speichern unter...", "save-as": "Speichern unter...",
"github-link": "Auf GitHub öffnen" "github-link": "Auf GitHub öffnen",
}, "translate": "Übersetzen"
"lang_en": {
"search": "Search",
"config": "Configuration",
"config-country": "Filter Results by Country",
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
"config-lang": "Interface Language",
"config-lang-search": "Search Language",
"config-near": "Near",
"config-near-help": "City Name",
"config-block": "Block",
"config-block-help": "Comma-separated site list",
"config-nojs": "Show NoJS Links",
"config-dark": "Dark Mode",
"config-safe": "Safe Search",
"config-alts": "Replace Social Media Links",
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
"config-new-tab": "Open Links in New Tab",
"config-images": "Full Size Image Search",
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
"config-tor": "Use Tor",
"config-get-only": "GET Requests Only",
"config-url": "Root URL",
"config-css": "Custom CSS",
"load": "Load",
"apply": "Apply",
"save-as": "Save As...",
"github-link": "View on GitHub"
}, },
"lang_es": { "lang_es": {
"search": "Buscar", "search": "Buscar",
@ -109,7 +112,8 @@
"load": "Cargar", "load": "Cargar",
"apply": "Aplicar", "apply": "Aplicar",
"save-as": "Guardar como...", "save-as": "Guardar como...",
"github-link": "Ver en GitHub" "github-link": "Ver en GitHub",
"translate": "traducir"
}, },
"lang_it": { "lang_it": {
"search": "Cerca", "search": "Cerca",
@ -137,7 +141,8 @@
"load": "Carica", "load": "Carica",
"apply": "Applica", "apply": "Applica",
"save-as": "Salva Come...", "save-as": "Salva Come...",
"github-link": "Guarda su GitHub" "github-link": "Guarda su GitHub",
"translate": "tradurre"
}, },
"lang_pt": { "lang_pt": {
"search": "Buscar", "search": "Buscar",
@ -165,7 +170,8 @@
"load": "Carregar", "load": "Carregar",
"apply": "Aplicar", "apply": "Aplicar",
"save-as": "Salvar Como...", "save-as": "Salvar Como...",
"github-link": "Ver no GitHub" "github-link": "Ver no GitHub",
"translate": "traduzir"
}, },
"lang_zh-CN": { "lang_zh-CN": {
"search": "搜索", "search": "搜索",
@ -193,7 +199,8 @@
"load": "载入", "load": "载入",
"apply": "应用", "apply": "应用",
"save-as": "另存为...", "save-as": "另存为...",
"github-link": "在 GitHub 上查看" "github-link": "在 GitHub 上查看",
"translate": "翻译"
}, },
"lang_si": { "lang_si": {
"search": "සොයන්න", "search": "සොයන්න",
@ -221,6 +228,7 @@
"load": "පූරනය කරන්න", "load": "පූරනය කරන්න",
"apply": "යොදන්න", "apply": "යොදන්න",
"save-as": "...ලෙස සුරකින්න", "save-as": "...ලෙස සුරකින්න",
"github-link": "ගිට්හබ් හි බලන්න" "github-link": "ගිට්හබ් හි බලන්න",
"translate": "පරිවර්තනය කරන්න"
} }
} }

View File

@ -15,6 +15,12 @@
</head> </head>
<body> <body>
{{ search_header|safe }} {{ search_header|safe }}
{% if is_translation %}
<iframe
id="lingva-iframe"
src="{{ lingva_url }}/auto/{{ translate_to }}/{{ translate_str }}">
</iframe>
{% endif %}
{{ response|safe }} {{ response|safe }}
</body> </body>
<footer> <footer>

View File

@ -37,6 +37,7 @@ services:
#- WHOOGLE_ALT_YT=invidious.snopyta.org #- WHOOGLE_ALT_YT=invidious.snopyta.org
#- WHOOGLE_ALT_IG=bibliogram.art/u #- WHOOGLE_ALT_IG=bibliogram.art/u
#- WHOOGLE_ALT_RD=libredd.it #- WHOOGLE_ALT_RD=libredd.it
#- WHOOGLE_ALT_TL=lingva.ml
#env_file: # Alternatively, load variables from whoogle.env #env_file: # Alternatively, load variables from whoogle.env
#- whoogle.env #- whoogle.env
ports: ports:

View File

@ -35,8 +35,9 @@ def test_get_results(client):
# Depending on the search, there can be more # Depending on the search, there can be more
# than 10 result divs # than 10 result divs
assert len(get_search_results(rv.data)) >= 10 results = get_search_results(rv.data)
assert len(get_search_results(rv.data)) <= 15 assert len(results) >= 10
assert len(results) <= 15
def test_post_results(client): def test_post_results(client):
@ -45,8 +46,19 @@ def test_post_results(client):
# Depending on the search, there can be more # Depending on the search, there can be more
# than 10 result divs # than 10 result divs
assert len(get_search_results(rv.data)) >= 10 results = get_search_results(rv.data)
assert len(get_search_results(rv.data)) <= 15 assert len(results) >= 10
assert len(results) <= 15
def test_translate_search(client):
rv = client.post('/search', data=dict(q='translate hola'))
assert rv._status_code == 200
# Pretty weak test, but better than nothing
str_data = str(rv.data)
assert 'iframe' in str_data
assert 'lingva.ml/auto/en/ hola' in str_data
def test_block_results(client): def test_block_results(client):

View File

@ -8,6 +8,7 @@
#WHOOGLE_ALT_YT=invidious.snopyta.org #WHOOGLE_ALT_YT=invidious.snopyta.org
#WHOOGLE_ALT_IG=bibliogram.art/u #WHOOGLE_ALT_IG=bibliogram.art/u
#WHOOGLE_ALT_RD=libredd.it #WHOOGLE_ALT_RD=libredd.it
#WHOOGLE_ALT_TL=lingva.ml
#WHOOGLE_USER="" #WHOOGLE_USER=""
#WHOOGLE_PASS="" #WHOOGLE_PASS=""
#WHOOGLE_PROXY_USER="" #WHOOGLE_PROXY_USER=""