From f688b88bd8003f0307559d5c1b91ecbd44a56196 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 6 Jul 2022 09:49:43 -0600 Subject: [PATCH] Preserve wikipedia language setting for wikiless redirects Wikipedia -> Wikiless redirects always result in an english language result, even if the Wikipedia result would've been in a non-english language. This is due to Wikipedia using language specific subdomains (i.e. de.wikipedia.org, en.wikipedia.org, etc) whereas Wikiless uses a "lang" url param. This has been fixed by inspecting the subdomain of the wikipedia link and passing that value to Wikiless as the lang param if it's determined to be a language specific value (currently just looking for a 2-char subdomain). See #805 --- app/utils/results.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/app/utils/results.py b/app/utils/results.py index 67835eb..d817573 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -134,7 +134,16 @@ def get_site_alt(link: str) -> str: if not hostname or site_key not in hostname or not SITE_ALTS[site_key]: continue - link = link.replace(hostname, SITE_ALTS[site_key]) + # Wikipedia -> Wikiless replacements require the subdomain (if it's + # a 2-char language code) to be passed as a URL param to Wikiless + # in order to preserve the language setting. + url_params = '' + if 'wikipedia' in hostname: + subdomain = hostname.split('.')[0] + if len(subdomain) == 2: + url_params = f'?lang={subdomain}' + + link = link.replace(hostname, SITE_ALTS[site_key]) + url_params for prefix in SKIP_PREFIX: link = link.replace(prefix, '//') break