Only extract domain+ext when using site alts

Parent sites using a 'www' subdomain or something similar were not
redirecting properly. This updates the hostname check to only validate
against the primary domain, except for Wikipedia since the subdomain is
used for interface translation in that case.

Fixes #901
main
Ben Busby 2022-12-08 10:54:21 -07:00
parent cd7fce2822
commit 3dc6d14377
No known key found for this signature in database
GPG Key ID: B9B7231E01D924A1
1 changed files with 9 additions and 5 deletions

View File

@ -134,7 +134,12 @@ def get_site_alt(link: str) -> str:
# Need to replace full hostname with alternative to encapsulate # Need to replace full hostname with alternative to encapsulate
# subdomains as well # subdomains as well
parsed_link = urlparse.urlparse(link) parsed_link = urlparse.urlparse(link)
hostname = parsed_link.hostname
# Extract subdomain separately from the domain+tld. The subdomain
# is used for wikiless translations.
split_host = parsed_link.netloc.split('.')
subdomain = split_host[0] if len(split_host) > 2 else ''
hostname = '.'.join(split_host[-2:])
# The full scheme + hostname is used when comparing against the list of # The full scheme + hostname is used when comparing against the list of
# available alternative services, due to how Medium links are constructed. # available alternative services, due to how Medium links are constructed.
@ -151,9 +156,8 @@ def get_site_alt(link: str) -> str:
# a 2-char language code) to be passed as a URL param to Wikiless # a 2-char language code) to be passed as a URL param to Wikiless
# in order to preserve the language setting. # in order to preserve the language setting.
params = '' params = ''
if 'wikipedia' in hostname: if 'wikipedia' in hostname and len(subdomain) == 2:
subdomain = hostname.split('.')[0] hostname = f'{subdomain}.{hostname}'
if len(subdomain) == 2:
params = f'?lang={subdomain}' params = f'?lang={subdomain}'
parsed_alt = urlparse.urlparse(SITE_ALTS[site_key]) parsed_alt = urlparse.urlparse(SITE_ALTS[site_key])