Exclude subdomain in Medium->Scribe redirects

Medium redirects needed further cleanup to account for instances where a
link contains a subdomain that would not make sense in a Farside
redirect link.

Fixes #947
main
Ben Busby 2023-02-04 16:36:16 -07:00
parent fab65d720d
commit 991fe6d910
No known key found for this signature in database
GPG Key ID: B9B7231E01D924A1
2 changed files with 6 additions and 5 deletions

View File

@ -459,7 +459,7 @@ class Filter:
if any(url in link_netloc for url in unsupported_g_pages): if any(url in link_netloc for url in unsupported_g_pages):
# FIXME: The "Shopping" tab requires further filtering (see #136) # FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now. # Temporarily removing all links to that tab for now.
# Replaces the /url google unsupported link to the direct url # Replaces the /url google unsupported link to the direct url
link['href'] = link_netloc link['href'] = link_netloc
parent = link.parent parent = link.parent
@ -588,10 +588,9 @@ class Filter:
# replaced (i.e. 'philomedium.com' should stay as it is). # replaced (i.e. 'philomedium.com' should stay as it is).
if 'medium.com' in link_str: if 'medium.com' in link_str:
if link_str.startswith('medium.com') or '.medium.com' in link_str: if link_str.startswith('medium.com') or '.medium.com' in link_str:
new_desc.string = link_str.replace( link_str = 'farside.link/scribe' + link_str[
'medium.com', 'farside.link/scribe') link_str.find('medium.com') + len('medium.com'):]
else: new_desc.string = link_str
new_desc.string = link_str
else: else:
new_desc.string = link_str.replace(site, alt) new_desc.string = link_str.replace(site, alt)

View File

@ -186,6 +186,8 @@ def get_site_alt(link: str) -> str:
if 'wikipedia' in hostname and len(subdomain) == 2: if 'wikipedia' in hostname and len(subdomain) == 2:
hostname = f'{subdomain}.{hostname}' hostname = f'{subdomain}.{hostname}'
params = f'?lang={subdomain}' params = f'?lang={subdomain}'
elif 'medium' in hostname and len(subdomain) > 0:
hostname = f'{subdomain}.{hostname}'
parsed_alt = urlparse.urlparse(SITE_ALTS[site_key]) parsed_alt = urlparse.urlparse(SITE_ALTS[site_key])
link = link.replace(hostname, SITE_ALTS[site_key]) + params link = link.replace(hostname, SITE_ALTS[site_key]) + params