Python’s get_tld works very well but is slow if you’re looking up a batch of domain names. Here’s a faster version that falls back on get_tld:
from tld import get_tld
import re
def quick_tld(url):
tld_prog = re.compile(r'(?P<tld>[^./]+\.(com|net|org|co\.uk))($|/)')
ip_prog = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
try:
tld_match = tld_prog.search(url)
if tld_match:
return tld_match.group('tld')
elif ip_prog.match(article.url):
return None
else:
return get_tld(url)
except Exception as e:
pass
return None