Python’s get_tld works very well but is slow if you’re looking up a batch of domain names. Here’s a faster version that falls back on get_tld:
from tld import get_tld import re def quick_tld(url): tld_prog = re.compile(r'(?P<tld>[^./]+\.(com|net|org|co\.uk))($|/)') ip_prog = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b') try: tld_match = tld_prog.search(url) if tld_match: return tld_match.group('tld') elif ip_prog.match(article.url): return None else: return get_tld(url) except Exception as e: pass return None