top level domains can have 1 to 24 characters.

This commit is contained in:
Arun Prakash Jana 2016-11-08 23:43:32 +05:30
parent 809e6d155a
commit d521460bdc
No known key found for this signature in database
GPG Key ID: A75979F35C080412

13
buku.py
View File

@ -1332,8 +1332,10 @@ def is_bad_url(url):
if not netloc:
return True
# netloc cannot start with a '.'
if netloc.startswith('.'):
logger.debug('netloc: %s' % netloc)
# netloc cannot start or end with a '.'
if netloc.startswith('.') or netloc.endswith('.'):
return True
# netloc should have at least one '.'
@ -1341,12 +1343,7 @@ def is_bad_url(url):
if index < 0:
return True
# '.' can be followed by 3 chars at most
revindex = len(netloc) - 1 - index
if revindex > 0 and revindex < 4:
return False
return True
return False
def is_ignored_mime(url):