Match unusual tags agains title and tags

This commit is contained in:
Arun Prakash Jana 2018-08-06 22:56:31 +05:30
parent e00b10d272
commit 59606427b0
No known key found for this signature in database
GPG Key ID: A75979F35C080412

22
buku.py
View File

@ -2955,12 +2955,9 @@ def is_unusual_tag(tagstr):
return True return True
nwords = len(tagstr.split()) nwords = len(tagstr.split())
ncommas = tagstr.count(',') ncommas = tagstr.count(',') + 1
if nwords > 3 and ncommas == 0: if nwords / ncommas > 3:
return True
if ncommas and (nwords / (ncommas + 1)) > 3:
return True return True
return False return False
@ -3013,13 +3010,14 @@ def parse_decoded_page(page):
try: try:
if keywords: if keywords:
keys = keywords.get('content').strip().replace('\n', ' ') keys = keywords.get('content').strip().replace('\n', ' ')
if (is_unusual_tag(keys)): keys = re.sub('\s{2,}', ' ', keys)
keys = re.sub('\s{2,}', ' ', keys) if is_unusual_tag(keys):
logdbg('keywords to description: %s', keys) if keys != title and keys != desc:
if desc: logdbg('keywords to description: %s', keys)
desc = desc + '\n## ' + keys if desc:
else: desc = desc + '\n## ' + keys
desc = '* ' + keys else:
desc = '* ' + keys
keys = None keys = None
except Exception as e: except Exception as e: