Match unusual tags agains title and tags
This commit is contained in:
parent
e00b10d272
commit
59606427b0
22
buku.py
22
buku.py
@ -2955,12 +2955,9 @@ def is_unusual_tag(tagstr):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
nwords = len(tagstr.split())
|
nwords = len(tagstr.split())
|
||||||
ncommas = tagstr.count(',')
|
ncommas = tagstr.count(',') + 1
|
||||||
|
|
||||||
if nwords > 3 and ncommas == 0:
|
if nwords / ncommas > 3:
|
||||||
return True
|
|
||||||
|
|
||||||
if ncommas and (nwords / (ncommas + 1)) > 3:
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
@ -3013,13 +3010,14 @@ def parse_decoded_page(page):
|
|||||||
try:
|
try:
|
||||||
if keywords:
|
if keywords:
|
||||||
keys = keywords.get('content').strip().replace('\n', ' ')
|
keys = keywords.get('content').strip().replace('\n', ' ')
|
||||||
if (is_unusual_tag(keys)):
|
keys = re.sub('\s{2,}', ' ', keys)
|
||||||
keys = re.sub('\s{2,}', ' ', keys)
|
if is_unusual_tag(keys):
|
||||||
logdbg('keywords to description: %s', keys)
|
if keys != title and keys != desc:
|
||||||
if desc:
|
logdbg('keywords to description: %s', keys)
|
||||||
desc = desc + '\n## ' + keys
|
if desc:
|
||||||
else:
|
desc = desc + '\n## ' + keys
|
||||||
desc = '* ' + keys
|
else:
|
||||||
|
desc = '* ' + keys
|
||||||
|
|
||||||
keys = None
|
keys = None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user