From 59606427b0878dda3ac8fc26c57d64a9134691cd Mon Sep 17 00:00:00 2001 From: Arun Prakash Jana Date: Mon, 6 Aug 2018 22:56:31 +0530 Subject: [PATCH] Match unusual tags agains title and tags --- buku.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/buku.py b/buku.py index c344776..54d6a94 100755 --- a/buku.py +++ b/buku.py @@ -2955,12 +2955,9 @@ def is_unusual_tag(tagstr): return True nwords = len(tagstr.split()) - ncommas = tagstr.count(',') + ncommas = tagstr.count(',') + 1 - if nwords > 3 and ncommas == 0: - return True - - if ncommas and (nwords / (ncommas + 1)) > 3: + if nwords / ncommas > 3: return True return False @@ -3013,13 +3010,14 @@ def parse_decoded_page(page): try: if keywords: keys = keywords.get('content').strip().replace('\n', ' ') - if (is_unusual_tag(keys)): - keys = re.sub('\s{2,}', ' ', keys) - logdbg('keywords to description: %s', keys) - if desc: - desc = desc + '\n## ' + keys - else: - desc = '* ' + keys + keys = re.sub('\s{2,}', ' ', keys) + if is_unusual_tag(keys): + if keys != title and keys != desc: + logdbg('keywords to description: %s', keys) + if desc: + desc = desc + '\n## ' + keys + else: + desc = '* ' + keys keys = None except Exception as e: