From f8c48d04154829b932cfae03b0ddb602352bc925 Mon Sep 17 00:00:00 2001 From: Arun Prakash Jana Date: Sun, 29 May 2016 22:43:37 +0530 Subject: [PATCH] Simplify tag parsing logic. No in-tag commas. --- README.md | 2 +- buku | 38 ++++++++++++++++---------------------- buku.1 | 2 +- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index be135da..a8f68d6 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ Shell completion scripts for Bash, Fish and Zsh can be found in respective subdi - If the URL contains characters like `;`, `&` or brackets they may be interpreted specially by the shell. To avoid it, add the URL within single or double (`'`/`"`) quotes. - URLs are unique in DB. The same URL cannot be added twice. You can update tags and re-fetch title data. - **Tags**: - - Comma (`,`) is the tag delimiter in DB. Tags are filtered (for unique tags) and sorted. Hence, any tag cannot have comma(s) in it. In-tag commas are replaced by spaces. + - Comma (`,`) is the tag delimiter in DB. Any tag cannot have comma(s) in it. Tags are filtered (for unique tags) and sorted. - **Update** operation: - If --title, --tag or --comment is passed without argument, clear the corresponding field from DB. - If --url is passed (and --title is omitted), update the title from web using the URL. diff --git a/buku b/buku index 6757312..386d6a8 100755 --- a/buku +++ b/buku @@ -877,42 +877,36 @@ def network_handler(url): def parse_tags(keywords=[]): """Format and get tag string from tokens""" - # TODO: Simplify this logic tags = DELIMITER origTags = [] uniqueTags = [] # Cleanse and get the tags - for tag in keywords: - if tag == '': + tagstr = ' '.join(keywords) + marker = tagstr.find(',') + + while marker >= 0: + token = tagstr[0:marker] + tagstr = tagstr[marker+1:] + marker = tagstr.find(',') + token = token.strip() + if token == '': continue - if tag[0] == DELIMITER: # delimiter precedes token (e.g. token1 ,token2) - if tags[-1] != DELIMITER: - tags += DELIMITER + tags += token + ',' - if tag[-1] == DELIMITER: # if delimiter is present, maintain it (e.g. token1, token2) - tag = tag.strip(DELIMITER).replace(DELIMITER, ' ') + DELIMITER - else: # a token in a multi-word tag (e.g. token1 token2) - tag = tag.strip(DELIMITER).replace(DELIMITER, ' ') + tagstr = tagstr.strip() + if tagstr != '': + tags += tagstr + ',' - if tag == DELIMITER: # isolated delimiter (e.g. token1 , token2) - if tags[-1] != DELIMITER: - tags += tag - continue - - if tags[-1] == DELIMITER: - tags += tag - else: - tags += ' ' + tag + if debug: + print(keywords) + print('tags: [%s]' % tags) if tags == DELIMITER: return tags - if tags[-1] != DELIMITER: - tags += DELIMITER - origTags.extend(tags.strip(DELIMITER).split(DELIMITER)) for tag in origTags: if tag not in uniqueTags: diff --git a/buku.1 b/buku.1 index 8bb79ff..8ce26af 100644 --- a/buku.1 +++ b/buku.1 @@ -31,7 +31,7 @@ If the URL contains characters like ';', '&' or brackets they may be interpreted URLs are unique in DB. The same URL cannot be added twice. You can update tags and re-fetch title data. .PP \fBTags\fR: - - Comma (',') is the tag delimiter in DB. Tags are filtered (for unique tags) and sorted. Hence, any tag cannot have comma(s) in it. In-tag commas are replaced by spaces. + - Comma (',') is the tag delimiter in DB. Any tag cannot have comma(s) in it. Tags are filtered (for unique tags) and sorted. .PP \fBUpdate\fR operation: - If --title, --tag or --comment is passed without argument, clear the corresponding field from DB.