From f8c48d04154829b932cfae03b0ddb602352bc925 Mon Sep 17 00:00:00 2001
From: Arun Prakash Jana <engineerarun@gmail.com>
Date: Sun, 29 May 2016 22:43:37 +0530
Subject: [PATCH] Simplify tag parsing logic. No in-tag commas.

---
 README.md |  2 +-
 buku      | 38 ++++++++++++++++----------------------
 buku.1    |  2 +-
 3 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index be135da..a8f68d6 100644
--- a/README.md
+++ b/README.md
@@ -187,7 +187,7 @@ Shell completion scripts for Bash, Fish and Zsh can be found in respective subdi
 - If the URL contains characters like `;`, `&` or brackets they may be interpreted specially by the shell. To avoid it, add the URL within single or double (`'`/`"`) quotes.
 - URLs are unique in DB. The same URL cannot be added twice. You can update tags and re-fetch title data.
 - **Tags**:
-  - Comma (`,`) is the tag delimiter in DB. Tags are filtered (for unique tags) and sorted. Hence, any tag cannot have comma(s) in it. In-tag commas are replaced by spaces.
+  - Comma (`,`) is the tag delimiter in DB. Any tag cannot have comma(s) in it. Tags are filtered (for unique tags) and sorted.
 - **Update** operation:
   - If --title, --tag or --comment is passed without argument, clear the corresponding field from DB.
   - If --url is passed (and --title is omitted), update the title from web using the URL.
diff --git a/buku b/buku
index 6757312..386d6a8 100755
--- a/buku
+++ b/buku
@@ -877,42 +877,36 @@ def network_handler(url):
 
 def parse_tags(keywords=[]):
     """Format and get tag string from tokens"""
-    # TODO: Simplify this logic
 
     tags = DELIMITER
     origTags = []
     uniqueTags = []
 
     # Cleanse and get the tags
-    for tag in keywords:
-        if tag == '':
+    tagstr = ' '.join(keywords)
+    marker = tagstr.find(',')
+
+    while marker >= 0:
+        token = tagstr[0:marker]
+        tagstr = tagstr[marker+1:]
+        marker = tagstr.find(',')
+        token = token.strip()
+        if token == '':
             continue
 
-        if tag[0] == DELIMITER:     # delimiter precedes token (e.g. token1 ,token2)
-            if tags[-1] != DELIMITER:
-                tags += DELIMITER
+        tags += token + ','
 
-        if tag[-1] == DELIMITER:    # if delimiter is present, maintain it (e.g. token1, token2)
-            tag = tag.strip(DELIMITER).replace(DELIMITER, ' ') + DELIMITER
-        else:                       # a token in a multi-word tag (e.g. token1 token2)
-            tag = tag.strip(DELIMITER).replace(DELIMITER, ' ')
+    tagstr = tagstr.strip()
+    if tagstr != '':
+        tags += tagstr + ','
 
-        if tag == DELIMITER:        # isolated delimiter (e.g. token1 , token2)
-            if tags[-1] != DELIMITER:
-                tags += tag
-            continue
-
-        if tags[-1] == DELIMITER:
-            tags += tag
-        else:
-            tags += ' ' + tag
+    if debug:
+        print(keywords)
+        print('tags: [%s]' % tags)
 
     if tags == DELIMITER:
         return tags
 
-    if tags[-1] != DELIMITER:
-        tags += DELIMITER
-
     origTags.extend(tags.strip(DELIMITER).split(DELIMITER))
     for tag in origTags:
         if tag not in uniqueTags:
diff --git a/buku.1 b/buku.1
index 8bb79ff..8ce26af 100644
--- a/buku.1
+++ b/buku.1
@@ -31,7 +31,7 @@ If the URL contains characters like ';', '&' or brackets they may be interpreted
 URLs are unique in DB. The same URL cannot be added twice. You can update tags and re-fetch title data.
 .PP
 \fBTags\fR:
-  - Comma (',') is the tag delimiter in DB. Tags are filtered (for unique tags) and sorted. Hence, any tag cannot have comma(s) in it. In-tag commas are replaced by spaces.
+  - Comma (',') is the tag delimiter in DB. Any tag cannot have comma(s) in it. Tags are filtered (for unique tags) and sorted.
 .PP
 \fBUpdate\fR operation:
   - If --title, --tag or --comment is passed without argument, clear the corresponding field from DB.