diff --git a/buku b/buku old mode 100755 new mode 100644 index a9aaf88..1a83b98 --- a/buku +++ b/buku @@ -81,6 +81,7 @@ USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Fi MYHEADERS = None # Default dictionary of headers MYPROXY = None # Default proxy TEXT_BROWSERS = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser'] +IGNORE_FF_BOOKMARK_FOLDERS = frozenset(["placesRoot", "bookmarksMenuFolder"]) # Set up logging LOGGER = logging.getLogger() @@ -2428,7 +2429,7 @@ class BukuDb: items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag) except ValueError as e: - LOGERR("JSON Decode Error: {}".format(e)) + LOGERR("ff_json: JSON Decode Error: {}".format(e)) return False except Exception as e: LOGERR(e) @@ -2981,7 +2982,7 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None) ] return d[0]['value'] except Exception: - LOGDBG("No description found for entry: {} {}".format(entry['uri'], entry['title'])) + LOGDBG("ff_json: No description found for entry: {} {}".format(entry['uri'], entry['title'])) return "" def extract_tags(entry): @@ -2989,32 +2990,36 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None) try: tags = entry['tags'].split(',') except Exception: - LOGDBG("No tags found for entry: {} {}".format(entry['uri'], entry['title'])) + LOGDBG("ff_json: No tags found for entry: {} {}".format(entry['uri'], entry['title'])) return tags def iterate_children(parent_folder, entry_list): for bm_entry in entry_list: + entry_title = bm_entry['title'] if 'title' in bm_entry else "" + try: typeCode = bm_entry['typeCode'] except Exception: - LOGDBG("item without typeCode found, ignoring: {}".format(bm_entry['title'])) + LOGDBG("ff_json: item without typeCode found, ignoring: {}".format(entry_title)) continue + LOGDBG("ff_json: processing typeCode '{}', title '{}'".format(typeCode, entry_title)) if TypeCode.uri.value == typeCode: try: if is_smart(bm_entry): - LOGDBG("SmartBookmark found, ignoring: {}".format(bm_entry['title'])) + LOGDBG("ff_json: SmartBookmark found, ignoring: {}".format(entry_title)) continue if is_nongeneric_url(bm_entry['uri']): - LOGDBG("Non-Generic URL found, ignoring: {}".format(bm_entry['title'])) + LOGDBG("ff_json: Non-Generic URL found, ignoring: {}".format(entry_title)) continue desc = extract_desc(bm_entry) bookmark_tags = extract_tags(bm_entry) - if add_bookmark_folder_as_tag: + # if parent_folder is not "None" + if add_bookmark_folder_as_tag and parent_folder: bookmark_tags.append(parent_folder) if unique_tag: @@ -3023,34 +3028,38 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None) formatted_tags = [DELIM + tag for tag in bookmark_tags] tags = parse_tags(formatted_tags) - LOGDBG("Entry found: {}, {}, {}, {} " - .format(bm_entry['uri'], bm_entry['title'], tags, desc)) - yield (bm_entry['uri'], bm_entry['title'], tags, desc, 0, True, False) + LOGDBG("ff_json: Entry found: {}, {}, {}, {} " .format(bm_entry['uri'], entry_title, tags, desc)) + yield (bm_entry['uri'], entry_title, tags, desc, 0, True, False) + except Exception as e: - LOGERR(e) + LOGERR("ff_json: Error parsing entry '{}' Exception '{}'".format(entry_title, e)) elif TypeCode.folder.value == typeCode: - try: - # from python 3.3 on: - # yield from iterate_children(bm_entry['title'], bm_entry['children']) - for entry in iterate_children( - parent_folder+"/"+bm_entry['title'], bm_entry['children']): - yield entry - except Exception as e: + # ignore special bookmark folders + if 'root' in bm_entry and bm_entry['root'] in IGNORE_FF_BOOKMARK_FOLDERS: + LOGDBG("ff_json: ignoring root folder: {}" .format(entry_title)) + entry_title = None + + if "children" in bm_entry: + yield from iterate_children(entry_title, bm_entry['children']) + else: # if any of the properties does not exist, bail out silently - LOGERR(e) + LOGDBG("ff_json: No 'children' found in bookmark folder - skipping: {}".format(entry_title)) elif TypeCode.separator.value == typeCode: - LOGDBG("Unknown typeCode found : {}".format(typeCode)) + # ignore separator + pass + else: + LOGDBG("ff_json: Unknown typeCode found : {}".format(typeCode)) - try: - entry_list = json['children'] - except Exception: - LOGERR("No children in Root entry found") + if "children" in json: + main_entry_list = json['children'] + else: + LOGDBG("ff_json: No children in Root entry found") return [] - yield from iterate_children("", entry_list) + yield from iterate_children(None, main_entry_list) def import_html(html_soup, add_parent_folder_as_tag, newtag): diff --git a/tests/test_import_firefox_json.py b/tests/test_import_firefox_json.py index b068330..7eb8bbf 100644 --- a/tests/test_import_firefox_json.py +++ b/tests/test_import_firefox_json.py @@ -20,24 +20,30 @@ def test_load_full_entry(): # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", + "typeCode": 2, "children": [ { - "dateAdded": 1269200039653000, - "guid": "xxxydfalkj", - "id": 113, - "index": 0, - "lastModified": 1305978154986000, - "title": "title", - "type": "text/x-moz-place", - "typeCode": 1, - "tags" : "x,y", - "uri": "http://uri.com/abc?234&536", - "annos" : [{ - "name": "bookmarkProperties/description", - "value": "desc" - }] - }] + "title" : "title", + "typeCode": 2, + "children": [ + { + "dateAdded": 1269200039653000, + "guid": "xxxydfalkj", + "id": 113, + "index": 0, + "lastModified": 1305978154986000, + "title": "entry title", + "type": "text/x-moz-place", + "typeCode": 1, + "tags" : "x,y", + "uri": "http://uri.com/abc?234&536", + "annos" : [{ + "name": "bookmarkProperties/description", + "value": "desc" + }] + }] + }] }""") # Act @@ -50,7 +56,7 @@ def test_load_full_entry(): assert len(result) == 1 assert result[0][0] == 'http://uri.com/abc?234&536' - assert result[0][1] == 'title' + assert result[0][1] == 'entry title' assert result[0][2] == ',x,y,' assert result[0][3] == 'desc' @@ -60,15 +66,20 @@ def test_load_no_typecode(): # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", + "typeCode": 2, "children": [ { - "title" : "title1", - "uri" : "http://uri1", - "annos" : [{ - "name": "bookmarkProperties/description", - "value": "desc" - }] + "title" : "title", + "children": [ + { + "title" : "title1", + "uri" : "http://uri1", + "annos" : [{ + "name": "bookmarkProperties/description", + "value": "desc" + }] + }] }] }""") @@ -110,6 +121,25 @@ def test_load_invalid_typecode(): assert len(result) == 0 +def test_load_folder_with_no_children(): + """test method.""" + + # Arrange + data = json.loads(""" + { + "title" : "title", + "typeCode" : 2 + } """) + + # Act + items = import_firefox_json(data) + + # Assert + result = [] + for item in items: + result.append(item) + + assert len(result) == 0 def test_load_one_child(): """test method.""" @@ -117,20 +147,26 @@ def test_load_one_child(): # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", "typeCode" : 2, "children": [ { - "title" : "title1", - "typeCode" : 1, - "uri" : "http://uri1", - "annos" : [{ - "name": "bookmarkProperties/description", - "value": "desc" - }] - } - ] - } """) + "title" : "title", + "typeCode" : 2, + "children": [ + { + "title" : "title1", + "typeCode" : 1, + "uri" : "http://uri1", + "annos" : [{ + "name": "bookmarkProperties/description", + "value": "desc" + }] + } + ]} + ] + } + """) # Act items = import_firefox_json(data) @@ -152,13 +188,18 @@ def test_load_one_container_child(): # Arrange data = json.loads(""" { - "title" : "title", - "typeCode" : 2, + "title" : "main", + "typeCode": 2, "children": [ { - "title":"bookmark folder", - "typeCode":2 - } ] + "title" : "title", + "typeCode" : 2, + "children": [ + { + "title":"bookmark folder", + "typeCode":2 + }] + }] }""") # Act @@ -176,15 +217,21 @@ def test_load_many_children(): # Arrange data = json.loads(""" - { + { + "title" : "main", + "typeCode" : 2, + "children": [ + { "title":"Weitere Lesezeichen", "typeCode":2, "children": [ {"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}, {"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"}, {"title":"title3","typeCode":1,"uri":"http://uri3.com"} - ] - } """) + ]} + ] + } + """) # Act items = import_firefox_json(data) @@ -196,46 +243,108 @@ def test_load_many_children(): assert len(result) == 3 -def test_load_hierarchical_container(): +def test_load_container_no_title(): """test method.""" # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", "typeCode" : 2, "children": [ - { - "title" : "title", - "typeCode" : 2, - "children": [ - {"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}, - {"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"}, - {"title":"title3","typeCode":1,"uri":"http://uri3.com"} - ] - }, - {"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"}, - {"title":"title5","typeCode":1,"uri":"http://uri5.com/xyz"}, - {"title":"title6","typeCode":1,"uri":"http://uri6.com"} + { + "typeCode" : 2, + "children": [ + {"title":"title1","typeCode":1,"uri":"http://uri.com"} + ]} ] } - """) + """) # Act - items = import_firefox_json(data) + items = import_firefox_json(data, add_bookmark_folder_as_tag=True) # Assert result = [] for item in items: result.append(item) - assert len(result) == 6 + assert len(result) == 1 + assert result[0][0] == 'http://uri.com' + assert result[0][2] == ',,' + +def test_load_hierarchical_container_without_ignore(): + """test method.""" + + # Arrange + data = json.loads(""" + { + "title" : "main", + "typeCode" : 2, + "children": [ + { + "title" : "title", + "typeCode" : 2, + "children": [ + {"title":"title1","typeCode":1,"uri":"http://uri.com"} + ] + }] + } + """) + + # Act + items = import_firefox_json(data, add_bookmark_folder_as_tag=True) + + # Assert + result = [] + for item in items: + result.append(item) + + assert len(result) == 1 + assert result[0][0] == 'http://uri.com' + assert result[0][2] == ',title,' + +def test_load_hierarchical_container_with_ignore(): + """test method.""" + + # Arrange + data = json.loads(""" + { + "title" : "main", + "typeCode" : 2, + "children": [ + { + "title" : "title", + "typeCode" : 2, + "root": "bookmarksMenuFolder", + "children": [ + { + "title" : "title2", + "typeCode" : 2, + "children": [ + {"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"} + ] + }, + {"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"} + ] + }] + } + """) + + # Act + items = import_firefox_json(data, add_bookmark_folder_as_tag=True) + + # Assert + result = [] + for item in items: + result.append(item) + + assert len(result) == 2 assert result[0][0] == 'http://uri1.com/#more-74' - assert result[1][0] == 'http://uri2.com/xyz' - assert result[2][0] == 'http://uri3.com' - assert result[3][0] == 'http://uri4.com/#more-74' - assert result[4][0] == 'http://uri5.com/xyz' - assert result[5][0] == 'http://uri6.com' + assert result[1][0] == 'http://uri4.com/#more-74' + + assert result[0][2] == ',title2,' + assert result[1][2] == ',' def test_load_separator(): """test method.""" @@ -243,14 +352,19 @@ def test_load_separator(): # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", "typeCode" : 2, "children": [ { - "title": "", - "type": "text/x-moz-place-separator", - "typeCode": 3 - } ] + "title" : "title", + "typeCode" : 2, + "children": [ + { + "title": "", + "type": "text/x-moz-place-separator", + "typeCode": 3 + }] + }] }""") # Act @@ -268,18 +382,24 @@ def test_load_multiple_tags(): # Arrange data = json.loads(""" { - "title" : "title", + "title" : "main", + "typeCode": 2, "children": [ { - "title" : "title1", - "uri" : "http://uri1", - "tags" : "tag1, tag2", - "typeCode": 1, - "annos" : [{ - "name": "bookmarkProperties/description", - "value": "desc" - }] - }] + "title" : "title", + "typeCode": 2, + "children": [ + { + "title" : "title1", + "uri" : "http://uri1", + "tags" : "tag1, tag2", + "typeCode": 1, + "annos" : [{ + "name": "bookmarkProperties/description", + "value": "desc" + }] + }] + }] }""") # Act