Merge pull request #344 from ckolumbus/issue/340

FIX(buku) #340 ff json import folder w/o children
This commit is contained in:
Arun Prakash Jana 2019-01-04 21:12:36 +05:30 committed by GitHub
commit 7a553fb9db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 234 additions and 105 deletions

59
buku Executable file → Normal file
View File

@ -81,6 +81,7 @@ USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Fi
MYHEADERS = None # Default dictionary of headers MYHEADERS = None # Default dictionary of headers
MYPROXY = None # Default proxy MYPROXY = None # Default proxy
TEXT_BROWSERS = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser'] TEXT_BROWSERS = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser']
IGNORE_FF_BOOKMARK_FOLDERS = frozenset(["placesRoot", "bookmarksMenuFolder"])
# Set up logging # Set up logging
LOGGER = logging.getLogger() LOGGER = logging.getLogger()
@ -2428,7 +2429,7 @@ class BukuDb:
items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag) items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag)
except ValueError as e: except ValueError as e:
LOGERR("JSON Decode Error: {}".format(e)) LOGERR("ff_json: JSON Decode Error: {}".format(e))
return False return False
except Exception as e: except Exception as e:
LOGERR(e) LOGERR(e)
@ -2981,7 +2982,7 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
] ]
return d[0]['value'] return d[0]['value']
except Exception: except Exception:
LOGDBG("No description found for entry: {} {}".format(entry['uri'], entry['title'])) LOGDBG("ff_json: No description found for entry: {} {}".format(entry['uri'], entry['title']))
return "" return ""
def extract_tags(entry): def extract_tags(entry):
@ -2989,32 +2990,36 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
try: try:
tags = entry['tags'].split(',') tags = entry['tags'].split(',')
except Exception: except Exception:
LOGDBG("No tags found for entry: {} {}".format(entry['uri'], entry['title'])) LOGDBG("ff_json: No tags found for entry: {} {}".format(entry['uri'], entry['title']))
return tags return tags
def iterate_children(parent_folder, entry_list): def iterate_children(parent_folder, entry_list):
for bm_entry in entry_list: for bm_entry in entry_list:
entry_title = bm_entry['title'] if 'title' in bm_entry else "<no title>"
try: try:
typeCode = bm_entry['typeCode'] typeCode = bm_entry['typeCode']
except Exception: except Exception:
LOGDBG("item without typeCode found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: item without typeCode found, ignoring: {}".format(entry_title))
continue continue
LOGDBG("ff_json: processing typeCode '{}', title '{}'".format(typeCode, entry_title))
if TypeCode.uri.value == typeCode: if TypeCode.uri.value == typeCode:
try: try:
if is_smart(bm_entry): if is_smart(bm_entry):
LOGDBG("SmartBookmark found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: SmartBookmark found, ignoring: {}".format(entry_title))
continue continue
if is_nongeneric_url(bm_entry['uri']): if is_nongeneric_url(bm_entry['uri']):
LOGDBG("Non-Generic URL found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: Non-Generic URL found, ignoring: {}".format(entry_title))
continue continue
desc = extract_desc(bm_entry) desc = extract_desc(bm_entry)
bookmark_tags = extract_tags(bm_entry) bookmark_tags = extract_tags(bm_entry)
if add_bookmark_folder_as_tag: # if parent_folder is not "None"
if add_bookmark_folder_as_tag and parent_folder:
bookmark_tags.append(parent_folder) bookmark_tags.append(parent_folder)
if unique_tag: if unique_tag:
@ -3023,34 +3028,38 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
formatted_tags = [DELIM + tag for tag in bookmark_tags] formatted_tags = [DELIM + tag for tag in bookmark_tags]
tags = parse_tags(formatted_tags) tags = parse_tags(formatted_tags)
LOGDBG("Entry found: {}, {}, {}, {} " LOGDBG("ff_json: Entry found: {}, {}, {}, {} " .format(bm_entry['uri'], entry_title, tags, desc))
.format(bm_entry['uri'], bm_entry['title'], tags, desc)) yield (bm_entry['uri'], entry_title, tags, desc, 0, True, False)
yield (bm_entry['uri'], bm_entry['title'], tags, desc, 0, True, False)
except Exception as e: except Exception as e:
LOGERR(e) LOGERR("ff_json: Error parsing entry '{}' Exception '{}'".format(entry_title, e))
elif TypeCode.folder.value == typeCode: elif TypeCode.folder.value == typeCode:
try:
# from python 3.3 on:
# yield from iterate_children(bm_entry['title'], bm_entry['children'])
for entry in iterate_children( # ignore special bookmark folders
parent_folder+"/"+bm_entry['title'], bm_entry['children']): if 'root' in bm_entry and bm_entry['root'] in IGNORE_FF_BOOKMARK_FOLDERS:
yield entry LOGDBG("ff_json: ignoring root folder: {}" .format(entry_title))
except Exception as e: entry_title = None
if "children" in bm_entry:
yield from iterate_children(entry_title, bm_entry['children'])
else:
# if any of the properties does not exist, bail out silently # if any of the properties does not exist, bail out silently
LOGERR(e) LOGDBG("ff_json: No 'children' found in bookmark folder - skipping: {}".format(entry_title))
elif TypeCode.separator.value == typeCode: elif TypeCode.separator.value == typeCode:
LOGDBG("Unknown typeCode found : {}".format(typeCode)) # ignore separator
pass
else:
LOGDBG("ff_json: Unknown typeCode found : {}".format(typeCode))
try: if "children" in json:
entry_list = json['children'] main_entry_list = json['children']
except Exception: else:
LOGERR("No children in Root entry found") LOGDBG("ff_json: No children in Root entry found")
return [] return []
yield from iterate_children("", entry_list) yield from iterate_children(None, main_entry_list)
def import_html(html_soup, add_parent_folder_as_tag, newtag): def import_html(html_soup, add_parent_folder_as_tag, newtag):

View File

@ -19,8 +19,13 @@ def test_load_full_entry():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode": 2,
"children": [
{ {
"title" : "title", "title" : "title",
"typeCode": 2,
"children": [ "children": [
{ {
"dateAdded": 1269200039653000, "dateAdded": 1269200039653000,
@ -28,7 +33,7 @@ def test_load_full_entry():
"id": 113, "id": 113,
"index": 0, "index": 0,
"lastModified": 1305978154986000, "lastModified": 1305978154986000,
"title": "title", "title": "entry title",
"type": "text/x-moz-place", "type": "text/x-moz-place",
"typeCode": 1, "typeCode": 1,
"tags" : "x,y", "tags" : "x,y",
@ -38,6 +43,7 @@ def test_load_full_entry():
"value": "desc" "value": "desc"
}] }]
}] }]
}]
}""") }""")
# Act # Act
@ -50,7 +56,7 @@ def test_load_full_entry():
assert len(result) == 1 assert len(result) == 1
assert result[0][0] == 'http://uri.com/abc?234&536' assert result[0][0] == 'http://uri.com/abc?234&536'
assert result[0][1] == 'title' assert result[0][1] == 'entry title'
assert result[0][2] == ',x,y,' assert result[0][2] == ',x,y,'
assert result[0][3] == 'desc' assert result[0][3] == 'desc'
@ -59,6 +65,10 @@ def test_load_no_typecode():
"""test method.""" """test method."""
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode": 2,
"children": [
{ {
"title" : "title", "title" : "title",
"children": [ "children": [
@ -70,6 +80,7 @@ def test_load_no_typecode():
"value": "desc" "value": "desc"
}] }]
}] }]
}]
}""") }""")
# Act # Act
@ -110,12 +121,35 @@ def test_load_invalid_typecode():
assert len(result) == 0 assert len(result) == 0
def test_load_folder_with_no_children():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "title",
"typeCode" : 2
} """)
# Act
items = import_firefox_json(data)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 0
def test_load_one_child(): def test_load_one_child():
"""test method.""" """test method."""
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{ {
"title" : "title", "title" : "title",
"typeCode" : 2, "typeCode" : 2,
@ -129,8 +163,10 @@ def test_load_one_child():
"value": "desc" "value": "desc"
}] }]
} }
]}
] ]
} """) }
""")
# Act # Act
items = import_firefox_json(data) items = import_firefox_json(data)
@ -151,6 +187,10 @@ def test_load_one_container_child():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode": 2,
"children": [
{ {
"title" : "title", "title" : "title",
"typeCode" : 2, "typeCode" : 2,
@ -159,6 +199,7 @@ def test_load_one_container_child():
"title":"bookmark folder", "title":"bookmark folder",
"typeCode":2 "typeCode":2
}] }]
}]
}""") }""")
# Act # Act
@ -176,6 +217,10 @@ def test_load_many_children():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{ {
"title":"Weitere Lesezeichen", "title":"Weitere Lesezeichen",
"typeCode":2, "typeCode":2,
@ -183,40 +228,7 @@ def test_load_many_children():
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}, {"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"},
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"}, {"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
{"title":"title3","typeCode":1,"uri":"http://uri3.com"} {"title":"title3","typeCode":1,"uri":"http://uri3.com"}
] ]}
} """)
# Act
items = import_firefox_json(data)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 3
def test_load_hierarchical_container():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "title",
"typeCode" : 2,
"children": [
{
"title" : "title",
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"},
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
{"title":"title3","typeCode":1,"uri":"http://uri3.com"}
]
},
{"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"},
{"title":"title5","typeCode":1,"uri":"http://uri5.com/xyz"},
{"title":"title6","typeCode":1,"uri":"http://uri6.com"}
] ]
} }
""") """)
@ -229,19 +241,120 @@ def test_load_hierarchical_container():
for item in items: for item in items:
result.append(item) result.append(item)
assert len(result) == 6 assert len(result) == 3
def test_load_container_no_title():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri.com"}
]}
]
}
""")
# Act
items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 1
assert result[0][0] == 'http://uri.com'
assert result[0][2] == ',<no title>,'
def test_load_hierarchical_container_without_ignore():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{
"title" : "title",
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri.com"}
]
}]
}
""")
# Act
items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 1
assert result[0][0] == 'http://uri.com'
assert result[0][2] == ',title,'
def test_load_hierarchical_container_with_ignore():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{
"title" : "title",
"typeCode" : 2,
"root": "bookmarksMenuFolder",
"children": [
{
"title" : "title2",
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}
]
},
{"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"}
]
}]
}
""")
# Act
items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 2
assert result[0][0] == 'http://uri1.com/#more-74' assert result[0][0] == 'http://uri1.com/#more-74'
assert result[1][0] == 'http://uri2.com/xyz' assert result[1][0] == 'http://uri4.com/#more-74'
assert result[2][0] == 'http://uri3.com'
assert result[3][0] == 'http://uri4.com/#more-74' assert result[0][2] == ',title2,'
assert result[4][0] == 'http://uri5.com/xyz' assert result[1][2] == ','
assert result[5][0] == 'http://uri6.com'
def test_load_separator(): def test_load_separator():
"""test method.""" """test method."""
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{ {
"title" : "title", "title" : "title",
"typeCode" : 2, "typeCode" : 2,
@ -251,6 +364,7 @@ def test_load_separator():
"type": "text/x-moz-place-separator", "type": "text/x-moz-place-separator",
"typeCode": 3 "typeCode": 3
}] }]
}]
}""") }""")
# Act # Act
@ -267,8 +381,13 @@ def test_load_multiple_tags():
"""test method.""" """test method."""
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{
"title" : "main",
"typeCode": 2,
"children": [
{ {
"title" : "title", "title" : "title",
"typeCode": 2,
"children": [ "children": [
{ {
"title" : "title1", "title" : "title1",
@ -280,6 +399,7 @@ def test_load_multiple_tags():
"value": "desc" "value": "desc"
}] }]
}] }]
}]
}""") }""")
# Act # Act