Merge pull request #344 from ckolumbus/issue/340

FIX(buku) #340 ff json import folder w/o children
This commit is contained in:
Arun Prakash Jana 2019-01-04 21:12:36 +05:30 committed by GitHub
commit 7a553fb9db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 234 additions and 105 deletions

59
buku Executable file → Normal file
View File

@ -81,6 +81,7 @@ USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Fi
MYHEADERS = None # Default dictionary of headers MYHEADERS = None # Default dictionary of headers
MYPROXY = None # Default proxy MYPROXY = None # Default proxy
TEXT_BROWSERS = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser'] TEXT_BROWSERS = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser']
IGNORE_FF_BOOKMARK_FOLDERS = frozenset(["placesRoot", "bookmarksMenuFolder"])
# Set up logging # Set up logging
LOGGER = logging.getLogger() LOGGER = logging.getLogger()
@ -2428,7 +2429,7 @@ class BukuDb:
items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag) items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag)
except ValueError as e: except ValueError as e:
LOGERR("JSON Decode Error: {}".format(e)) LOGERR("ff_json: JSON Decode Error: {}".format(e))
return False return False
except Exception as e: except Exception as e:
LOGERR(e) LOGERR(e)
@ -2981,7 +2982,7 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
] ]
return d[0]['value'] return d[0]['value']
except Exception: except Exception:
LOGDBG("No description found for entry: {} {}".format(entry['uri'], entry['title'])) LOGDBG("ff_json: No description found for entry: {} {}".format(entry['uri'], entry['title']))
return "" return ""
def extract_tags(entry): def extract_tags(entry):
@ -2989,32 +2990,36 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
try: try:
tags = entry['tags'].split(',') tags = entry['tags'].split(',')
except Exception: except Exception:
LOGDBG("No tags found for entry: {} {}".format(entry['uri'], entry['title'])) LOGDBG("ff_json: No tags found for entry: {} {}".format(entry['uri'], entry['title']))
return tags return tags
def iterate_children(parent_folder, entry_list): def iterate_children(parent_folder, entry_list):
for bm_entry in entry_list: for bm_entry in entry_list:
entry_title = bm_entry['title'] if 'title' in bm_entry else "<no title>"
try: try:
typeCode = bm_entry['typeCode'] typeCode = bm_entry['typeCode']
except Exception: except Exception:
LOGDBG("item without typeCode found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: item without typeCode found, ignoring: {}".format(entry_title))
continue continue
LOGDBG("ff_json: processing typeCode '{}', title '{}'".format(typeCode, entry_title))
if TypeCode.uri.value == typeCode: if TypeCode.uri.value == typeCode:
try: try:
if is_smart(bm_entry): if is_smart(bm_entry):
LOGDBG("SmartBookmark found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: SmartBookmark found, ignoring: {}".format(entry_title))
continue continue
if is_nongeneric_url(bm_entry['uri']): if is_nongeneric_url(bm_entry['uri']):
LOGDBG("Non-Generic URL found, ignoring: {}".format(bm_entry['title'])) LOGDBG("ff_json: Non-Generic URL found, ignoring: {}".format(entry_title))
continue continue
desc = extract_desc(bm_entry) desc = extract_desc(bm_entry)
bookmark_tags = extract_tags(bm_entry) bookmark_tags = extract_tags(bm_entry)
if add_bookmark_folder_as_tag: # if parent_folder is not "None"
if add_bookmark_folder_as_tag and parent_folder:
bookmark_tags.append(parent_folder) bookmark_tags.append(parent_folder)
if unique_tag: if unique_tag:
@ -3023,34 +3028,38 @@ def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None)
formatted_tags = [DELIM + tag for tag in bookmark_tags] formatted_tags = [DELIM + tag for tag in bookmark_tags]
tags = parse_tags(formatted_tags) tags = parse_tags(formatted_tags)
LOGDBG("Entry found: {}, {}, {}, {} " LOGDBG("ff_json: Entry found: {}, {}, {}, {} " .format(bm_entry['uri'], entry_title, tags, desc))
.format(bm_entry['uri'], bm_entry['title'], tags, desc)) yield (bm_entry['uri'], entry_title, tags, desc, 0, True, False)
yield (bm_entry['uri'], bm_entry['title'], tags, desc, 0, True, False)
except Exception as e: except Exception as e:
LOGERR(e) LOGERR("ff_json: Error parsing entry '{}' Exception '{}'".format(entry_title, e))
elif TypeCode.folder.value == typeCode: elif TypeCode.folder.value == typeCode:
try:
# from python 3.3 on:
# yield from iterate_children(bm_entry['title'], bm_entry['children'])
for entry in iterate_children( # ignore special bookmark folders
parent_folder+"/"+bm_entry['title'], bm_entry['children']): if 'root' in bm_entry and bm_entry['root'] in IGNORE_FF_BOOKMARK_FOLDERS:
yield entry LOGDBG("ff_json: ignoring root folder: {}" .format(entry_title))
except Exception as e: entry_title = None
if "children" in bm_entry:
yield from iterate_children(entry_title, bm_entry['children'])
else:
# if any of the properties does not exist, bail out silently # if any of the properties does not exist, bail out silently
LOGERR(e) LOGDBG("ff_json: No 'children' found in bookmark folder - skipping: {}".format(entry_title))
elif TypeCode.separator.value == typeCode: elif TypeCode.separator.value == typeCode:
LOGDBG("Unknown typeCode found : {}".format(typeCode)) # ignore separator
pass
else:
LOGDBG("ff_json: Unknown typeCode found : {}".format(typeCode))
try: if "children" in json:
entry_list = json['children'] main_entry_list = json['children']
except Exception: else:
LOGERR("No children in Root entry found") LOGDBG("ff_json: No children in Root entry found")
return [] return []
yield from iterate_children("", entry_list) yield from iterate_children(None, main_entry_list)
def import_html(html_soup, add_parent_folder_as_tag, newtag): def import_html(html_soup, add_parent_folder_as_tag, newtag):

View File

@ -20,24 +20,30 @@ def test_load_full_entry():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode": 2,
"children": [ "children": [
{ {
"dateAdded": 1269200039653000, "title" : "title",
"guid": "xxxydfalkj", "typeCode": 2,
"id": 113, "children": [
"index": 0, {
"lastModified": 1305978154986000, "dateAdded": 1269200039653000,
"title": "title", "guid": "xxxydfalkj",
"type": "text/x-moz-place", "id": 113,
"typeCode": 1, "index": 0,
"tags" : "x,y", "lastModified": 1305978154986000,
"uri": "http://uri.com/abc?234&536", "title": "entry title",
"annos" : [{ "type": "text/x-moz-place",
"name": "bookmarkProperties/description", "typeCode": 1,
"value": "desc" "tags" : "x,y",
}] "uri": "http://uri.com/abc?234&536",
}] "annos" : [{
"name": "bookmarkProperties/description",
"value": "desc"
}]
}]
}]
}""") }""")
# Act # Act
@ -50,7 +56,7 @@ def test_load_full_entry():
assert len(result) == 1 assert len(result) == 1
assert result[0][0] == 'http://uri.com/abc?234&536' assert result[0][0] == 'http://uri.com/abc?234&536'
assert result[0][1] == 'title' assert result[0][1] == 'entry title'
assert result[0][2] == ',x,y,' assert result[0][2] == ',x,y,'
assert result[0][3] == 'desc' assert result[0][3] == 'desc'
@ -60,15 +66,20 @@ def test_load_no_typecode():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode": 2,
"children": [ "children": [
{ {
"title" : "title1", "title" : "title",
"uri" : "http://uri1", "children": [
"annos" : [{ {
"name": "bookmarkProperties/description", "title" : "title1",
"value": "desc" "uri" : "http://uri1",
}] "annos" : [{
"name": "bookmarkProperties/description",
"value": "desc"
}]
}]
}] }]
}""") }""")
@ -110,6 +121,25 @@ def test_load_invalid_typecode():
assert len(result) == 0 assert len(result) == 0
def test_load_folder_with_no_children():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "title",
"typeCode" : 2
} """)
# Act
items = import_firefox_json(data)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 0
def test_load_one_child(): def test_load_one_child():
"""test method.""" """test method."""
@ -117,20 +147,26 @@ def test_load_one_child():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode" : 2, "typeCode" : 2,
"children": [ "children": [
{ {
"title" : "title1", "title" : "title",
"typeCode" : 1, "typeCode" : 2,
"uri" : "http://uri1", "children": [
"annos" : [{ {
"name": "bookmarkProperties/description", "title" : "title1",
"value": "desc" "typeCode" : 1,
}] "uri" : "http://uri1",
} "annos" : [{
] "name": "bookmarkProperties/description",
} """) "value": "desc"
}]
}
]}
]
}
""")
# Act # Act
items = import_firefox_json(data) items = import_firefox_json(data)
@ -152,13 +188,18 @@ def test_load_one_container_child():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode" : 2, "typeCode": 2,
"children": [ "children": [
{ {
"title":"bookmark folder", "title" : "title",
"typeCode":2 "typeCode" : 2,
} ] "children": [
{
"title":"bookmark folder",
"typeCode":2
}]
}]
}""") }""")
# Act # Act
@ -176,15 +217,21 @@ def test_load_many_children():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "main",
"typeCode" : 2,
"children": [
{
"title":"Weitere Lesezeichen", "title":"Weitere Lesezeichen",
"typeCode":2, "typeCode":2,
"children": [ "children": [
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}, {"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"},
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"}, {"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
{"title":"title3","typeCode":1,"uri":"http://uri3.com"} {"title":"title3","typeCode":1,"uri":"http://uri3.com"}
] ]}
} """) ]
}
""")
# Act # Act
items = import_firefox_json(data) items = import_firefox_json(data)
@ -196,46 +243,108 @@ def test_load_many_children():
assert len(result) == 3 assert len(result) == 3
def test_load_hierarchical_container(): def test_load_container_no_title():
"""test method.""" """test method."""
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode" : 2, "typeCode" : 2,
"children": [ "children": [
{ {
"title" : "title", "typeCode" : 2,
"typeCode" : 2, "children": [
"children": [ {"title":"title1","typeCode":1,"uri":"http://uri.com"}
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}, ]}
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
{"title":"title3","typeCode":1,"uri":"http://uri3.com"}
]
},
{"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"},
{"title":"title5","typeCode":1,"uri":"http://uri5.com/xyz"},
{"title":"title6","typeCode":1,"uri":"http://uri6.com"}
] ]
} }
""") """)
# Act # Act
items = import_firefox_json(data) items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert # Assert
result = [] result = []
for item in items: for item in items:
result.append(item) result.append(item)
assert len(result) == 6 assert len(result) == 1
assert result[0][0] == 'http://uri.com'
assert result[0][2] == ',<no title>,'
def test_load_hierarchical_container_without_ignore():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{
"title" : "title",
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri.com"}
]
}]
}
""")
# Act
items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 1
assert result[0][0] == 'http://uri.com'
assert result[0][2] == ',title,'
def test_load_hierarchical_container_with_ignore():
"""test method."""
# Arrange
data = json.loads("""
{
"title" : "main",
"typeCode" : 2,
"children": [
{
"title" : "title",
"typeCode" : 2,
"root": "bookmarksMenuFolder",
"children": [
{
"title" : "title2",
"typeCode" : 2,
"children": [
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"}
]
},
{"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"}
]
}]
}
""")
# Act
items = import_firefox_json(data, add_bookmark_folder_as_tag=True)
# Assert
result = []
for item in items:
result.append(item)
assert len(result) == 2
assert result[0][0] == 'http://uri1.com/#more-74' assert result[0][0] == 'http://uri1.com/#more-74'
assert result[1][0] == 'http://uri2.com/xyz' assert result[1][0] == 'http://uri4.com/#more-74'
assert result[2][0] == 'http://uri3.com'
assert result[3][0] == 'http://uri4.com/#more-74' assert result[0][2] == ',title2,'
assert result[4][0] == 'http://uri5.com/xyz' assert result[1][2] == ','
assert result[5][0] == 'http://uri6.com'
def test_load_separator(): def test_load_separator():
"""test method.""" """test method."""
@ -243,14 +352,19 @@ def test_load_separator():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode" : 2, "typeCode" : 2,
"children": [ "children": [
{ {
"title": "", "title" : "title",
"type": "text/x-moz-place-separator", "typeCode" : 2,
"typeCode": 3 "children": [
} ] {
"title": "",
"type": "text/x-moz-place-separator",
"typeCode": 3
}]
}]
}""") }""")
# Act # Act
@ -268,18 +382,24 @@ def test_load_multiple_tags():
# Arrange # Arrange
data = json.loads(""" data = json.loads("""
{ {
"title" : "title", "title" : "main",
"typeCode": 2,
"children": [ "children": [
{ {
"title" : "title1", "title" : "title",
"uri" : "http://uri1", "typeCode": 2,
"tags" : "tag1, tag2", "children": [
"typeCode": 1, {
"annos" : [{ "title" : "title1",
"name": "bookmarkProperties/description", "uri" : "http://uri1",
"value": "desc" "tags" : "tag1, tag2",
}] "typeCode": 1,
}] "annos" : [{
"name": "bookmarkProperties/description",
"value": "desc"
}]
}]
}]
}""") }""")
# Act # Act