Merge pull request #338 from ckolumbus/feature/import-firefox-json
Feature/import firefox json
This commit is contained in:
commit
940d3e1642
141
buku
141
buku
@ -22,6 +22,7 @@ from bs4 import BeautifulSoup
|
||||
import certifi
|
||||
import cgi
|
||||
import collections
|
||||
from enum import Enum
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -2376,6 +2377,24 @@ class BukuDb:
|
||||
items = import_md(filepath=filepath, newtag=newtag)
|
||||
elif filepath.endswith('org'):
|
||||
items = import_org(filepath=filepath, newtag=newtag)
|
||||
elif filepath.endswith('json'):
|
||||
if not tacit:
|
||||
resp = input('Add Bookmark folder name as tag? (y/n): ')
|
||||
else:
|
||||
resp = 'y'
|
||||
add_bookmark_folder_as_tag = (resp == 'y')
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as datafile:
|
||||
data = json.load(datafile)
|
||||
|
||||
items = import_firefox_json(data, add_bookmark_folder_as_tag, newtag)
|
||||
|
||||
except ValueError as e:
|
||||
logerr("JSON Decode Error: {}".format(e))
|
||||
return False
|
||||
except Exception as e:
|
||||
logerr(e)
|
||||
return False
|
||||
|
||||
else:
|
||||
try:
|
||||
@ -2861,6 +2880,128 @@ def import_org(filepath, newtag):
|
||||
if newtag else None, None, 0, True
|
||||
)
|
||||
|
||||
def import_firefox_json(json, add_bookmark_folder_as_tag=False, unique_tag=None):
|
||||
"""Open Firefox json export file and import data.
|
||||
|
||||
Ignore 'SmartBookmark' and 'Separator' entries.
|
||||
|
||||
|
||||
Needed/used fields out of the JSON schema of the bookmarks:
|
||||
|
||||
title : the name/title of the entry
|
||||
tags : ',' separated tags for the bookmark entry
|
||||
typeCode : 1 - uri, 2 - subfolder, 3 - separator
|
||||
annos/{name,value} : following annotation entries are used
|
||||
name : Places/SmartBookmark : identifies smart folder, ignored
|
||||
name : bookmarkPropereties/description : detailed bookmark entry description
|
||||
children : for subfolders, recurse into the child entries
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
Path to Firefox json bookmarks file.
|
||||
unique_tag : str
|
||||
Timestamp tag in YYYYMonDD format.
|
||||
add_bookmark_folder_as_tag : bool
|
||||
True if bookmark parent folder should be added as tags else False.
|
||||
"""
|
||||
|
||||
class TypeCode(Enum):
|
||||
""" Format
|
||||
typeCode
|
||||
1 : uri (type=text/x-moz-place)
|
||||
2 : subfolder (type=text/x-moz-container)
|
||||
3 : separator (type=text/x-moz-separator)
|
||||
"""
|
||||
uri = 1
|
||||
folder = 2
|
||||
separator = 3
|
||||
|
||||
def is_smart(entry):
|
||||
result = False
|
||||
try:
|
||||
d = [anno for anno in entry['annos'] if anno['name'] == "Places/SmartBookmark"]
|
||||
result = bool(len(d))
|
||||
except Exception:
|
||||
result = False
|
||||
|
||||
return result
|
||||
|
||||
def extract_desc(entry):
|
||||
try:
|
||||
d = [anno for anno in entry['annos'] if anno['name'] == "bookmarkProperties/description"]
|
||||
return d[0]['value']
|
||||
except Exception:
|
||||
logdbg("no description found for entry: {} {}".format(entry['uri'], entry['title']))
|
||||
return ""
|
||||
|
||||
def extract_tags(entry):
|
||||
tags = []
|
||||
try:
|
||||
tags = entry['tags'].split(',')
|
||||
except Exception:
|
||||
logdbg("no tags found for entry: {} {}".format(entry['uri'], entry['title']))
|
||||
|
||||
return tags
|
||||
|
||||
def iterate_children(parent_folder, entry_list):
|
||||
for bm_entry in entry_list:
|
||||
try:
|
||||
typeCode = bm_entry['typeCode']
|
||||
except Exception:
|
||||
logdbg("item without typeCode found, ignoring: {}".format(bm_entry['title']))
|
||||
continue
|
||||
|
||||
if TypeCode.uri.value == typeCode:
|
||||
try:
|
||||
if is_smart(bm_entry):
|
||||
logdbg("SmartBookmark found,m ignoring: {}".format(bm_entry['title']))
|
||||
continue
|
||||
|
||||
if is_nongeneric_url(bm_entry['uri']):
|
||||
logdbg("Non-Generic URL found,m ignoring: {}".format(bm_entry['title']))
|
||||
continue
|
||||
|
||||
desc = extract_desc(bm_entry)
|
||||
bookmark_tags = extract_tags(bm_entry)
|
||||
|
||||
if add_bookmark_folder_as_tag:
|
||||
bookmark_tags.append(parent_folder)
|
||||
|
||||
if unique_tag:
|
||||
bookmark_tags.append(unique_tag)
|
||||
|
||||
formatted_tags = [DELIM + tag for tag in bookmark_tags]
|
||||
tags = parse_tags(formatted_tags)
|
||||
|
||||
logdbg("Entry found: {}, {}, {}, {} ".format(bm_entry['uri'], bm_entry['title'], tags, desc))
|
||||
yield (bm_entry['uri'], bm_entry['title'], tags, desc, 0, True)
|
||||
|
||||
except Exception as e:
|
||||
logerr(e)
|
||||
|
||||
elif TypeCode.folder.value == typeCode:
|
||||
try:
|
||||
# from python 3.3 on:
|
||||
# yield from iterate_children(bm_entry['title'], bm_entry['children'])
|
||||
|
||||
for entry in iterate_children(parent_folder+"/"+bm_entry['title'], bm_entry['children']):
|
||||
yield entry
|
||||
except Exception as e:
|
||||
# if any of the properties does not exist, bail out silently
|
||||
logerr(e)
|
||||
|
||||
elif TypeCode.separator.value == typeCode:
|
||||
logdbg("Unknonw typeCode found : {}".format(typeCode))
|
||||
|
||||
try:
|
||||
entry_list = json['children']
|
||||
except Exception:
|
||||
logerr("No children in Root entry found")
|
||||
return []
|
||||
|
||||
yield from iterate_children("", entry_list)
|
||||
|
||||
|
||||
def import_html(html_soup, add_parent_folder_as_tag, newtag):
|
||||
"""Parse bookmark html.
|
||||
|
@ -557,11 +557,10 @@ def test_sigint_handler(capsys):
|
||||
[
|
||||
'http://www.kadrof.ru/cat_exchange.shtml',
|
||||
(
|
||||
'Все биржи фриланса и удаленной работы - больше 110 сайтов | Kadrof.ru',
|
||||
'Все биржи фриланса и удаленной работы - больше 110 сайтов для фрилансеров | Kadrof.ru',
|
||||
'Здесь собраны самые популярные биржи удаленной работы и фриланса для новичков и опытных специалистов. '
|
||||
'Более 110 ресурсов по видам:',
|
||||
'биржи удаленной работы,биржи фриланс',
|
||||
0, 0
|
||||
'биржи удаленной работы,биржи фриланс', 0, 0
|
||||
)
|
||||
],
|
||||
]
|
||||
|
294
tests/test_import_firefox_json.py
Normal file
294
tests/test_import_firefox_json.py
Normal file
@ -0,0 +1,294 @@
|
||||
import json
|
||||
from buku import import_firefox_json
|
||||
|
||||
|
||||
def test_load_from_empty():
|
||||
"""test method."""
|
||||
# Arrange
|
||||
data = json.loads("{}")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
count = sum(1 for _ in items)
|
||||
assert 0 == count
|
||||
|
||||
def test_load_full_entry():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"children": [
|
||||
{
|
||||
"dateAdded": 1269200039653000,
|
||||
"guid": "xxxydfalkj",
|
||||
"id": 113,
|
||||
"index": 0,
|
||||
"lastModified": 1305978154986000,
|
||||
"title": "title",
|
||||
"type": "text/x-moz-place",
|
||||
"typeCode": 1,
|
||||
"tags" : "x,y",
|
||||
"uri": "http://uri.com/abc?234&536",
|
||||
"annos" : [{
|
||||
"name": "bookmarkProperties/description",
|
||||
"value": "desc"
|
||||
}]
|
||||
}]
|
||||
}""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 1 == len(result)
|
||||
assert 'http://uri.com/abc?234&536' == result[0][0]
|
||||
assert 'title' == result[0][1]
|
||||
assert ',x,y,' == result[0][2]
|
||||
assert 'desc' == result[0][3]
|
||||
|
||||
|
||||
def test_load_no_typecode():
|
||||
"""test method."""
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"children": [
|
||||
{
|
||||
"title" : "title1",
|
||||
"uri" : "http://uri1",
|
||||
"annos" : [{
|
||||
"name": "bookmarkProperties/description",
|
||||
"value": "desc"
|
||||
}]
|
||||
}]
|
||||
}""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 0 == len(result)
|
||||
|
||||
|
||||
def test_load_invalid_typecode():
|
||||
"""test method."""
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"children": [
|
||||
{
|
||||
"title" : "title1",
|
||||
"typeCode" : 99,
|
||||
"uri" : "http://uri1",
|
||||
"annos" : [{
|
||||
"name": "bookmarkProperties/description",
|
||||
"value": "desc"
|
||||
}]
|
||||
}]
|
||||
}""")
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 0 == len(result)
|
||||
|
||||
|
||||
def test_load_one_child():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"typeCode" : 2,
|
||||
"children": [
|
||||
{
|
||||
"title" : "title1",
|
||||
"typeCode" : 1,
|
||||
"uri" : "http://uri1",
|
||||
"annos" : [{
|
||||
"name": "bookmarkProperties/description",
|
||||
"value": "desc"
|
||||
}]
|
||||
}
|
||||
]
|
||||
} """)
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 1 == len(result)
|
||||
assert 'http://uri1' == result[0][0]
|
||||
assert 'title1' == result[0][1]
|
||||
assert ',' == result[0][2]
|
||||
assert 'desc' == result[0][3]
|
||||
|
||||
def test_load_one_container_child():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"typeCode" : 2,
|
||||
"children": [
|
||||
{
|
||||
"title":"bookmark folder",
|
||||
"typeCode":2
|
||||
} ]
|
||||
}""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 0 == len(result)
|
||||
|
||||
def test_load_many_children():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title":"Weitere Lesezeichen",
|
||||
"typeCode":2,
|
||||
"children": [
|
||||
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"},
|
||||
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
|
||||
{"title":"title3","typeCode":1,"uri":"http://uri3.com"}
|
||||
]
|
||||
} """)
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 3 == len(result)
|
||||
|
||||
def test_load_hierarchical_container():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"typeCode" : 2,
|
||||
"children": [
|
||||
{
|
||||
"title" : "title",
|
||||
"typeCode" : 2,
|
||||
"children": [
|
||||
{"title":"title1","typeCode":1,"uri":"http://uri1.com/#more-74"},
|
||||
{"title":"title2","typeCode":1,"uri":"http://uri2.com/xyz"},
|
||||
{"title":"title3","typeCode":1,"uri":"http://uri3.com"}
|
||||
]
|
||||
},
|
||||
{"title":"title4","typeCode":1,"uri":"http://uri4.com/#more-74"},
|
||||
{"title":"title5","typeCode":1,"uri":"http://uri5.com/xyz"},
|
||||
{"title":"title6","typeCode":1,"uri":"http://uri6.com"}
|
||||
]
|
||||
}
|
||||
""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 6 == len(result)
|
||||
assert 'http://uri1.com/#more-74' == result[0][0]
|
||||
assert 'http://uri2.com/xyz' == result[1][0]
|
||||
assert 'http://uri3.com' == result[2][0]
|
||||
assert 'http://uri4.com/#more-74' == result[3][0]
|
||||
assert 'http://uri5.com/xyz' == result[4][0]
|
||||
assert 'http://uri6.com' == result[5][0]
|
||||
|
||||
def test_load_separator():
|
||||
"""test method."""
|
||||
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"typeCode" : 2,
|
||||
"children": [
|
||||
{
|
||||
"title": "",
|
||||
"type": "text/x-moz-place-separator",
|
||||
"typeCode": 3
|
||||
} ]
|
||||
}""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 0 == len(result)
|
||||
|
||||
def test_load_multiple_tags():
|
||||
"""test method."""
|
||||
# Arrange
|
||||
data = json.loads("""
|
||||
{
|
||||
"title" : "title",
|
||||
"children": [
|
||||
{
|
||||
"title" : "title1",
|
||||
"uri" : "http://uri1",
|
||||
"tags" : "tag1, tag2",
|
||||
"typeCode": 1,
|
||||
"annos" : [{
|
||||
"name": "bookmarkProperties/description",
|
||||
"value": "desc"
|
||||
}]
|
||||
}]
|
||||
}""")
|
||||
|
||||
# Act
|
||||
items = import_firefox_json(data)
|
||||
|
||||
# Assert
|
||||
result = []
|
||||
for item in items:
|
||||
result.append(item)
|
||||
|
||||
assert 1 == len(result)
|
||||
assert ",tag1,tag2," == result[0][2]
|
Loading…
Reference in New Issue
Block a user