Move import helper APIs to correct section
This commit is contained in:
parent
d46f272a30
commit
fa16f7fda8
204
buku.py
204
buku.py
@ -369,108 +369,6 @@ class BukuCrypt:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def import_md(filepath, newtag):
|
||||
"""Parse bookmark markdown file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath : str
|
||||
Path to markdown file.
|
||||
newtag : str
|
||||
New tag for bookmarks in markdown file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Parsed result.
|
||||
"""
|
||||
with open(filepath, mode='r', encoding='utf-8') as infp:
|
||||
for line in infp:
|
||||
# Supported markdown format: [title](url)
|
||||
# Find position of title end, url start delimiter combo
|
||||
index = line.find('](')
|
||||
if index != -1:
|
||||
# Find title start delimiter
|
||||
title_start_delim = line[:index].find('[')
|
||||
# Reverse find the url end delimiter
|
||||
url_end_delim = line[index + 2:].rfind(')')
|
||||
|
||||
if title_start_delim != -1 and url_end_delim > 0:
|
||||
# Parse title
|
||||
title = line[title_start_delim + 1:index]
|
||||
# Parse url
|
||||
url = line[index + 2:index + 2 + url_end_delim]
|
||||
if (is_nongeneric_url(url)):
|
||||
continue
|
||||
|
||||
yield (
|
||||
url, title, delim_wrap(newtag)
|
||||
if newtag else None, None, 0, True
|
||||
)
|
||||
|
||||
|
||||
def import_html(html_soup, add_parent_folder_as_tag, newtag):
|
||||
"""Parse bookmark html.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
html_soup : BeautifulSoup object
|
||||
BeautifulSoup representation of bookmark html.
|
||||
add_parent_folder_as_tag : bool
|
||||
True if bookmark parent folders should be added as tags else False.
|
||||
newtag : str
|
||||
A new unique tag to add to imported bookmarks.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Parsed result.
|
||||
"""
|
||||
|
||||
# compatibility
|
||||
soup = html_soup
|
||||
|
||||
for tag in soup.findAll('a'):
|
||||
# Extract comment from <dd> tag
|
||||
try:
|
||||
if (is_nongeneric_url(tag['href'])):
|
||||
continue
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
desc = None
|
||||
comment_tag = tag.findNextSibling('dd')
|
||||
|
||||
if comment_tag:
|
||||
desc = comment_tag.find(text=True, recursive=False)
|
||||
|
||||
# add parent folder as tag
|
||||
if add_parent_folder_as_tag:
|
||||
# could be its folder or not
|
||||
possible_folder = tag.find_previous('h3')
|
||||
# get list of tags within that folder
|
||||
tag_list = tag.parent.parent.find_parent('dl')
|
||||
|
||||
if ((possible_folder) and possible_folder.parent in list(tag_list.parents)):
|
||||
# then it's the folder of this bookmark
|
||||
if tag.has_attr('tags'):
|
||||
tag['tags'] += (DELIM + possible_folder.text)
|
||||
else:
|
||||
tag['tags'] = possible_folder.text
|
||||
|
||||
# add unique tag if opted
|
||||
if newtag:
|
||||
if tag.has_attr('tags'):
|
||||
tag['tags'] += (DELIM + newtag)
|
||||
else:
|
||||
tag['tags'] = newtag
|
||||
|
||||
yield (
|
||||
tag['href'], tag.string, parse_tags([tag['tags']])
|
||||
if tag.has_attr('tags') else None, desc, 0, True
|
||||
)
|
||||
|
||||
|
||||
class BukuDb:
|
||||
"""Abstracts all database operations.
|
||||
|
||||
@ -2595,6 +2493,108 @@ def walk(root):
|
||||
walk(element)
|
||||
|
||||
|
||||
def import_md(filepath, newtag):
|
||||
"""Parse bookmark markdown file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath : str
|
||||
Path to markdown file.
|
||||
newtag : str
|
||||
New tag for bookmarks in markdown file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Parsed result.
|
||||
"""
|
||||
with open(filepath, mode='r', encoding='utf-8') as infp:
|
||||
for line in infp:
|
||||
# Supported markdown format: [title](url)
|
||||
# Find position of title end, url start delimiter combo
|
||||
index = line.find('](')
|
||||
if index != -1:
|
||||
# Find title start delimiter
|
||||
title_start_delim = line[:index].find('[')
|
||||
# Reverse find the url end delimiter
|
||||
url_end_delim = line[index + 2:].rfind(')')
|
||||
|
||||
if title_start_delim != -1 and url_end_delim > 0:
|
||||
# Parse title
|
||||
title = line[title_start_delim + 1:index]
|
||||
# Parse url
|
||||
url = line[index + 2:index + 2 + url_end_delim]
|
||||
if (is_nongeneric_url(url)):
|
||||
continue
|
||||
|
||||
yield (
|
||||
url, title, delim_wrap(newtag)
|
||||
if newtag else None, None, 0, True
|
||||
)
|
||||
|
||||
|
||||
def import_html(html_soup, add_parent_folder_as_tag, newtag):
|
||||
"""Parse bookmark html.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
html_soup : BeautifulSoup object
|
||||
BeautifulSoup representation of bookmark html.
|
||||
add_parent_folder_as_tag : bool
|
||||
True if bookmark parent folders should be added as tags else False.
|
||||
newtag : str
|
||||
A new unique tag to add to imported bookmarks.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Parsed result.
|
||||
"""
|
||||
|
||||
# compatibility
|
||||
soup = html_soup
|
||||
|
||||
for tag in soup.findAll('a'):
|
||||
# Extract comment from <dd> tag
|
||||
try:
|
||||
if (is_nongeneric_url(tag['href'])):
|
||||
continue
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
desc = None
|
||||
comment_tag = tag.findNextSibling('dd')
|
||||
|
||||
if comment_tag:
|
||||
desc = comment_tag.find(text=True, recursive=False)
|
||||
|
||||
# add parent folder as tag
|
||||
if add_parent_folder_as_tag:
|
||||
# could be its folder or not
|
||||
possible_folder = tag.find_previous('h3')
|
||||
# get list of tags within that folder
|
||||
tag_list = tag.parent.parent.find_parent('dl')
|
||||
|
||||
if ((possible_folder) and possible_folder.parent in list(tag_list.parents)):
|
||||
# then it's the folder of this bookmark
|
||||
if tag.has_attr('tags'):
|
||||
tag['tags'] += (DELIM + possible_folder.text)
|
||||
else:
|
||||
tag['tags'] = possible_folder.text
|
||||
|
||||
# add unique tag if opted
|
||||
if newtag:
|
||||
if tag.has_attr('tags'):
|
||||
tag['tags'] += (DELIM + newtag)
|
||||
else:
|
||||
tag['tags'] = newtag
|
||||
|
||||
yield (
|
||||
tag['href'], tag.string, parse_tags([tag['tags']])
|
||||
if tag.has_attr('tags') else None, desc, 0, True
|
||||
)
|
||||
|
||||
|
||||
def is_bad_url(url):
|
||||
"""Check if URL is malformed.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user