Move import helper APIs to correct section

This commit is contained in:
Arun Prakash Jana 2017-10-17 23:31:56 +05:30
parent d46f272a30
commit fa16f7fda8
No known key found for this signature in database
GPG Key ID: A75979F35C080412

204
buku.py
View File

@ -369,108 +369,6 @@ class BukuCrypt:
sys.exit(1)
def import_md(filepath, newtag):
"""Parse bookmark markdown file.
Parameters
----------
filepath : str
Path to markdown file.
newtag : str
New tag for bookmarks in markdown file.
Returns
-------
tuple
Parsed result.
"""
with open(filepath, mode='r', encoding='utf-8') as infp:
for line in infp:
# Supported markdown format: [title](url)
# Find position of title end, url start delimiter combo
index = line.find('](')
if index != -1:
# Find title start delimiter
title_start_delim = line[:index].find('[')
# Reverse find the url end delimiter
url_end_delim = line[index + 2:].rfind(')')
if title_start_delim != -1 and url_end_delim > 0:
# Parse title
title = line[title_start_delim + 1:index]
# Parse url
url = line[index + 2:index + 2 + url_end_delim]
if (is_nongeneric_url(url)):
continue
yield (
url, title, delim_wrap(newtag)
if newtag else None, None, 0, True
)
def import_html(html_soup, add_parent_folder_as_tag, newtag):
"""Parse bookmark html.
Parameters
----------
html_soup : BeautifulSoup object
BeautifulSoup representation of bookmark html.
add_parent_folder_as_tag : bool
True if bookmark parent folders should be added as tags else False.
newtag : str
A new unique tag to add to imported bookmarks.
Returns
-------
tuple
Parsed result.
"""
# compatibility
soup = html_soup
for tag in soup.findAll('a'):
# Extract comment from <dd> tag
try:
if (is_nongeneric_url(tag['href'])):
continue
except KeyError:
continue
desc = None
comment_tag = tag.findNextSibling('dd')
if comment_tag:
desc = comment_tag.find(text=True, recursive=False)
# add parent folder as tag
if add_parent_folder_as_tag:
# could be its folder or not
possible_folder = tag.find_previous('h3')
# get list of tags within that folder
tag_list = tag.parent.parent.find_parent('dl')
if ((possible_folder) and possible_folder.parent in list(tag_list.parents)):
# then it's the folder of this bookmark
if tag.has_attr('tags'):
tag['tags'] += (DELIM + possible_folder.text)
else:
tag['tags'] = possible_folder.text
# add unique tag if opted
if newtag:
if tag.has_attr('tags'):
tag['tags'] += (DELIM + newtag)
else:
tag['tags'] = newtag
yield (
tag['href'], tag.string, parse_tags([tag['tags']])
if tag.has_attr('tags') else None, desc, 0, True
)
class BukuDb:
"""Abstracts all database operations.
@ -2595,6 +2493,108 @@ def walk(root):
walk(element)
def import_md(filepath, newtag):
"""Parse bookmark markdown file.
Parameters
----------
filepath : str
Path to markdown file.
newtag : str
New tag for bookmarks in markdown file.
Returns
-------
tuple
Parsed result.
"""
with open(filepath, mode='r', encoding='utf-8') as infp:
for line in infp:
# Supported markdown format: [title](url)
# Find position of title end, url start delimiter combo
index = line.find('](')
if index != -1:
# Find title start delimiter
title_start_delim = line[:index].find('[')
# Reverse find the url end delimiter
url_end_delim = line[index + 2:].rfind(')')
if title_start_delim != -1 and url_end_delim > 0:
# Parse title
title = line[title_start_delim + 1:index]
# Parse url
url = line[index + 2:index + 2 + url_end_delim]
if (is_nongeneric_url(url)):
continue
yield (
url, title, delim_wrap(newtag)
if newtag else None, None, 0, True
)
def import_html(html_soup, add_parent_folder_as_tag, newtag):
"""Parse bookmark html.
Parameters
----------
html_soup : BeautifulSoup object
BeautifulSoup representation of bookmark html.
add_parent_folder_as_tag : bool
True if bookmark parent folders should be added as tags else False.
newtag : str
A new unique tag to add to imported bookmarks.
Returns
-------
tuple
Parsed result.
"""
# compatibility
soup = html_soup
for tag in soup.findAll('a'):
# Extract comment from <dd> tag
try:
if (is_nongeneric_url(tag['href'])):
continue
except KeyError:
continue
desc = None
comment_tag = tag.findNextSibling('dd')
if comment_tag:
desc = comment_tag.find(text=True, recursive=False)
# add parent folder as tag
if add_parent_folder_as_tag:
# could be its folder or not
possible_folder = tag.find_previous('h3')
# get list of tags within that folder
tag_list = tag.parent.parent.find_parent('dl')
if ((possible_folder) and possible_folder.parent in list(tag_list.parents)):
# then it's the folder of this bookmark
if tag.has_attr('tags'):
tag['tags'] += (DELIM + possible_folder.text)
else:
tag['tags'] = possible_folder.text
# add unique tag if opted
if newtag:
if tag.has_attr('tags'):
tag['tags'] += (DELIM + newtag)
else:
tag['tags'] = newtag
yield (
tag['href'], tag.string, parse_tags([tag['tags']])
if tag.has_attr('tags') else None, desc, 0, True
)
def is_bad_url(url):
"""Check if URL is malformed.