buku/buku.py

2922 lines
92 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
#
# Bookmark management utility
#
2017-01-01 08:25:10 -06:00
# Copyright © 2015-2017 Arun Prakash Jana <engineerarun@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
2016-10-22 08:21:46 -05:00
# along with Buku. If not, see <http://www.gnu.org/licenses/>.
2016-04-24 14:19:32 -05:00
import argparse
import html.parser as HTMLParser
import json
import logging
2016-12-12 10:35:34 -06:00
import os
import re
try:
import readline
readline
except ImportError:
pass
2016-12-12 10:35:34 -06:00
import requests
import signal
import sqlite3
import sys
2016-11-27 22:00:42 -06:00
import threading
2016-12-12 10:35:34 -06:00
import urllib3
from urllib3.util import parse_url, make_headers
import webbrowser
2017-01-10 21:55:45 -06:00
__version__ = '2.8'
__author__ = 'Arun Prakash Jana <engineerarun@gmail.com>'
__license__ = 'GPLv3'
2016-12-31 10:50:18 -06:00
# Global variables
2016-10-22 08:21:46 -05:00
interrupted = False # Received SIGINT
DELIM = ',' # Delimiter used to store tags in DB
SKIP_MIMES = {'.pdf', '.txt'}
colorize = True # Allow color output by default
# Default colour to print records
ID_str = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m \x1b[1m[%s]\x1b[0m\n'
ID_DB_str = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m'
MUTE_str = '%s \x1b[1m(L)\x1b[0m\n'
TITLE_str = '%s \x1b[91m>\x1b[0m %s\n'
DESC_str = '%s \x1b[91m+\x1b[0m %s\n'
TAG_str = '%s \x1b[91m#\x1b[0m %s\n'
2016-04-05 23:55:25 -05:00
# Disguise as Firefox on Ubuntu
2016-11-29 13:49:24 -06:00
USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 \
Firefox/50.0'
myheaders = None # Default dictionary of headers
myproxy = None # Default proxy
# Set up logging
logging.basicConfig(format='[%(levelname)s] %(message)s')
logger = logging.getLogger()
2016-11-22 12:09:03 -06:00
logdbg = logger.debug
logerr = logger.error
2016-12-27 08:10:29 -06:00
class BukuHTMLParser(HTMLParser.HTMLParser):
'''Class to parse and fetch the title
from a HTML page, if available
'''
2016-04-05 23:39:56 -05:00
2016-04-05 06:25:40 -05:00
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.in_title_tag = False
2016-05-24 12:51:38 -05:00
self.data = ''
self.prev_tag = None
self.parsed_title = None
2016-04-05 06:25:40 -05:00
def handle_starttag(self, tag, attrs):
self.in_title_tag = False
2016-05-24 12:51:38 -05:00
if tag == 'title':
self.in_title_tag = True
self.prev_tag = tag
2016-04-05 06:25:40 -05:00
def handle_endtag(self, tag):
2016-05-24 12:51:38 -05:00
if tag == 'title':
self.in_title_tag = False
2016-05-24 12:51:38 -05:00
if self.data != '':
self.parsed_title = self.data
self.reset() # We have received title data, exit parsing
2016-04-05 06:25:40 -05:00
def handle_data(self, data):
if self.prev_tag == 'title' and self.in_title_tag:
self.data = '%s%s' % (self.data, data)
2016-04-05 06:25:40 -05:00
def error(self, message):
pass
2016-09-09 10:07:01 -05:00
class BukuCrypt:
'''Class to handle encryption and decryption of
the database file. Functionally a separate entity.
Involves late imports in the static functions but it
saves ~100ms each time. Given that encrypt/decrypt are
not done automatically and any one should be called at
a time, this doesn't seem to be an outrageous approach.
'''
2016-12-30 11:55:26 -06:00
# Crypto constants
BLOCKSIZE = 0x10000 # 64 KB blocks
SALT_SIZE = 0x20
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
@staticmethod
def get_filehash(filepath):
'''Get the SHA256 hash of a file
:param filepath: path to the file
:return: hash digest of the file
'''
from hashlib import sha256
2016-10-29 04:35:44 -05:00
with open(filepath, 'rb') as fp:
hasher = sha256()
2016-12-30 11:55:26 -06:00
buf = fp.read(BukuCrypt.BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
2016-12-30 11:55:26 -06:00
buf = fp.read(BukuCrypt.BLOCKSIZE)
return hasher.digest()
@staticmethod
2016-11-11 20:38:28 -06:00
def encrypt_file(iterations, dbfile=None):
'''Encrypt the bookmarks database file
:param iterations: number of iterations for key generation
2016-11-11 20:38:28 -06:00
:param dbfile: custom database file path (including filename)
'''
try:
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import (Cipher, modes,
algorithms)
from getpass import getpass
from hashlib import sha256
import struct
except ImportError:
2016-11-22 12:09:03 -06:00
logerr('cryptography lib(s) missing')
sys.exit(1)
if iterations < 1:
2016-11-22 12:09:03 -06:00
logerr('Iterations must be >= 1')
sys.exit(1)
2016-11-11 20:38:28 -06:00
if not dbfile:
dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db')
encfile = '%s.enc' % dbfile
db_exists = os.path.exists(dbfile)
enc_exists = os.path.exists(encfile)
2016-11-11 20:38:28 -06:00
if db_exists and not enc_exists:
pass
elif not db_exists:
2016-11-22 12:09:03 -06:00
logerr('%s missing. Already encrypted?', dbfile)
2016-11-11 20:38:28 -06:00
sys.exit(1)
else:
# db_exists and enc_exists
2016-11-22 12:09:03 -06:00
logerr('Both encrypted and flat DB files exist!')
sys.exit(1)
password = getpass()
passconfirm = getpass()
if not password or not passconfirm:
2016-11-22 12:09:03 -06:00
logerr('Empty password')
sys.exit(1)
if password != passconfirm:
2016-11-22 12:09:03 -06:00
logerr('Passwords do not match')
sys.exit(1)
2016-11-11 20:38:28 -06:00
try:
# Get SHA256 hash of DB file
dbhash = BukuCrypt.get_filehash(dbfile)
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
2016-11-11 20:38:28 -06:00
sys.exit(1)
# Generate random 256-bit salt and key
2016-12-30 11:55:26 -06:00
salt = os.urandom(BukuCrypt.SALT_SIZE)
key = ('%s%s' % (password,
salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = os.urandom(16)
encryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend()
).encryptor()
2016-11-11 20:38:28 -06:00
filesize = os.path.getsize(dbfile)
2016-11-11 20:38:28 -06:00
try:
with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp:
2016-10-29 04:35:44 -05:00
outfp.write(struct.pack('<Q', filesize))
outfp.write(salt)
outfp.write(iv)
# Embed DB file hash in encrypted file
2016-10-29 04:35:44 -05:00
outfp.write(dbhash)
while True:
2016-12-30 11:55:26 -06:00
chunk = infp.read(BukuCrypt.CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16))
outfp.write(encryptor.update(chunk) + encryptor.finalize())
2016-11-11 20:38:28 -06:00
os.remove(dbfile)
print('File encrypted')
sys.exit(0)
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
2016-11-11 20:38:28 -06:00
sys.exit(1)
@staticmethod
2016-11-11 20:38:28 -06:00
def decrypt_file(iterations, dbfile=None):
'''Decrypt the bookmarks database file
:param iterations: number of iterations for key generation
2016-11-11 20:38:28 -06:00
:param dbfile: custom database file path (including filename)
: The '.enc' suffix must be omitted.
'''
try:
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import (Cipher, modes,
algorithms)
from getpass import getpass
from hashlib import sha256
import struct
except ImportError:
2016-11-22 12:09:03 -06:00
logerr('cryptography lib(s) missing')
sys.exit(1)
if iterations < 1:
2016-11-22 12:09:03 -06:00
logerr('Decryption failed')
sys.exit(1)
2016-11-11 20:38:28 -06:00
if not dbfile:
dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db')
else:
dbfile = os.path.abspath(dbfile)
dbpath, filename = os.path.split(dbfile)
encfile = '%s.enc' % dbfile
enc_exists = os.path.exists(encfile)
db_exists = os.path.exists(dbfile)
2016-11-11 20:38:28 -06:00
if enc_exists and not db_exists:
pass
elif not enc_exists:
2016-11-22 12:09:03 -06:00
logerr('%s missing', encfile)
2016-11-11 20:38:28 -06:00
sys.exit(1)
else:
# db_exists and enc_exists
2016-11-22 12:09:03 -06:00
logerr('Both encrypted and flat DB files exist!')
sys.exit(1)
password = getpass()
if not password:
2016-11-22 12:09:03 -06:00
logerr('Decryption failed')
sys.exit(1)
2016-11-11 20:38:28 -06:00
try:
with open(encfile, 'rb') as infp:
size = struct.unpack('<Q', infp.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infp.read(32)
key = ('%s%s' % (password,
salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = infp.read(16)
decryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend(),
).decryptor()
# Get original DB file's SHA256 hash from encrypted file
enchash = infp.read(32)
with open(dbfile, 'wb') as outfp:
while True:
2016-12-30 11:55:26 -06:00
chunk = infp.read(BukuCrypt.CHUNKSIZE)
2016-11-11 20:38:28 -06:00
if len(chunk) == 0:
break
outfp.write(
decryptor.update(chunk) + decryptor.finalize())
outfp.truncate(size)
# Match hash of generated file with that of original DB file
dbhash = BukuCrypt.get_filehash(dbfile)
if dbhash != enchash:
os.remove(dbfile)
2016-11-22 12:09:03 -06:00
logerr('Decryption failed')
2016-11-11 20:38:28 -06:00
sys.exit(1)
else:
os.remove(encfile)
print('File decrypted')
except struct.error:
2016-11-22 12:09:03 -06:00
logerr('Tainted file')
2016-11-11 20:38:28 -06:00
sys.exit(1)
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
sys.exit(1)
2016-09-09 10:07:01 -05:00
class BukuDb:
2017-01-01 08:25:10 -06:00
'''Abstracts all database operations'''
2016-04-05 06:25:40 -05:00
def __init__(self, json=False, field_filter=0, chatty=False, dbfile=None,
colorize=True):
'''Database initialization API
:param json: print results in json format
:param field_filter: bookmark print format specifier
2016-11-06 09:30:45 -06:00
:param chatty: set the verbosity of the APIs
2016-11-11 20:38:28 -06:00
:param dbfile: custom database file path (including filename)
:param colorize: use colour in output
'''
2016-11-11 20:38:28 -06:00
self.conn, self.cur = BukuDb.initdb(dbfile)
2016-06-29 13:06:33 -05:00
self.json = json
self.field_filter = field_filter
2016-11-06 09:30:45 -06:00
self.chatty = chatty
self.colorize = colorize
2016-04-10 07:28:49 -05:00
@staticmethod
2016-11-11 20:38:28 -06:00
def get_default_dbdir():
'''Determine the directory path where dbfile will be stored:
if $XDG_DATA_HOME is defined, use it
else if $HOME exists, use it
else use the current directory
:return: path to database file
'''
data_home = os.environ.get('XDG_DATA_HOME')
if data_home is None:
if os.environ.get('HOME') is None:
2016-10-09 23:52:21 -05:00
return os.path.abspath('.')
else:
data_home = os.path.join(os.environ.get('HOME'),
'.local', 'share')
return os.path.join(data_home, 'buku')
@staticmethod
2016-11-11 20:38:28 -06:00
def initdb(dbfile=None):
'''Initialize the database connection. Create DB
file and/or bookmarks table if they don't exist.
Alert on encryption options on first execution.
2016-11-11 20:38:28 -06:00
:param dbfile: custom database file path (including filename)
:return: (connection, cursor) tuple
'''
2016-11-11 20:38:28 -06:00
if not dbfile:
dbpath = BukuDb.get_default_dbdir()
filename = 'bookmarks.db'
dbfile = os.path.join(dbpath, filename)
else:
dbfile = os.path.abspath(dbfile)
dbpath, filename = os.path.split(dbfile)
2016-11-11 20:38:28 -06:00
encfile = dbfile + '.enc'
2016-11-11 20:38:28 -06:00
try:
if not os.path.exists(dbpath):
os.makedirs(dbpath)
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
2016-11-11 20:38:28 -06:00
os.exit(1)
db_exists = os.path.exists(dbfile)
enc_exists = os.path.exists(encfile)
if db_exists and not enc_exists:
pass
elif enc_exists and not db_exists:
2016-11-22 12:09:03 -06:00
logerr('Unlock database first')
sys.exit(1)
2016-11-11 20:38:28 -06:00
elif db_exists and enc_exists:
2016-11-22 12:09:03 -06:00
logerr('Both encrypted and flat DB files exist!')
2016-11-11 20:38:28 -06:00
sys.exit(1)
else:
# not db_exists and not enc_exists
print('DB file is being created at %s.\nYou should encrypt it.'
% dbfile)
try:
# Create a connection
conn = sqlite3.connect(dbfile, check_same_thread=False)
2016-09-09 08:05:28 -05:00
conn.create_function('REGEXP', 2, regexp)
cur = conn.cursor()
# Create table if it doesn't exist
cur.execute('CREATE TABLE if not exists bookmarks \
(id integer PRIMARY KEY, URL text NOT NULL UNIQUE, \
metadata text default \'\', tags text default \',\', \
2016-12-20 12:04:43 -06:00
desc text default \'\', flags integer default 0)')
conn.commit()
except Exception as e:
logerr('initdb(): %s', e)
sys.exit(1)
# Add description column in existing DB (from version 2.1)
try:
query = 'ALTER TABLE bookmarks ADD COLUMN desc text default \'\''
cur.execute(query)
conn.commit()
2016-05-31 12:39:34 -05:00
except Exception:
pass
2016-11-05 17:32:03 -05:00
'''Add flags column in existing DB
Introduced in v2.7 to handle immutable title
Designed to be extended in future using bitwise masks
Masks:
0b00000001: set title immutable'''
try:
query = 'ALTER TABLE bookmarks ADD COLUMN flags integer default 0'
cur.execute(query)
conn.commit()
except Exception:
pass
return (conn, cur)
2016-12-27 08:10:29 -06:00
def get_rec_by_id(self, index):
'''Get a bookmark from database by its ID.
:return: bookmark data as a tuple, or None, if index is not found
'''
self.cur.execute('SELECT * FROM bookmarks WHERE id = ? LIMIT 1',
(index,))
resultset = self.cur.fetchall()
if resultset:
return resultset[0]
return None
2016-12-27 08:10:29 -06:00
def get_rec_id(self, url):
'''Check if URL already exists in DB
:param url: URL to search
:return: DB index if URL found, else -1
'''
self.cur.execute('SELECT id FROM bookmarks WHERE URL = ? LIMIT 1',
(url,))
resultset = self.cur.fetchall()
if resultset:
return resultset[0][0]
return -1
2016-12-27 08:10:29 -06:00
def add_rec(self, url, title_in=None, tags_in=None, desc=None, immutable=0,
delay_commit=False):
'''Add a new bookmark
:param url: URL to bookmark
2016-10-29 04:35:44 -05:00
:param title_in: string title to add manually
:param tags_in: string of comma-separated tags to add manually
2016-12-03 09:32:03 -06:00
must start and end with comma
2016-05-22 14:33:24 -05:00
:param desc: string description
2016-12-20 11:07:14 -06:00
:param immutable: disable title fetch from web
:param delay_commit: do not commit to DB, caller responsibility
:return: True on success, False on failure
'''
2016-10-22 15:56:27 -05:00
# Return error for empty URL
if not url or url == '':
2016-11-22 12:09:03 -06:00
logerr('Invalid URL')
return False
2016-10-22 15:56:27 -05:00
# Ensure that the URL does not exist in DB already
2016-12-27 08:10:29 -06:00
id = self.get_rec_id(url)
if id != -1:
2016-11-22 12:09:03 -06:00
logerr('URL [%s] already exists at index %d', url, id)
return False
# Process title
2016-10-29 04:35:44 -05:00
if title_in is not None:
meta = title_in
else:
meta, mime, bad = network_handler(url)
if bad:
print('Malformed URL\n')
elif mime:
logdbg('Mime HEAD requested')
elif meta == '':
print('No title\n')
else:
2016-11-22 12:09:03 -06:00
logdbg('Title: [%s]', meta)
2017-02-04 08:45:33 -06:00
# Fix up tags, if broken
2016-12-20 10:31:04 -06:00
if tags_in is None or tags_in == '':
tags_in = DELIM
2017-02-04 08:45:33 -06:00
elif tags_in[0] != DELIM:
tags_in = '%s%s' % (DELIM, tags_in)
elif tags_in[-1] != DELIM:
tags_in = '%s%s' % (tags_in, DELIM)
# Process description
2016-05-22 14:33:24 -05:00
if desc is None:
desc = ''
try:
2016-11-05 17:32:03 -05:00
flagset = 0
2016-12-20 11:07:14 -06:00
if immutable == 1:
flagset |= immutable
2016-11-05 17:32:03 -05:00
query = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) \
VALUES (?, ?, ?, ?, ?)'
self.cur.execute(query, (url, meta, tags_in, desc, flagset))
if not delay_commit:
self.conn.commit()
2016-11-06 09:30:45 -06:00
if self.chatty:
2016-12-27 08:10:29 -06:00
self.print_rec(self.cur.lastrowid)
return True
2016-05-22 14:20:50 -05:00
except Exception as e:
2016-12-27 08:10:29 -06:00
logerr('add_rec(): %s', e)
return False
2016-11-06 09:30:45 -06:00
def append_tag_at_index(self, index, tags_in):
'''Append tags for bookmark at index
2016-06-12 05:30:54 -05:00
:param index: int position of record, 0 for all
2016-10-29 04:35:44 -05:00
:param tags_in: string of comma-separated tags to add manually
:return: True on success, False on failure
'''
2016-06-12 05:30:54 -05:00
if index == 0:
resp = read_in('Append the tags to ALL bookmarks? (y/n): ')
if resp != 'y':
return False
self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC')
else:
self.cur.execute('SELECT id, tags FROM bookmarks WHERE id = ? \
LIMIT 1', (index,))
2016-06-12 05:30:54 -05:00
resultset = self.cur.fetchall()
2017-01-29 13:55:04 -06:00
if resultset:
query = 'UPDATE bookmarks SET tags = ? WHERE id = ?'
for row in resultset:
tags = '%s%s' % (row[1], tags_in[1:])
tags = parse_tags([tags])
self.cur.execute(query, (tags, row[0],))
if self.chatty:
self.print_rec(row[0])
self.conn.commit()
return True
2016-11-06 09:30:45 -06:00
def delete_tag_at_index(self, index, tags_in):
'''Delete tags for bookmark at index
2016-07-03 16:50:44 -05:00
:param index: int position of record, 0 for all
2016-10-29 04:35:44 -05:00
:param tags_in: string of comma-separated tags to delete manually
:return: True on success, False on failure
'''
tags_to_delete = tags_in.strip(DELIM).split(DELIM)
2016-07-03 16:50:44 -05:00
if index == 0:
resp = read_in('Delete the tag(s) from ALL bookmarks? (y/n): ')
if resp != 'y':
return False
2016-11-27 12:40:14 -06:00
count = 0
match = "'%' || ? || '%'"
for tag in tags_to_delete:
2016-11-27 12:40:14 -06:00
q = "UPDATE bookmarks SET tags = replace(tags, '%s%s%s', '%s')\
WHERE tags LIKE %s" % (DELIM, tag, DELIM, DELIM, match)
2016-11-27 12:40:14 -06:00
self.cur.execute(q, (DELIM + tag + DELIM,))
count += self.cur.rowcount
2016-07-03 16:50:44 -05:00
2016-11-27 12:40:14 -06:00
if count:
self.conn.commit()
if self.chatty:
print('%d records updated' % count)
2016-07-03 16:50:44 -05:00
2017-01-29 13:55:04 -06:00
return True
# Process a single index
query = 'SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1'
self.cur.execute(query, (index,))
resultset = self.cur.fetchall()
if resultset:
query = 'UPDATE bookmarks SET tags = ? WHERE id = ?'
for row in resultset:
tags = row[1]
2016-07-03 16:50:44 -05:00
for tag in tags_to_delete:
tags = tags.replace('%s%s%s' % (DELIM, tag, DELIM,), DELIM)
self.cur.execute(query, (parse_tags([tags]), row[0],))
2016-11-21 08:03:04 -06:00
if self.chatty:
2016-12-27 08:10:29 -06:00
self.print_rec(row[0])
2016-11-21 08:03:04 -06:00
self.conn.commit()
2016-07-03 16:50:44 -05:00
return True
2016-12-27 08:10:29 -06:00
def update_rec(self, index, url=None, title_in=None, tags_in=None,
desc=None, immutable=-1, threads=4):
'''Update an existing record at index
Update all records if index is 0 and url is not specified.
URL is an exception because URLs are unique in DB.
2016-06-12 05:30:54 -05:00
:param index: int position to update, 0 for all
:param url: bookmark address
2016-10-29 04:35:44 -05:00
:param title_in: string title to add manually
:param tags_in: string of comma-separated tags to add manually
2016-12-03 09:32:03 -06:00
must start and end with comma
prefix with '+,' to append to current tags
prefix with '-,' to delete from current tags
:param desc: string description
2016-12-20 11:07:14 -06:00
:param immutable: disable title fetch from web, if 1
:param threads: number of threads to use to refresh full DB
:return: True on success, False on failure
'''
arguments = []
2016-05-24 12:51:38 -05:00
query = 'UPDATE bookmarks SET'
to_update = False
2016-12-03 09:32:03 -06:00
tag_modified = False
ret = False
# Update URL if passed as argument
2016-12-21 09:28:28 -06:00
if url is not None and url != '':
if index == 0:
2016-11-22 12:09:03 -06:00
logerr('All URLs cannot be same')
return False
query = '%s URL = ?,' % query
arguments += (url,)
to_update = True
# Update tags if passed as argument
2016-10-29 04:35:44 -05:00
if tags_in is not None:
2016-12-03 09:32:03 -06:00
if tags_in == '+,' or tags_in == '-,':
logerr('Please specify a tag')
return False
if tags_in.startswith('+,'):
ret = self.append_tag_at_index(index, tags_in[1:])
tag_modified = True
elif tags_in.startswith('-,'):
ret = self.delete_tag_at_index(index, tags_in[1:])
tag_modified = True
2016-06-12 05:30:54 -05:00
else:
2017-02-04 08:45:33 -06:00
# Fix up tags, if broken
if tags_in is None or tags_in == '':
tags_in = DELIM
elif tags_in[0] != DELIM:
tags_in = '%s%s' % (DELIM, tags_in)
elif tags_in[-1] != DELIM:
tags_in = '%s%s' % (tags_in, DELIM)
2016-06-12 05:30:54 -05:00
query = '%s tags = ?,' % query
2016-10-29 04:35:44 -05:00
arguments += (tags_in,)
2016-06-12 05:30:54 -05:00
to_update = True
# Update description if passed as an argument
if desc is not None:
query = '%s desc = ?,' % query
arguments += (desc,)
to_update = True
2016-11-05 17:32:03 -05:00
# Update immutable flag if passed as argument
2016-12-20 11:07:14 -06:00
if immutable != -1:
2016-11-05 17:32:03 -05:00
flagset = 1
2016-12-20 11:07:14 -06:00
if immutable == 1:
2016-11-05 17:32:03 -05:00
query = '%s flags = flags | ?,' % query
2016-12-20 11:07:14 -06:00
elif immutable == 0:
2016-11-05 17:32:03 -05:00
query = '%s flags = flags & ?,' % query
flagset = ~flagset
arguments += (flagset,)
to_update = True
# Update title
#
# 1. if -t has no arguments, delete existing title
# 2. if -t has arguments, update existing title
# 3. if -t option is omitted at cmdline:
# if URL is passed, update the title from web using the URL
2016-11-05 17:32:03 -05:00
# 4. if no other argument (url, tag, comment, immutable) passed,
# update title from web using DB URL (if title is mutable)
title_to_insert = None
2016-10-29 04:35:44 -05:00
if title_in is not None:
title_to_insert = title_in
2016-12-21 09:28:28 -06:00
elif url is not None and url != '':
title_to_insert, mime, bad = network_handler(url)
if bad:
print('Malformed URL\n')
elif mime:
logdbg('Mime HEAD requested')
elif title_to_insert == '':
print('No title\n')
else:
2016-11-22 12:09:03 -06:00
logdbg('Title: [%s]', title_to_insert)
2016-12-03 09:32:03 -06:00
elif not to_update and not tag_modified:
ret = self.refreshdb(index, threads)
2016-11-06 09:30:45 -06:00
if ret and index and self.chatty:
2016-12-27 08:10:29 -06:00
self.print_rec(index)
return ret
if title_to_insert is not None:
query = '%s metadata = ?,' % query
arguments += (title_to_insert,)
to_update = True
if not to_update: # Nothing to update
return ret
if index == 0: # Update all records
resp = read_in('Update ALL bookmarks? (y/n): ')
if resp != 'y':
return False
query = query[:-1]
else:
query = '%s WHERE id = ?' % query[:-1]
arguments += (index,)
2016-11-22 12:09:03 -06:00
logdbg('query: "%s", args: %s', query, arguments)
try:
self.cur.execute(query, arguments)
self.conn.commit()
2016-11-06 09:30:45 -06:00
if self.cur.rowcount and self.chatty:
2016-12-27 08:10:29 -06:00
self.print_rec(index)
if self.cur.rowcount == 0:
logerr('No matching index %d', index)
return False
except sqlite3.IntegrityError:
2016-11-22 12:09:03 -06:00
logerr('URL already exists')
return False
return True
def refreshdb(self, index, threads):
'''Refresh ALL records in the database. Fetch title for each
bookmark from the web and update the records. Doesn't update
the record if title is empty.
This API doesn't change DB index, URL or tags of a bookmark.
2016-10-29 00:43:52 -05:00
This API is verbose.
2016-05-23 14:16:21 -05:00
:param index: index of record to update, or 0 for all records
'''
if index == 0:
2016-11-05 17:32:03 -05:00
self.cur.execute('SELECT id, url FROM bookmarks WHERE \
flags & 1 != 1 ORDER BY id ASC')
else:
2016-11-05 17:32:03 -05:00
self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \
flags & 1 != 1 LIMIT 1', (index,))
resultset = self.cur.fetchall()
recs = len(resultset)
if not recs:
2016-11-22 12:09:03 -06:00
logerr('No matching index or title immutable or empty DB')
return False
# Set up strings to be printed
if self.colorize:
bad_url_str = '\x1b[1mIndex %d: Malformed URL\x1b[0m\n'
mime_str = '\x1b[1mIndex %d: Mime HEAD requested\x1b[0m\n'
blank_title_str = '\x1b[1mIndex %d: No title\x1b[0m\n'
success_str = 'Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
else:
bad_url_str = 'Index %d: Malformed URL\n'
mime_str = 'Index %d: Mime HEAD requested\n'
blank_title_str = 'Index %d: No title\n'
success_str = 'Title: [%s]\nIndex %d: updated\n'
query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?'
done = {'value': 0} # count threads completed
2016-11-30 21:00:17 -06:00
processed = {'value': 0} # count number of records processed
# An additional call to generate default headers
# gen_headers() is called within network_handler()
# However, this initial call to setup headers
# ensures there is no race condition among the
# initial threads to setup headers
if not myheaders:
gen_headers()
cond = threading.Condition()
cond.acquire()
def refresh(count, cond):
'''Inner function to fetch titles and update records
param count: dummy input to adhere to convention
param cond: threading condition object
2016-11-27 22:00:42 -06:00
'''
count = 0
while True:
cond.acquire()
if resultset:
row = resultset.pop()
else:
cond.release()
break
cond.release()
2016-11-27 22:00:42 -06:00
title, mime, bad = network_handler(row[1])
count += 1
cond.acquire()
2016-11-27 22:00:42 -06:00
if bad:
print(bad_url_str % row[0])
cond.release()
2016-11-27 22:00:42 -06:00
continue
elif mime:
print(mime_str % row[0])
cond.release()
2016-11-27 22:00:42 -06:00
continue
elif title == '':
print(blank_title_str % row[0])
cond.release()
2016-11-27 22:00:42 -06:00
continue
2016-11-28 21:47:30 -06:00
self.cur.execute(query, (title, row[0],))
# Save after fetching 32 titles per thread
if count & 0b11111 == 0:
self.conn.commit()
2016-11-27 22:00:42 -06:00
if self.chatty:
print(success_str % (title, row[0]))
cond.release()
if interrupted:
break
2016-11-30 13:44:23 -06:00
logdbg('Thread %d: processed %d', threading.get_ident(), count)
with cond:
done['value'] += 1
2016-11-30 21:00:17 -06:00
processed['value'] += count
cond.notify()
2016-11-27 22:00:42 -06:00
if recs < threads:
threads = recs
for i in range(threads):
thread = threading.Thread(target=refresh, args=(i, cond))
2016-11-27 22:00:42 -06:00
thread.start()
while done['value'] < threads:
cond.wait()
logdbg('%d threads completed', done['value'])
2016-11-30 21:00:17 -06:00
# Guard: records found == total records processed
if recs != processed['value']:
logerr('Records: %d, processed: %d !!!', recs, processed['value'])
cond.release()
self.conn.commit()
return True
2016-10-27 15:21:09 -05:00
def searchdb(self, keywords, all_keywords=False, deep=False, regex=False):
'''Search the database for an entries with tags or URL
or title info matching keywords and list those.
2016-05-23 14:16:21 -05:00
:param keywords: keywords to search
:param all_keywords: search any or all keywords
:param deep: search for matching substrings
:param regex: match a regular expression
:return: search results, or None, if no matches
'''
if not keywords:
return None
qry = 'SELECT id, url, metadata, tags, desc FROM bookmarks WHERE'
2016-11-13 03:03:19 -06:00
# Deep query string
q1 = "(tags LIKE ('%' || ? || '%') OR URL LIKE ('%' || ? || '%') OR \
2016-11-13 03:03:19 -06:00
metadata LIKE ('%' || ? || '%') OR desc LIKE ('%' || ? || '%'))"
# Non-deep query string
q2 = '(tags REGEXP ? OR URL REGEXP ? OR metadata REGEXP ? OR desc \
REGEXP ?)'
qargs = []
2016-11-13 03:03:19 -06:00
if regex:
for token in keywords:
qry = '%s %s OR' % (qry, q2)
qargs += (token, token, token, token,)
qry = qry[:-3]
2016-11-13 03:03:19 -06:00
elif all_keywords:
if len(keywords) == 1 and keywords[0] == 'blank':
qry = "SELECT * FROM bookmarks WHERE metadata = '' OR tags = ?"
qargs += (DELIM,)
elif len(keywords) == 1 and keywords[0] == 'immutable':
2016-12-25 08:22:28 -06:00
qry = 'SELECT * FROM bookmarks WHERE flags & 1 == 1'
else:
for token in keywords:
if deep:
qry = '%s %s AND' % (qry, q1)
else:
token = '\\b' + token.rstrip('/') + '\\b'
qry = '%s %s AND' % (qry, q2)
2016-09-05 03:18:21 -05:00
qargs += (token, token, token, token,)
qry = qry[:-4]
2016-11-13 03:03:19 -06:00
elif not all_keywords:
for token in keywords:
if deep:
qry = '%s %s OR' % (qry, q1)
2016-09-05 03:18:21 -05:00
else:
token = '\\b' + token.rstrip('/') + '\\b'
qry = '%s %s OR' % (qry, q2)
2016-09-05 03:18:21 -05:00
qargs += (token, token, token, token,)
qry = qry[:-3]
2016-11-13 03:03:19 -06:00
else:
2016-11-22 12:09:03 -06:00
logerr('Invalid search option')
2016-11-13 03:03:19 -06:00
return None
qry = '%s ORDER BY id ASC' % qry
2016-11-22 12:09:03 -06:00
logdbg('query: "%s", args: %s', qry, qargs)
try:
self.cur.execute(qry, qargs)
except sqlite3.OperationalError as e:
2016-11-22 12:09:03 -06:00
logerr(e)
return None
return self.cur.fetchall()
2016-10-27 15:21:09 -05:00
def search_by_tag(self, tag):
'''Search and list bookmarks with a tag
:param tag: a tag to search as string
:return: search results, or None, if no matches
'''
tag = '%s%s%s' % (DELIM, tag.strip(DELIM), DELIM)
query = "SELECT id, url, metadata, tags, desc FROM bookmarks \
WHERE tags LIKE '%' || ? || '%' ORDER BY id ASC"
2016-11-22 12:09:03 -06:00
logdbg('query: "%s", args: %s', query, tag)
self.cur.execute(query, (tag,))
return self.cur.fetchall()
def compactdb(self, index, delay_commit=False):
'''When an entry at index is deleted, move the
last entry in DB to index, if index is lesser.
:param index: DB index of deleted entry
:param delay_commit: do not commit to DB, caller's responsibility
'''
self.cur.execute('SELECT MAX(id) from bookmarks')
results = self.cur.fetchall()
2016-10-01 10:29:53 -05:00
# Return if the last index left in DB was just deleted
if results[0][0] is None:
return
query1 = 'SELECT id, URL, metadata, tags, \
desc FROM bookmarks WHERE id = ? LIMIT 1'
query2 = 'DELETE FROM bookmarks WHERE id = ?'
query3 = 'INSERT INTO bookmarks(id, URL, metadata, \
tags, desc) VALUES (?, ?, ?, ?, ?)'
for row in results:
if row[0] > index:
self.cur.execute(query1, (row[0],))
results = self.cur.fetchall()
for row in results:
self.cur.execute(query2, (row[0],))
self.cur.execute(query3,
(index, row[1], row[2], row[3], row[4],))
if not delay_commit:
self.conn.commit()
if self.chatty:
print('Index %d moved to %d' % (row[0], index))
2016-12-27 08:10:29 -06:00
def delete_rec(self, index, low=0, high=0, is_range=False,
delay_commit=False):
'''Delete a single record or remove the table if index is None
:param index: DB index of deleted entry
2016-12-26 21:40:08 -06:00
:param low: actual lower index of range
:param high: actual higher index of range
:param is_range: a range is passed using low and high arguments
:param delay_commit: do not commit to DB, caller's responsibility
:return: True on success, False on failure
'''
if is_range: # Delete a range of indices
# If range starts from 0, delete all records
if low == 0:
return self.cleardb()
2016-06-16 16:08:38 -05:00
try:
query = 'DELETE from bookmarks where id BETWEEN ? AND ?'
self.cur.execute(query, (low, high))
2016-12-26 21:40:08 -06:00
print('Index %d-%d: %d deleted'
% (low, high, self.cur.rowcount))
if not self.cur.rowcount:
return False
# Compact DB by ascending order of index to ensure
# the existing higher indices move only once
# Delayed commit is forced
for index in range(low, high + 1):
self.compactdb(index, delay_commit=True)
if not delay_commit:
self.conn.commit()
2016-06-16 16:08:38 -05:00
except IndexError:
2016-11-22 12:09:03 -06:00
logerr('No matching index')
return False
2016-06-16 16:08:38 -05:00
elif index == 0: # Remove the table
return self.cleardb()
2016-05-31 12:39:34 -05:00
else: # Remove a single entry
try:
query = 'DELETE FROM bookmarks WHERE id = ?'
self.cur.execute(query, (index,))
if self.cur.rowcount == 1:
2016-12-26 21:40:08 -06:00
print('Index %d deleted' % index)
2016-11-28 10:40:01 -06:00
self.compactdb(index, delay_commit=True)
if not delay_commit:
self.conn.commit()
else:
logerr('No matching index %d', index)
return False
except IndexError:
logerr('No matching index %d', index)
return False
return True
def delete_resultset(self, results):
'''Delete search results in descending order of DB index.
Indices are expected to be unique and in ascending order.
This API forces a delayed commit.
:param results: set of results to delete
:return: True on success, False on failure
'''
resp = read_in('Delete the search results? (y/n): ')
2016-10-28 14:56:40 -05:00
if resp != 'y':
return False
# delete records in reverse order
pos = len(results) - 1
while pos >= 0:
idx = results[pos][0]
2016-12-27 08:10:29 -06:00
self.delete_rec(idx, delay_commit=True)
# Commit at every 200th removal
if pos % 200 == 0:
self.conn.commit()
pos -= 1
return True
def cleardb(self):
'''Drops the bookmark table if it exists
:return: True on success, False on failure
'''
2016-10-28 14:03:10 -05:00
resp = read_in('Remove ALL bookmarks? (y/n): ')
2016-10-28 14:03:10 -05:00
if resp != 'y':
print('No bookmarks deleted')
return False
2016-08-21 18:09:07 -05:00
self.cur.execute('DROP TABLE if exists bookmarks')
self.conn.commit()
2016-10-28 14:03:10 -05:00
print('All bookmarks deleted')
return True
2016-08-21 18:09:07 -05:00
2016-12-27 08:10:29 -06:00
def print_rec(self, index):
'''Print bookmark details at index or all bookmarks if index is 0
Note: URL is printed on top because title may be blank
:param index: index to print, 0 prints all
'''
if index != 0: # Show record at index
try:
query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1'
self.cur.execute(query, (index,))
results = self.cur.fetchall()
if not results:
logerr('No matching index %d', index)
return
except IndexError:
logerr('No matching index %d', index)
return
2016-06-29 13:06:33 -05:00
if not self.json:
for row in results:
if self.field_filter == 0:
print_record(row)
elif self.field_filter == 1:
2016-06-02 15:46:05 -05:00
print('%s\t%s' % (row[0], row[1]))
elif self.field_filter == 2:
2016-06-02 15:46:05 -05:00
print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1]))
elif self.field_filter == 3:
print('%s\t%s' % (row[0], row[2]))
else:
print(format_json(results, True, self.field_filter))
else: # Show all entries
self.cur.execute('SELECT * FROM bookmarks')
resultset = self.cur.fetchall()
if not self.json:
if self.field_filter == 0:
for row in resultset:
print_record(row)
elif self.field_filter == 1:
for row in resultset:
print('%s\t%s' % (row[0], row[1]))
elif self.field_filter == 2:
for row in resultset:
print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1]))
elif self.field_filter == 3:
for row in resultset:
print('%s\t%s' % (row[0], row[2]))
else:
print(format_json(resultset, field_filter=self.field_filter))
def get_all_tags(self):
'''Get list of tags in DB
:return: tuple (list of unique tags sorted alphabetically,
a dictionary of {tag:usage_count})
'''
2016-10-22 08:21:46 -05:00
tags = []
unique_tags = []
2016-11-20 07:31:02 -06:00
dic = {}
qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags'
for row in self.cur.execute(qry):
tagset = row[0].strip(DELIM).split(DELIM)
for tag in tagset:
2016-10-22 08:21:46 -05:00
if tag not in tags:
2016-11-20 07:31:02 -06:00
dic[tag] = row[1]
2016-10-22 08:21:46 -05:00
tags += (tag,)
2016-11-20 07:31:02 -06:00
else:
dic[tag] += row[1]
if not tags:
return tags, dic
2016-11-19 23:41:09 -06:00
2016-10-22 08:21:46 -05:00
if tags[0] == '':
2016-11-20 07:31:02 -06:00
unique_tags = sorted(tags[1:])
else:
2016-11-20 07:31:02 -06:00
unique_tags = sorted(tags)
2016-11-20 07:31:02 -06:00
return unique_tags, dic
def replace_tag(self, orig, new=None):
2016-11-27 12:40:14 -06:00
'''Replace original tag by new tags in all records.
Remove original tag if new tag is empty.
2016-11-27 12:40:14 -06:00
:param orig: original tag as string
:param new: replacement tags as list
:return: True on success, False on failure
'''
newtags = DELIM
orig = '%s%s%s' % (DELIM, orig, DELIM)
2016-11-27 12:40:14 -06:00
if new is not None:
newtags = parse_tags(new)
if orig == newtags:
2016-05-24 12:51:38 -05:00
print('Tags are same.')
return False
2016-11-27 12:40:14 -06:00
if newtags == DELIM:
return self.delete_tag_at_index(0, orig)
query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?'
self.cur.execute(query, ('%' + orig + '%',))
results = self.cur.fetchall()
if results:
query = 'UPDATE bookmarks SET tags = ? WHERE id = ?'
for row in results:
tags = row[1].replace(orig, newtags)
tags = parse_tags([tags])
self.cur.execute(query, (tags, row[0],))
print('Index %d updated' % row[0])
self.conn.commit()
2016-11-27 12:40:14 -06:00
return True
def browse_by_index(self, index):
'''Open URL at index in browser
:param index: DB index
:return: True on success, False on failure
'''
2016-10-01 10:29:53 -05:00
if index == 0:
2016-10-11 13:49:05 -05:00
query = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1'
self.cur.execute(query)
result = self.cur.fetchone()
2016-10-01 10:29:53 -05:00
# Return if no entries in DB
if result is None:
print('No bookmarks added yet ...')
return False
2016-10-01 10:29:53 -05:00
index = result[0]
2016-11-30 13:44:23 -06:00
logdbg('Opening random index %d', index)
2016-10-01 10:29:53 -05:00
query = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1'
try:
for row in self.cur.execute(query, (index,)):
open_in_browser(row[0])
return True
logerr('No matching index %d', index)
except IndexError:
logerr('No matching index %d', index)
2016-05-29 01:09:51 -05:00
return False
def exportdb(self, filepath, taglist=None):
'''Export bookmarks to a Firefox bookmarks
formatted html or a markdown file, if
destination file name ends with '.md'.
2016-10-29 04:35:44 -05:00
:param filepath: path to file to export to
:param taglist: list of specific tags to export
:return: True on success, False on failure
'''
import time
2016-09-20 13:02:04 -05:00
count = 0
timestamp = int(time.time())
arguments = []
query = 'SELECT * FROM bookmarks'
2016-10-26 11:17:01 -05:00
is_tag_valid = False
2016-09-20 13:02:04 -05:00
if taglist is not None:
tagstr = parse_tags(taglist)
if not tagstr or tagstr == DELIM:
2016-11-22 12:09:03 -06:00
logerr('Invalid tag')
return False
2016-09-20 13:02:04 -05:00
tags = tagstr.split(DELIM)
query = '%s WHERE' % query
for tag in tags:
if tag != '':
is_tag_valid = True
query += " tags LIKE '%' || ? || '%' OR"
tag = '%s%s%s' % (DELIM, tag, DELIM)
arguments += (tag,)
if is_tag_valid:
query = query[:-3]
else:
query = query[:-6]
2016-09-20 13:02:04 -05:00
2016-11-30 13:44:23 -06:00
logdbg('(%s), %s', query, arguments)
2016-09-20 13:02:04 -05:00
self.cur.execute(query, arguments)
resultset = self.cur.fetchall()
if not resultset:
print('No bookmarks exported')
return False
2016-10-29 04:35:44 -05:00
if os.path.exists(filepath):
resp = read_in('%s exists. Overwrite? (y/n): ' % filepath)
if resp != 'y':
return False
try:
2016-10-29 04:35:44 -05:00
outfp = open(filepath, mode='w', encoding='utf-8')
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
return False
if filepath.endswith('.md'):
outfp.write('List of buku bookmarks:\n\n')
for row in resultset:
if row[2] == '':
out = '- [Untitled](%s)\n' % (row[1])
else:
out = '- [%s](%s)\n' % (row[2], row[1])
outfp.write(out)
count += 1
else:
2016-10-29 04:35:44 -05:00
outfp.write('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
<DT><H3 ADD_DATE="%s" LAST_MODIFIED="%s" PERSONAL_TOOLBAR_FOLDER="true">\
Buku bookmarks</H3>
<DL><p>
''' % (timestamp, timestamp))
for row in resultset:
out = '%s<DT><A HREF="%s" ADD_DATE="%s" LAST_MODIFIED="%s"' \
% (' ', row[1], timestamp, timestamp)
if row[3] != DELIM:
out = '%s TAGS="%s"' % (out, row[3][1:-1])
out = '%s>%s</A>\n' % (out, row[2])
if row[4] != '':
out = '%s <DD>%s\n' % (out, row[4])
2016-10-29 04:35:44 -05:00
outfp.write(out)
count += 1
2016-10-29 04:35:44 -05:00
outfp.write(' </DL><p>\n</DL><p>')
2016-10-22 15:56:27 -05:00
2016-10-29 04:35:44 -05:00
outfp.close()
2016-10-22 15:56:27 -05:00
print('%s exported' % count)
return True
def importdb(self, filepath):
'''Import bookmarks from a html or a markdown
file (with extension '.md'). Supports Firefox,
Google Chrome and IE exported html
2016-05-22 16:03:47 -05:00
2016-10-29 04:35:44 -05:00
:param filepath: path to file to import
:return: True on success, False on failure
'''
2016-05-22 16:03:47 -05:00
if filepath.endswith('.md'):
with open(filepath, mode='r', encoding='utf-8') as infp:
for line in infp:
# Supported markdown format: [title](url)
# Find position of title end, url start delimiter combo
index = line.find('](')
if index != -1:
# Find title start delimiter
title_start_delim = line[:index].find('[')
# Reverse find the url end delimiter
url_end_delim = line[index + 2:].rfind(')')
if title_start_delim != -1 and url_end_delim > 0:
# Parse title
title = line[title_start_delim + 1:index]
# Parse url
url = line[index + 2:index + 2 + url_end_delim]
self.add_rec(url, title, None, None, 0, True)
self.conn.commit()
infp.close()
else:
try:
import bs4
2016-10-29 04:35:44 -05:00
with open(filepath, mode='r', encoding='utf-8') as infp:
soup = bs4.BeautifulSoup(infp, 'html.parser')
except ImportError:
2016-11-22 12:09:03 -06:00
logerr('Beautiful Soup not found')
return False
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
return False
html_tags = soup.findAll('a')
for tag in html_tags:
# Extract comment from <dd> tag
desc = None
comment_tag = tag.findNextSibling('dd')
if comment_tag:
desc = comment_tag.text[0:comment_tag.text.find('\n')]
2016-12-27 08:10:29 -06:00
self.add_rec(tag['href'], tag.string, ('%s%s%s' %
(DELIM, tag['tags'], DELIM))
if tag.has_attr('tags') else None,
desc, 0, True)
2016-05-29 01:09:51 -05:00
self.conn.commit()
2016-10-29 04:35:44 -05:00
infp.close()
2016-10-22 01:25:41 -05:00
return True
def mergedb(self, path):
'''Merge bookmarks from another Buku database file
2016-05-29 01:09:51 -05:00
:param path: path to DB file to merge
:return: True on success, False on failure
'''
2016-05-29 01:09:51 -05:00
try:
# Connect to input DB
if sys.version_info >= (3, 4, 4):
# Python 3.4.4 and above
indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True)
else:
indb_conn = sqlite3.connect(path)
2016-10-29 04:35:44 -05:00
indb_cur = indb_conn.cursor()
indb_cur.execute('SELECT * FROM bookmarks')
2016-05-29 01:09:51 -05:00
except Exception as e:
2016-11-22 12:09:03 -06:00
logerr(e)
return False
2016-05-29 01:09:51 -05:00
2016-10-29 04:35:44 -05:00
resultset = indb_cur.fetchall()
if resultset:
for row in resultset:
self.add_rec(row[1], row[2], row[3], row[4], row[5], True)
self.conn.commit()
2016-05-29 01:09:51 -05:00
try:
2016-10-29 04:35:44 -05:00
indb_cur.close()
indb_conn.close()
2016-05-31 12:39:34 -05:00
except Exception:
2016-05-29 01:09:51 -05:00
pass
2016-05-22 16:03:47 -05:00
return True
def tnyfy_url(self, index=0, url=None, shorten=True):
2016-11-12 09:47:36 -06:00
'''Shorted a URL using Google URL shortener
:param index: shorten the URL at DB index (int)
:param url: pass a URL (string)
:param shorten: True (default) to shorten, False to expand (boolean)
2016-11-12 09:47:36 -06:00
:return: shortened url string on success, None on failure
'''
if not index and not url:
2016-11-22 12:09:03 -06:00
logerr('Either a valid DB index or URL required')
2016-11-12 09:47:36 -06:00
return None
if index:
self.cur.execute('SELECT url FROM bookmarks WHERE id = ? LIMIT 1',
2016-11-12 09:47:36 -06:00
(index,))
results = self.cur.fetchall()
if not results:
2016-11-12 09:47:36 -06:00
return None
url = results[0][0]
proxies = {
'https': os.environ.get('https_proxy'),
}
urlbase = 'https://tny.im/yourls-api.php?action='
if shorten:
2017-01-10 17:53:24 -06:00
from urllib.parse import quote_plus as qp
_u = '%s%s%s' % (urlbase, 'shorturl&format=simple&url=', qp(url))
else:
_u = '%s%s%s' % (urlbase, 'expand&format=simple&shorturl=', url)
try:
r = requests.post(_u,
headers={
'content-type': 'application/json',
'User-Agent': USER_AGENT
},
proxies=proxies)
except Exception as e:
logerr(e)
return None
2016-11-12 09:47:36 -06:00
if r.status_code != 200:
2016-11-22 12:09:03 -06:00
logerr('[%s] %s', r.status_code, r.reason)
2016-11-12 09:47:36 -06:00
return None
return r.text
def fixtags(self):
'''Undocumented API to fix tags set
in earlier versions. Functionalities:
1. Remove duplicate tags
2. Sort tags
3. Use lower case to store tags
'''
to_commit = False
self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC')
resultset = self.cur.fetchall()
query = 'UPDATE bookmarks SET tags = ? WHERE id = ?'
for row in resultset:
oldtags = row[1]
if oldtags == ',':
continue
tags = parse_tags([oldtags])
if tags == oldtags:
continue
self.cur.execute(query, (tags, row[0],))
to_commit = True
if to_commit:
self.conn.commit()
def close_quit(self, exitval=0):
'''Close a DB connection and exit
:param exitval: program exit value
'''
if self.conn is not None:
try:
self.cur.close()
self.conn.close()
except Exception:
# ignore errors here, we're closing down
pass
sys.exit(exitval)
2016-09-09 10:07:01 -05:00
2016-12-31 10:50:18 -06:00
class ExtendedArgumentParser(argparse.ArgumentParser):
'''Extend classic argument parser'''
# Print program info
@staticmethod
2017-01-10 10:27:20 -06:00
def print_program_info(file=sys.stdout):
if sys.platform == 'win32' and file == sys.stdout:
file = sys.stderr
2016-12-31 10:50:18 -06:00
2017-01-04 10:17:02 -06:00
file.write('''
SYMBOLS:
> title
+ comment
# tags
2016-12-31 10:50:18 -06:00
Version %s
2017-01-04 10:17:02 -06:00
Copyright © 2015-2017 %s
License: %s
2016-12-31 10:50:18 -06:00
Webpage: https://github.com/jarun/Buku
2017-01-04 10:17:02 -06:00
''' % (__version__, __author__, __license__))
2016-12-31 10:50:18 -06:00
# Print prompt help
@staticmethod
2017-01-10 10:27:20 -06:00
def print_prompt_help(file=sys.stdout):
2016-12-31 10:50:18 -06:00
file.write('''
keys:
1-N browse search result indices and/or ranges
a open all results in browser
s keyword [...] search for records with ANY keyword
S keyword [...] search for records with ALL keywords
d match substrings ('pen' matches 'opened')
r expression run a regex search
t [...] search bookmarks by a tag or show tag list
(tag list index fetches bookmarks by tag)
? show this help
q, ^D, double Enter exit buku
''')
# Help
2017-01-10 10:27:20 -06:00
def print_help(self, file=sys.stdout):
2016-12-31 10:50:18 -06:00
super(ExtendedArgumentParser, self).print_help(file)
self.print_program_info(file)
# ----------------
# Helper functions
# ----------------
2016-05-22 16:03:47 -05:00
def is_bad_url(url):
'''Check if URL is malformed
This API is not bulletproof but works in most cases.
:param url: URL to scan
:return: True or False
'''
# Get the netloc token
netloc = parse_url(url).netloc
if not netloc:
# Try of prepend '//' and get netloc
netloc = parse_url('//' + url).netloc
if not netloc:
return True
2016-11-30 13:44:23 -06:00
logdbg('netloc: %s', netloc)
# netloc cannot start or end with a '.'
if netloc.startswith('.') or netloc.endswith('.'):
return True
# netloc should have at least one '.'
2017-02-04 13:27:30 -06:00
if netloc.rfind('.') < 0:
return True
return False
def is_ignored_mime(url):
'''Check if URL links to ignored mime
Only a 'HEAD' request is made for these URLs
:param url: URL to scan
:return: True or False
'''
for mime in SKIP_MIMES:
if url.lower().endswith(mime):
return True
return False
def get_page_title(resp):
'''Invoke HTML parser and extract title from HTTP response
2016-04-10 07:41:00 -05:00
:param resp: HTTP(S) GET response
:return: title fetched from parsed page
'''
2016-04-05 06:25:40 -05:00
2016-12-27 08:10:29 -06:00
parser = BukuHTMLParser()
2016-11-08 13:43:53 -06:00
2016-04-05 06:25:40 -05:00
try:
parser.feed(resp.data.decode(errors='replace'))
2016-04-05 06:25:40 -05:00
except Exception as e:
# Suppress Exception due to intentional self.reset() in BHTMLParser
if logger.isEnabledFor(logging.DEBUG) \
and str(e) != 'we should not get here!':
logerr('get_page_title(): %s', e)
finally:
return parser.parsed_title
2016-04-05 06:25:40 -05:00
2016-09-09 10:07:01 -05:00
def gen_headers():
'''Generate headers for network connection'''
global myheaders, myproxy
myheaders = {
'Accept-Encoding': 'gzip,deflate',
'User-Agent': USER_AGENT,
'Accept': '*/*',
'Cookie': '',
'DNT': '1'
}
myproxy = os.environ.get('https_proxy')
if myproxy:
try:
url = parse_url(myproxy)
except Exception as e:
logerr(e)
return
# Strip username and password (if present) and update headers
if url.auth:
myproxy = myproxy.replace(url.auth + '@', '')
auth_headers = make_headers(basic_auth=url.auth)
myheaders.update(auth_headers)
logdbg('proxy: [%s]', myproxy)
def get_PoolManager():
'''Creates a pool manager with proxy support, if applicable
:return: ProxyManager if https_proxy is defined, else PoolManager.
'''
if myproxy:
return urllib3.ProxyManager(myproxy, num_pools=1, headers=myheaders)
return urllib3.PoolManager(num_pools=1, headers=myheaders)
def network_handler(url):
'''Handle server connection and redirections
2016-04-10 07:41:00 -05:00
:param url: URL to fetch
2016-11-22 12:09:03 -06:00
:return: (title, recognized mime, bad url) tuple
'''
2016-04-05 06:25:40 -05:00
http_handler = None
page_title = None
resp = None
method = 'GET'
if is_bad_url(url):
return ('', 0, 1)
if is_ignored_mime(url):
method = 'HEAD'
if not myheaders:
gen_headers()
try:
http_handler = get_PoolManager()
2016-05-31 12:39:34 -05:00
while True:
resp = http_handler.request(method, url, timeout=40)
if resp.status == 200:
if method == 'GET':
page_title = get_page_title(resp)
elif resp.status == 403 and url.endswith('/'):
2016-11-07 23:45:24 -06:00
# HTTP response Forbidden
# Handle URLs in the form of https://www.domain.com/
# which fail when trying to fetch resource '/'
# retry without trailing '/'
2016-11-22 12:09:03 -06:00
logdbg('Received status 403: retrying...')
# Remove trailing /
url = url[:-1]
resp.release_conn()
continue
else:
2016-11-22 12:09:03 -06:00
logerr('[%s] %s', resp.status, resp.reason)
if resp:
resp.release_conn()
break
except Exception as e:
logerr('network_handler(): %s', e)
finally:
if http_handler:
http_handler.clear()
if method == 'HEAD':
return ('', 1, 0)
if page_title is None:
return ('', 0, 0)
return (page_title.strip().replace('\n', ''), 0, 0)
2016-09-09 10:07:01 -05:00
def parse_tags(keywords=[]):
'''Format and get tag string from tokens
:param keywords: list of tags
:return: comma-delimited string of tags
:return: just delimiter, if no keywords
:return: None, if keyword is None
'''
2016-05-18 22:24:46 -05:00
2016-05-31 12:39:34 -05:00
if keywords is None:
2016-05-31 16:40:51 -05:00
return None
2016-05-31 12:39:34 -05:00
if not keywords:
return DELIM
tags = DELIM
2016-10-22 08:21:46 -05:00
orig_tags = []
unique_tags = []
2016-05-18 22:24:46 -05:00
# Cleanse and get the tags
tagstr = ' '.join(keywords)
marker = tagstr.find(DELIM)
while marker >= 0:
token = tagstr[0:marker]
2016-05-31 12:39:34 -05:00
tagstr = tagstr[marker + 1:]
marker = tagstr.find(DELIM)
token = token.strip()
if token == '':
continue
tags = '%s%s%s' % (tags, token, DELIM)
tagstr = tagstr.strip()
if tagstr != '':
tags = '%s%s%s' % (tags, tagstr, DELIM)
2016-05-18 22:24:46 -05:00
2016-11-22 12:09:03 -06:00
logdbg('keywords: %s', keywords)
logdbg('parsed tags: [%s]', tags)
2016-05-18 22:24:46 -05:00
if tags == DELIM:
return tags
orig_tags += tags.strip(DELIM).split(DELIM)
2016-10-22 08:21:46 -05:00
for tag in orig_tags:
2016-11-20 00:22:28 -06:00
if tag.lower() not in unique_tags:
# Add unique tags in lower case
2016-11-20 00:22:28 -06:00
unique_tags += (tag.lower(), )
# Sort the tags
2016-11-20 00:22:28 -06:00
sorted_tags = sorted(unique_tags)
# Wrap with delimiter
return '%s%s%s' % (DELIM, DELIM.join(sorted_tags), DELIM)
2016-05-18 22:24:46 -05:00
2016-09-09 10:07:01 -05:00
def taglist_subprompt(obj, msg, noninteractive=False):
'''Additional prompt to show unique tag list
:param obj: a valid instance of BukuDb class
:param msg: sub-prompt message
2016-11-27 09:14:23 -06:00
:param noninteractive: do not seek user input
:return: new command string
'''
2016-11-20 07:31:02 -06:00
unique_tags, dic = obj.get_all_tags()
new_results = True
while True:
if new_results:
if not unique_tags:
2016-11-15 09:44:26 -06:00
count = 0
print('0 tags')
else:
count = 1
for tag in unique_tags:
2016-11-20 07:31:02 -06:00
print('%6d. %s (%d)' % (count, tag, dic[tag]))
2016-11-15 09:44:26 -06:00
count += 1
2016-11-20 12:20:23 -06:00
print()
2016-11-27 09:14:23 -06:00
if noninteractive:
return
try:
nav = read_in(msg)
if not nav:
nav = read_in(msg)
if not nav:
# Quit on double enter
return 'q'
nav = nav.strip()
except EOFError:
return 'q'
if is_int(nav) and int(nav) > 0 and int(nav) < count:
return 't ' + unique_tags[int(nav) - 1]
elif is_int(nav):
print('No matching index %s' % nav)
new_results = False
elif is_int(nav[0]):
print('Invalid input')
new_results = False
elif nav == 't':
new_results = True
continue
elif (nav == 'q' or nav == 'd' or nav == '?' or
nav.startswith('s ') or nav.startswith('S ') or
nav.startswith('r ') or nav.startswith('t ')):
return nav
else:
print('Invalid input')
new_results = False
2016-11-20 12:20:23 -06:00
def prompt(obj, results, noninteractive=False, deep=False, subprompt=False):
'''Show each matching result from a search and prompt
:param obj: a valid instance of BukuDb class
:param results: result set from a DB query
:param noninteractive: do not seek user input
:param deep: use deep search
2016-11-20 12:20:23 -06:00
:param subprompt: jump directly to sub prompt
'''
2016-05-18 10:46:08 -05:00
if not type(obj) is BukuDb:
2016-11-22 12:09:03 -06:00
logerr('Not a BukuDb instance')
return
2016-05-18 10:46:08 -05:00
2016-11-13 12:40:47 -06:00
new_results = True
if colorize:
msg = '\x1b[7mbuku (? for help)\x1b[0m '
else:
msg = 'buku (? for help): '
2016-11-13 12:40:47 -06:00
while True:
2016-11-20 12:20:23 -06:00
if not subprompt:
if new_results:
if results:
count = 0
for row in results:
count += 1
print_record(row, count)
else:
print('0 results')
2016-11-20 12:20:23 -06:00
if noninteractive:
return
2016-11-20 12:20:23 -06:00
try:
nav = read_in(msg)
if not nav:
nav = read_in(msg)
2016-11-20 12:20:23 -06:00
if not nav:
# Quit on double enter
break
nav = nav.strip()
except EOFError:
return
else:
nav = 't'
subprompt = False
# list tags with 't'
if nav == 't':
nav = taglist_subprompt(obj, msg, noninteractive)
2016-11-27 09:14:23 -06:00
if noninteractive:
return
# search ANY match with new keywords
if nav.startswith('s '):
results = obj.searchdb(nav[2:].split(), False, deep)
new_results = True
continue
# search ALL match with new keywords
if nav.startswith('S '):
results = obj.searchdb(nav[2:].split(), True, deep)
new_results = True
continue
# regular expressions search with new keywords
if nav.startswith('r '):
results = obj.searchdb(nav[2:].split(), True, regex=True)
new_results = True
continue
# tag search with new keywords
if nav.startswith('t '):
results = obj.search_by_tag(nav[2:])
new_results = True
continue
# quit with 'q'
if nav == 'q':
return
# toggle deep search with 'd'
if nav == 'd':
deep = not deep
if deep:
print('deep search on')
else:
print('deep search off')
new_results = False
continue
# Show help with '?'
if nav == '?':
ExtendedArgumentParser.print_prompt_help(sys.stdout)
new_results = False
continue
new_results = False
# Nothing to browse if there are no results
if not results:
print('Not in a search context')
continue
# open all results and re-prompt with 'a'
if nav == 'a':
for index in range(0, count):
2017-02-04 13:27:30 -06:00
open_in_browser(results[index][1])
continue
# iterate over white-space separated indices
for nav in nav.split():
if is_int(nav):
index = int(nav) - 1
if index < 0 or index >= count:
print('No matching index %s' % nav)
continue
2017-02-04 13:27:30 -06:00
open_in_browser(results[index][1])
elif '-' in nav and is_int(nav.split('-')[0]) \
and is_int(nav.split('-')[1]):
lower = int(nav.split('-')[0])
upper = int(nav.split('-')[1])
if lower > upper:
lower, upper = upper, lower
for index in range(lower-1, upper):
2017-02-04 13:27:30 -06:00
if 0 <= index < count:
open_in_browser(results[index][1])
else:
print('No matching index %d' % (index + 1))
else:
print('Invalid input')
break
2016-05-18 10:46:08 -05:00
2016-09-09 10:07:01 -05:00
2016-10-11 13:49:05 -05:00
def print_record(row, idx=0):
'''Print a single DB record
Handles both search result and individual record
:param idx: search result index. If 0, print with DB index
'''
2016-05-17 15:11:31 -05:00
# Start with index and URL
2016-10-11 13:49:05 -05:00
if idx != 0:
pr = ID_str % (idx, row[1], row[0])
2016-05-17 15:11:31 -05:00
else:
pr = ID_DB_str % (row[0], row[1])
# Indicate if record is immutable
if row[5] & 1:
pr = MUTE_str % (pr)
else:
pr = '%s\n' % (pr)
2016-06-02 12:26:37 -05:00
# Append title
2016-06-02 12:26:37 -05:00
if row[2] != '':
pr = TITLE_str % (pr, row[2])
2016-06-02 12:26:37 -05:00
# Append description
2016-06-02 12:26:37 -05:00
if row[4] != '':
pr = DESC_str % (pr, row[4])
2016-06-02 12:26:37 -05:00
# Append tags IF not default (delimiter)
if row[3] != DELIM:
pr = TAG_str % (pr, row[3][1:-1])
2016-06-02 12:26:37 -05:00
2016-10-11 13:49:05 -05:00
print(pr)
2016-05-17 15:11:31 -05:00
2016-09-09 10:07:01 -05:00
def format_json(resultset, single_record=False, field_filter=0):
'''Return results in Json format
2016-05-16 09:39:01 -05:00
:param single_record: indicates only one record
:param field_filter: determines fields to show
:return: record(s) in Json format
'''
2016-03-22 18:29:45 -05:00
if single_record:
2016-03-22 18:29:45 -05:00
marks = {}
for row in resultset:
if field_filter == 1:
marks['uri'] = row[1]
elif field_filter == 2:
marks['uri'] = row[1]
2016-05-31 12:39:34 -05:00
marks['tags'] = row[3][1:-1]
elif field_filter == 3:
marks['title'] = row[2]
2016-03-22 18:29:45 -05:00
else:
2016-11-26 20:16:54 -06:00
marks['index'] = row[0]
2016-05-31 12:39:34 -05:00
marks['uri'] = row[1]
2016-03-22 18:29:45 -05:00
marks['title'] = row[2]
marks['description'] = row[4]
2016-05-31 12:39:34 -05:00
marks['tags'] = row[3][1:-1]
else:
marks = []
for row in resultset:
if field_filter == 1:
record = {'uri': row[1]}
elif field_filter == 2:
record = {'uri': row[1], 'tags': row[3][1:-1]}
elif field_filter == 3:
record = {'title': row[2]}
else:
2016-11-26 20:16:54 -06:00
record = {'index': row[0], 'uri': row[1], 'title': row[2],
'description': row[4], 'tags': row[3][1:-1]}
marks.append(record)
2016-03-22 18:29:45 -05:00
return json.dumps(marks, sort_keys=True, indent=4)
2016-09-09 10:07:01 -05:00
def is_int(string):
'''Check if a string is a digit
2016-04-10 07:41:00 -05:00
:param string: input string
:return: True on success, False on exception
'''
2016-04-05 06:25:40 -05:00
try:
int(string)
return True
2016-05-31 12:39:34 -05:00
except Exception:
return False
2016-09-09 10:07:01 -05:00
def open_in_browser(url):
'''Duplicate stdin, stdout (to suppress showing errors
on the terminal) and open URL in default browser
:param url: URL to open
'''
if not parse_url(url).scheme:
# Prefix with 'http://' is no scheme
# Otherwise, opening in browser fails anyway
# We expect http to https redirection
# will happen for https-only websites
2016-11-22 12:09:03 -06:00
logerr('scheme missing in URI, trying http')
url = '%s%s' % ('http://', url)
_stderr = os.dup(2)
os.close(2)
_stdout = os.dup(1)
os.close(1)
fd = os.open(os.devnull, os.O_RDWR)
os.dup2(fd, 2)
os.dup2(fd, 1)
try:
webbrowser.open(url)
except Exception as e:
logerr('open_in_browser(): %s', e)
finally:
os.close(fd)
os.dup2(_stderr, 2)
os.dup2(_stdout, 1)
2016-09-09 10:07:01 -05:00
def check_upstream_release():
'''Check and report the latest upstream release version'''
proxies = {
'https': os.environ.get('https_proxy'),
}
try:
r = requests.get(
'https://api.github.com/repos/jarun/buku/tags?per_page=1',
proxies=proxies
)
except Exception as e:
logerr(e)
return
if r.status_code != 200:
2016-11-22 12:09:03 -06:00
logerr('[%s] %s', r.status_code, r.reason)
else:
latest = r.json()[0]['name']
if latest == 'v' + __version__:
print('This is the latest release')
else:
print('Latest upstream release is %s' % latest)
2017-01-10 21:55:45 -06:00
def regexp(expr, item):
'''Perform a regular expression search'''
return re.search(expr, item, re.IGNORECASE) is not None
2017-02-07 12:03:51 -06:00
def read_in(msg):
disable_sigint_handler()
message = None
try:
message = input(msg)
except KeyboardInterrupt:
print('Interrupted.')
enable_sigint_handler()
return message
def sigint_handler(signum, frame):
'''Custom SIGINT handler'''
global interrupted
interrupted = True
print('\nInterrupted.', file=sys.stderr)
# Do a hard exit from here
os._exit(1)
DEFAULT_HANDLER = signal.signal(signal.SIGINT, sigint_handler)
2017-02-07 12:03:51 -06:00
def disable_sigint_handler():
signal.signal(signal.SIGINT, DEFAULT_HANDLER)
2017-02-07 12:03:51 -06:00
def enable_sigint_handler():
signal.signal(signal.SIGINT, sigint_handler)
2016-09-09 10:07:01 -05:00
# ---------------------
# Editor mode functions
# ---------------------
2017-02-07 12:03:51 -06:00
2017-02-04 08:45:33 -06:00
def get_system_editor():
'''Returns default system editor is $EDITOR is set'''
2017-02-07 12:03:51 -06:00
return os.environ.get('EDITOR', 'none')
2017-02-04 08:45:33 -06:00
def to_temp_file_content(url, title_in, tags_in, desc):
'''Generate temporary file content string
2017-02-04 08:45:33 -06:00
:param url: URL to open
:param title_in: string title to add manually
:param tags_in: string of comma-separated tags to add manually
:param desc: string description
:return: lines as newline separated string
'''
strings = []
# URL
2017-02-04 08:45:33 -06:00
strings.extend(['# Lines beginning with "#" will be stripped.\n\
# Add URL in next line (single line).', ])
if url is not None:
strings.append(url)
# TITLE
2017-02-04 08:45:33 -06:00
strings.extend(['# Add TITLE in next line (single line). \
Leave blank to web fetch, "-" for no title.'])
if title_in is None:
title_in = ''
elif title_in == '':
title_in = '-'
strings.append(title_in)
# TAGS
2017-02-04 08:45:33 -06:00
strings.extend(['# Add comma-separated TAGS in next line (single line).'])
strings.append(tags_in.strip(DELIM) if not None else '')
# DESC
2017-02-04 08:45:33 -06:00
strings.append('# Add COMMENTS in next line(s).')
if desc is not None and desc != '':
strings.append(desc)
else:
strings.append('\n')
return '\n'.join(strings)
2017-02-04 08:45:33 -06:00
def parse_temp_file_content(content):
2017-02-04 08:45:33 -06:00
'''Parse and return temporary file content
:param content: string of content
:return: tuple
url: URL to open
title: string title to add manually
tags: string of comma-separated tags to add manually
comments: string description
'''
content = content.split('\n')
2017-02-04 08:45:33 -06:00
content = [c for c in content if len(c) == 0 or c[0] != '#']
if len(content) == 0 or content[0].strip() == '':
2017-02-04 08:45:33 -06:00
print('Edit aborted')
return None
url = content[0]
title = None
if len(content) > 1:
title = content[1]
if title == '':
title = None
elif title == '-':
title = ''
tags = ','
if len(content) > 2:
2017-02-04 08:45:33 -06:00
tags = parse_tags([content[2]])
comments = []
if len(content) > 3:
2017-02-04 08:45:33 -06:00
comments = [c for c in content[3:]]
# need to remove all empty line that are at the end
# and not those in the middle of the text
2017-02-04 08:45:33 -06:00
for i in range(len(comments) - 1, -1, -1):
if comments[i].strip() != '':
break
2017-02-04 08:45:33 -06:00
if i == -1:
comments = []
else:
comments = comments[0:i+1]
2017-02-04 08:45:33 -06:00
comments = '\n'.join(comments)
return url, title, tags, comments
2017-02-04 08:45:33 -06:00
def edit_rec(editor, url, title_in, tags_in, desc):
'''Edit a bookmark record
:param editor: editor to open
:param url: URL to open
:param title_in: string title to add manually
:param tags_in: string of comma-separated tags to add manually
:param desc: string description
:return: parsed content
'''
import tempfile
import subprocess
temp_file_content = to_temp_file_content(url, title_in, tags_in, desc)
fd, tmpfile = tempfile.mkstemp(prefix='buku-edit-')
os.close(fd)
try:
with open(tmpfile, 'w+', encoding='utf-8') as fp:
fp.write(temp_file_content)
fp.flush()
logdbg('Edited content written to %s', tmpfile)
cmd = editor.split(' ')
cmd.append(tmpfile)
subprocess.call(cmd)
2017-02-04 08:45:33 -06:00
with open(tmpfile, 'r', encoding='utf-8') as f:
content = f.read()
os.remove(tmpfile)
except FileNotFoundError:
if os.path.exists(tmpfile):
os.remove(tmpfile)
logerr('Cannot open editor')
else:
logerr('Cannot open tempfile')
2017-02-04 08:45:33 -06:00
return None
parsed_content = parse_temp_file_content(content)
return parsed_content
2016-04-26 12:23:48 -05:00
# Handle piped input
def piped_input(argv, pipeargs=None):
if not sys.stdin.isatty():
2016-05-31 12:39:34 -05:00
pipeargs.extend(argv)
for s in sys.stdin.readlines():
pipeargs.extend(s.split())
2016-12-31 10:50:18 -06:00
# main starts here
def main():
global colorize, ID_str, ID_DB_str, MUTE_str, TITLE_str, DESC_str, TAG_str
title_in = None
tags_in = None
desc_in = None
2016-06-30 09:19:57 -05:00
pipeargs = []
try:
piped_input(sys.argv, pipeargs)
except KeyboardInterrupt:
pass
# If piped input, set argument vector
if pipeargs:
sys.argv = pipeargs
# Setup custom argument parser
argparser = ExtendedArgumentParser(
description='''Powerful command-line bookmark manager. Your mini web!
POSITIONAL ARGUMENTS:
KEYWORD search keywords''',
formatter_class=argparse.RawTextHelpFormatter,
2016-06-08 11:57:50 -05:00
usage='''buku [OPTIONS] [KEYWORD [KEYWORD ...]]''',
add_help=False
)
HIDE = argparse.SUPPRESS
argparser.add_argument('keywords', nargs='*', metavar='KEYWORD', help=HIDE)
# ---------------------
# GENERAL OPTIONS GROUP
# ---------------------
general_grp = argparser.add_argument_group(
2017-01-04 10:17:02 -06:00
title='GENERAL OPTIONS',
description=''' -a, --add URL [tag, ...]
bookmark URL with comma-separated tags
-u, --update [...] update fields of an existing bookmark
2017-01-04 10:17:02 -06:00
accepts indices and ranges
refresh the title, if no edit options
if no arguments:
- update results when used with search
- otherwise refresh all titles
-w, --write [editor|index]
open editor to edit a fresh bookmark
to update by index, EDITOR must be set
-d, --delete [...] remove bookmarks from DB
accepts indices or a single range
if no arguments:
- delete results when used with search
- otherwise delete all bookmarks
-h, --help show this information and exit
-v, --version show the program version and exit''')
addarg = general_grp.add_argument
addarg('-a', '--add', nargs='+', help=HIDE)
addarg('-u', '--update', nargs='*', help=HIDE)
addarg('-w', '--write', nargs='?', const=get_system_editor(), help=HIDE)
addarg('-d', '--delete', nargs='*', help=HIDE)
addarg('-h', '--help', action='store_true', help=HIDE)
addarg('-v', '--version', action='version', version=__version__, help=HIDE)
# ------------------
# EDIT OPTIONS GROUP
# ------------------
edit_grp = argparser.add_argument_group(
2017-01-04 10:17:02 -06:00
title='EDIT OPTIONS',
description=''' --url keyword bookmark link
--tag [+|-] [...] comma-separated tags
clear bookmark tagset, if no arguments
'+' appends to, '-' removes from tagset
-t, --title [...] bookmark title; if no arguments:
2017-01-04 10:17:02 -06:00
-a: do not set title, -u: clear title
-c, --comment [...] description of the bookmark
clears description, if no arguments
2017-01-04 10:17:02 -06:00
--immutable N disable title fetch from web on update
N=0: mutable (default), N=1: immutable''')
addarg = edit_grp.add_argument
addarg('--url', nargs=1, help=HIDE)
addarg('--tag', nargs='*', help=HIDE)
addarg('-t', '--title', nargs='*', help=HIDE)
addarg('-c', '--comment', nargs='*', help=HIDE)
addarg('--immutable', type=int, default=-1, choices={0, 1}, help=HIDE)
# --------------------
# SEARCH OPTIONS GROUP
# --------------------
search_grp = argparser.add_argument_group(
2017-01-04 10:17:02 -06:00
title='SEARCH OPTIONS',
description=''' -s, --sany find records with ANY search keyword
this is the default search option
-S, --sall find records with ALL search keywords
2017-01-04 10:17:02 -06:00
special keywords -
"blank": entries with empty title/tag
"immutable": entries with locked title
--deep match substrings ('pen' matches 'opens')
--sreg run a regex search
--stag search bookmarks by a tag
list all tags, if no search keywords''')
addarg = search_grp.add_argument
addarg('-s', '--sany', action='store_true', help=HIDE)
addarg('-S', '--sall', action='store_true', help=HIDE)
addarg('--sreg', action='store_true', help=HIDE)
addarg('--deep', action='store_true', help=HIDE)
addarg('--stag', action='store_true', help=HIDE)
# ------------------------
# ENCRYPTION OPTIONS GROUP
# ------------------------
crypto_grp = argparser.add_argument_group(
2017-01-04 10:17:02 -06:00
title='ENCRYPTION OPTIONS',
description=''' -l, --lock [N] encrypt DB file with N (> 0, default 8)
hash iterations to generate key
-k, --unlock [N] decrypt DB file with N (> 0, default 8)
hash iterations to generate key''')
addarg = crypto_grp.add_argument
addarg('-k', '--unlock', nargs='?', type=int, const=8, help=HIDE)
addarg('-l', '--lock', nargs='?', type=int, const=8, help=HIDE)
# ----------------
# POWER TOYS GROUP
# ----------------
power_grp = argparser.add_argument_group(
2017-01-04 10:17:02 -06:00
title='POWER TOYS',
description=''' -e, --export file export bookmarks in Firefox format html
export markdown, if file ends with '.md'
format: [title](url), 1 entry per line
2017-01-04 10:17:02 -06:00
use --tag to export only specific tags
-i, --import file import Firefox or Chrome bookmarks html
import markdown, if file ends with '.md'
2017-01-04 10:17:02 -06:00
-m, --merge file add bookmarks from another buku DB file
-p, --print [...] show record details by indices, ranges
print all bookmarks, if no arguments
2017-01-04 10:17:02 -06:00
-f, --format N limit fields in -p or Json search output
N=1: URL, N=2: URL and tag, N=3: title
2017-01-04 10:17:02 -06:00
-r, --replace oldtag [newtag ...]
replace oldtag with newtag everywhere
delete oldtag, if newtag not specified
2017-01-04 10:17:02 -06:00
-j, --json Json formatted output for -p and search
2017-02-10 07:55:05 -06:00
--nc disable color output
2017-02-10 07:59:02 -06:00
--np do not show the prompt, run and exit
-o, --open [...] browse bookmarks by indices and ranges
open a random bookmark, if no arguments
--oa browse all search results immediately
--shorten index|URL fetch shortened url from tny.im service
--expand index|URL expand a tny.im shortened url
2017-01-04 10:17:02 -06:00
--tacit reduce verbosity
--threads N max network connections in full refresh
default N=4, min N=1, max N=10
2017-02-10 09:04:24 -06:00
-V check latest upstream version available
2017-01-04 10:17:02 -06:00
-z, --debug show debug information and verbose logs''')
addarg = power_grp.add_argument
addarg('-e', '--export', nargs=1, help=HIDE)
addarg('-i', '--import', nargs=1, dest='importfile', help=HIDE)
addarg('-m', '--merge', nargs=1, help=HIDE)
addarg('-p', '--print', nargs='*', help=HIDE)
addarg('-f', '--format', type=int, default=0, choices={1, 2, 3}, help=HIDE)
addarg('-r', '--replace', nargs='+', help=HIDE)
addarg('-j', '--json', action='store_true', help=HIDE)
2017-02-10 07:55:05 -06:00
addarg('--nc', action='store_true', help=HIDE)
2017-02-10 07:59:02 -06:00
addarg('--np', action='store_true', help=HIDE)
2016-12-30 12:59:57 -06:00
addarg('-o', '--open', nargs='*', help=HIDE)
addarg('--oa', action='store_true', help=HIDE)
2016-11-12 09:47:36 -06:00
addarg('--shorten', nargs=1, help=HIDE)
addarg('--expand', nargs=1, help=HIDE)
addarg('--tacit', action='store_true', help=HIDE)
addarg('--threads', type=int, default=4, choices=range(1, 11), help=HIDE)
2017-02-10 09:04:24 -06:00
addarg('-V', dest='upstream', action='store_true', help=HIDE)
addarg('-z', '--debug', action='store_true', help=HIDE)
# Undocumented API
addarg('--fixtags', action='store_true', help=HIDE)
# Show help and exit if no arguments
if len(sys.argv) == 1:
argparser.print_help(sys.stdout)
sys.exit(1)
# Parse the arguments
args = argparser.parse_args()
# Show help and exit if help requested
2016-05-31 12:39:34 -05:00
if args.help:
argparser.print_help(sys.stdout)
sys.exit(0)
# Set up debugging
if args.debug:
logger.setLevel(logging.DEBUG)
logdbg('Version %s', __version__)
else:
logging.disable(logging.WARNING)
urllib3.disable_warnings()
# Handle color output preference
2017-02-10 07:55:05 -06:00
if args.nc:
colorize = False
ID_str = '%d. %s [%s]\n'
ID_DB_str = '%d. %s'
MUTE_str = '%s (L)\n'
TITLE_str = '%s > %s\n'
DESC_str = '%s + %s\n'
TAG_str = '%s # %s\n'
# Handle encrypt/decrypt options at top priority
if args.lock is not None:
BukuCrypt.encrypt_file(args.lock)
if args.unlock is not None:
BukuCrypt.decrypt_file(args.unlock)
# Set up title
if args.title is not None:
if args.title:
title_in = ' '.join(args.title)
else:
title_in = ''
2017-02-04 08:45:33 -06:00
# Set up tags
if args.tag is not None:
if args.tag:
tags_in = args.tag
else:
tags_in = [DELIM, ]
# Set up comment
if args.comment is not None:
if args.comment:
desc_in = ' '.join(args.comment)
else:
desc_in = ''
2016-11-06 09:30:45 -06:00
# Initialize the database and get handles, set verbose by default
bdb = BukuDb(args.json, args.format, not args.tacit,
2017-02-10 07:55:05 -06:00
colorize=not args.nc)
2017-02-07 12:03:51 -06:00
# Editor mode
if args.write is not None:
if args.write == 'none':
logerr('EDITOR is not set')
bdb.close_quit(1)
elif args.write == '0':
logerr('Cannot edit index 0')
bdb.close_quit(1)
if is_int(args.write):
editor = get_system_editor()
if editor == 'none':
logerr('EDITOR must be set to use index with -w')
bdb.close_quit()
idx = int(args.write)
rec = bdb.get_rec_by_id(idx)
if not rec:
logerr('No matching index %d', idx)
bdb.close_quit(1)
result = edit_rec(editor, rec[1], rec[2], rec[3], rec[4])
if result is not None:
url, title, tags, desc = result
bdb.update_rec(idx, url, title, tags, desc)
elif args.add is None:
# Edit and add a new bookmark
# Parse tags into a comma-separated string
if tags_in:
if tags_in[0] == '+':
tags = '+%s' % parse_tags(tags_in[1:])
elif tags_in[0] == '-':
tags = '-%s' % parse_tags(tags_in[1:])
else:
tags = parse_tags(tags_in)
2017-02-04 08:45:33 -06:00
else:
2017-02-07 12:03:51 -06:00
tags = DELIM
2017-02-04 08:45:33 -06:00
2017-02-07 12:03:51 -06:00
result = edit_rec(args.write, '', title_in, tags, desc_in)
if result is not None:
url, title_in, tags, desc_in = result
bdb.add_rec(url, title_in, tags, desc_in, args.immutable)
2017-01-01 08:25:10 -06:00
# Add record
if args.add is not None:
if args.url is not None and args.update is None:
logerr('Bookmark a single URL at a time')
bdb.close_quit(1)
# Parse tags into a comma-separated string
tags = DELIM
keywords = args.add
2016-10-29 04:35:44 -05:00
if tags_in is not None:
if tags_in[0] == '+':
if len(tags_in) > 1:
# The case: buku -a url tag1, tag2 --tag + tag3, tag4
tags_in = tags_in[1:]
# In case of add, args.add may have URL followed by tags
# Add delimiter as url+tags may not end with one
keywords = args.add + [DELIM] + tags_in
2016-06-12 05:30:54 -05:00
else:
keywords = args.add + [DELIM] + tags_in
if len(keywords) > 1:
tags = parse_tags(keywords[1:])
url = args.add[0]
2017-02-04 08:45:33 -06:00
2017-02-07 12:03:51 -06:00
if args.write and not is_int(arg.write):
2017-02-04 08:45:33 -06:00
result = edit_rec(args.write, url, title_in, tags, desc_in)
if result is not None:
url, title_in, tags, desc_in = result
bdb.add_rec(url, title_in, tags, desc_in, args.immutable)
2017-01-08 22:22:22 -06:00
# Search record
search_results = None
search_opted = True
update_search_results = False
if args.sany:
2017-01-08 22:22:22 -06:00
# Search URLs, titles, tags for any keyword
search_results = bdb.searchdb(args.keywords, False, args.deep)
elif args.sall:
2017-01-08 22:22:22 -06:00
# Search URLs, titles, tags with all keywords
search_results = bdb.searchdb(args.keywords, True, args.deep)
elif args.sreg:
2017-01-08 22:22:22 -06:00
# Run a regular expression search
search_results = bdb.searchdb(args.keywords, regex=True)
elif args.stag:
2017-01-08 22:22:22 -06:00
# Search bookmarks by tag
if args.keywords:
search_results = bdb.search_by_tag(' '.join(args.keywords))
2017-01-08 22:22:22 -06:00
else:
# Use sub prompt to list all tags
2017-02-10 07:59:02 -06:00
prompt(bdb, None, args.np, subprompt=True)
elif args.keywords:
search_results = bdb.searchdb(args.keywords, False, args.deep)
2017-01-08 22:22:22 -06:00
else:
search_opted = False
if search_results:
2017-02-10 07:59:02 -06:00
oneshot = args.np
2017-01-08 22:22:22 -06:00
to_delete = False
# Open all results in browser right away if args.oa
# is specified. The has priority over delete/update.
# URLs are opened first and updated/deleted later.
if args.oa:
for row in search_results:
open_in_browser(row[1])
2017-01-08 22:22:22 -06:00
# In case of search and delete/update,
# prompt should be non-interactive
# delete gets priority over update
if args.delete is not None and not args.delete:
2017-01-08 22:22:22 -06:00
oneshot = True
to_delete = True
elif args.update is not None and not args.update:
2017-01-08 22:22:22 -06:00
oneshot = True
update_search_results = True
if not args.json:
prompt(bdb, search_results, oneshot, args.deep)
else:
# Printing in Json format is non-interactive
print(format_json(search_results, field_filter=args.format))
# Delete search results if opted
if to_delete:
bdb.delete_resultset(search_results)
# Update record
if args.update is not None:
if args.url is not None:
2016-10-29 04:35:44 -05:00
url_in = args.url[0]
else:
2016-10-29 04:35:44 -05:00
url_in = ''
# Parse tags into a comma-separated string
if tags_in:
2016-12-03 09:32:03 -06:00
if tags_in[0] == '+':
tags = '+%s' % parse_tags(tags_in[1:])
elif tags_in[0] == '-':
tags = '-%s' % parse_tags(tags_in[1:])
else:
tags = parse_tags(tags_in)
else:
tags = None
2017-02-04 08:45:33 -06:00
# No arguments to --update, update all
if not args.update:
2017-01-08 22:22:22 -06:00
# Update all records only if search was not opted
if not search_opted:
bdb.update_rec(0, url_in, title_in, tags, desc_in,
args.immutable, args.threads)
2017-01-09 11:11:36 -06:00
elif update_search_results and search_results is not None:
if not args.tacit:
2017-02-04 08:45:33 -06:00
print('Updated results:\n')
2017-01-08 22:22:22 -06:00
pos = len(search_results) - 1
while pos >= 0:
idx = search_results[pos][0]
bdb.update_rec(idx, url_in, title_in, tags, desc_in,
args.immutable, args.threads)
# Commit at every 200th removal
if pos % 200 == 0:
bdb.conn.commit()
pos -= 1
else:
2017-02-07 12:03:51 -06:00
for idx in args.update:
if is_int(idx):
bdb.update_rec(int(idx), url_in, title_in, tags,
desc_in, args.immutable, args.threads)
elif '-' in idx and is_int(idx.split('-')[0]) \
and is_int(idx.split('-')[1]):
lower = int(idx.split('-')[0])
upper = int(idx.split('-')[1])
if lower > upper:
lower, upper = upper, lower
# Update only once if range starts from 0 (all)
if lower == 0:
bdb.update_rec(0, url_in, title_in, tags, desc_in,
args.immutable, args.threads)
else:
for _id in range(lower, upper + 1):
bdb.update_rec(_id, url_in, title_in, tags,
desc_in, args.immutable,
args.threads)
if interrupted:
break
if interrupted:
break
2017-01-01 08:25:10 -06:00
# Delete record
if args.delete is not None:
if not args.delete:
# Attempt delete-all only if search was not opted
if not search_opted:
bdb.cleardb()
2016-06-16 16:08:38 -05:00
elif len(args.delete) == 1 and '-' in args.delete[0]:
vals = str(args.delete[0]).split('-')
if len(vals) == 2 and is_int(vals[0]) and is_int(vals[1]):
if int(vals[0]) == int(vals[1]):
2016-12-27 08:10:29 -06:00
bdb.delete_rec(int(vals[0]))
2016-06-16 16:08:38 -05:00
elif int(vals[0]) < int(vals[1]):
2016-12-27 08:10:29 -06:00
bdb.delete_rec(0, int(vals[0]), int(vals[1]), True)
2016-06-16 16:08:38 -05:00
else:
2016-12-27 08:10:29 -06:00
bdb.delete_rec(0, int(vals[1]), int(vals[0]), True)
2016-06-16 16:08:38 -05:00
else:
2016-12-12 10:35:34 -06:00
logerr('Invalid index or range')
2016-06-16 16:08:38 -05:00
bdb.close_quit(1)
else:
ids = []
# Select the unique indices
2016-06-16 16:08:38 -05:00
for idx in args.delete:
if idx not in ids:
ids += (idx,)
try:
# Index delete order - highest to lowest
ids.sort(key=lambda x: int(x), reverse=True)
for idx in ids:
2016-12-27 08:10:29 -06:00
bdb.delete_rec(int(idx))
except ValueError:
2016-12-12 10:35:34 -06:00
logerr('Invalid index or range')
2017-01-01 08:25:10 -06:00
# Print record
if args.print is not None:
if not args.print:
2016-12-27 08:10:29 -06:00
bdb.print_rec(0)
else:
for idx in args.print:
if is_int(idx):
2016-12-27 08:10:29 -06:00
bdb.print_rec(int(idx))
elif '-' in idx and is_int(idx.split('-')[0]) \
and is_int(idx.split('-')[1]):
lower = int(idx.split('-')[0])
upper = int(idx.split('-')[1])
if lower > upper:
lower, upper = upper, lower
for _id in range(lower, upper + 1):
2016-12-27 08:10:29 -06:00
bdb.print_rec(_id)
else:
2016-12-30 12:59:57 -06:00
logerr('Invalid index or range to print')
bdb.close_quit(1)
# Replace a tag in DB
if args.replace is not None:
if len(args.replace) == 1:
2016-11-27 12:40:14 -06:00
bdb.delete_tag_at_index(0, args.replace[0])
else:
bdb.replace_tag(args.replace[0], args.replace[1:])
# Export bookmarks
if args.export is not None:
2016-09-20 13:02:04 -05:00
if args.tag is None:
bdb.exportdb(args.export[0])
elif not args.tag:
2016-11-22 12:09:03 -06:00
logerr('Missing tag')
2016-09-20 13:02:04 -05:00
else:
bdb.exportdb(args.export[0], args.tag)
# Import bookmarks
if args.importfile is not None:
bdb.importdb(args.importfile[0])
# Merge a database file and exit
if args.merge is not None:
bdb.mergedb(args.merge[0])
# Open URL in browser
if args.open is not None:
if not args.open:
2016-12-30 12:59:57 -06:00
bdb.browse_by_index(0)
else:
for idx in args.open:
if is_int(idx):
bdb.browse_by_index(int(idx))
elif '-' in idx and is_int(idx.split('-')[0]) \
and is_int(idx.split('-')[1]):
lower = int(idx.split('-')[0])
upper = int(idx.split('-')[1])
if lower > upper:
lower, upper = upper, lower
for _id in range(lower, upper + 1):
bdb.browse_by_index(_id)
else:
logerr('Invalid index or range to open')
bdb.close_quit(1)
2016-11-26 08:28:22 -06:00
# Shorten URL
if args.shorten:
2016-11-12 09:47:36 -06:00
if is_int(args.shorten[0]):
shorturl = bdb.tnyfy_url(index=int(args.shorten[0]))
2016-11-12 09:47:36 -06:00
else:
shorturl = bdb.tnyfy_url(url=args.shorten[0])
2016-11-12 09:47:36 -06:00
if shorturl:
print(shorturl)
# Expand URL
if args.expand:
if is_int(args.expand[0]):
url = bdb.tnyfy_url(index=int(args.expand[0]), shorten=False)
else:
url = bdb.tnyfy_url(url=args.expand[0], shorten=False)
if url:
print(url)
# Report upstream version
if args.upstream:
check_upstream_release()
# Fix tags
if args.fixtags:
bdb.fixtags()
# Close DB connection and quit
bdb.close_quit(0)
if __name__ == '__main__':
main()