#!/usr/bin/env python3 # # Bookmark management utility # # Copyright © 2015-2018 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import collections import html.parser as HTMLParser import json import logging import os import re try: import readline readline except ImportError: pass import shutil import signal import sqlite3 import subprocess from subprocess import Popen, PIPE, DEVNULL import sys import threading import time import urllib3 from urllib3.exceptions import LocationParseError from urllib3.util import parse_url, make_headers import webbrowser __version__ = '3.8' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Global variables interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} promptmsg = 'buku (? for help): ' # Prompt message string # Default format specifiers to print records ID_str = '%d. %s [%s]\n' ID_DB_str = '%d. %s' MUTE_str = '%s (L)\n' URL_str = ' > %s\n' DESC_str = ' + %s\n' TAG_str = ' # %s\n' # colormap for color output from "googler" project COLORMAP = {k: '\x1b[%sm' % v for k, v in { 'a': '30', 'b': '31', 'c': '32', 'd': '33', 'e': '34', 'f': '35', 'g': '36', 'h': '37', 'i': '90', 'j': '91', 'k': '92', 'l': '93', 'm': '94', 'n': '95', 'o': '96', 'p': '97', 'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1', 'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1', 'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1', 'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1', 'x': '0', 'X': '1', 'y': '7', 'Y': '7;1', 'z': '2', }.items()} USER_AGENT = 'Buku/{} (textmode; Linux x86_64; 1024x768)'.format(__version__) myheaders = None # Default dictionary of headers myproxy = None # Default proxy text_browsers = ['elinks', 'links', 'links2', 'lynx', 'w3m', 'www-browser'] # Set up logging logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): """Class to parse and fetch the title from a HTML page, if available. .. note:: The methods in this class are custom implementations of the HTMLParser object. See docs https://docs.python.org/3/library/html.parser.html. Attributes ---------- in_title_tag : bool True if HTML tag is a tag. Initial value is False. data : str Initial value is empty string. prev_tag : None or str Initial value is None. parsed_title : None or str The parsed title from a title tag. Initial value is None. """ def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data += data def error(self, message): pass class BukuCrypt: """Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. """ # Crypto constants BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks @staticmethod def get_filehash(filepath): """Get the SHA256 hash of a file. Parameters ---------- filepath : str Path to the file. Returns ------- hash : bytes Hash digest of file. """ from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BukuCrypt.BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BukuCrypt.BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): """Encrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = dbfile + '.enc' db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() passconfirm = getpass() if not password or not passconfirm: logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(BukuCrypt.SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack('<Q', filesize)) outfp.write(salt) outfp.write(iv) # Embed DB file hash in encrypted file outfp.write(dbhash) while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break elif len(chunk) % 16 != 0: chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16)) outfp.write(encryptor.update(chunk) + encryptor.finalize()) os.remove(dbfile) print('File encrypted') sys.exit(0) except Exception as e: logerr(e) sys.exit(1) @staticmethod def decrypt_file(iterations, dbfile=None): """Decrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). The '.enc' suffix must be omitted. """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Decryption failed') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) encfile = dbfile + '.enc' enc_exists = os.path.exists(encfile) db_exists = os.path.exists(dbfile) if enc_exists and not db_exists: pass elif not enc_exists: logerr('%s missing', encfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() if not password: logerr('Decryption failed') sys.exit(1) try: with open(encfile, 'rb') as infp: size = struct.unpack('<Q', infp.read(struct.calcsize('Q')))[0] # Read 256-bit salt and generate key salt = infp.read(32) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = infp.read(16) decryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend(), ).decryptor() # Get original DB file's SHA256 hash from encrypted file enchash = infp.read(32) with open(dbfile, 'wb') as outfp: while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break outfp.write(decryptor.update(chunk) + decryptor.finalize()) outfp.truncate(size) # Match hash of generated file with that of original DB file dbhash = BukuCrypt.get_filehash(dbfile) if dbhash != enchash: os.remove(dbfile) logerr('Decryption failed') sys.exit(1) else: os.remove(encfile) print('File decrypted') except struct.error: logerr('Tainted file') sys.exit(1) except Exception as e: logerr(e) sys.exit(1) class BukuDb: """Abstracts all database operations. Attributes ---------- conn : sqlite database connection. cur : sqlite database cursor. json : bool True if results should be printed in json format else False. field_filter : int Indicates format for displaying bookmarks. Default is 0. chatty : bool Sets the verbosity of the APIs. Default is False. """ def __init__(self, json=False, field_filter=0, chatty=False, dbfile=None, colorize=True): """Database initialization API. Parameters ---------- json : bool, optional True if results should be printed in json format else False. field_filter : int, optional Indicates format for displaying bookmarks. Default is 0. chatty : bool, optional Sets the verbosity of the APIs. Default is False. colorize : bool, optional Indicates whether color should be used in output. Default is True. """ self.json = json self.field_filter = field_filter self.chatty = chatty self.colorize = colorize self.conn, self.cur = BukuDb.initdb(dbfile, self.chatty) @staticmethod def get_default_dbdir(): """Determine the directory path where dbfile will be stored. If the platform is Windows, use %APPDATA% else if $XDG_DATA_HOME is defined, use it else if $HOME exists, use it else use the current directory. Returns ------- str Path to database file. """ data_home = os.environ.get('XDG_DATA_HOME') if data_home is None: if os.environ.get('HOME') is None: if sys.platform == 'win32': data_home = os.environ.get('APPDATA') if data_home is None: return os.path.abspath('.') else: return os.path.abspath('.') else: data_home = os.path.join(os.environ.get('HOME'), '.local', 'share') return os.path.join(data_home, 'buku') @staticmethod def initdb(dbfile=None, chatty=False): """Initialize the database connection. Create DB file and/or bookmarks table if they don't exist. Alert on encryption options on first execution. Parameters ---------- dbfile : str, optional Custom database file path (including filename). chatty : bool If True, shows informative message on DB creation. Returns ------- tuple (connection, cursor). """ if not dbfile: dbpath = BukuDb.get_default_dbdir() filename = 'bookmarks.db' dbfile = os.path.join(dbpath, filename) else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) try: if not os.path.exists(dbpath): os.makedirs(dbpath) except Exception as e: logerr(e) os._exit(1) db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(dbfile + '.enc') if db_exists and not enc_exists: pass elif enc_exists and not db_exists: logerr('Unlock database first') sys.exit(1) elif db_exists and enc_exists: logerr('Both encrypted and flat DB files exist!') sys.exit(1) elif chatty: # not db_exists and not enc_exists print('DB file is being created at %s.\nYou should encrypt it.' % dbfile) try: # Create a connection conn = sqlite3.connect(dbfile, check_same_thread=False) conn.create_function('REGEXP', 2, regexp) cur = conn.cursor() # Create table if it doesn't exist # flags: designed to be extended in future using bitwise masks # Masks: # 0b00000001: set title immutable cur.execute('CREATE TABLE if not exists bookmarks (' 'id integer PRIMARY KEY, ' 'URL text NOT NULL UNIQUE, ' 'metadata text default \'\', ' 'tags text default \',\', ' 'desc text default \'\', ' 'flags integer default 0)') conn.commit() except Exception as e: logerr('initdb(): %s', e) sys.exit(1) return (conn, cur) def get_rec_all(self): """Get all the bookmarks in the database. Returns ------- list A list of tuples representing bookmark records. """ self.cur.execute('SELECT * FROM bookmarks') return self.cur.fetchall() def get_rec_by_id(self, index): """Get a bookmark from database by its ID. Parameters ---------- index : int DB index of bookmark record. Returns ------- tuple or None Bookmark data, or None if index is not found. """ self.cur.execute('SELECT * FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() return resultset[0] if resultset else None def get_rec_id(self, url): """Check if URL already exists in DB. Parameters ---------- url : str A URL to search for in the DB. Returns ------- int DB index, or -1 if URL not found in DB. """ self.cur.execute('SELECT id FROM bookmarks WHERE URL = ? LIMIT 1', (url,)) resultset = self.cur.fetchall() return resultset[0][0] if resultset else -1 def get_max_id(self): """Fetch the ID of the last record. Returns ------- int ID of the record if any record exists, else -1. """ self.cur.execute('SELECT MAX(id) from bookmarks') resultset = self.cur.fetchall() return -1 if resultset[0][0] is None else resultset[0][0] def add_rec(self, url, title_in=None, tags_in=None, desc=None, immutable=0, delay_commit=False): """Add a new bookmark. Parameters ---------- url : str URL to bookmark. title_in :str, optional Title to add manually. Default is None. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Default is None. desc : str, optional Description of the bookmark. Default is None. immutable : int, optional Indicates whether to disable title fetch from web. Default is 0. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- int DB index of new bookmark on success, -1 on failure. """ # Return error for empty URL if not url or url == '': logerr('Invalid URL') return -1 # Ensure that the URL does not exist in DB already id = self.get_rec_id(url) if id != -1: logerr('URL [%s] already exists at index %d', url, id) return -1 # Process title if title_in is not None: meta = title_in else: meta, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif meta == '': print('No title\n') else: logdbg('Title: [%s]', meta) # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM # Process description if desc is None: desc = '' try: flagset = 0 if immutable == 1: flagset |= immutable qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' self.cur.execute(qry, (url, meta, tags_in, desc, flagset)) if not delay_commit: self.conn.commit() if self.chatty: self.print_rec(self.cur.lastrowid) return self.cur.lastrowid except Exception as e: logerr('add_rec(): %s', e) return -1 def append_tag_at_index(self, index, tags_in, delay_commit=False): """Append tags to bookmark tagset at index. Parameters ---------- index : int DB index of the record. 0 indicates all records. tags_in : str Comma-separated tags to add manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if index == 0: resp = read_in('Append the tags to ALL bookmarks? (y/n): ') if resp != 'y': return False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] + tags_in[1:] tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) else: return False if not delay_commit: self.conn.commit() return True def delete_tag_at_index(self, index, tags_in, delay_commit=False): """Delete tags from bookmark tagset at index. Parameters ---------- index : int DB index of bookmark record. 0 indicates all records. tags_in : str Comma-separated tags to delete manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ tags_to_delete = tags_in.strip(DELIM).split(DELIM) if index == 0: resp = read_in('Delete the tag(s) from ALL bookmarks? (y/n): ') if resp != 'y': return False count = 0 match = "'%' || ? || '%'" for tag in tags_to_delete: tag = delim_wrap(tag) q = ("UPDATE bookmarks SET tags = replace(tags, '%s', '%s') WHERE tags LIKE %s" % (tag, DELIM, match)) self.cur.execute(q, (tag,)) count += self.cur.rowcount if count and not delay_commit: self.conn.commit() if self.chatty: print('%d record(s) updated' % count) return True # Process a single index # Use SELECT and UPDATE to handle multiple tags at once query = 'SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] for tag in tags_to_delete: tags = tags.replace(delim_wrap(tag), DELIM) self.cur.execute(query, (parse_tags([tags]), row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) if not delay_commit: self.conn.commit() else: return False return True def update_rec(self, index, url=None, title_in=None, tags_in=None, desc=None, immutable=-1, threads=4): """Update an existing record at index. Update all records if index is 0 and url is not specified. URL is an exception because URLs are unique in DB. Parameters ---------- index : int DB index of record. 0 indicates all records. url : str, optional Bookmark address. title_in : str, optional Title to add manually. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Prefix with '+,' to append to current tags. Prefix with '-,' to delete from current tags. desc : str, optional Description of bookmark. immutable : int, optional Diable title fetch from web if 1. Default is -1. threads : int, optional Number of threads to use to refresh full DB. Default is 4. Returns ------- bool True on success, False on Failure. """ arguments = [] query = 'UPDATE bookmarks SET' to_update = False tag_modified = False ret = False # Update URL if passed as argument if url is not None and url != '': if index == 0: logerr('All URLs cannot be same') return False query += ' URL = ?,' arguments += (url,) to_update = True # Update tags if passed as argument if tags_in is not None: if tags_in == '+,' or tags_in == '-,': logerr('Please specify a tag') return False if tags_in.startswith('+,'): chatty = self.chatty self.chatty = False ret = self.append_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True elif tags_in.startswith('-,'): chatty = self.chatty self.chatty = False ret = self.delete_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True else: # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM query += ' tags = ?,' arguments += (tags_in,) to_update = True # Update description if passed as an argument if desc is not None: query += ' desc = ?,' arguments += (desc,) to_update = True # Update immutable flag if passed as argument if immutable != -1: flagset = 1 if immutable == 1: query += ' flags = flags | ?,' elif immutable == 0: query += ' flags = flags & ?,' flagset = ~flagset arguments += (flagset,) to_update = True # Update title # # 1. if --title has no arguments, delete existing title # 2. if --title has arguments, update existing title # 3. if --title option is omitted at cmdline: # if URL is passed, update the title from web using the URL # 4. if no other argument (url, tag, comment, immutable) passed, # update title from web using DB URL (if title is mutable) title_to_insert = None if title_in is not None: title_to_insert = title_in elif url is not None and url != '': title_to_insert, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif title_to_insert == '': print('No title\n') else: logdbg('Title: [%s]', title_to_insert) elif not to_update and not tag_modified: ret = self.refreshdb(index, threads) if ret and index and self.chatty: self.print_rec(index) return ret if title_to_insert is not None: query += ' metadata = ?,' arguments += (title_to_insert,) to_update = True if not to_update: # Nothing to update # Show bookmark if tags were appended to deleted if tag_modified and self.chatty: self.print_rec(index) return ret if index == 0: # Update all records resp = read_in('Update ALL bookmarks? (y/n): ') if resp != 'y': return False query = query[:-1] else: query = query[:-1] + ' WHERE id = ?' arguments += (index,) logdbg('query: "%s", args: %s', query, arguments) try: self.cur.execute(query, arguments) self.conn.commit() if self.cur.rowcount and self.chatty: self.print_rec(index) if self.cur.rowcount == 0: logerr('No matching index %d', index) return False except sqlite3.IntegrityError: logerr('URL already exists') return False return True def refreshdb(self, index, threads): """Refresh ALL records in the database. Fetch title for eachbookmark from the web and update the records. Doesn't update the record if title is empty. Notes ----- This API doesn't change DB index, URL or tags of a bookmark. This API is verbose. Parameters ---------- index : int DB index of record to update. 0 indicates all records. threads: int Number of threads to use to refresh full DB. Default is 4. """ if index == 0: self.cur.execute('SELECT id, url, flags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, url, flags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() recs = len(resultset) if not recs: logerr('No matching index or title immutable or empty DB') return False # Set up strings to be printed if self.colorize: bad_url_str = '\x1b[1mIndex %d: Malformed URL\x1b[0m\n' mime_str = '\x1b[1mIndex %d: HTTP HEAD requested\x1b[0m\n' blank_URL_str = '\x1b[1mIndex %d: No title\x1b[0m\n' success_str = 'Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' else: bad_url_str = 'Index %d: Malformed URL\n' mime_str = 'Index %d: HTTP HEAD requested\n' blank_URL_str = 'Index %d: No title\n' success_str = 'Title: [%s]\nIndex %d: updated\n' query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?' done = {'value': 0} # count threads completed processed = {'value': 0} # count number of records processed # An additional call to generate default headers # gen_headers() is called within network_handler() # However, this initial call to setup headers # ensures there is no race condition among the # initial threads to setup headers if not myheaders: gen_headers() cond = threading.Condition() cond.acquire() def refresh(count, cond): """Inner function to fetch titles and update records. Parameters ---------- count : int Dummy input to adhere to convention. cond : threading condition object. """ count = 0 while True: cond.acquire() if resultset: row = resultset.pop() else: cond.release() break cond.release() title, mime, bad = network_handler(row[1], row[2] & 1) count += 1 cond.acquire() if bad: print(bad_url_str % row[0]) cond.release() continue elif mime: if self.chatty: print(mime_str % row[0]) cond.release() continue elif title == '': print(blank_URL_str % row[0]) cond.release() continue self.cur.execute(query, (title, row[0],)) # Save after fetching 32 titles per thread if count & 0b11111 == 0: self.conn.commit() if self.chatty: print(success_str % (title, row[0])) cond.release() if interrupted: break logdbg('Thread %d: processed %d', threading.get_ident(), count) with cond: done['value'] += 1 processed['value'] += count cond.notify() if recs < threads: threads = recs for i in range(threads): thread = threading.Thread(target=refresh, args=(i, cond)) thread.start() while done['value'] < threads: cond.wait() logdbg('%d threads completed', done['value']) # Guard: records found == total records processed if recs != processed['value']: logerr('Records: %d, processed: %d !!!', recs, processed['value']) cond.release() self.conn.commit() return True def edit_update_rec(self, index, immutable=-1): """Edit in editor and update a record. Parameters ---------- index : int DB index of the record. Last record, if index is -1. immutable : int, optional Diable title fetch from web if 1. Default is -1. Returns ------- bool True if updated, else False. """ editor = get_system_editor() if editor == 'none': logerr('EDITOR must be set to use index with -w') return False if (index == -1): # Edit the last records index = self.get_max_id() if index == -1: logerr('Empty database') return False rec = self.get_rec_by_id(index) if not rec: logerr('No matching index %d', index) return False result = edit_rec(editor, rec[1], rec[2], rec[3], rec[4]) if result is not None: url, title, tags, desc = result return self.update_rec(index, url, title, tags, desc, immutable) if immutable != -1: return self.update_rec(index, immutable) return False def searchdb(self, keywords, all_keywords=False, deep=False, regex=False): """Search DB for entries where tags, URL, or title fields match keywords. Parameters ---------- keywords : list of str Keywords to search. all_keywords : bool, optional True to return records matching ALL keywords. False (default value) to return records matching ANY keyword. deep : bool, optional True to search for matching substrings. Default is False. regex : bool, optional Match a regular expression if True. Default is False. Returns ------- list or None List of search results, or None if no matches. """ if not keywords: return None # Deep query string q1 = ("(tags LIKE ('%' || ? || '%') OR " "URL LIKE ('%' || ? || '%') OR " "metadata LIKE ('%' || ? || '%') OR " "desc LIKE ('%' || ? || '%')) ") # Non-deep query string q2 = ('(tags REGEXP ? OR ' 'URL REGEXP ? OR ' 'metadata REGEXP ? OR ' 'desc REGEXP ?) ') qargs = [] case_statement = lambda x: 'CASE WHEN ' + x + ' THEN 1 ELSE 0 END' if regex: q0 = 'SELECT id, url, metadata, tags, desc FROM (SELECT *, ' for token in keywords: q0 += case_statement(q2) + ' + ' qargs += (token, token, token, token,) q0 = q0[:-3] + ' AS score FROM bookmarks WHERE score > 0 ORDER BY score DESC)' elif all_keywords: if len(keywords) == 1 and keywords[0] == 'blank': q0 = "SELECT * FROM bookmarks WHERE metadata = '' OR tags = ? " qargs += (DELIM,) elif len(keywords) == 1 and keywords[0] == 'immutable': q0 = 'SELECT * FROM bookmarks WHERE flags & 1 == 1 ' else: q0 = 'SELECT id, url, metadata, tags, desc FROM bookmarks WHERE ' for token in keywords: if deep: q0 += q1 + 'AND ' else: token = '\\b' + re.escape(token.rstrip('/')) + '\\b' q0 += q2 + 'AND ' qargs += (token, token, token, token,) q0 = q0[:-4] q0 += 'ORDER BY id ASC' elif not all_keywords: q0 = 'SELECT id, url, metadata, tags, desc FROM (SELECT *, ' for token in keywords: if deep: q0 += case_statement(q1) + ' + ' else: token = '\\b' + re.escape(token.rstrip('/')) + '\\b' q0 += case_statement(q2) + ' + ' qargs += (token, token, token, token,) q0 = q0[:-3] + ' AS score FROM bookmarks WHERE score > 0 ORDER BY score DESC)' else: logerr('Invalid search option') return None logdbg('query: "%s", args: %s', q0, qargs) try: self.cur.execute(q0, qargs) except sqlite3.OperationalError as e: logerr(e) return None return self.cur.fetchall() def search_by_tag(self, tags): """Search bookmarks for entries with given tags. Parameters ---------- tags : str String of tags to search for. Retrieves entries matching ANY tag if tags are delimited with ','. Retrieves entries matching ALL tags if tags are delimited with '+'. Returns ------- list or None List of search results, or None if no matches. """ logdbg(tags) tags, search_operator, excluded_tags = prep_tag_search(tags) if search_operator is None: logerr("Cannot use both '+' and ',' in same search") return None logdbg('tags: %s', tags) logdbg('search_operator: %s', search_operator) logdbg('excluded_tags: %s', excluded_tags) if search_operator == 'AND': query = "SELECT id, url, metadata, tags, desc FROM bookmarks WHERE tags LIKE '%' || ? || '%' " for tag in tags[1:]: query += "{} tags LIKE '%' || ? || '%' ".format(search_operator) if excluded_tags: tags.append(excluded_tags) query = query.replace('WHERE tags', 'WHERE (tags') query += ') AND tags NOT REGEXP ? ' query += 'ORDER BY id ASC' else: query = 'SELECT id, url, metadata, tags, desc FROM (SELECT *, ' case_statement = "CASE WHEN tags LIKE '%' || ? || '%' THEN 1 ELSE 0 END" query += case_statement for tag in tags[1:]: query += ' + ' + case_statement query += ' AS score FROM bookmarks WHERE score > 0' if excluded_tags: tags.append(excluded_tags) query += ' AND tags NOT REGEXP ? ' query += ' ORDER BY score DESC)' logdbg('query: "%s", args: %s', query, tags) self.cur.execute(query, tuple(tags, )) return self.cur.fetchall() def search_keywords_and_filter_by_tags(self, keywords, all_keywords, deep, regex, stag): """Search bookmarks for entries with keywords and specified criteria while filtering out entries with matching tags. Parameters ---------- keywords : list of str Keywords to search. all_keywords : bool, optional True to return records matching ALL keywords. False to return records matching ANY keyword. deep : bool, optional True to search for matching substrings. regex : bool, optional Match a regular expression if True. tags : str String of tags to search for. Retrieves entries matching ANY tag if tags are delimited with ','. Retrieves entries matching ALL tags if tags are delimited with '+'. Returns ------- list or None List of search results, or None if no matches. """ keyword_results = self.searchdb(keywords, all_keywords, deep, regex) stag_results = self.search_by_tag(''.join(stag)) return list(set(keyword_results) & set(stag_results)) def exclude_results_from_search(self, search_results, without, deep): """Excludes records that match keyword search using without parameters Parameters ---------- search_results : list List of search results without : list of str Keywords to search. deep : bool, optional True to search for matching substrings. Returns ------- list or None List of search results, or None if no matches. """ return list(set(search_results) - set(self.searchdb(without, False, deep))) def compactdb(self, index, delay_commit=False): """When an entry at index is deleted, move the last entry in DB to index, if index is lesser. Parameters ---------- index : int DB index of deleted entry. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. """ # Return if the last index left in DB was just deleted max_id = self.get_max_id() if max_id == -1: return query1 = 'SELECT id, URL, metadata, tags, desc FROM bookmarks WHERE id = ? LIMIT 1' query2 = 'DELETE FROM bookmarks WHERE id = ?' query3 = 'INSERT INTO bookmarks(id, URL, metadata, tags, desc) VALUES (?, ?, ?, ?, ?)' if max_id > index: self.cur.execute(query1, (max_id,)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() if self.chatty: print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): """Delete a single record or remove the table if index is None. Parameters ---------- index : int DB index of deleted entry. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: # Delete a range of indices if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low # If range starts from 0, delete all records if low == 0: return self.cleardb() try: if self.chatty: if self.print_rec(0, low, high, True) is True: resp = input('Delete these bookmarks? (y/n): ') if resp != 'y': return False query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: if self.chatty: if self.print_rec(index) is True: resp = input('Delete this bookmark? (y/n): ') if resp != 'y': return False query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): """Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. Notes ----- This API forces a delayed commit. Parameters ---------- results : list of tuples List of results to delete from DB. Returns ------- bool True on success, False on failure. """ resp = read_in('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def delete_rec_all(self, delay_commit=False): """Removes all records in the Bookmarks table. Parameters ---------- delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ try: self.cur.execute('DELETE FROM bookmarks') if not delay_commit: self.conn.commit() return True except Exception as e: logerr('delete_rec_all(): %s', e) return False def cleardb(self): """Drops the bookmark table if it exists. Returns ------- bool True on success, False on failure. """ resp = read_in('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index=0, low=0, high=0, is_range=False): """Print bookmark details at index or all bookmarks if index is 0. A negative index behaves like tail, if title is blank show "Untitled". Parameters ----------- index : int, optional DB index of record to print. 0 prints all records. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. Returns ------- bool True on success, False on failure. """ if (index < 0): # Show the last n records _id = self.get_max_id() if _id == -1: logerr('Empty database') return False low = (1 if _id <= -index else _id + index + 1) high = _id is_range = True if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 print all records if low == 0: query = 'SELECT * from bookmarks' resultset = self.cur.execute(query) else: query = 'SELECT * from bookmarks where id BETWEEN ? AND ?' resultset = self.cur.execute(query, (low, high)) except IndexError: logerr('Index out of range') return False elif index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) results = self.cur.fetchall() if not results: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False if not self.json: print_rec_with_filter(results, self.field_filter) else: print(format_json(results, True, self.field_filter)) return True else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not resultset: logerr('0 records') return True if not self.json: print_rec_with_filter(resultset, self.field_filter) else: print(format_json(resultset, field_filter=self.field_filter)) return True def get_tag_all(self): """Get list of tags in DB. Returns ------- tuple (list of unique tags sorted alphabetically, dictionary of {tag: usage_count}). """ tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if not tags: return tags, dic if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def suggest_similar_tag(self, tagstr): """Show list of tags those go together in DB. Parameters ---------- tagstr : str Original tag string. Returns ------- str DELIM separated string of tags. """ tags = tagstr.split(',') if not len(tags): return tagstr qry = 'SELECT DISTINCT tags FROM bookmarks WHERE tags LIKE ?' tagset = set() for tag in tags: if tag == '': continue self.cur.execute(qry, ('%' + delim_wrap(tag) + '%',)) results = self.cur.fetchall() for row in results: # update tagset with unique tags in row tagset |= set(row[0].strip(DELIM).split(DELIM)) # remove user supplied tags from tagset tagset.difference_update(tags) if not len(tagset): return tagstr unique_tags = sorted(tagset) print('similar tags:\n') for count, tag in enumerate(unique_tags): print('%d. %s' % (count + 1, unique_tags[count])) selected_tags = input('\nselect: ').split() print() if not selected_tags: return tagstr tags = [tagstr] for index in selected_tags: try: tags.append(delim_wrap(unique_tags[int(index) - 1])) except Exception as e: logerr(e) continue return parse_tags(tags) def replace_tag(self, orig, new=None): """Replace original tag by new tags in all records. Remove original tag if new tag is empty. Parameters ---------- orig : str Original tag. new : list Replacement tags. Returns ------- bool True on success, False on failure. """ newtags = DELIM orig = delim_wrap(orig) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False # Remove original tag from DB if new tagset reduces to delimiter if newtags == DELIM: return self.delete_tag_at_index(0, orig) # Update bookmarks with original tag query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() if results: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) self.conn.commit() return True def set_tag(self, cmdstr, taglist): """Append, overwrite, remove tags using the symbols >>, > and << respectively. Parameters ---------- cmdstr : str Command pattern. taglist : list List of tags. Returns ------- int Number of indices updated on success, -1 on failure. """ if not cmdstr or not taglist: return -1 flag = 0 # 0: invalid, 1: append, 2: overwrite, 3: remove index = cmdstr.find('>>') if index == -1: index = cmdstr.find('>') if index != -1: flag = 2 else: index = cmdstr.find('<<') if index != -1: flag = 3 else: flag = 1 if not flag: return -1 tags = DELIM id_list = cmdstr[:index].split() try: for id in id_list: if is_int(id) and int(id) > 0: tags += taglist[int(id) - 1] + DELIM elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): tags += taglist[_id - 1] + DELIM else: return -1 except ValueError: return -1 if flag != 2: index += 1 update_count = 0 query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' try: db_id_list = cmdstr[index + 1:].split() for id in db_id_list: if is_int(id) and int(id) > 0: if flag == 1: if self.append_tag_at_index(id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, id,)) update_count += self.cur.rowcount else: self.delete_tag_at_index(id, tags, True) update_count += 1 elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): if flag == 1: if self.append_tag_at_index(_id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, _id,)) update_count += self.cur.rowcount else: if self.delete_tag_at_index(_id, tags, True): update_count += 1 else: return -1 except ValueError: return -1 except sqlite3.IntegrityError: return -1 try: self.conn.commit() except Exception as e: logerr(e) return -1 return update_count def browse_by_index(self, index=0, low=0, high=0, is_range=False): """Open URL at index or range of indices in browser. Parameters ---------- index : int Index to browse. 0 opens a random bookmark. low : int Actual lower index of range. high : int Higher index of range. is_range : bool A range is passed using low and high arguments. If True, index is ignored. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 throw an error if low <= 0: raise IndexError else: qry = 'SELECT URL from bookmarks where id BETWEEN ? AND ?' for row in self.cur.execute(qry, (low, high)): browse(row[0]) return True except IndexError: logerr('Index out of range') return False if index < 0: logerr('Invalid index %d', index) return False if index == 0: qry = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(qry) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) qry = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1' try: for row in self.cur.execute(qry, (index,)): browse(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, taglist=None): """Export DB bookmarks to file. If destination file name ends with '.db', bookmarks are exported to a Buku database file. If destination file name ends with '.md', bookmarks are exported to a markdown file. If destination file name ends with '.org' bookmarks are exported to a org file. Otherwise, bookmarks are exported to a Firefox bookmarks.html formatted file. Parameters ---------- filepath : str Path to export destination file. taglist : list, optional Specific tags to export. Returns ------- bool True on success, False on failure. """ count = 0 timestamp = str(int(time.time())) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if not tagstr or tagstr == DELIM: logerr('Invalid tag') return False tags = tagstr.split(DELIM) query += ' WHERE' for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = delim_wrap(tag) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if not resultset: print('No records found') return False if os.path.exists(filepath): resp = read_in(filepath + ' exists. Overwrite? (y/n): ') if resp != 'y': return False if filepath.endswith('.db'): os.remove(filepath) if filepath.endswith('.db'): outdb = BukuDb(dbfile=filepath) qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' for row in resultset: outdb.cur.execute(qry, (row[1], row[2], row[3], row[4], row[5])) outdb.conn.commit() outdb.close() return True try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if filepath.endswith('.md'): for row in resultset: if row[2] == '': out = '- [Untitled](' + row[1] + ')\n' else: out = '- [' + row[2] + '](' + row[1] + ')\n' outfp.write(out) count += 1 elif filepath.endswith('.org'): for row in resultset: if row[2] == '': out = '* [[{}][Untitled]]\n'.format(row[1]) else: out = '* [[{}][{}]]\n'.format(row[1], row[2]) outfp.write(out) count += 1 else: outfp.write('<!DOCTYPE NETSCAPE-Bookmark-file-1>\n\n' '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">\n' '<TITLE>Bookmarks\n' '

Bookmarks

\n\n' '

Buku bookmarks \n' ' \n' % (timestamp, timestamp)) for row in resultset: out = (' \n' if row[4] != '': out += ' ' + row[4] + '\n' outfp.write(out) count += 1 outfp.write(' \n

Bookmarks

Buku bookmarks