#!/usr/bin/env python3 # # Bookmark management utility # # Copyright © 2015-2017 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import collections import html.parser as HTMLParser import json import logging import os import re try: import readline readline except ImportError: pass import requests import signal import sqlite3 import sys import threading import time import urllib3 from urllib3.util import parse_url, make_headers import webbrowser __version__ = '3.3.1' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Global variables interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} promptmsg = 'buku (? for help): ' # Prompt message string # Default format specifiers to print records ID_str = '%d. %s [%s]\n' ID_DB_str = '%d. %s' MUTE_str = '%s (L)\n' URL_str = ' > %s\n' DESC_str = ' + %s\n' TAG_str = ' # %s\n' # colormap for color output from "googler" project COLORMAP = {k: '\x1b[%sm' % v for k, v in { 'a': '30', 'b': '31', 'c': '32', 'd': '33', 'e': '34', 'f': '35', 'g': '36', 'h': '37', 'i': '90', 'j': '91', 'k': '92', 'l': '93', 'm': '94', 'n': '95', 'o': '96', 'p': '97', 'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1', 'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1', 'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1', 'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1', 'x': '0', 'X': '1', 'y': '7', 'Y': '7;1', 'z': '2', }.items()} # Disguise as Firefox on Ubuntu USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0' myheaders = None # Default dictionary of headers myproxy = None # Default proxy # Set up logging logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): """Class to parse and fetch the title from a HTML page, if available. .. note:: The methods in this class are custom implementations of the HTMLParser object. See docs https://docs.python.org/3/library/html.parser.html. Attributes ---------- in_title_tag : bool True if HTML tag is a tag. Initial value is False. data : str Initial value is empty string. prev_tag : None or str Initial value is None. parsed_title : None or str The parsed title from a title tag. Initial value is None. """ def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data += data def error(self, message): pass class BukuCrypt: """Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. """ # Crypto constants BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks @staticmethod def get_filehash(filepath): """Get the SHA256 hash of a file. Parameters ---------- filepath : str Path to the file. Returns ------- hash : bytes Hash digest of file. """ from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BukuCrypt.BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BukuCrypt.BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): """Encrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = dbfile + '.enc' db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() passconfirm = getpass() if not password or not passconfirm: logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(BukuCrypt.SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack('<Q', filesize)) outfp.write(salt) outfp.write(iv) # Embed DB file hash in encrypted file outfp.write(dbhash) while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break elif len(chunk) % 16 != 0: chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16)) outfp.write(encryptor.update(chunk) + encryptor.finalize()) os.remove(dbfile) print('File encrypted') sys.exit(0) except Exception as e: logerr(e) sys.exit(1) @staticmethod def decrypt_file(iterations, dbfile=None): """Decrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). The '.enc' suffix must be omitted. """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Decryption failed') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) encfile = dbfile + '.enc' enc_exists = os.path.exists(encfile) db_exists = os.path.exists(dbfile) if enc_exists and not db_exists: pass elif not enc_exists: logerr('%s missing', encfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() if not password: logerr('Decryption failed') sys.exit(1) try: with open(encfile, 'rb') as infp: size = struct.unpack('<Q', infp.read(struct.calcsize('Q')))[0] # Read 256-bit salt and generate key salt = infp.read(32) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = infp.read(16) decryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend(), ).decryptor() # Get original DB file's SHA256 hash from encrypted file enchash = infp.read(32) with open(dbfile, 'wb') as outfp: while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break outfp.write(decryptor.update(chunk) + decryptor.finalize()) outfp.truncate(size) # Match hash of generated file with that of original DB file dbhash = BukuCrypt.get_filehash(dbfile) if dbhash != enchash: os.remove(dbfile) logerr('Decryption failed') sys.exit(1) else: os.remove(encfile) print('File decrypted') except struct.error: logerr('Tainted file') sys.exit(1) except Exception as e: logerr(e) sys.exit(1) def import_md(filepath, newtag): """Parse bookmark markdown file. Parameters ---------- filepath : str Path to markdown file. newtag : str New tag for bookmarks in markdown file. Returns ------- tuple Parsed result. """ with open(filepath, mode='r', encoding='utf-8') as infp: for line in infp: # Supported markdown format: [title](url) # Find position of title end, url start delimiter combo index = line.find('](') if index != -1: # Find title start delimiter title_start_delim = line[:index].find('[') # Reverse find the url end delimiter url_end_delim = line[index + 2:].rfind(')') if title_start_delim != -1 and url_end_delim > 0: # Parse title title = line[title_start_delim + 1:index] # Parse url url = line[index + 2:index + 2 + url_end_delim] if (is_nongeneric_url(url)): continue yield ( url, title, delim_wrap(newtag) if newtag else None, None, 0, True ) def import_html(html_soup, add_parent_folder_as_tag, newtag): """Parse bookmark html. Parameters ---------- html_soup : BeautifulSoup object BeautifulSoup representation of bookmark html. add_parent_folder_as_tag : bool True if bookmark parent folders should be added as tags else False. newtag : str A new unique tag to add to imported bookmarks. Returns ------- tuple Parsed result. """ # compatibility soup = html_soup for tag in soup.findAll('a'): # Extract comment from <dd> tag try: if (is_nongeneric_url(tag['href'])): continue except KeyError: continue desc = None comment_tag = tag.findNextSibling('dd') if comment_tag: desc = comment_tag.find(text=True, recursive=False) # add parent folder as tag if add_parent_folder_as_tag: # could be its folder or not possible_folder = tag.find_previous('h3') # get list of tags within that folder tag_list = tag.parent.parent.find_parent('dl') if ((possible_folder) and possible_folder.parent in list(tag_list.parents)): # then it's the folder of this bookmark if tag.has_attr('tags'): tag['tags'] += (DELIM + possible_folder.text) else: tag['tags'] = possible_folder.text # add unique tag if opted if newtag: if tag.has_attr('tags'): tag['tags'] += (DELIM + newtag) else: tag['tags'] = newtag yield ( tag['href'], tag.string, parse_tags([tag['tags']]) if tag.has_attr('tags') else None, desc, 0, True ) class BukuDb: """Abstracts all database operations. Attributes ---------- conn : sqlite database connection. cur : sqlite database cursor. json : bool True if results should be printed in json format else False. field_filter : int Indicates format for displaying bookmarks. Default is 0. chatty : bool Sets the verbosity of the APIs. Default is False. """ def __init__(self, json=False, field_filter=0, chatty=False, dbfile=None, colorize=True): """Database initialization API. Parameters ---------- json : bool, optional True if results should be printed in json format else False. field_filter : int, optional Indicates format for displaying bookmarks. Default is 0. chatty : bool, optional Sets the verbosity of the APIs. Default is False. colorize : bool, optional Indicates whether color should be used in output. Default is True. """ self.json = json self.field_filter = field_filter self.chatty = chatty self.colorize = colorize self.conn, self.cur = BukuDb.initdb(dbfile, self.chatty) @staticmethod def get_default_dbdir(): """Determine the directory path where dbfile will be stored. If the platform is Windows, use %APPDATA% else if $XDG_DATA_HOME is defined, use it else if $HOME exists, use it else use the current directory. Returns ------- str Path to database file. """ data_home = os.environ.get('XDG_DATA_HOME') if data_home is None: if os.environ.get('HOME') is None: if sys.platform == 'win32': data_home = os.environ.get('APPDATA') if data_home is None: return os.path.abspath('.') else: return os.path.abspath('.') else: data_home = os.path.join(os.environ.get('HOME'), '.local', 'share') return os.path.join(data_home, 'buku') @staticmethod def initdb(dbfile=None, chatty=False): """Initialize the database connection. Create DB file and/or bookmarks table if they don't exist. Alert on encryption options on first execution. Parameters ---------- dbfile : str, optional Custom database file path (including filename). chatty : bool If True, shows informative message on DB creation. Returns ------- tuple (connection, cursor). """ if not dbfile: dbpath = BukuDb.get_default_dbdir() filename = 'bookmarks.db' dbfile = os.path.join(dbpath, filename) else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) try: if not os.path.exists(dbpath): os.makedirs(dbpath) except Exception as e: logerr(e) os._exit(1) db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(dbfile + '.enc') if db_exists and not enc_exists: pass elif enc_exists and not db_exists: logerr('Unlock database first') sys.exit(1) elif db_exists and enc_exists: logerr('Both encrypted and flat DB files exist!') sys.exit(1) elif chatty: # not db_exists and not enc_exists print('DB file is being created at %s.\nYou should encrypt it.' % dbfile) try: # Create a connection conn = sqlite3.connect(dbfile, check_same_thread=False) conn.create_function('REGEXP', 2, regexp) cur = conn.cursor() # Create table if it doesn't exist # flags: designed to be extended in future using bitwise masks # Masks: # 0b00000001: set title immutable cur.execute('CREATE TABLE if not exists bookmarks (' 'id integer PRIMARY KEY, ' 'URL text NOT NULL UNIQUE, ' 'metadata text default \'\', ' 'tags text default \',\', ' 'desc text default \'\', ' 'flags integer default 0)') conn.commit() except Exception as e: logerr('initdb(): %s', e) sys.exit(1) return (conn, cur) def get_rec_all(self): """Get all the bookmarks in the database. Returns ------- list A list of tuples representing bookmark records. """ self.cur.execute('SELECT * FROM bookmarks') return self.cur.fetchall() def get_rec_by_id(self, index): """Get a bookmark from database by its ID. Parameters ---------- index : int DB index of bookmark record. Returns ------- tuple or None Bookmark data, or None if index is not found. """ self.cur.execute('SELECT * FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() return resultset[0] if resultset else None def get_rec_id(self, url): """Check if URL already exists in DB. Parameters ---------- url : str A URL to search for in the DB. Returns ------- int DB index, or -1 if URL not found in DB. """ self.cur.execute('SELECT id FROM bookmarks WHERE URL = ? LIMIT 1', (url,)) resultset = self.cur.fetchall() return resultset[0][0] if resultset else -1 def get_max_id(self): """Fetch the ID of the last record. Returns ------- int ID of the record if any record exists, else -1. """ self.cur.execute('SELECT MAX(id) from bookmarks') resultset = self.cur.fetchall() return -1 if resultset[0][0] is None else resultset[0][0] def add_rec(self, url, title_in=None, tags_in=None, desc=None, immutable=0, delay_commit=False): """Add a new bookmark. Parameters ---------- url : str URL to bookmark. title_in :str, optional Title to add manually. Default is None. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Default is None. desc : str, optional Description of the bookmark. Default is None. immutable : int, optional Indicates whether to disable title fetch from web. Default is 0. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- int DB index of new bookmark on success, -1 on failure. """ # Return error for empty URL if not url or url == '': logerr('Invalid URL') return -1 # Ensure that the URL does not exist in DB already id = self.get_rec_id(url) if id != -1: logerr('URL [%s] already exists at index %d', url, id) return -1 # Process title if title_in is not None: meta = title_in else: meta, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif meta == '': print('No title\n') else: logdbg('Title: [%s]', meta) # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM # Process description if desc is None: desc = '' try: flagset = 0 if immutable == 1: flagset |= immutable qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' self.cur.execute(qry, (url, meta, tags_in, desc, flagset)) if not delay_commit: self.conn.commit() if self.chatty: self.print_rec(self.cur.lastrowid) return self.cur.lastrowid except Exception as e: logerr('add_rec(): %s', e) return -1 def append_tag_at_index(self, index, tags_in, delay_commit=False): """Append tags to bookmark tagset at index. Parameters ---------- index : int DB index of the record. 0 indicates all records. tags_in : str Comma-separated tags to add manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if index == 0: resp = read_in('Append the tags to ALL bookmarks? (y/n): ') if resp != 'y': return False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] + tags_in[1:] tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) else: return False if not delay_commit: self.conn.commit() return True def delete_tag_at_index(self, index, tags_in, delay_commit=False): """Delete tags from bookmark tagset at index. Parameters ---------- index : int DB index of bookmark record. 0 indicates all records. tags_in : str Comma-separated tags to delete manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ tags_to_delete = tags_in.strip(DELIM).split(DELIM) if index == 0: resp = read_in('Delete the tag(s) from ALL bookmarks? (y/n): ') if resp != 'y': return False count = 0 match = "'%' || ? || '%'" for tag in tags_to_delete: tag = delim_wrap(tag) q = ("UPDATE bookmarks SET tags = replace(tags, '%s', '%s') WHERE tags LIKE %s" % (tag, DELIM, match)) self.cur.execute(q, (tag,)) count += self.cur.rowcount if count and not delay_commit: self.conn.commit() if self.chatty: print('%d record(s) updated' % count) return True # Process a single index # Use SELECT and UPDATE to handle multiple tags at once query = 'SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] for tag in tags_to_delete: tags = tags.replace(delim_wrap(tag), DELIM) self.cur.execute(query, (parse_tags([tags]), row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) if not delay_commit: self.conn.commit() else: return False return True def update_rec(self, index, url=None, title_in=None, tags_in=None, desc=None, immutable=-1, threads=4): """Update an existing record at index. Update all records if index is 0 and url is not specified. URL is an exception because URLs are unique in DB. Parameters ---------- index : int DB index of record. 0 indicates all records. url : str, optional Bookmark address. title_in : str, optional Title to add manually. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Prefix with '+,' to append to current tags. Prefix with '-,' to delete from current tags. desc : str, optional Description of bookmark. immutable : int, optional Diable title fetch from web if 1. Default is -1. threads : int, optional Number of threads to use to refresh full DB. Default is 4. Returns ------- bool True on success, False on Failure. """ arguments = [] query = 'UPDATE bookmarks SET' to_update = False tag_modified = False ret = False # Update URL if passed as argument if url is not None and url != '': if index == 0: logerr('All URLs cannot be same') return False query += ' URL = ?,' arguments += (url,) to_update = True # Update tags if passed as argument if tags_in is not None: if tags_in == '+,' or tags_in == '-,': logerr('Please specify a tag') return False if tags_in.startswith('+,'): chatty = self.chatty self.chatty = False ret = self.append_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True elif tags_in.startswith('-,'): chatty = self.chatty self.chatty = False ret = self.delete_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True else: # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM query += ' tags = ?,' arguments += (tags_in,) to_update = True # Update description if passed as an argument if desc is not None: query += ' desc = ?,' arguments += (desc,) to_update = True # Update immutable flag if passed as argument if immutable != -1: flagset = 1 if immutable == 1: query += ' flags = flags | ?,' elif immutable == 0: query += ' flags = flags & ?,' flagset = ~flagset arguments += (flagset,) to_update = True # Update title # # 1. if --title has no arguments, delete existing title # 2. if --title has arguments, update existing title # 3. if --title option is omitted at cmdline: # if URL is passed, update the title from web using the URL # 4. if no other argument (url, tag, comment, immutable) passed, # update title from web using DB URL (if title is mutable) title_to_insert = None if title_in is not None: title_to_insert = title_in elif url is not None and url != '': title_to_insert, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif title_to_insert == '': print('No title\n') else: logdbg('Title: [%s]', title_to_insert) elif not to_update and not tag_modified: ret = self.refreshdb(index, threads) if ret and index and self.chatty: self.print_rec(index) return ret if title_to_insert is not None: query += ' metadata = ?,' arguments += (title_to_insert,) to_update = True if not to_update: # Nothing to update # Show bookmark if tags were appended to deleted if tag_modified and self.chatty: self.print_rec(index) return ret if index == 0: # Update all records resp = read_in('Update ALL bookmarks? (y/n): ') if resp != 'y': return False query = query[:-1] else: query = query[:-1] + ' WHERE id = ?' arguments += (index,) logdbg('query: "%s", args: %s', query, arguments) try: self.cur.execute(query, arguments) self.conn.commit() if self.cur.rowcount and self.chatty: self.print_rec(index) if self.cur.rowcount == 0: logerr('No matching index %d', index) return False except sqlite3.IntegrityError: logerr('URL already exists') return False return True def refreshdb(self, index, threads): """Refresh ALL records in the database. Fetch title for eachbookmark from the web and update the records. Doesn't update the record if title is empty. Notes ----- This API doesn't change DB index, URL or tags of a bookmark. This API is verbose. Parameters ---------- index : int DB index of record to update. 0 indicates all records. threads: int Number of threads to use to refresh full DB. Default is 4. """ if index == 0: self.cur.execute('SELECT id, url, flags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, url, flags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() recs = len(resultset) if not recs: logerr('No matching index or title immutable or empty DB') return False # Set up strings to be printed if self.colorize: bad_url_str = '\x1b[1mIndex %d: Malformed URL\x1b[0m\n' mime_str = '\x1b[1mIndex %d: HTTP HEAD requested\x1b[0m\n' blank_URL_str = '\x1b[1mIndex %d: No title\x1b[0m\n' success_str = 'Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' else: bad_url_str = 'Index %d: Malformed URL\n' mime_str = 'Index %d: HTTP HEAD requested\n' blank_URL_str = 'Index %d: No title\n' success_str = 'Title: [%s]\nIndex %d: updated\n' query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?' done = {'value': 0} # count threads completed processed = {'value': 0} # count number of records processed # An additional call to generate default headers # gen_headers() is called within network_handler() # However, this initial call to setup headers # ensures there is no race condition among the # initial threads to setup headers if not myheaders: gen_headers() cond = threading.Condition() cond.acquire() def refresh(count, cond): """Inner function to fetch titles and update records. Parameters ---------- count : int Dummy input to adhere to convention. cond : threading condition object. """ count = 0 while True: cond.acquire() if resultset: row = resultset.pop() else: cond.release() break cond.release() title, mime, bad = network_handler(row[1], row[2] & 1) count += 1 cond.acquire() if bad: print(bad_url_str % row[0]) cond.release() continue elif mime: if self.chatty: print(mime_str % row[0]) cond.release() continue elif title == '': print(blank_URL_str % row[0]) cond.release() continue self.cur.execute(query, (title, row[0],)) # Save after fetching 32 titles per thread if count & 0b11111 == 0: self.conn.commit() if self.chatty: print(success_str % (title, row[0])) cond.release() if interrupted: break logdbg('Thread %d: processed %d', threading.get_ident(), count) with cond: done['value'] += 1 processed['value'] += count cond.notify() if recs < threads: threads = recs for i in range(threads): thread = threading.Thread(target=refresh, args=(i, cond)) thread.start() while done['value'] < threads: cond.wait() logdbg('%d threads completed', done['value']) # Guard: records found == total records processed if recs != processed['value']: logerr('Records: %d, processed: %d !!!', recs, processed['value']) cond.release() self.conn.commit() return True def edit_update_rec(self, index, immutable=-1): """Edit in editor and update a record. Parameters ---------- index : int DB index of the record. immutable : int, optional Diable title fetch from web if 1. Default is -1. Returns ------- bool True if updated, else False. """ editor = get_system_editor() if editor == 'none': logerr('EDITOR must be set to use index with -w') return False rec = self.get_rec_by_id(index) if not rec: logerr('No matching index %d', index) return False result = edit_rec(editor, rec[1], rec[2], rec[3], rec[4]) if result is not None: url, title, tags, desc = result return self.update_rec(index, url, title, tags, desc, immutable) if immutable != -1: return self.update_rec(index, immutable) return False def searchdb(self, keywords, all_keywords=False, deep=False, regex=False): """Search DB for entries where tags, URL, or title fields match keywords. Parameters ---------- keywords : list of str Keywords to search. all_keywords : bool, optional True to return records matching ALL keywords. False (default value) to return records matching ANY keyword. deep : bool, optional True to search for matching substrings. Default is False. regex : bool, optional Match a regular expression if True. Default is False. Returns ------- list or None List of search results, or None if no matches. """ if not keywords: return None q0 = 'SELECT id, url, metadata, tags, desc FROM bookmarks WHERE ' # Deep query string q1 = ("(tags LIKE ('%' || ? || '%') OR " "URL LIKE ('%' || ? || '%') OR " "metadata LIKE ('%' || ? || '%') OR " "desc LIKE ('%' || ? || '%')) ") # Non-deep query string q2 = ('(tags REGEXP ? OR ' 'URL REGEXP ? OR ' 'metadata REGEXP ? OR ' 'desc REGEXP ?) ') qargs = [] if regex: for token in keywords: q0 += q2 + 'OR ' qargs += (token, token, token, token,) q0 = q0[:-3] elif all_keywords: if len(keywords) == 1 and keywords[0] == 'blank': q0 = "SELECT * FROM bookmarks WHERE metadata = '' OR tags = ? " qargs += (DELIM,) elif len(keywords) == 1 and keywords[0] == 'immutable': q0 = 'SELECT * FROM bookmarks WHERE flags & 1 == 1 ' else: for token in keywords: if deep: q0 += q1 + 'AND ' else: token = '\\b' + token.rstrip('/') + '\\b' q0 += q2 + 'AND ' qargs += (token, token, token, token,) q0 = q0[:-4] elif not all_keywords: for token in keywords: if deep: q0 += q1 + 'OR ' else: token = '\\b' + token.rstrip('/') + '\\b' q0 += q2 + 'OR ' qargs += (token, token, token, token,) q0 = q0[:-3] else: logerr('Invalid search option') return None q0 += 'ORDER BY id ASC' logdbg('query: "%s", args: %s', q0, qargs) try: self.cur.execute(q0, qargs) except sqlite3.OperationalError as e: logerr(e) return None return self.cur.fetchall() def search_by_tag(self, tags): """Search bookmarks for entries with given tags. Parameters ---------- tags : str String of tags to search for. Retrieves entries matching ANY tag if tags are delimited with ','. Retrieves entries matching ALL tags if tags are delimited with '+'. Returns ------- list or None List of search results, or None if no matches. """ # do not allow combination of search logics if ' + ' in tags and ',' in tags: logerr("Cannot use both '+' and ',' in same search") return tags, search_operator, excluded_tags = prep_tag_search(tags) query = "SELECT id, url, metadata, tags, desc FROM bookmarks WHERE tags LIKE '%' || ? || '%' " for tag in tags[1:]: query += "{} tags LIKE '%' || ? || '%' ".format(search_operator) if excluded_tags: tags.append(excluded_tags) query = query.replace('WHERE tags', 'WHERE (tags') query += ') AND tags NOT REGEXP ? ' query += 'ORDER BY id ASC' logdbg('query: "%s", args: %s', query, tags) self.cur.execute(query, tuple(tags, )) return self.cur.fetchall() def compactdb(self, index, delay_commit=False): """When an entry at index is deleted, move the last entry in DB to index, if index is lesser. Parameters ---------- index : int DB index of deleted entry. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. """ # Return if the last index left in DB was just deleted max_id = self.get_max_id() if max_id == -1: return query1 = 'SELECT id, URL, metadata, tags, desc FROM bookmarks WHERE id = ? LIMIT 1' query2 = 'DELETE FROM bookmarks WHERE id = ?' query3 = 'INSERT INTO bookmarks(id, URL, metadata, tags, desc) VALUES (?, ?, ?, ?, ?)' if max_id > index: self.cur.execute(query1, (max_id,)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() if self.chatty: print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): """Delete a single record or remove the table if index is None. Parameters ---------- index : int DB index of deleted entry. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: # Delete a range of indices if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low # If range starts from 0, delete all records if low == 0: return self.cleardb() try: query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): """Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. Notes ----- This API forces a delayed commit. Parameters ---------- results : list of tuples List of results to delete from DB. Returns ------- bool True on success, False on failure. """ resp = read_in('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def delete_rec_all(self, delay_commit=False): """Removes all records in the Bookmarks table. Parameters ---------- delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ try: self.cur.execute('DELETE FROM bookmarks') if not delay_commit: self.conn.commit() return True except Exception as e: logerr('delete_rec_all(): %s', e) return False def cleardb(self): """Drops the bookmark table if it exists. Returns ------- bool True on success, False on failure. """ resp = read_in('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index=0, low=0, high=0, is_range=False): """Print bookmark details at index or all bookmarks if index is 0. A negative index behaves like tail, if title is blank show "Untitled". Parameters ----------- index : int, optional DB index of record to print. 0 prints all records. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. """ if (index < 0): # Show the last n records _id = self.get_max_id() if _id == -1: logerr('Empty database') return False low = (1 if _id <= -index else _id + index + 1) high = _id is_range = True if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 print all records if low == 0: query = 'SELECT * from bookmarks' resultset = self.cur.execute(query) else: query = 'SELECT * from bookmarks where id BETWEEN ? AND ?' resultset = self.cur.execute(query, (low, high)) except IndexError: logerr('Index out of range') return elif index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) results = self.cur.fetchall() if not results: logerr('No matching index %d', index) return except IndexError: logerr('No matching index %d', index) return if not self.json: for row in results: if self.field_filter == 0: print_single_rec(row) elif self.field_filter == 1: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: print('%s\t%s\t%s\t%s' % (row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(results, True, self.field_filter)) return else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not resultset: logerr('0 records') return if not self.json: if self.field_filter == 0: for row in resultset: print_single_rec(row) elif self.field_filter == 1: for row in resultset: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: for row in resultset: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: for row in resultset: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: for row in resultset: print('%s\t%s\t%s\t%s' % (row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(resultset, field_filter=self.field_filter)) def get_tag_all(self): """Get list of tags in DB. Returns ------- tuple (list of unique tags sorted alphabetically, dictionary of {tag: usage_count}). """ tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if not tags: return tags, dic if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def suggest_similar_tag(self, tagstr): """Show list of tags those go together in DB. Parameters ---------- tagstr : str Original tag string. Returns ------- str DELIM separated string of tags. """ tags = tagstr.split(',') if not len(tags): return tagstr qry = 'SELECT DISTINCT tags FROM bookmarks WHERE tags LIKE ?' tagset = [] unique_tags = [] for tag in tags: if tag == '': continue self.cur.execute(qry, ('%' + delim_wrap(tag) + '%',)) results = self.cur.fetchall() if results: for row in results: tagset += row[0].strip(DELIM).split(DELIM) if len(tagset): for tag in tagset: if tag not in tags and tag not in unique_tags: unique_tags += (tag, ) if not len(unique_tags): return tagstr unique_tags = sorted(unique_tags) print('similar tags:\n') count = 0 for tag in unique_tags: print('%d. %s' % (count + 1, unique_tags[count])) count += 1 resp = input('\nselect: ') print() if not resp: return tagstr tagset = resp.split() tags = [tagstr] for index in tagset: try: tags.append(delim_wrap(unique_tags[int(index) - 1])) except: continue return parse_tags(tags) def replace_tag(self, orig, new=None): """Replace original tag by new tags in all records. Remove original tag if new tag is empty. Parameters ---------- orig : str Original tag. new : list Replacement tags. Returns ------- bool True on success, False on failure. """ newtags = DELIM orig = delim_wrap(orig) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False # Remove original tag from DB if new tagset reduces to delimiter if newtags == DELIM: return self.delete_tag_at_index(0, orig) # Update bookmarks with original tag query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() if results: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) self.conn.commit() return True def set_tag(self, cmdstr, taglist): """Append, overwrite, remove tags using the symbols >>, > and << respectively. Parameters ---------- cmdstr : str Command pattern. taglist : list List of tags. Returns ------- int Number of indices updated on success, -1 on failure. """ if not cmdstr or not taglist: return -1 flag = 0 # 0: invalid, 1: append, 2: overwrite, 3: remove index = cmdstr.find('>>') if index == -1: index = cmdstr.find('>') if index != -1: flag = 2 else: index = cmdstr.find('<<') if index != -1: flag = 3 else: flag = 1 if not flag: return -1 tags = DELIM id_list = cmdstr[:index].split() try: for id in id_list: if is_int(id) and int(id) > 0: tags += taglist[int(id) - 1] + DELIM elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): tags += taglist[_id - 1] + DELIM else: return -1 except ValueError: return -1 if flag != 2: index += 1 update_count = 0 query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' try: db_id_list = cmdstr[index + 1:].split() for id in db_id_list: if is_int(id) and int(id) > 0: if flag == 1: if self.append_tag_at_index(id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, id,)) update_count += self.cur.rowcount else: self.delete_tag_at_index(id, tags, True) update_count += 1 elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): if flag == 1: if self.append_tag_at_index(_id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, _id,)) update_count += self.cur.rowcount else: if self.delete_tag_at_index(_id, tags, True): update_count += 1 else: return -1 except ValueError: return -1 except sqlite3.IntegrityError: return -1 try: self.conn.commit() except: return -1 return update_count def browse_by_index(self, index=0, low=0, high=0, is_range=False): """Open URL at index or range of indies in browser. Parameters ---------- index : int Index to browse. 0 opens a random bookmark. low : int Actual lower index of range. high : int Higher index of range. is_range : bool A range is passed using low and high arguments. If True, index is ignored. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 throw an error if low <= 0: raise IndexError else: qry = 'SELECT URL from bookmarks where id BETWEEN ? AND ?' for row in self.cur.execute(qry, (low, high)): browse(row[0]) return True except IndexError: logerr('Index out of range') return False if index < 0: logerr('Invalid index %d', index) return False if index == 0: qry = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(qry) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) qry = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1' try: for row in self.cur.execute(qry, (index,)): browse(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, taglist=None): """Export DB bookmarks to file. If destination file name ends with '.db', bookmarks are exported to a Buku database file. If destination file name ends with '.md', bookmarks are exported to a markdown file. Otherwise, bookmarks are exported to a Firefox bookmarks.html formatted file. Parameters ---------- filepath : str Path to export destination file. taglist : list, optional Specific tags to export. Returns ------- bool True on success, False on failure. """ count = 0 timestamp = str(int(time.time())) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if not tagstr or tagstr == DELIM: logerr('Invalid tag') return False tags = tagstr.split(DELIM) query += ' WHERE' for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = delim_wrap(tag) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if not resultset: print('No records found') return False if os.path.exists(filepath): resp = read_in(filepath + ' exists. Overwrite? (y/n): ') if resp != 'y': return False if filepath.endswith('.db'): os.remove(filepath) if filepath.endswith('.db'): outdb = BukuDb(dbfile=filepath) qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' for row in resultset: outdb.cur.execute(qry, (row[1], row[2], row[3], row[4], row[5])) outdb.conn.commit() outdb.close() return True try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if filepath.endswith('.md'): for row in resultset: if row[2] == '': out = '- [Untitled](' + row[1] + ')\n' else: out = '- [' + row[2] + '](' + row[1] + ')\n' outfp.write(out) count += 1 else: outfp.write('<!DOCTYPE NETSCAPE-Bookmark-file-1>\n\n' '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">\n' '<TITLE>Bookmarks\n' '

Bookmarks

\n\n' '

Buku bookmarks \n' ' \n' % (timestamp, timestamp)) for row in resultset: out = (' \n' if row[4] != '': out += ' ' + row[4] + '\n' outfp.write(out) count += 1 outfp.write(' \n

Bookmarks

Buku bookmarks