#!/usr/bin/env python3 # # Bookmark management utility # # Copyright © 2015-2017 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import collections import html.parser as HTMLParser import json import logging import os import re try: import readline readline except ImportError: pass import requests import signal import sqlite3 import sys import threading import time import urllib3 from urllib3.util import parse_url, make_headers import webbrowser __version__ = '3.3.1' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Global variables interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} promptmsg = 'buku (? for help): ' # Prompt message string # Default format specifiers to print records ID_str = '%d. %s [%s]\n' ID_DB_str = '%d. %s' MUTE_str = '%s (L)\n' URL_str = ' > %s\n' DESC_str = ' + %s\n' TAG_str = ' # %s\n' # colormap for color output from "googler" project COLORMAP = {k: '\x1b[%sm' % v for k, v in { 'a': '30', 'b': '31', 'c': '32', 'd': '33', 'e': '34', 'f': '35', 'g': '36', 'h': '37', 'i': '90', 'j': '91', 'k': '92', 'l': '93', 'm': '94', 'n': '95', 'o': '96', 'p': '97', 'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1', 'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1', 'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1', 'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1', 'x': '0', 'X': '1', 'y': '7', 'Y': '7;1', 'z': '2', }.items()} # Disguise as Firefox on Ubuntu USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0' myheaders = None # Default dictionary of headers myproxy = None # Default proxy # Set up logging logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): """Class to parse and fetch the title from a HTML page, if available. .. note:: The methods in this class are custom implementations of the HTMLParser object. See docs https://docs.python.org/3/library/html.parser.html. Attributes ---------- in_title_tag : bool True if HTML tag is a tag. Initial value is False. data : str Initial value is empty string. prev_tag : None or str Initial value is None. parsed_title : None or str The parsed title from a title tag. Initial value is None. """ def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data += data def error(self, message): pass class BukuCrypt: """Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. """ # Crypto constants BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks @staticmethod def get_filehash(filepath): """Get the SHA256 hash of a file. Parameters ---------- filepath : str Path to the file. Returns ------- hash : bytes Hash digest of file. """ from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BukuCrypt.BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BukuCrypt.BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): """Encrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = dbfile + '.enc' db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() passconfirm = getpass() if not password or not passconfirm: logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(BukuCrypt.SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack('<Q', filesize)) outfp.write(salt) outfp.write(iv) # Embed DB file hash in encrypted file outfp.write(dbhash) while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break elif len(chunk) % 16 != 0: chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16)) outfp.write(encryptor.update(chunk) + encryptor.finalize()) os.remove(dbfile) print('File encrypted') sys.exit(0) except Exception as e: logerr(e) sys.exit(1) @staticmethod def decrypt_file(iterations, dbfile=None): """Decrypt the bookmarks database file. Parameters ---------- iterations : int Number of iterations for key generation. dbfile : str, optional Custom database file path (including filename). The '.enc' suffix must be omitted. """ try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Decryption failed') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) encfile = dbfile + '.enc' enc_exists = os.path.exists(encfile) db_exists = os.path.exists(dbfile) if enc_exists and not db_exists: pass elif not enc_exists: logerr('%s missing', encfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() if not password: logerr('Decryption failed') sys.exit(1) try: with open(encfile, 'rb') as infp: size = struct.unpack('<Q', infp.read(struct.calcsize('Q')))[0] # Read 256-bit salt and generate key salt = infp.read(32) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = infp.read(16) decryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend(), ).decryptor() # Get original DB file's SHA256 hash from encrypted file enchash = infp.read(32) with open(dbfile, 'wb') as outfp: while True: chunk = infp.read(BukuCrypt.CHUNKSIZE) if len(chunk) == 0: break outfp.write(decryptor.update(chunk) + decryptor.finalize()) outfp.truncate(size) # Match hash of generated file with that of original DB file dbhash = BukuCrypt.get_filehash(dbfile) if dbhash != enchash: os.remove(dbfile) logerr('Decryption failed') sys.exit(1) else: os.remove(encfile) print('File decrypted') except struct.error: logerr('Tainted file') sys.exit(1) except Exception as e: logerr(e) sys.exit(1) def import_md(filepath, newtag): """Parse bookmark markdown file. Parameters ---------- filepath : str Path to markdown file. newtag : str New tag for bookmarks in markdown file. Returns ------- tuple Parsed result. """ with open(filepath, mode='r', encoding='utf-8') as infp: for line in infp: # Supported markdown format: [title](url) # Find position of title end, url start delimiter combo index = line.find('](') if index != -1: # Find title start delimiter title_start_delim = line[:index].find('[') # Reverse find the url end delimiter url_end_delim = line[index + 2:].rfind(')') if title_start_delim != -1 and url_end_delim > 0: # Parse title title = line[title_start_delim + 1:index] # Parse url url = line[index + 2:index + 2 + url_end_delim] if (is_nongeneric_url(url)): continue yield ( url, title, delim_wrap(newtag) if newtag else None, None, 0, True ) def import_html(html_soup, add_parent_folder_as_tag, newtag): """Parse bookmark html. Parameters ---------- html_soup : BeautifulSoup object BeautifulSoup representation of bookmark html. add_parent_folder_as_tag : bool True if bookmark parent folders should be added as tags else False. newtag : str A new unique tag to add to imported bookmarks. Returns ------- tuple Parsed result. """ # compatibility soup = html_soup for tag in soup.findAll('a'): # Extract comment from <dd> tag try: if (is_nongeneric_url(tag['href'])): continue except KeyError: continue desc = None comment_tag = tag.findNextSibling('dd') if comment_tag: desc = comment_tag.find(text=True, recursive=False) # add parent folder as tag if add_parent_folder_as_tag: # could be its folder or not possible_folder = tag.find_previous('h3') # get list of tags within that folder tag_list = tag.parent.parent.find_parent('dl') if ((possible_folder) and possible_folder.parent in list(tag_list.parents)): # then it's the folder of this bookmark if tag.has_attr('tags'): tag['tags'] += (DELIM + possible_folder.text) else: tag['tags'] = possible_folder.text # add unique tag if opted if newtag: if tag.has_attr('tags'): tag['tags'] += (DELIM + newtag) else: tag['tags'] = newtag yield ( tag['href'], tag.string, parse_tags([tag['tags']]) if tag.has_attr('tags') else None, desc, 0, True ) class BukuDb: """Abstracts all database operations. Attributes ---------- conn : sqlite database connection. cur : sqlite database cursor. json : bool True if results should be printed in json format else False. field_filter : int Indicates format for displaying bookmarks. Default is 0. chatty : bool Sets the verbosity of the APIs. Default is False. """ def __init__(self, json=False, field_filter=0, chatty=False, dbfile=None, colorize=True): """Database initialization API. Parameters ---------- json : bool, optional True if results should be printed in json format else False. field_filter : int, optional Indicates format for displaying bookmarks. Default is 0. chatty : bool, optional Sets the verbosity of the APIs. Default is False. colorize : bool, optional Indicates whether color should be used in output. Default is True. """ self.json = json self.field_filter = field_filter self.chatty = chatty self.colorize = colorize self.conn, self.cur = BukuDb.initdb(dbfile, self.chatty) @staticmethod def get_default_dbdir(): """Determine the directory path where dbfile will be stored. If the platform is Windows, use %APPDATA% else if $XDG_DATA_HOME is defined, use it else if $HOME exists, use it else use the current directory. Returns ------- str Path to database file. """ data_home = os.environ.get('XDG_DATA_HOME') if data_home is None: if os.environ.get('HOME') is None: if sys.platform == 'win32': data_home = os.environ.get('APPDATA') if data_home is None: return os.path.abspath('.') else: return os.path.abspath('.') else: data_home = os.path.join(os.environ.get('HOME'), '.local', 'share') return os.path.join(data_home, 'buku') @staticmethod def initdb(dbfile=None, chatty=False): """Initialize the database connection. Create DB file and/or bookmarks table if they don't exist. Alert on encryption options on first execution. Parameters ---------- dbfile : str, optional Custom database file path (including filename). chatty : bool If True, shows informative message on DB creation. Returns ------- tuple (connection, cursor). """ if not dbfile: dbpath = BukuDb.get_default_dbdir() filename = 'bookmarks.db' dbfile = os.path.join(dbpath, filename) else: dbfile = os.path.abspath(dbfile) dbpath, filename = os.path.split(dbfile) try: if not os.path.exists(dbpath): os.makedirs(dbpath) except Exception as e: logerr(e) os._exit(1) db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(dbfile + '.enc') if db_exists and not enc_exists: pass elif enc_exists and not db_exists: logerr('Unlock database first') sys.exit(1) elif db_exists and enc_exists: logerr('Both encrypted and flat DB files exist!') sys.exit(1) elif chatty: # not db_exists and not enc_exists print('DB file is being created at %s.\nYou should encrypt it.' % dbfile) try: # Create a connection conn = sqlite3.connect(dbfile, check_same_thread=False) conn.create_function('REGEXP', 2, regexp) cur = conn.cursor() # Create table if it doesn't exist # flags: designed to be extended in future using bitwise masks # Masks: # 0b00000001: set title immutable cur.execute('CREATE TABLE if not exists bookmarks (' 'id integer PRIMARY KEY, ' 'URL text NOT NULL UNIQUE, ' 'metadata text default \'\', ' 'tags text default \',\', ' 'desc text default \'\', ' 'flags integer default 0)') conn.commit() except Exception as e: logerr('initdb(): %s', e) sys.exit(1) return (conn, cur) def get_rec_all(self): """Get all the bookmarks in the database. Returns ------- list A list of tuples representing bookmark records. """ self.cur.execute('SELECT * FROM bookmarks') return self.cur.fetchall() def get_rec_by_id(self, index): """Get a bookmark from database by its ID. Parameters ---------- index : int DB index of bookmark record. Returns ------- tuple or None Bookmark data, or None if index is not found. """ self.cur.execute('SELECT * FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() return resultset[0] if resultset else None def get_rec_id(self, url): """Check if URL already exists in DB. Parameters ---------- url : str A URL to search for in the DB. Returns ------- int DB index, or -1 if URL not found in DB. """ self.cur.execute('SELECT id FROM bookmarks WHERE URL = ? LIMIT 1', (url,)) resultset = self.cur.fetchall() return resultset[0][0] if resultset else -1 def get_max_id(self): """Fetch the ID of the last record. Returns ------- int ID of the record if any record exists, else -1. """ self.cur.execute('SELECT MAX(id) from bookmarks') resultset = self.cur.fetchall() return -1 if resultset[0][0] is None else resultset[0][0] def add_rec(self, url, title_in=None, tags_in=None, desc=None, immutable=0, delay_commit=False): """Add a new bookmark. Parameters ---------- url : str URL to bookmark. title_in :str, optional Title to add manually. Default is None. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Default is None. desc : str, optional Description of the bookmark. Default is None. immutable : int, optional Indicates whether to disable title fetch from web. Default is 0. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- int DB index of new bookmark on success, -1 on failure. """ # Return error for empty URL if not url or url == '': logerr('Invalid URL') return -1 # Ensure that the URL does not exist in DB already id = self.get_rec_id(url) if id != -1: logerr('URL [%s] already exists at index %d', url, id) return -1 # Process title if title_in is not None: meta = title_in else: meta, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif meta == '': print('No title\n') else: logdbg('Title: [%s]', meta) # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM # Process description if desc is None: desc = '' try: flagset = 0 if immutable == 1: flagset |= immutable qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' self.cur.execute(qry, (url, meta, tags_in, desc, flagset)) if not delay_commit: self.conn.commit() if self.chatty: self.print_rec(self.cur.lastrowid) return self.cur.lastrowid except Exception as e: logerr('add_rec(): %s', e) return -1 def append_tag_at_index(self, index, tags_in, delay_commit=False): """Append tags to bookmark tagset at index. Parameters ---------- index : int DB index of the record. 0 indicates all records. tags_in : str Comma-separated tags to add manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if index == 0: resp = read_in('Append the tags to ALL bookmarks? (y/n): ') if resp != 'y': return False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] + tags_in[1:] tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) else: return False if not delay_commit: self.conn.commit() return True def delete_tag_at_index(self, index, tags_in, delay_commit=False): """Delete tags from bookmark tagset at index. Parameters ---------- index : int DB index of bookmark record. 0 indicates all records. tags_in : str Comma-separated tags to delete manually. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ tags_to_delete = tags_in.strip(DELIM).split(DELIM) if index == 0: resp = read_in('Delete the tag(s) from ALL bookmarks? (y/n): ') if resp != 'y': return False count = 0 match = "'%' || ? || '%'" for tag in tags_to_delete: tag = delim_wrap(tag) q = ("UPDATE bookmarks SET tags = replace(tags, '%s', '%s') WHERE tags LIKE %s" % (tag, DELIM, match)) self.cur.execute(q, (tag,)) count += self.cur.rowcount if count and not delay_commit: self.conn.commit() if self.chatty: print('%d record(s) updated' % count) return True # Process a single index # Use SELECT and UPDATE to handle multiple tags at once query = 'SELECT id, tags FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) resultset = self.cur.fetchall() if resultset: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: tags = row[1] for tag in tags_to_delete: tags = tags.replace(delim_wrap(tag), DELIM) self.cur.execute(query, (parse_tags([tags]), row[0],)) if self.chatty and not delay_commit: self.print_rec(row[0]) if not delay_commit: self.conn.commit() else: return False return True def update_rec(self, index, url=None, title_in=None, tags_in=None, desc=None, immutable=-1, threads=4): """Update an existing record at index. Update all records if index is 0 and url is not specified. URL is an exception because URLs are unique in DB. Parameters ---------- index : int DB index of record. 0 indicates all records. url : str, optional Bookmark address. title_in : str, optional Title to add manually. tags_in : str, optional Comma-separated tags to add manually. Must start and end with comma. Prefix with '+,' to append to current tags. Prefix with '-,' to delete from current tags. desc : str, optional Description of bookmark. immutable : int, optional Diable title fetch from web if 1. Default is -1. threads : int, optional Number of threads to use to refresh full DB. Default is 4. Returns ------- bool True on success, False on Failure. """ arguments = [] query = 'UPDATE bookmarks SET' to_update = False tag_modified = False ret = False # Update URL if passed as argument if url is not None and url != '': if index == 0: logerr('All URLs cannot be same') return False query += ' URL = ?,' arguments += (url,) to_update = True # Update tags if passed as argument if tags_in is not None: if tags_in == '+,' or tags_in == '-,': logerr('Please specify a tag') return False if tags_in.startswith('+,'): chatty = self.chatty self.chatty = False ret = self.append_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True elif tags_in.startswith('-,'): chatty = self.chatty self.chatty = False ret = self.delete_tag_at_index(index, tags_in[1:]) self.chatty = chatty tag_modified = True else: # Fix up tags, if broken if tags_in is None or tags_in == '': tags_in = DELIM elif tags_in[0] != DELIM: tags_in = DELIM + tags_in elif tags_in[-1] != DELIM: tags_in = tags_in + DELIM query += ' tags = ?,' arguments += (tags_in,) to_update = True # Update description if passed as an argument if desc is not None: query += ' desc = ?,' arguments += (desc,) to_update = True # Update immutable flag if passed as argument if immutable != -1: flagset = 1 if immutable == 1: query += ' flags = flags | ?,' elif immutable == 0: query += ' flags = flags & ?,' flagset = ~flagset arguments += (flagset,) to_update = True # Update title # # 1. if --title has no arguments, delete existing title # 2. if --title has arguments, update existing title # 3. if --title option is omitted at cmdline: # if URL is passed, update the title from web using the URL # 4. if no other argument (url, tag, comment, immutable) passed, # update title from web using DB URL (if title is mutable) title_to_insert = None if title_in is not None: title_to_insert = title_in elif url is not None and url != '': title_to_insert, mime, bad = network_handler(url) if bad: print('Malformed URL\n') elif mime: logdbg('HTTP HEAD requested') elif title_to_insert == '': print('No title\n') else: logdbg('Title: [%s]', title_to_insert) elif not to_update and not tag_modified: ret = self.refreshdb(index, threads) if ret and index and self.chatty: self.print_rec(index) return ret if title_to_insert is not None: query += ' metadata = ?,' arguments += (title_to_insert,) to_update = True if not to_update: # Nothing to update # Show bookmark if tags were appended to deleted if tag_modified and self.chatty: self.print_rec(index) return ret if index == 0: # Update all records resp = read_in('Update ALL bookmarks? (y/n): ') if resp != 'y': return False query = query[:-1] else: query = query[:-1] + ' WHERE id = ?' arguments += (index,) logdbg('query: "%s", args: %s', query, arguments) try: self.cur.execute(query, arguments) self.conn.commit() if self.cur.rowcount and self.chatty: self.print_rec(index) if self.cur.rowcount == 0: logerr('No matching index %d', index) return False except sqlite3.IntegrityError: logerr('URL already exists') return False return True def refreshdb(self, index, threads): """Refresh ALL records in the database. Fetch title for eachbookmark from the web and update the records. Doesn't update the record if title is empty. Notes ----- This API doesn't change DB index, URL or tags of a bookmark. This API is verbose. Parameters ---------- index : int DB index of record to update. 0 indicates all records. threads: int Number of threads to use to refresh full DB. Default is 4. """ if index == 0: self.cur.execute('SELECT id, url, flags FROM bookmarks ORDER BY id ASC') else: self.cur.execute('SELECT id, url, flags FROM bookmarks WHERE id = ? LIMIT 1', (index,)) resultset = self.cur.fetchall() recs = len(resultset) if not recs: logerr('No matching index or title immutable or empty DB') return False # Set up strings to be printed if self.colorize: bad_url_str = '\x1b[1mIndex %d: Malformed URL\x1b[0m\n' mime_str = '\x1b[1mIndex %d: HTTP HEAD requested\x1b[0m\n' blank_URL_str = '\x1b[1mIndex %d: No title\x1b[0m\n' success_str = 'Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' else: bad_url_str = 'Index %d: Malformed URL\n' mime_str = 'Index %d: HTTP HEAD requested\n' blank_URL_str = 'Index %d: No title\n' success_str = 'Title: [%s]\nIndex %d: updated\n' query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?' done = {'value': 0} # count threads completed processed = {'value': 0} # count number of records processed # An additional call to generate default headers # gen_headers() is called within network_handler() # However, this initial call to setup headers # ensures there is no race condition among the # initial threads to setup headers if not myheaders: gen_headers() cond = threading.Condition() cond.acquire() def refresh(count, cond): """Inner function to fetch titles and update records. Parameters ---------- count : int Dummy input to adhere to convention. cond : threading condition object. """ count = 0 while True: cond.acquire() if resultset: row = resultset.pop() else: cond.release() break cond.release() title, mime, bad = network_handler(row[1], row[2] & 1) count += 1 cond.acquire() if bad: print(bad_url_str % row[0]) cond.release() continue elif mime: if self.chatty: print(mime_str % row[0]) cond.release() continue elif title == '': print(blank_URL_str % row[0]) cond.release() continue self.cur.execute(query, (title, row[0],)) # Save after fetching 32 titles per thread if count & 0b11111 == 0: self.conn.commit() if self.chatty: print(success_str % (title, row[0])) cond.release() if interrupted: break logdbg('Thread %d: processed %d', threading.get_ident(), count) with cond: done['value'] += 1 processed['value'] += count cond.notify() if recs < threads: threads = recs for i in range(threads): thread = threading.Thread(target=refresh, args=(i, cond)) thread.start() while done['value'] < threads: cond.wait() logdbg('%d threads completed', done['value']) # Guard: records found == total records processed if recs != processed['value']: logerr('Records: %d, processed: %d !!!', recs, processed['value']) cond.release() self.conn.commit() return True def edit_update_rec(self, index, immutable=-1): """Edit in editor and update a record. Parameters ---------- index : int DB index of the record. immutable : int, optional Diable title fetch from web if 1. Default is -1. Returns ------- bool True if updated, else False. """ editor = get_system_editor() if editor == 'none': logerr('EDITOR must be set to use index with -w') return False rec = self.get_rec_by_id(index) if not rec: logerr('No matching index %d', index) return False result = edit_rec(editor, rec[1], rec[2], rec[3], rec[4]) if result is not None: url, title, tags, desc = result return self.update_rec(index, url, title, tags, desc, immutable) if immutable != -1: return self.update_rec(index, immutable) return False def searchdb(self, keywords, all_keywords=False, deep=False, regex=False): """Search DB for entries where tags, URL, or title fields match keywords. Parameters ---------- keywords : list of str Keywords to search. all_keywords : bool, optional True to return records matching ALL keywords. False (default value) to return records matching ANY keyword. deep : bool, optional True to search for matching substrings. Default is False. regex : bool, optional Match a regular expression if True. Default is False. Returns ------- list or None List of search results, or None if no matches. """ if not keywords: return None q0 = 'SELECT id, url, metadata, tags, desc FROM bookmarks WHERE ' # Deep query string q1 = ("(tags LIKE ('%' || ? || '%') OR " "URL LIKE ('%' || ? || '%') OR " "metadata LIKE ('%' || ? || '%') OR " "desc LIKE ('%' || ? || '%')) ") # Non-deep query string q2 = ('(tags REGEXP ? OR ' 'URL REGEXP ? OR ' 'metadata REGEXP ? OR ' 'desc REGEXP ?) ') qargs = [] if regex: for token in keywords: q0 += q2 + 'OR ' qargs += (token, token, token, token,) q0 = q0[:-3] elif all_keywords: if len(keywords) == 1 and keywords[0] == 'blank': q0 = "SELECT * FROM bookmarks WHERE metadata = '' OR tags = ? " qargs += (DELIM,) elif len(keywords) == 1 and keywords[0] == 'immutable': q0 = 'SELECT * FROM bookmarks WHERE flags & 1 == 1 ' else: for token in keywords: if deep: q0 += q1 + 'AND ' else: token = '\\b' + token.rstrip('/') + '\\b' q0 += q2 + 'AND ' qargs += (token, token, token, token,) q0 = q0[:-4] elif not all_keywords: for token in keywords: if deep: q0 += q1 + 'OR ' else: token = '\\b' + token.rstrip('/') + '\\b' q0 += q2 + 'OR ' qargs += (token, token, token, token,) q0 = q0[:-3] else: logerr('Invalid search option') return None q0 += 'ORDER BY id ASC' logdbg('query: "%s", args: %s', q0, qargs) try: self.cur.execute(q0, qargs) except sqlite3.OperationalError as e: logerr(e) return None return self.cur.fetchall() def search_by_tag(self, tags): """Search bookmarks for entries with given tags. Parameters ---------- tags : str String of tags to search for. Retrieves entries matching ANY tag if tags are delimited with ','. Retrieves entries matching ALL tags if tags are delimited with '+'. Returns ------- list or None List of search results, or None if no matches. """ # do not allow combination of search logics if ' + ' in tags and ',' in tags: logerr("Cannot use both '+' and ',' in same search") return tags, search_operator, excluded_tags = prep_tag_search(tags) query = "SELECT id, url, metadata, tags, desc FROM bookmarks WHERE tags LIKE '%' || ? || '%' " for tag in tags[1:]: query += "{} tags LIKE '%' || ? || '%' ".format(search_operator) if excluded_tags: tags.append(excluded_tags) query = query.replace('WHERE tags', 'WHERE (tags') query += ') AND tags NOT REGEXP ? ' query += 'ORDER BY id ASC' logdbg('query: "%s", args: %s', query, tags) self.cur.execute(query, tuple(tags, )) return self.cur.fetchall() def compactdb(self, index, delay_commit=False): """When an entry at index is deleted, move the last entry in DB to index, if index is lesser. Parameters ---------- index : int DB index of deleted entry. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. """ # Return if the last index left in DB was just deleted max_id = self.get_max_id() if max_id == -1: return query1 = 'SELECT id, URL, metadata, tags, desc FROM bookmarks WHERE id = ? LIMIT 1' query2 = 'DELETE FROM bookmarks WHERE id = ?' query3 = 'INSERT INTO bookmarks(id, URL, metadata, tags, desc) VALUES (?, ?, ?, ?, ?)' if max_id > index: self.cur.execute(query1, (max_id,)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() if self.chatty: print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): """Delete a single record or remove the table if index is None. Parameters ---------- index : int DB index of deleted entry. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: # Delete a range of indices if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low # If range starts from 0, delete all records if low == 0: return self.cleardb() try: query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): """Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. Notes ----- This API forces a delayed commit. Parameters ---------- results : list of tuples List of results to delete from DB. Returns ------- bool True on success, False on failure. """ resp = read_in('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def delete_rec_all(self, delay_commit=False): """Removes all records in the Bookmarks table. Parameters ---------- delay_commit : bool, optional True if record should not be committed to the DB, leaving commit responsibility to caller. Default is False. Returns ------- bool True on success, False on failure. """ try: self.cur.execute('DELETE FROM bookmarks') if not delay_commit: self.conn.commit() return True except Exception as e: logerr('delete_rec_all(): %s', e) return False def cleardb(self): """Drops the bookmark table if it exists. Returns ------- bool True on success, False on failure. """ resp = read_in('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index=0, low=0, high=0, is_range=False): """Print bookmark details at index or all bookmarks if index is 0. A negative index behaves like tail, if title is blank show "Untitled". Parameters ----------- index : int, optional DB index of record to print. 0 prints all records. low : int, optional Actual lower index of range. high : int, optional Actual higher index of range. is_range : bool, optional A range is passed using low and high arguments. An index is ignored if is_range is True (use dummy index). Default is False. """ if (index < 0): # Show the last n records _id = self.get_max_id() if _id == -1: logerr('Empty database') return False low = (1 if _id <= -index else _id + index + 1) high = _id is_range = True if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 print all records if low == 0: query = 'SELECT * from bookmarks' resultset = self.cur.execute(query) else: query = 'SELECT * from bookmarks where id BETWEEN ? AND ?' resultset = self.cur.execute(query, (low, high)) except IndexError: logerr('Index out of range') return elif index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) results = self.cur.fetchall() if not results: logerr('No matching index %d', index) return except IndexError: logerr('No matching index %d', index) return if not self.json: for row in results: if self.field_filter == 0: print_single_rec(row) elif self.field_filter == 1: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: print('%s\t%s\t%s\t%s' % (row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(results, True, self.field_filter)) return else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not resultset: logerr('0 records') return if not self.json: if self.field_filter == 0: for row in resultset: print_single_rec(row) elif self.field_filter == 1: for row in resultset: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: for row in resultset: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: for row in resultset: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: for row in resultset: print('%s\t%s\t%s\t%s' % (row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(resultset, field_filter=self.field_filter)) def get_tag_all(self): """Get list of tags in DB. Returns ------- tuple (list of unique tags sorted alphabetically, dictionary of {tag: usage_count}). """ tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if not tags: return tags, dic if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def suggest_similar_tag(self, tagstr): """Show list of tags those go together in DB. Parameters ---------- tagstr : str Original tag string. Returns ------- str DELIM separated string of tags. """ tags = tagstr.split(',') if not len(tags): return tagstr qry = 'SELECT DISTINCT tags FROM bookmarks WHERE tags LIKE ?' tagset = [] unique_tags = [] for tag in tags: if tag == '': continue self.cur.execute(qry, ('%' + delim_wrap(tag) + '%',)) results = self.cur.fetchall() if results: for row in results: tagset += row[0].strip(DELIM).split(DELIM) if len(tagset): for tag in tagset: if tag not in tags and tag not in unique_tags: unique_tags += (tag, ) if not len(unique_tags): return tagstr unique_tags = sorted(unique_tags) print('similar tags:\n') count = 0 for tag in unique_tags: print('%d. %s' % (count + 1, unique_tags[count])) count += 1 resp = input('\nselect: ') print() if not resp: return tagstr tagset = resp.split() tags = [tagstr] for index in tagset: try: tags.append(delim_wrap(unique_tags[int(index) - 1])) except: continue return parse_tags(tags) def replace_tag(self, orig, new=None): """Replace original tag by new tags in all records. Remove original tag if new tag is empty. Parameters ---------- orig : str Original tag. new : list Replacement tags. Returns ------- bool True on success, False on failure. """ newtags = DELIM orig = delim_wrap(orig) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False # Remove original tag from DB if new tagset reduces to delimiter if newtags == DELIM: return self.delete_tag_at_index(0, orig) # Update bookmarks with original tag query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() if results: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) self.conn.commit() return True def set_tag(self, cmdstr, taglist): """Append, overwrite, remove tags using the symbols >>, > and << respectively. Parameters ---------- cmdstr : str Command pattern. taglist : list List of tags. Returns ------- int Number of indices updated on success, -1 on failure. """ if not cmdstr or not taglist: return -1 flag = 0 # 0: invalid, 1: append, 2: overwrite, 3: remove index = cmdstr.find('>>') if index == -1: index = cmdstr.find('>') if index != -1: flag = 2 else: index = cmdstr.find('<<') if index != -1: flag = 3 else: flag = 1 if not flag: return -1 tags = DELIM id_list = cmdstr[:index].split() try: for id in id_list: if is_int(id) and int(id) > 0: tags += taglist[int(id) - 1] + DELIM elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): tags += taglist[_id - 1] + DELIM else: return -1 except ValueError: return -1 if flag != 2: index += 1 update_count = 0 query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' try: db_id_list = cmdstr[index + 1:].split() for id in db_id_list: if is_int(id) and int(id) > 0: if flag == 1: if self.append_tag_at_index(id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, id,)) update_count += self.cur.rowcount else: self.delete_tag_at_index(id, tags, True) update_count += 1 elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): if flag == 1: if self.append_tag_at_index(_id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, _id,)) update_count += self.cur.rowcount else: if self.delete_tag_at_index(_id, tags, True): update_count += 1 else: return -1 except ValueError: return -1 except sqlite3.IntegrityError: return -1 try: self.conn.commit() except: return -1 return update_count def browse_by_index(self, index=0, low=0, high=0, is_range=False): """Open URL at index or range of indies in browser. Parameters ---------- index : int Index to browse. 0 opens a random bookmark. low : int Actual lower index of range. high : int Higher index of range. is_range : bool A range is passed using low and high arguments. If True, index is ignored. Default is False. Returns ------- bool True on success, False on failure. """ if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 throw an error if low <= 0: raise IndexError else: qry = 'SELECT URL from bookmarks where id BETWEEN ? AND ?' for row in self.cur.execute(qry, (low, high)): browse(row[0]) return True except IndexError: logerr('Index out of range') return False if index < 0: logerr('Invalid index %d', index) return False if index == 0: qry = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(qry) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) qry = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1' try: for row in self.cur.execute(qry, (index,)): browse(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, taglist=None): """Export DB bookmarks to file. If destination file name ends with '.db', bookmarks are exported to a Buku database file. If destination file name ends with '.md', bookmarks are exported to a markdown file. Otherwise, bookmarks are exported to a Firefox bookmarks.html formatted file. Parameters ---------- filepath : str Path to export destination file. taglist : list, optional Specific tags to export. Returns ------- bool True on success, False on failure. """ count = 0 timestamp = str(int(time.time())) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if not tagstr or tagstr == DELIM: logerr('Invalid tag') return False tags = tagstr.split(DELIM) query += ' WHERE' for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = delim_wrap(tag) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if not resultset: print('No records found') return False if os.path.exists(filepath): resp = read_in(filepath + ' exists. Overwrite? (y/n): ') if resp != 'y': return False if filepath.endswith('.db'): os.remove(filepath) if filepath.endswith('.db'): outdb = BukuDb(dbfile=filepath) qry = 'INSERT INTO bookmarks(URL, metadata, tags, desc, flags) VALUES (?, ?, ?, ?, ?)' for row in resultset: outdb.cur.execute(qry, (row[1], row[2], row[3], row[4], row[5])) outdb.conn.commit() outdb.close() return True try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if filepath.endswith('.md'): for row in resultset: if row[2] == '': out = '- [Untitled](' + row[1] + ')\n' else: out = '- [' + row[2] + '](' + row[1] + ')\n' outfp.write(out) count += 1 else: outfp.write('<!DOCTYPE NETSCAPE-Bookmark-file-1>\n\n' '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">\n' '<TITLE>Bookmarks\n' '

Bookmarks

\n\n' '

\n' '

Buku bookmarks

\n' '

\n' % (timestamp, timestamp)) for row in resultset: out = ('

\n' if row[4] != '': out += '
' + row[4] + '\n' outfp.write(out) count += 1 outfp.write('

\n

') outfp.close() print('%s exported' % count) return True def traverse_bm_folder(self, sublist, unique_tag, folder_name, add_parent_folder_as_tag): """Traverse bookmark folders recursively and find bookmarks. Parameters ---------- sublist : list List of child entries in bookmark folder. unique_tag : str Timestamp tag in YYYYMonDD format. folder_name : str Name of the parent folder. add_parent_folder_as_tag : bool True if bookmark parent folders should be added as tags else False. Returns ------- tuple Bookmark record data. """ for item in sublist: if item['type'] == 'folder': for i in self.traverse_bm_folder(item['children'], unique_tag, item['name'], add_parent_folder_as_tag): yield (i) elif item['type'] == 'url': try: if (is_nongeneric_url(item['url'])): continue except KeyError: continue tags = '' if add_parent_folder_as_tag: tags += folder_name if unique_tag: tags += DELIM + unique_tag yield (item['url'], item['name'], parse_tags([tags]), None, 0, True) def load_chrome_database(self, path, unique_tag, add_parent_folder_as_tag): """Open Chrome Bookmarks json file and import data. Parameters ---------- path : str Path to Google Chrome bookmarks file. unique_tag : str Timestamp tag in YYYYMonDD format. add_parent_folder_as_tag : bool True if bookmark parent folders should be added as tags else False. """ with open(path, 'r') as datafile: data = json.load(datafile) roots = data['roots'] for entry in roots: for item in self.traverse_bm_folder(roots[entry]['children'], unique_tag, roots[entry]['name'], add_parent_folder_as_tag): self.add_rec(*item) def load_firefox_database(self, path, unique_tag, add_parent_folder_as_tag): """Connect to Firefox sqlite db and import bookmarks into BukuDb. Parameters ---------- path : str Path to Firefox bookmarks sqlite database. unique_tag : str Timestamp tag in YYYYMonDD format. add_parent_folder_as_tag : bool True if bookmark parent folders should be added as tags else False. """ # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) else: conn = sqlite3.connect(path) cur = conn.cursor() res = cur.execute('SELECT DISTINCT fk, parent, title FROM moz_bookmarks WHERE type=1') # get id's and remove duplicates for row in res.fetchall(): # get the url res = cur.execute('SELECT url FROM moz_places where id={}'.format(row[0])) url = res.fetchone()[0] if (is_nongeneric_url(url)): continue # get tags res = cur.execute('SELECT parent FROM moz_bookmarks WHERE fk={} AND title IS NULL'.format(row[0])) bm_tag_ids = [tid for item in res.fetchall() for tid in item] bookmark_tags = [] for bm_tag_id in bm_tag_ids: res = cur.execute('SELECT title FROM moz_bookmarks WHERE id={}'.format(bm_tag_id)) bookmark_tags.append(res.fetchone()[0]) if add_parent_folder_as_tag: # add folder name res = cur.execute('SELECT title FROM moz_bookmarks WHERE id={}'.format(row[1])) bookmark_tags.append(res.fetchone()[0]) if unique_tag: # add timestamp tag bookmark_tags.append(unique_tag) formatted_tags = [DELIM + tag for tag in bookmark_tags] tags = parse_tags(formatted_tags) # get the title if row[2]: title = row[2] else: title = '' self.add_rec(url, title, tags, None, 0, True) try: cur.close() conn.close() except Exception as e: logerr(e) def auto_import_from_browser(self): """Import bookmarks from a browser default database file. Supports Firefox and Google Chrome. Returns ------- bool True on success, False on failure. """ FF_BM_DB_PATH = None if sys.platform.startswith('linux'): GC_BM_DB_PATH = '~/.config/google-chrome/Default/Bookmarks' DEFAULT_FF_FOLDER = os.path.expanduser('~/.mozilla/firefox') profile = get_firefox_profile_name(DEFAULT_FF_FOLDER) if profile: FF_BM_DB_PATH = '~/.mozilla/firefox/{}.default/places.sqlite'.format(profile) elif sys.platform == 'darwin': GC_BM_DB_PATH = '~/Library/Application Support/Google/Chrome/Default/Bookmarks' DEFAULT_FF_FOLDER = os.path.expanduser('~/Library/Application Support/Firefox') profile = get_firefox_profile_name(DEFAULT_FF_FOLDER) if profile: FF_BM_DB_PATH = '~/Library/Application Support/Firefox/{}.default/places.sqlite'.format(profile) elif sys.platform == 'win32': username = os.getlogin() GC_BM_DB_PATH = 'C:/Users/{}/AppData/Local/Google/Chrome/User Data/Default/Bookmarks'.format(username) DEFAULT_FF_FOLDER = 'C:/Users/{}/AppData/Roaming/Mozilla/Firefox/Profiles'.format(username) profile = get_firefox_profile_name(DEFAULT_FF_FOLDER) if profile: FF_BM_DB_PATH = os.path.join(DEFAULT_FF_FOLDER, '{}.default/places.sqlite'.format(profile)) else: logerr('Buku does not support {} yet'.format(sys.platform)) self.close_quit(1) if self.chatty: newtag = gen_auto_tag() resp = input('Add parent folder names as tags? (y/n): ') else: newtag = None resp = 'y' add_parent_folder_as_tag = (resp == 'y') resp = 'y' try: if self.chatty: resp = input('Import bookmarks from google chrome? (y/n): ') if resp == 'y': bookmarks_database = os.path.expanduser(GC_BM_DB_PATH) if not os.path.exists(bookmarks_database): raise FileNotFoundError self.load_chrome_database(bookmarks_database, newtag, add_parent_folder_as_tag) except Exception: print('Could not import bookmarks from google-chrome') try: if self.chatty: resp = input('Import bookmarks from firefox? (y/n): ') if resp == 'y': bookmarks_database = os.path.expanduser(FF_BM_DB_PATH) if not os.path.exists(bookmarks_database): raise FileNotFoundError self.load_firefox_database(bookmarks_database, newtag, add_parent_folder_as_tag) except Exception: print('Could not import bookmarks from firefox') self.conn.commit() if newtag: print('\nAuto-generated tag: %s' % newtag) def importdb(self, filepath, tacit=False): """Import bookmarks from a html or a markdown file. Supports Firefox, Google Chrome, and IE exported html bookmarks. Supports markdown files with extension '.md'. Supports importing bookmarks from another Buku database file. Parameters ---------- filepath : str Path to file to import. tacit : bool, optional If True, no questions asked and folder names are automatically imported as tags from bookmarks html. If True, automatic timestamp tag is NOT added. Default is False. Returns ------- bool True on success, False on failure. """ if filepath.endswith('.db'): return self.mergedb(filepath) if not tacit: newtag = gen_auto_tag() else: newtag = None if filepath.endswith('.md'): for item in import_md(filepath=filepath, newtag=newtag): self.add_rec(*item) self.conn.commit() else: try: import bs4 with open(filepath, mode='r', encoding='utf-8') as infp: soup = bs4.BeautifulSoup(infp, 'html.parser') except ImportError: logerr('Beautiful Soup not found') return False except Exception as e: logerr(e) return False if not tacit: resp = input('Add parent folder names as tags? (y/n): ') else: resp = 'y' add_parent_folder_as_tag = (resp == 'y') for item in import_html(soup, add_parent_folder_as_tag, newtag): self.add_rec(*item) self.conn.commit() infp.close() if newtag: print('\nAuto-generated tag: %s' % newtag) return True def mergedb(self, path): """Merge bookmarks from another Buku database file. Parameters ---------- path : str Path to DB file to merge. Returns ------- bool True on success, False on failure. """ try: # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) else: indb_conn = sqlite3.connect(path) indb_cur = indb_conn.cursor() indb_cur.execute('SELECT * FROM bookmarks') except Exception as e: logerr(e) return False resultset = indb_cur.fetchall() if resultset: for row in resultset: self.add_rec(row[1], row[2], row[3], row[4], row[5], True) self.conn.commit() try: indb_cur.close() indb_conn.close() except Exception: pass return True def tnyfy_url(self, index=0, url=None, shorten=True): """Shorten a URL using Google URL shortener. Parameters ---------- index : int, optional (if URL is provided) DB index of the bookmark with the URL to shorten. Default is 0. url : str, optional (if index is provided) URL to shorten. shorten : bool, optional True to shorten, False to expand. Default is False. Returns ------- str Shortened url on success, None on failure. """ if not index and not url: logerr('Either a valid DB index or URL required') return None if index: self.cur.execute('SELECT url FROM bookmarks WHERE id = ? LIMIT 1', (index,)) results = self.cur.fetchall() if not results: return None url = results[0][0] proxies = { 'https': os.environ.get('https_proxy'), } from urllib.parse import quote_plus as qp urlbase = 'https://tny.im/yourls-api.php?action=' if shorten: _u = urlbase + 'shorturl&format=simple&url=' + qp(url) else: _u = urlbase + 'expand&format=simple&shorturl=' + qp(url) try: r = requests.post(_u, headers={ 'content-type': 'application/json', 'User-Agent': USER_AGENT }, proxies=proxies) except Exception as e: logerr(e) return None if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) return None return r.text def fixtags(self): """Undocumented API to fix tags set in earlier versions. Functionalities: 1. Remove duplicate tags 2. Sort tags 3. Use lower case to store tags """ to_commit = False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') resultset = self.cur.fetchall() query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: oldtags = row[1] if oldtags == DELIM: continue tags = parse_tags([oldtags]) if tags == oldtags: continue self.cur.execute(query, (tags, row[0],)) to_commit = True if to_commit: self.conn.commit() def close(self): """Close a DB connection.""" if self.conn is not None: try: self.cur.close() self.conn.close() except Exception: # ignore errors here, we're closing down pass def close_quit(self, exitval=0): """Close a DB connection and exit. Parameters ---------- exitval : int, optional Program exit value. """ if self.conn is not None: try: self.cur.close() self.conn.close() except Exception: # ignore errors here, we're closing down pass sys.exit(exitval) class ExtendedArgumentParser(argparse.ArgumentParser): """Extend classic argument parser.""" @staticmethod def program_info(file=sys.stdout): """Print program info. Parameters ---------- file : file, optional File to write program info to. Default is sys.stdout. """ if sys.platform == 'win32' and file == sys.stdout: file = sys.stderr file.write(''' SYMBOLS: > url + comment # tags Version %s Copyright © 2015-2017 %s License: %s Webpage: https://github.com/jarun/Buku ''' % (__version__, __author__, __license__)) @staticmethod def prompt_help(file=sys.stdout): """Print prompt help. Parameters ---------- file : file, optional File to write program info to. Default is sys.stdout. """ file.write(''' PROMPT KEYS: 1-N browse search result indices and/or ranges a open all results in browser s keyword [...] search for records with ANY keyword S keyword [...] search for records with ALL keywords d match substrings ('pen' matches 'opened') r expression run a regex search t [...] search bookmarks by tags or show taglist list index after a tag listing shows records with the tag o id|range [...] browse bookmarks by indices and/or ranges p id|range [...] print bookmarks by indices and/or ranges g [taglist id|range ...] [>>|>|<<] record id|range [...] append, set, remove (all or specific) tags w [editor|id] edit and add or update a bookmark ? show this help q, ^D, double Enter exit buku ''') @staticmethod def is_colorstr(arg): """Check if a string is a valid color string. Parameters ---------- arg : str Color string to validate. Returns ------- str Same color string that was passed as an argument. Raises ------ ArgumentTypeError If the arg is not a valid color string. """ try: assert len(arg) == 5 for c in arg: assert c in COLORMAP except AssertionError: raise argparse.ArgumentTypeError('%s is not a valid color string' % arg) return arg # Help def print_help(self, file=sys.stdout): """Print help prompt. Parameters ---------- file : file, optional File to write program info to. Default is sys.stdout. """ super(ExtendedArgumentParser, self).print_help(file) self.program_info(file) # ---------------- # Helper functions # ---------------- def get_firefox_profile_name(path): """List folder and detect default Firefox profile name. Returns ------- profile : str Firefox profile name. """ try: names = os.listdir(path) profile = [name[:-8] for name in names if name.endswith('.default')][0] except FileNotFoundError: profile = None return profile def walk(root): """Recursively iterate over json. Parameters ---------- root : json element Base node of the json data. """ for element in root['children']: if element['type'] == 'url': url = element['url'] title = element['name'] yield (url, title, None, None, 0, True) else: walk(element) def is_bad_url(url): """Check if URL is malformed. .. note:: This API is not bulletproof but works in most cases. Parameters ---------- url : str URL to scan. Returns ------- bool True if URL is malformed, False otherwise. """ # Get the netloc token netloc = parse_url(url).netloc if not netloc: # Try of prepend '//' and get netloc netloc = parse_url('//' + url).netloc if not netloc: return True logdbg('netloc: %s', netloc) # netloc cannot start or end with a '.' if netloc.startswith('.') or netloc.endswith('.'): return True # netloc should have at least one '.' if netloc.rfind('.') < 0: return True return False def is_nongeneric_url(url): """Returns True for URLs which are non-http and non-generic. Parameters ---------- url : str URL to scan. Returns ------- bool True if URL is a non-generic URL, False otherwise. """ ignored_prefix = ['place:', 'file://', 'apt:'] for prefix in ignored_prefix: if url.startswith(prefix): return True return False def is_ignored_mime(url): """Check if URL links to ignored MIME. .. note:: Only a 'HEAD' request is made for these URLs. Parameters ---------- url : str URL to scan. Returns ------- bool True if URL links to ignored MIME, False otherwise. """ for mime in SKIP_MIMES: if url.lower().endswith(mime): logdbg('matched MIME: %s', mime) return True return False def get_page_title(resp): """Invoke HTML parser and extract title from HTTP response. Parameters ---------- resp : HTTP response Response from GET request. Returns ------- str Title fetched from parsed page. """ parser = BukuHTMLParser() try: parser.feed(resp.data.decode(errors='replace')) except Exception as e: # Suppress Exception due to intentional self.reset() in BHTMLParser if (logger.isEnabledFor(logging.DEBUG) and str(e) != 'we should not get here!'): logerr('get_page_title(): %s', e) finally: return parser.parsed_title def gen_headers(): """Generate headers for network connection.""" global myheaders, myproxy myheaders = { 'Accept-Encoding': 'gzip,deflate', 'User-Agent': USER_AGENT, 'Accept': '*/*', 'Cookie': '', 'DNT': '1' } myproxy = os.environ.get('https_proxy') if myproxy: try: url = parse_url(myproxy) except Exception as e: logerr(e) return # Strip username and password (if present) and update headers if url.auth: myproxy = myproxy.replace(url.auth + '@', '') auth_headers = make_headers(basic_auth=url.auth) myheaders.update(auth_headers) logdbg('proxy: [%s]', myproxy) def get_PoolManager(): """Creates a pool manager with proxy support, if applicable. Returns ------- ProxyManager or PoolManager ProxyManager if https_proxy is defined, PoolManager otherwise. """ if myproxy: return urllib3.ProxyManager(myproxy, num_pools=1, headers=myheaders) return urllib3.PoolManager(num_pools=1, headers=myheaders) def network_handler(url, http_head=False): """Handle server connection and redirections. Parameters ---------- url : str URL to fetch. http_head : bool If True, send only HTTP HEAD request. Default is False. Returns ------- tuple (title, recognized mime, bad url). """ page_title = None if is_bad_url(url): return ('', 0, 1) if is_ignored_mime(url) or http_head: method = 'HEAD' else: method = 'GET' if not myheaders: gen_headers() try: http_handler = get_PoolManager() while True: resp = http_handler.request(method, url, timeout=40) if resp.status == 200: if method == 'GET': page_title = get_page_title(resp) elif resp.status == 403 and url.endswith('/'): # HTTP response Forbidden # Handle URLs in the form of https://www.domain.com/ # which fail when trying to fetch resource '/' # retry without trailing '/' logdbg('Received status 403: retrying...') # Remove trailing / url = url[:-1] resp.release_conn() continue else: logerr('[%s] %s', resp.status, resp.reason) if resp: resp.release_conn() break except Exception as e: logerr('network_handler(): %s', e) finally: if http_handler: http_handler.clear() if method == 'HEAD': return ('', 1, 0) if page_title is None: return ('', 0, 0) return (page_title.strip().replace('\n', ''), 0, 0) def parse_tags(keywords=[]): """Format and get tag string from tokens. Parameters ---------- keywords : list, optional List of tags to parse. Default is empty list. Returns ------- str Comma-delimited string of tags. DELIM : str If no keywords, returns the delimiter. None If keywords is None. """ if keywords is None: return None if not keywords: return DELIM tags = DELIM # Cleanse and get the tags tagstr = ' '.join(keywords) marker = tagstr.find(DELIM) while marker >= 0: token = tagstr[0:marker] tagstr = tagstr[marker + 1:] marker = tagstr.find(DELIM) token = token.strip() if token == '': continue tags += token + DELIM tagstr = tagstr.strip() if tagstr != '': tags += tagstr + DELIM logdbg('keywords: %s', keywords) logdbg('parsed tags: [%s]', tags) if tags == DELIM: return tags orig_tags = tags.strip(DELIM).split(DELIM) # Add unique tags in lower case unique_tags = [] for tag in orig_tags: tag = tag.lower() if tag not in unique_tags: unique_tags += (tag, ) # Sort the tags sorted_tags = sorted(unique_tags) # Wrap with delimiter return delim_wrap(DELIM.join(sorted_tags)) def prep_tag_search(tags): """Prepare list of tags to search and determine search operator. Parameters ---------- tags : str String list of tags to search. Returns ------- tuple (list of formatted tags to search, a string indicating query search operator (either OR or AND), a regex string of tags or None if ' - ' delimiter not in tags). """ excluded_tags = None if ' - ' in tags: tags, excluded_tags = tags.split(' - ', 1) excluded_taglist = [delim_wrap(t.strip()) for t in excluded_tags.split(',')] # join with pipe to construct regex string excluded_tags = '|'.join(excluded_taglist) search_operator = 'OR' tag_delim = ',' if ' + ' in tags: search_operator = 'AND' tag_delim = ' + ' tags = [delim_wrap(t.strip()) for t in tags.split(tag_delim)] return tags, search_operator, excluded_tags def gen_auto_tag(): """Generate a tag in Year-Month-Date format. Returns ------- str New tag as YYYYMonDD. """ import calendar as cal t = time.localtime() return ('%d%s%02d' % (t.tm_year, cal.month_abbr[t.tm_mon], t.tm_mday)) def edit_at_prompt(obj, nav, suggest=False): """Edit and add or update a bookmark. Parameters ---------- obj : BukuDb instance A valid instance of BukuDb class. nav : str Navigation command argument passed at prompt by user. suggest : bool, optional If True, suggest similar tags on new bookmark addition. """ if nav == 'w': editor = get_system_editor() if not is_editor_valid(editor): return elif is_int(nav[2:]): obj.edit_update_rec(int(nav[2:])) return else: editor = nav[2:] result = edit_rec(editor, '', None, DELIM, None) if result is not None: url, title, tags, desc = result if suggest: tags = obj.suggest_similar_tag(tags) obj.add_rec(url, title, tags, desc) def taglist_subprompt(obj, noninteractive=False): """Additional prompt to show unique tag list. Parameters ---------- obj : BukuDb instance A valid instance of BukuDb class. noninteractive : bool, optional If True, does not seek user input. Default is False. Returns ------- str New command string. """ unique_tags, dic = obj.get_tag_all() new_results = True while True: if new_results: if not unique_tags: count = 0 print('0 tags') else: count = 1 for tag in unique_tags: print('%6d. %s (%d)' % (count, tag, dic[tag])) count += 1 print() if noninteractive: return try: nav = read_in(promptmsg) if not nav: nav = read_in(promptmsg) if not nav: # Quit on double enter return 'q' nav = nav.strip() except EOFError: return 'q' if is_int(nav) and int(nav) > 0 and int(nav) < count: return 't ' + unique_tags[int(nav) - 1] elif is_int(nav): print('No matching index %s' % nav) new_results = False elif nav == 't': new_results = True elif (nav == 'q' or nav == 'd' or nav == '?' or nav.startswith('s ') or nav.startswith('S ') or nav.startswith('r ') or nav.startswith('t ') or nav.startswith('o ') or nav.startswith('p ') or nav.startswith('g ')) or nav == 'w' or nav.startswith('w '): return nav else: print('Invalid input') new_results = False def prompt(obj, results, noninteractive=False, deep=False, subprompt=False, suggest=False): """Show each matching result from a search and prompt. Parameters ---------- obj : BukuDb instance A valid instance of BukuDb class. results : list Search result set from a DB query. noninteractive : bool, optional If True, does not seek user input. Default is False. deep : bool, optional Use deep search. Default is False. subprompt : bool, optional If True, jump directly to subprompt. suggest : bool, optional If True, suggest similar tags on edit and add bookmark. """ if not type(obj) is BukuDb: logerr('Not a BukuDb instance') return new_results = True while True: if not subprompt: if new_results: if results: count = 0 for row in results: count += 1 print_single_rec(row, count) else: print('0 results') if noninteractive: return try: nav = read_in(promptmsg) if not nav: nav = read_in(promptmsg) if not nav: # Quit on double enter break nav = nav.strip() except EOFError: return else: nav = 't' subprompt = False # list tags with 't' if nav == 't': nav = taglist_subprompt(obj, noninteractive) if noninteractive: return # search ANY match with new keywords if nav.startswith('s '): results = obj.searchdb(nav[2:].split(), False, deep) new_results = True continue # search ALL match with new keywords if nav.startswith('S '): results = obj.searchdb(nav[2:].split(), True, deep) new_results = True continue # regular expressions search with new keywords if nav.startswith('r '): results = obj.searchdb(nav[2:].split(), True, regex=True) new_results = True continue # tag search with new keywords if nav.startswith('t '): results = obj.search_by_tag(nav[2:]) new_results = True continue # quit with 'q' if nav == 'q': return # No new results fetched beyond this point new_results = False # toggle deep search with 'd' if nav == 'd': deep = not deep if deep: print('deep search on') else: print('deep search off') continue # Show help with '?' if nav == '?': ExtendedArgumentParser.prompt_help(sys.stdout) continue # Edit and add or update if nav == 'w' or nav.startswith('w '): edit_at_prompt(obj, nav, suggest) continue # Append or overwrite tags if nav.startswith('g '): unique_tags, dic = obj.get_tag_all() _count = obj.set_tag(nav[2:], unique_tags) if _count == -1: print('Invalid input') else: print('%d updated' % _count) continue # Print bookmarks by DB index if nav.startswith('p '): id_list = nav[2:].split() try: for id in id_list: if is_int(id): obj.print_rec(int(id)) elif '-' in id: vals = [int(x) for x in id.split('-')] obj.print_rec(0, vals[0], vals[-1], True) else: print('Invalid input') except ValueError: print('Invalid input') continue # Browse bookmarks by DB index if nav.startswith('o '): id_list = nav[2:].split() try: for id in id_list: if is_int(id): obj.browse_by_index(int(id)) elif '-' in id: vals = [int(x) for x in id.split('-')] obj.browse_by_index(0, vals[0], vals[-1], True) else: print('Invalid input') except ValueError: print('Invalid input') continue # Nothing to browse if there are no results if not results: print('Not in a search context') continue # open all results and re-prompt with 'a' if nav == 'a': for index in range(0, count): browse(results[index][1]) continue # iterate over white-space separated indices for nav in nav.split(): if is_int(nav): index = int(nav) - 1 if index < 0 or index >= count: print('No matching index %s' % nav) continue browse(results[index][1]) elif '-' in nav: try: vals = [int(x) for x in nav.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0]-1, vals[-1]): if 0 <= _id < count: browse(results[_id][1]) else: print('No matching index %d' % (_id + 1)) except ValueError: print('Invalid input') break else: print('Invalid input') break def print_single_rec(row, idx=0): # NOQA """Print a single DB record. Handles both search results and individual record. Parameters ---------- row : tuple Tuple representing bookmark record data. idx : int, optional Search result index. If 0, print with DB index. Default is 0. """ str_list = [] # Start with index and title if idx != 0: id_title_res = ID_str % (idx, row[2] if row[2] else 'Untitled', row[0]) else: id_title_res = ID_DB_str % (row[0], row[2] if row[2] else 'Untitled') # Indicate if record is immutable if row[5] & 1: id_title_res = MUTE_str % (id_title_res) else: id_title_res += '\n' str_list.append(id_title_res) str_list.append(URL_str % (row[1])) if row[4]: str_list.append(DESC_str % (row[4])) if row[3] != DELIM: str_list.append(TAG_str % (row[3][1:-1])) print(''.join(str_list)) def format_json(resultset, single_record=False, field_filter=0): """Return results in json format. Parameters ---------- resultset : list Search results from DB query. single_record : bool, optional If True, indicates only one record. Default is False. Returns ------- json Record(s) in json format. """ if single_record: marks = {} for row in resultset: if field_filter == 1: marks['uri'] = row[1] elif field_filter == 2: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] elif field_filter == 3: marks['title'] = row[2] elif field_filter == 4: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] marks['title'] = row[2] else: marks['index'] = row[0] marks['uri'] = row[1] marks['title'] = row[2] marks['description'] = row[4] marks['tags'] = row[3][1:-1] else: marks = [] for row in resultset: if field_filter == 1: record = {'uri': row[1]} elif field_filter == 2: record = {'uri': row[1], 'tags': row[3][1:-1]} elif field_filter == 3: record = {'title': row[2]} elif field_filter == 4: record = {'uri': row[1], 'title': row[2], 'tags': row[3][1:-1]} else: record = {'index': row[0], 'uri': row[1], 'title': row[2], 'description': row[4], 'tags': row[3][1:-1]} marks.append(record) return json.dumps(marks, sort_keys=True, indent=4) def is_int(string): """Check if a string is a digit. string : str Input string to check. Returns ------- bool True on success, False on exception. """ try: int(string) return True except Exception: return False def browse(url): """Duplicate stdin, stdout and open URL in default browser. .. note:: Duplicates stdin and stdout in order to suppress showing errors on the terminal. Parameters ---------- url : str URL to open in browser. Attributes ---------- suppress_browser_output : bool True if a text based browser is detected. Must be initialized (as applicable) to use the API. """ if not parse_url(url).scheme: # Prefix with 'http://' if no scheme # Otherwise, opening in browser fails anyway # We expect http to https redirection # will happen for https-only websites logerr('scheme missing in URI, trying http') url = 'http://' + url if browse.suppress_browser_output: _stderr = os.dup(2) os.close(2) _stdout = os.dup(1) os.close(1) fd = os.open(os.devnull, os.O_RDWR) os.dup2(fd, 2) os.dup2(fd, 1) try: if sys.platform != 'win32': webbrowser.open(url, new=2) else: # On Windows, the webbrowser module does not fork. # Use threads instead. def browserthread(): webbrowser.open(url, new=2) t = threading.Thread(target=browserthread) t.start() except Exception as e: logerr('browse(): %s', e) finally: if browse.suppress_browser_output: os.close(fd) os.dup2(_stderr, 2) os.dup2(_stdout, 1) def check_upstream_release(): """Check and report the latest upstream release version.""" proxies = { 'https': os.environ.get('https_proxy'), } try: r = requests.get( 'https://api.github.com/repos/jarun/buku/releases?per_page=1', proxies=proxies ) except Exception as e: logerr(e) return if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) else: latest = r.json()[0]['tag_name'] if latest == 'v' + __version__: print('This is the latest release') else: print('Latest upstream release is %s' % latest) def regexp(expr, item): """Perform a regular expression search. Parameters ---------- expr : regex Regular expression to search for. item : str Item on which to perform regex search. Returns ------- bool True if result of search is not None, returns None otherwise. """ return re.search(expr, item, re.IGNORECASE) is not None def delim_wrap(token): """Returns token string wrapped in delimiters. Parameters ---------- token : str String item to wrap with DELIM. Returns ------- str Token string wrapped by DELIM. """ return DELIM + token + DELIM def read_in(msg): """A wrapper to handle input() with interrupts disabled. Parameters ---------- msg : str String to pass to to input(). """ disable_sigint_handler() message = None try: message = input(msg) except KeyboardInterrupt: print('Interrupted.') enable_sigint_handler() return message def sigint_handler(signum, frame): """Custom SIGINT handler. .. note:: Neither signum nor frame are used in this custom handler. However, they are required parameters for signal handlers. Parameters ---------- signum : int Signal number. frame : frame object or None. """ global interrupted interrupted = True print('\nInterrupted.', file=sys.stderr) # Do a hard exit from here os._exit(1) DEFAULT_HANDLER = signal.signal(signal.SIGINT, sigint_handler) def disable_sigint_handler(): """Disable signint handler.""" signal.signal(signal.SIGINT, DEFAULT_HANDLER) def enable_sigint_handler(): """Enable sigint handler.""" signal.signal(signal.SIGINT, sigint_handler) # --------------------- # Editor mode functions # --------------------- def get_system_editor(): """Returns default system editor is $EDITOR is set.""" return os.environ.get('EDITOR', 'none') def is_editor_valid(editor): """Check if the editor string is valid. Parameters ---------- editor : str Editor string. Returns ------- bool True if string is valid, else False. """ if editor == 'none': logerr('EDITOR is not set') return False if editor == '0': logerr('Cannot edit index 0') return False return True def to_temp_file_content(url, title_in, tags_in, desc): """Generate temporary file content string. Parameters ---------- url : str URL to open. title_in : str Title to add manually. tags_in : str Comma-separated tags to add manually. desc : str String description. Returns ------- str Lines as newline separated string. """ strings = [('# Lines beginning with "#" will be stripped.\n' '# Add URL in next line (single line).'), ] # URL if url is not None: strings += (url,) # TITLE strings += (('# Add TITLE in next line (single line). Leave blank to web fetch, "-" for no title.'),) if title_in is None: title_in = '' elif title_in == '': title_in = '-' strings += (title_in,) # TAGS strings += ('# Add comma-separated TAGS in next line (single line).',) strings += (tags_in.strip(DELIM),) if not None else '' # DESC strings += ('# Add COMMENTS in next line(s).',) if desc is not None and desc != '': strings += (desc,) else: strings += ('\n',) return '\n'.join(strings) def parse_temp_file_content(content): """Parse and return temporary file content. Parameters ---------- content : str String of content. Returns ------- tuple (url, title, tags, comments) url: URL to open title: string title to add manually tags: string of comma-separated tags to add manually comments: string description """ content = content.split('\n') content = [c for c in content if not c or c[0] != '#'] if not content or content[0].strip() == '': print('Edit aborted') return None url = content[0] title = None if len(content) > 1: title = content[1] if title == '': title = None elif title == '-': title = '' tags = DELIM if len(content) > 2: tags = parse_tags([content[2]]) comments = [] if len(content) > 3: comments = [c for c in content[3:]] # need to remove all empty line that are at the end # and not those in the middle of the text for i in range(len(comments) - 1, -1, -1): if comments[i].strip() != '': break if i == -1: comments = [] else: comments = comments[0:i+1] comments = '\n'.join(comments) return url, title, tags, comments def edit_rec(editor, url, title_in, tags_in, desc): """Edit a bookmark record. Parameters ---------- editor : str Editor to open. URL : str URL to open. title_in : str Title to add manually. tags_in : str Comma-separated tags to add manually. desc : str Bookmark description. Returns ------- tuple Parsed results from parse_temp_file_content(). """ import tempfile import subprocess temp_file_content = to_temp_file_content(url, title_in, tags_in, desc) fd, tmpfile = tempfile.mkstemp(prefix='buku-edit-') os.close(fd) try: with open(tmpfile, 'w+', encoding='utf-8') as fp: fp.write(temp_file_content) fp.flush() logdbg('Edited content written to %s', tmpfile) cmd = editor.split(' ') cmd += (tmpfile,) subprocess.call(cmd) with open(tmpfile, 'r', encoding='utf-8') as f: content = f.read() os.remove(tmpfile) except FileNotFoundError: if os.path.exists(tmpfile): os.remove(tmpfile) logerr('Cannot open editor') else: logerr('Cannot open tempfile') return None parsed_content = parse_temp_file_content(content) return parsed_content def setup_logger(logger): """Setup logger with color. Parameters ---------- logger : logger object Logger to colorize. """ def decorate_emit(fn): def new(*args): levelno = args[0].levelno if levelno == logging.DEBUG: color = '\x1b[35m' elif levelno == logging.ERROR: color = '\x1b[31m' elif levelno == logging.WARNING: color = '\x1b[33m' elif levelno == logging.INFO: color = '\x1b[32m' elif levelno == logging.CRITICAL: color = '\x1b[31m' else: color = '\x1b[0m' args[0].msg = '{}[{}]\x1b[0m {}'.format(color, args[0].levelname, args[0].msg) return fn(*args) return new sh = logging.StreamHandler() sh.emit = decorate_emit(sh.emit) logger.addHandler(sh) def piped_input(argv, pipeargs=None): """Handle piped input. Parameters ---------- pipeargs : str """ if not sys.stdin.isatty(): pipeargs += argv print('waiting for input') for s in sys.stdin: pipeargs += s.split() def setcolors(args): """Get colors from user and separate into 'result' list for use in arg.colors. Parameters ---------- args : str Color string. """ Colors = collections.namedtuple('Colors', ' ID_srch, ID_str, URL_str, DESC_str, TAG_str') colors = Colors(*[COLORMAP[c] for c in args]) id_col = colors.ID_srch id_str_col = colors.ID_str url_col = colors.URL_str desc_col = colors.DESC_str tag_col = colors.TAG_str result = [id_col, id_str_col, url_col, desc_col, tag_col] return result # main starts here def main(): """Main.""" global ID_str, ID_DB_str, MUTE_str, URL_str, DESC_str, TAG_str, promptmsg title_in = None tags_in = None desc_in = None pipeargs = [] colorstr_env = os.getenv('BUKU_COLORS') try: piped_input(sys.argv, pipeargs) except KeyboardInterrupt: pass # If piped input, set argument vector if pipeargs: sys.argv = pipeargs # Setup custom argument parser argparser = ExtendedArgumentParser( description='''Powerful command-line bookmark manager. POSITIONAL ARGUMENTS: KEYWORD search keywords''', formatter_class=argparse.RawTextHelpFormatter, usage='''buku [OPTIONS] [KEYWORD [KEYWORD ...]]''', add_help=False ) HIDE = argparse.SUPPRESS argparser.add_argument('keywords', nargs='*', metavar='KEYWORD', help=HIDE) # --------------------- # GENERAL OPTIONS GROUP # --------------------- general_grp = argparser.add_argument_group( title='GENERAL OPTIONS', description=''' -a, --add URL [tag, ...] bookmark URL with comma-separated tags -u, --update [...] update fields of an existing bookmark accepts indices and ranges refresh the title, if no edit options if no arguments: - update results when used with search - otherwise refresh all titles -w, --write [editor|index] open editor to edit a fresh bookmark to update by index, EDITOR must be set -d, --delete [...] remove bookmarks from DB accepts indices or a single range if no arguments: - delete results when used with search - otherwise delete all bookmarks -h, --help show this information and exit -v, --version show the program version and exit''') addarg = general_grp.add_argument addarg('-a', '--add', nargs='+', help=HIDE) addarg('-u', '--update', nargs='*', help=HIDE) addarg('-w', '--write', nargs='?', const=get_system_editor(), help=HIDE) addarg('-d', '--delete', nargs='*', help=HIDE) addarg('-h', '--help', action='store_true', help=HIDE) addarg('-v', '--version', action='version', version=__version__, help=HIDE) # ------------------ # EDIT OPTIONS GROUP # ------------------ edit_grp = argparser.add_argument_group( title='EDIT OPTIONS', description=''' --url keyword bookmark link --tag [+|-] [...] comma-separated tags clear bookmark tagset, if no arguments '+' appends to, '-' removes from tagset --title [...] bookmark title; if no arguments: -a: do not set title, -u: clear title -c, --comment [...] notes or description of the bookmark clears description, if no arguments --immutable N disable title fetch from web on update N=0: mutable (default), N=1: immutable''') addarg = edit_grp.add_argument addarg('--url', nargs=1, help=HIDE) addarg('--tag', nargs='*', help=HIDE) addarg('--title', nargs='*', help=HIDE) addarg('-c', '--comment', nargs='*', help=HIDE) addarg('--immutable', type=int, default=-1, choices={0, 1}, help=HIDE) # -------------------- # SEARCH OPTIONS GROUP # -------------------- search_grp = argparser.add_argument_group( title='SEARCH OPTIONS', description=''' -s, --sany find records with ANY matching keyword this is the default search option -S, --sall find records matching ALL the keywords special keywords - "blank": entries with empty title/tag "immutable": entries with locked title --deep match substrings ('pen' matches 'opens') -r, --sreg run a regex search -t, --stag [tag [,|+] ...] [- tag, ...] search bookmarks by tags use ',' to find entries matching ANY tag use '+' to find entries matching ALL tags excludes entries with tags after ' - ' list all tags, if no search keywords''') addarg = search_grp.add_argument addarg('-s', '--sany', action='store_true', help=HIDE) addarg('-S', '--sall', action='store_true', help=HIDE) addarg('-r', '--sreg', action='store_true', help=HIDE) addarg('--deep', action='store_true', help=HIDE) addarg('-t', '--stag', action='store_true', help=HIDE) # ------------------------ # ENCRYPTION OPTIONS GROUP # ------------------------ crypto_grp = argparser.add_argument_group( title='ENCRYPTION OPTIONS', description=''' -l, --lock [N] encrypt DB in N (default 8) # iterations -k, --unlock [N] decrypt DB in N (default 8) # iterations''') addarg = crypto_grp.add_argument addarg('-k', '--unlock', nargs='?', type=int, const=8, help=HIDE) addarg('-l', '--lock', nargs='?', type=int, const=8, help=HIDE) # ---------------- # POWER TOYS GROUP # ---------------- power_grp = argparser.add_argument_group( title='POWER TOYS', description=''' --ai auto-import from Firefox and Chrome -e, --export file export bookmarks in Firefox format html export markdown, if file ends with '.md' format: [title](url), 1 entry per line export buku DB, if file ends with '.db' use --tag to export only specific tags -i, --import file import Firefox or Chrome bookmarks html import markdown, if file ends with '.md' import buku DB, if file ends with '.db' -p, --print [...] show record details by indices, ranges print all bookmarks, if no arguments -n shows the last n results (like tail) -f, --format N limit fields in -p or Json search output N=1: URL, N=2: URL and tag, N=3: title, N=4: URL, title and tag -j, --json Json formatted output for -p and search --colors COLORS set output colors in five-letter string --nc disable color output --np do not show the prompt, run and exit -o, --open [...] browse bookmarks by indices and ranges open a random bookmark, if no arguments --oa browse all search results immediately --replace old new replace old tag with new tag everywhere delete old tag, if new tag not specified --shorten index|URL fetch shortened url from tny.im service --expand index|URL expand a tny.im shortened url --suggest show similar tags when adding bookmarks --tacit reduce verbosity --threads N max network connections in full refresh default N=4, min N=1, max N=10 -V check latest upstream version available -z, --debug show debug information and verbose logs''') addarg = power_grp.add_argument addarg('--ai', action='store_true', help=HIDE) addarg('-e', '--export', nargs=1, help=HIDE) addarg('-i', '--import', nargs=1, dest='importfile', help=HIDE) addarg('-p', '--print', nargs='*', help=HIDE) addarg('-f', '--format', type=int, default=0, choices={1, 2, 3, 4}, help=HIDE) addarg('-j', '--json', action='store_true', help=HIDE) addarg('--colors', dest='colorstr', type=argparser.is_colorstr, default=colorstr_env if colorstr_env else 'oKlxm', metavar='COLORS', help=HIDE) addarg('--nc', action='store_true', help=HIDE) addarg('--np', action='store_true', help=HIDE) addarg('-o', '--open', nargs='*', help=HIDE) addarg('--oa', action='store_true', help=HIDE) addarg('--replace', nargs='+', help=HIDE) addarg('--shorten', nargs=1, help=HIDE) addarg('--expand', nargs=1, help=HIDE) addarg('--suggest', action='store_true', help=HIDE) addarg('--tacit', action='store_true', help=HIDE) addarg('--threads', type=int, default=4, choices=range(1, 11), help=HIDE) addarg('-V', dest='upstream', action='store_true', help=HIDE) addarg('-z', '--debug', action='store_true', help=HIDE) # Undocumented APIs addarg('--fixtags', action='store_true', help=HIDE) addarg('--db', nargs=1, help=HIDE) # Show help and exit if no arguments if len(sys.argv) == 1: argparser.print_help(sys.stdout) sys.exit(1) # Parse the arguments args = argparser.parse_args() # Show help and exit if help requested if args.help: argparser.print_help(sys.stdout) sys.exit(0) # Handle color output preference if args.nc: logging.basicConfig(format='[%(levelname)s] %(message)s') else: # Set colors ID = setcolors(args.colorstr)[0] + '%d. ' + COLORMAP['x'] ID_DB_dim = COLORMAP['z'] + '[%s]\n' + COLORMAP['x'] ID_str = ID + setcolors(args.colorstr)[1] + '%s ' + COLORMAP['x'] + ID_DB_dim ID_DB_str = ID + setcolors(args.colorstr)[1] + '%s' + COLORMAP['x'] MUTE_str = '%s \x1b[2m(L)\x1b[0m\n' URL_str = COLORMAP['j'] + ' > ' + setcolors(args.colorstr)[2] + '%s\n' + COLORMAP['x'] DESC_str = COLORMAP['j'] + ' + ' + setcolors(args.colorstr)[3] + '%s\n' + COLORMAP['x'] TAG_str = COLORMAP['j'] + ' # ' + setcolors(args.colorstr)[4] + '%s\n' + COLORMAP['x'] # Enable color in logs setup_logger(logger) # Enable prompt with reverse video promptmsg = '\x1b[7mbuku (? for help)\x1b[0m ' # Set up debugging if args.debug: logger.setLevel(logging.DEBUG) logdbg('Version %s', __version__) else: logging.disable(logging.WARNING) urllib3.disable_warnings() # Handle encrypt/decrypt options at top priority if args.lock is not None: BukuCrypt.encrypt_file(args.lock) elif args.unlock is not None: BukuCrypt.decrypt_file(args.unlock) # Set up title if args.title is not None: if args.title: title_in = ' '.join(args.title) else: title_in = '' # Set up tags if args.tag is not None: if args.tag: tags_in = args.tag else: tags_in = [DELIM, ] # Set up comment if args.comment is not None: if args.comment: desc_in = ' '.join(args.comment) else: desc_in = '' # Initialize the database and get handles, set verbose by default bdb = BukuDb(args.json, args.format, not args.tacit, dbfile=args.db[0] if args.db is not None else None, colorize=not args.nc) # Editor mode if args.write is not None: if not is_editor_valid(args.write): bdb.close_quit(1) if is_int(args.write): if not bdb.edit_update_rec(int(args.write), args.immutable): bdb.close_quit(1) elif args.add is None: # Edit and add a new bookmark # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+' + parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-' + parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = DELIM result = edit_rec(args.write, '', title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result if args.suggest: tags = bdb.suggest_similar_tag(tags) bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Add record if args.add is not None: if args.url is not None and args.update is None: logerr('Bookmark a single URL at a time') bdb.close_quit(1) # Parse tags into a comma-separated string tags = DELIM keywords = args.add if tags_in is not None: if tags_in[0] == '+': if len(tags_in) > 1: # The case: buku -a url tag1, tag2 --tag + tag3, tag4 tags_in = tags_in[1:] # In case of add, args.add may have URL followed by tags # Add delimiter as url+tags may not end with one keywords = args.add + [DELIM] + tags_in else: keywords = args.add + [DELIM] + tags_in if len(keywords) > 1: tags = parse_tags(keywords[1:]) url = args.add[0] if args.write and not is_int(args.write): result = edit_rec(args.write, url, title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result if args.suggest: tags = bdb.suggest_similar_tag(tags) bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Enable browser output in case of a text based browser if os.getenv('BROWSER') in ['elinks', 'links', 'lynx', 'w3m', 'links2']: browse.suppress_browser_output = False else: browse.suppress_browser_output = True # Search record search_results = None search_opted = True update_search_results = False if args.sany: # Search URLs, titles, tags for any keyword search_results = bdb.searchdb(args.keywords, False, args.deep) elif args.sall: # Search URLs, titles, tags with all keywords search_results = bdb.searchdb(args.keywords, True, args.deep) elif args.sreg: # Run a regular expression search search_results = bdb.searchdb(args.keywords, regex=True) elif args.stag: # Search bookmarks by tag if args.keywords: search_results = bdb.search_by_tag(' '.join(args.keywords)) else: # Use sub prompt to list all tags prompt(bdb, None, args.np, subprompt=True, suggest=args.suggest) elif args.keywords: search_results = bdb.searchdb(args.keywords, False, args.deep) else: search_opted = False # Add cmdline search options to readline history if search_opted and args.keywords: try: readline.add_history(' '.join(args.keywords)) except Exception: pass if search_results: oneshot = args.np to_delete = False # Open all results in browser right away if args.oa # is specified. The has priority over delete/update. # URLs are opened first and updated/deleted later. if args.oa: for row in search_results: browse(row[1]) # In case of search and delete/update, # prompt should be non-interactive # delete gets priority over update if args.delete is not None and not args.delete: oneshot = True to_delete = True elif args.update is not None and not args.update: oneshot = True update_search_results = True if not args.json: prompt(bdb, search_results, oneshot, args.deep) else: # Printing in Json format is non-interactive print(format_json(search_results, field_filter=args.format)) # Delete search results if opted if to_delete: bdb.delete_resultset(search_results) # Update record if args.update is not None: if args.url is not None: url_in = args.url[0] else: url_in = '' # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+' + parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-' + parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = None # No arguments to --update, update all if not args.update: # Update all records only if search was not opted if not search_opted: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) elif update_search_results and search_results is not None: if not args.tacit: print('Updated results:\n') pos = len(search_results) - 1 while pos >= 0: idx = search_results[pos][0] bdb.update_rec(idx, url_in, title_in, tags, desc_in, args.immutable, args.threads) # Commit at every 200th removal if pos % 200 == 0: bdb.conn.commit() pos -= 1 else: for idx in args.update: if is_int(idx): bdb.update_rec(int(idx), url_in, title_in, tags, desc_in, args.immutable, args.threads) elif '-' in idx: try: vals = [int(x) for x in idx.split('-')] if vals[0] > vals[1]: vals[0], vals[1] = vals[1], vals[0] # Update only once if range starts from 0 (all) if vals[0] == 0: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) else: for _id in range(vals[0], vals[1] + 1): bdb.update_rec(_id, url_in, title_in, tags, desc_in, args.immutable, args.threads) if interrupted: break except ValueError: logerr('Invalid index or range to update') bdb.close_quit(1) if interrupted: break # Delete record if args.delete is not None: if not args.delete: # Attempt delete-all only if search was not opted if not search_opted: bdb.cleardb() elif len(args.delete) == 1 and '-' in args.delete[0]: try: vals = [int(x) for x in args.delete[0].split('-')] if len(vals) == 2: bdb.delete_rec(0, vals[0], vals[1], True) except ValueError: logerr('Invalid index or range to delete') bdb.close_quit(1) else: ids = [] # Select the unique indices for idx in args.delete: if idx not in ids: ids += (idx,) try: # Index delete order - highest to lowest ids.sort(key=lambda x: int(x), reverse=True) for idx in ids: bdb.delete_rec(int(idx)) except ValueError: logerr('Invalid index or range or combination') bdb.close_quit(1) # Print record if args.print is not None: if not args.print: bdb.print_rec(0) else: try: for idx in args.print: if is_int(idx): bdb.print_rec(int(idx)) elif '-' in idx: vals = [int(x) for x in idx.split('-')] bdb.print_rec(0, vals[0], vals[-1], True) except ValueError: logerr('Invalid index or range to print') bdb.close_quit(1) # Replace a tag in DB if args.replace is not None: if len(args.replace) == 1: bdb.delete_tag_at_index(0, args.replace[0]) else: bdb.replace_tag(args.replace[0], args.replace[1:]) # Export bookmarks if args.export is not None: if args.tag is None: bdb.exportdb(args.export[0]) elif not args.tag: logerr('Missing tag') else: bdb.exportdb(args.export[0], args.tag) # Import bookmarks if args.importfile is not None: bdb.importdb(args.importfile[0], args.tacit) # Import bookmarks from browser if args.ai: bdb.auto_import_from_browser() # Open URL in browser if args.open is not None: if not args.open: bdb.browse_by_index(0) else: try: for idx in args.open: if is_int(idx): bdb.browse_by_index(int(idx)) elif '-' in idx: vals = [int(x) for x in idx.split('-')] bdb.browse_by_index(0, vals[0], vals[-1], True) except ValueError: logerr('Invalid index or range to open') bdb.close_quit(1) # Shorten URL if args.shorten: if is_int(args.shorten[0]): shorturl = bdb.tnyfy_url(index=int(args.shorten[0])) else: shorturl = bdb.tnyfy_url(url=args.shorten[0]) if shorturl: print(shorturl) # Expand URL if args.expand: if is_int(args.expand[0]): url = bdb.tnyfy_url(index=int(args.expand[0]), shorten=False) else: url = bdb.tnyfy_url(url=args.expand[0], shorten=False) if url: print(url) # Report upstream version if args.upstream: check_upstream_release() # Fix tags if args.fixtags: bdb.fixtags() # Close DB connection and quit bdb.close_quit(0) if __name__ == '__main__': main()