#!/usr/bin/env python3 # # Bookmark management utility # # Copyright © 2015-2017 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import html.parser as HTMLParser import json import logging import os import re try: import readline readline except ImportError: pass import requests import signal import sqlite3 import sys import threading import urllib3 from urllib3.util import parse_url, make_headers import webbrowser __version__ = '2.9' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Global variables interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} colorize = True # Allow color output by default # Default colour to print records ID_str = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m \x1b[1m[%s]\x1b[0m\n' ID_DB_str = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m' MUTE_str = '%s \x1b[1m(L)\x1b[0m\n' TITLE_str = '%s \x1b[91m>\x1b[0m %s\n' DESC_str = '%s \x1b[91m+\x1b[0m %s\n' TAG_str = '%s \x1b[91m#\x1b[0m %s\n' # Disguise as Firefox on Ubuntu USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 \ Firefox/51.0' myheaders = None # Default dictionary of headers myproxy = None # Default proxy # Set up logging logging.basicConfig(format='[%(levelname)s] %(message)s') logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): '''Class to parse and fetch the title from a HTML page, if available ''' def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data = '%s%s' % (self.data, data) def error(self, message): pass class BukuCrypt: '''Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. ''' # Crypto constants BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks @staticmethod def get_filehash(filepath): '''Get the SHA256 hash of a file :param filepath: path to the file :return: hash digest of the file ''' from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BukuCrypt.BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BukuCrypt.BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): '''Encrypt the bookmarks database file :param iterations: number of iterations for key generation :param dbfile: custom database file path (including filename) ''' try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = '%s.enc' % dbfile db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() passconfirm = getpass() if not password or not passconfirm: logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(BukuCrypt.SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack(' index: self.cur.execute(query1, (row[0],)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() if self.chatty: print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): '''Delete a single record or remove the table if index is None :param index: DB index of deleted entry :param low: actual lower index of range :param high: actual higher index of range :param is_range: a range is passed using low and high arguments :param delay_commit: do not commit to DB, caller's responsibility :return: True on success, False on failure ''' if is_range: # Delete a range of indices # If range starts from 0, delete all records if low == 0: return self.cleardb() try: query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): '''Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. This API forces a delayed commit. :param results: set of results to delete :return: True on success, False on failure ''' resp = read_in('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def delete_rec_all(self, delay_commit=False): '''Removes all records in the Bookmarks table :param delay_commit: do not commit to DB, caller responsibility :return: True on success, False on failure ''' try: self.cur.execute('DELETE FROM bookmarks') if not delay_commit: self.conn.commit() return True except Exception as e: logerr('delete_rec_all(): %s', e) return False def cleardb(self): '''Drops the bookmark table if it exists :return: True on success, False on failure ''' resp = read_in('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index): '''Print bookmark details at index or all bookmarks if index is 0 Note: URL is printed on top because title may be blank :param index: index to print, 0 prints all ''' if index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) results = self.cur.fetchall() if not results: logerr('No matching index %d', index) return except IndexError: logerr('No matching index %d', index) return if not self.json: for row in results: if self.field_filter == 0: print_record(row) elif self.field_filter == 1: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: print('%s\t%s' % (row[0], row[2])) else: print(format_json(results, True, self.field_filter)) else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not self.json: if self.field_filter == 0: for row in resultset: print_record(row) elif self.field_filter == 1: for row in resultset: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: for row in resultset: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: for row in resultset: print('%s\t%s' % (row[0], row[2])) else: print(format_json(resultset, field_filter=self.field_filter)) def get_tag_all(self): '''Get list of tags in DB :return: tuple (list of unique tags sorted alphabetically, a dictionary of {tag:usage_count}) ''' tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if not tags: return tags, dic if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def replace_tag(self, orig, new=None): '''Replace original tag by new tags in all records. Remove original tag if new tag is empty. :param orig: original tag as string :param new: replacement tags as list :return: True on success, False on failure ''' newtags = DELIM orig = '%s%s%s' % (DELIM, orig, DELIM) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False if newtags == DELIM: return self.delete_tag_at_index(0, orig) query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() if results: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) self.conn.commit() return True def browse_by_index(self, index): '''Open URL at index in browser :param index: DB index :return: True on success, False on failure ''' if index == 0: query = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(query) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) query = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1' try: for row in self.cur.execute(query, (index,)): browse(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, taglist=None): '''Export bookmarks to a Firefox bookmarks formatted html or a markdown file, if destination file name ends with '.md'. :param filepath: path to file to export to :param taglist: list of specific tags to export :return: True on success, False on failure ''' import time count = 0 timestamp = int(time.time()) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if not tagstr or tagstr == DELIM: logerr('Invalid tag') return False tags = tagstr.split(DELIM) query = '%s WHERE' % query for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = '%s%s%s' % (DELIM, tag, DELIM) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if not resultset: print('No bookmarks exported') return False if os.path.exists(filepath): resp = read_in('%s exists. Overwrite? (y/n): ' % filepath) if resp != 'y': return False try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if filepath.endswith('.md'): outfp.write('List of buku bookmarks:\n\n') for row in resultset: if row[2] == '': out = '- [Untitled](%s)\n' % (row[1]) else: out = '- [%s](%s)\n' % (row[2], row[1]) outfp.write(out) count += 1 else: outfp.write(''' Bookmarks

Bookmarks

\ Buku bookmarks

''' % (timestamp, timestamp)) for row in resultset: out = '%s

\n

') outfp.close() print('%s exported' % count) return True def importdb(self, filepath): '''Import bookmarks from a html or a markdown file (with extension '.md'). Supports Firefox, Google Chrome and IE exported html :param filepath: path to file to import :return: True on success, False on failure ''' if filepath.endswith('.md'): with open(filepath, mode='r', encoding='utf-8') as infp: for line in infp: # Supported markdown format: [title](url) # Find position of title end, url start delimiter combo index = line.find('](') if index != -1: # Find title start delimiter title_start_delim = line[:index].find('[') # Reverse find the url end delimiter url_end_delim = line[index + 2:].rfind(')') if title_start_delim != -1 and url_end_delim > 0: # Parse title title = line[title_start_delim + 1:index] # Parse url url = line[index + 2:index + 2 + url_end_delim] self.add_rec(url, title, None, None, 0, True) self.conn.commit() infp.close() else: try: import bs4 with open(filepath, mode='r', encoding='utf-8') as infp: soup = bs4.BeautifulSoup(infp, 'html.parser') except ImportError: logerr('Beautiful Soup not found') return False except Exception as e: logerr(e) return False html_tags = soup.findAll('a') for tag in html_tags: # Extract comment from

tag desc = None comment_tag = tag.findNextSibling('dd') if comment_tag: desc = comment_tag.text[0:comment_tag.text.find('\n')] self.add_rec(tag['href'], tag.string, ('%s%s%s' % (DELIM, tag['tags'], DELIM)) if tag.has_attr('tags') else None, desc, 0, True) self.conn.commit() infp.close() return True def mergedb(self, path): '''Merge bookmarks from another Buku database file :param path: path to DB file to merge :return: True on success, False on failure ''' try: # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) else: indb_conn = sqlite3.connect(path) indb_cur = indb_conn.cursor() indb_cur.execute('SELECT * FROM bookmarks') except Exception as e: logerr(e) return False resultset = indb_cur.fetchall() if resultset: for row in resultset: self.add_rec(row[1], row[2], row[3], row[4], row[5], True) self.conn.commit() try: indb_cur.close() indb_conn.close() except Exception: pass return True def tnyfy_url(self, index=0, url=None, shorten=True): '''Shorted a URL using Google URL shortener :param index: shorten the URL at DB index (int) :param url: pass a URL (string) :param shorten: True (default) to shorten, False to expand (boolean) :return: shortened url string on success, None on failure ''' if not index and not url: logerr('Either a valid DB index or URL required') return None if index: self.cur.execute('SELECT url FROM bookmarks WHERE id = ? LIMIT 1', (index,)) results = self.cur.fetchall() if not results: return None url = results[0][0] proxies = { 'https': os.environ.get('https_proxy'), } urlbase = 'https://tny.im/yourls-api.php?action=' if shorten: from urllib.parse import quote_plus as qp _u = '%s%s%s' % (urlbase, 'shorturl&format=simple&url=', qp(url)) else: _u = '%s%s%s' % (urlbase, 'expand&format=simple&shorturl=', url) try: r = requests.post(_u, headers={ 'content-type': 'application/json', 'User-Agent': USER_AGENT }, proxies=proxies) except Exception as e: logerr(e) return None if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) return None return r.text def fixtags(self): '''Undocumented API to fix tags set in earlier versions. Functionalities: 1. Remove duplicate tags 2. Sort tags 3. Use lower case to store tags ''' to_commit = False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') resultset = self.cur.fetchall() query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: oldtags = row[1] if oldtags == ',': continue tags = parse_tags([oldtags]) if tags == oldtags: continue self.cur.execute(query, (tags, row[0],)) to_commit = True if to_commit: self.conn.commit() def close_quit(self, exitval=0): '''Close a DB connection and exit :param exitval: program exit value ''' if self.conn is not None: try: self.cur.close() self.conn.close() except Exception: # ignore errors here, we're closing down pass sys.exit(exitval) class ExtendedArgumentParser(argparse.ArgumentParser): '''Extend classic argument parser''' # Print program info @staticmethod def program_info(file=sys.stdout): if sys.platform == 'win32' and file == sys.stdout: file = sys.stderr file.write(''' SYMBOLS: > title + comment # tags Version %s Copyright © 2015-2017 %s License: %s Webpage: https://github.com/jarun/Buku ''' % (__version__, __author__, __license__)) # Print prompt help @staticmethod def prompt_help(file=sys.stdout): file.write(''' keys: 1-N browse search result indices and/or ranges a open all results in browser s keyword [...] search for records with ANY keyword S keyword [...] search for records with ALL keywords d match substrings ('pen' matches 'opened') r expression run a regex search t [...] search bookmarks by a tag or show tag list (tag list index fetches bookmarks by tag) ? show this help q, ^D, double Enter exit buku ''') # Help def print_help(self, file=sys.stdout): super(ExtendedArgumentParser, self).print_help(file) self.program_info(file) # ---------------- # Helper functions # ---------------- def is_bad_url(url): '''Check if URL is malformed This API is not bulletproof but works in most cases. :param url: URL to scan :return: True or False ''' # Get the netloc token netloc = parse_url(url).netloc if not netloc: # Try of prepend '//' and get netloc netloc = parse_url('//' + url).netloc if not netloc: return True logdbg('netloc: %s', netloc) # netloc cannot start or end with a '.' if netloc.startswith('.') or netloc.endswith('.'): return True # netloc should have at least one '.' if netloc.rfind('.') < 0: return True return False def is_ignored_mime(url): '''Check if URL links to ignored mime Only a 'HEAD' request is made for these URLs :param url: URL to scan :return: True or False ''' for mime in SKIP_MIMES: if url.lower().endswith(mime): return True return False def get_page_title(resp): '''Invoke HTML parser and extract title from HTTP response :param resp: HTTP(S) GET response :return: title fetched from parsed page ''' parser = BukuHTMLParser() try: parser.feed(resp.data.decode(errors='replace')) except Exception as e: # Suppress Exception due to intentional self.reset() in BHTMLParser if logger.isEnabledFor(logging.DEBUG) \ and str(e) != 'we should not get here!': logerr('get_page_title(): %s', e) finally: return parser.parsed_title def gen_headers(): '''Generate headers for network connection''' global myheaders, myproxy myheaders = { 'Accept-Encoding': 'gzip,deflate', 'User-Agent': USER_AGENT, 'Accept': '*/*', 'Cookie': '', 'DNT': '1' } myproxy = os.environ.get('https_proxy') if myproxy: try: url = parse_url(myproxy) except Exception as e: logerr(e) return # Strip username and password (if present) and update headers if url.auth: myproxy = myproxy.replace(url.auth + '@', '') auth_headers = make_headers(basic_auth=url.auth) myheaders.update(auth_headers) logdbg('proxy: [%s]', myproxy) def get_PoolManager(): '''Creates a pool manager with proxy support, if applicable :return: ProxyManager if https_proxy is defined, else PoolManager. ''' if myproxy: return urllib3.ProxyManager(myproxy, num_pools=1, headers=myheaders) return urllib3.PoolManager(num_pools=1, headers=myheaders) def network_handler(url): '''Handle server connection and redirections :param url: URL to fetch :return: (title, recognized mime, bad url) tuple ''' http_handler = None page_title = None resp = None method = 'GET' if is_bad_url(url): return ('', 0, 1) if is_ignored_mime(url): method = 'HEAD' if not myheaders: gen_headers() try: http_handler = get_PoolManager() while True: resp = http_handler.request(method, url, timeout=40) if resp.status == 200: if method == 'GET': page_title = get_page_title(resp) elif resp.status == 403 and url.endswith('/'): # HTTP response Forbidden # Handle URLs in the form of https://www.domain.com/ # which fail when trying to fetch resource '/' # retry without trailing '/' logdbg('Received status 403: retrying...') # Remove trailing / url = url[:-1] resp.release_conn() continue else: logerr('[%s] %s', resp.status, resp.reason) if resp: resp.release_conn() break except Exception as e: logerr('network_handler(): %s', e) finally: if http_handler: http_handler.clear() if method == 'HEAD': return ('', 1, 0) if page_title is None: return ('', 0, 0) return (page_title.strip().replace('\n', ''), 0, 0) def parse_tags(keywords=[]): '''Format and get tag string from tokens :param keywords: list of tags :return: comma-delimited string of tags :return: just delimiter, if no keywords :return: None, if keyword is None ''' if keywords is None: return None if not keywords: return DELIM tags = DELIM orig_tags = [] unique_tags = [] # Cleanse and get the tags tagstr = ' '.join(keywords) marker = tagstr.find(DELIM) while marker >= 0: token = tagstr[0:marker] tagstr = tagstr[marker + 1:] marker = tagstr.find(DELIM) token = token.strip() if token == '': continue tags = '%s%s%s' % (tags, token, DELIM) tagstr = tagstr.strip() if tagstr != '': tags = '%s%s%s' % (tags, tagstr, DELIM) logdbg('keywords: %s', keywords) logdbg('parsed tags: [%s]', tags) if tags == DELIM: return tags orig_tags += tags.strip(DELIM).split(DELIM) for tag in orig_tags: if tag.lower() not in unique_tags: # Add unique tags in lower case unique_tags += (tag.lower(), ) # Sort the tags sorted_tags = sorted(unique_tags) # Wrap with delimiter return '%s%s%s' % (DELIM, DELIM.join(sorted_tags), DELIM) def taglist_subprompt(obj, msg, noninteractive=False): '''Additional prompt to show unique tag list :param obj: a valid instance of BukuDb class :param msg: sub-prompt message :param noninteractive: do not seek user input :return: new command string ''' unique_tags, dic = obj.get_tag_all() new_results = True while True: if new_results: if not unique_tags: count = 0 print('0 tags') else: count = 1 for tag in unique_tags: print('%6d. %s (%d)' % (count, tag, dic[tag])) count += 1 print() if noninteractive: return try: nav = read_in(msg) if not nav: nav = read_in(msg) if not nav: # Quit on double enter return 'q' nav = nav.strip() except EOFError: return 'q' if is_int(nav) and int(nav) > 0 and int(nav) < count: return 't ' + unique_tags[int(nav) - 1] elif is_int(nav): print('No matching index %s' % nav) new_results = False elif is_int(nav[0]): print('Invalid input') new_results = False elif nav == 't': new_results = True continue elif (nav == 'q' or nav == 'd' or nav == '?' or nav.startswith('s ') or nav.startswith('S ') or nav.startswith('r ') or nav.startswith('t ')): return nav else: print('Invalid input') new_results = False def prompt(obj, results, noninteractive=False, deep=False, subprompt=False): '''Show each matching result from a search and prompt :param obj: a valid instance of BukuDb class :param results: result set from a DB query :param noninteractive: do not seek user input :param deep: use deep search :param subprompt: jump directly to sub prompt ''' if not type(obj) is BukuDb: logerr('Not a BukuDb instance') return new_results = True if colorize: msg = '\x1b[7mbuku (? for help)\x1b[0m ' else: msg = 'buku (? for help): ' while True: if not subprompt: if new_results: if results: count = 0 for row in results: count += 1 print_record(row, count) else: print('0 results') if noninteractive: return try: nav = read_in(msg) if not nav: nav = read_in(msg) if not nav: # Quit on double enter break nav = nav.strip() except EOFError: return else: nav = 't' subprompt = False # list tags with 't' if nav == 't': nav = taglist_subprompt(obj, msg, noninteractive) if noninteractive: return # search ANY match with new keywords if nav.startswith('s '): results = obj.searchdb(nav[2:].split(), False, deep) new_results = True continue # search ALL match with new keywords if nav.startswith('S '): results = obj.searchdb(nav[2:].split(), True, deep) new_results = True continue # regular expressions search with new keywords if nav.startswith('r '): results = obj.searchdb(nav[2:].split(), True, regex=True) new_results = True continue # tag search with new keywords if nav.startswith('t '): results = obj.search_by_tag(nav[2:]) new_results = True continue # quit with 'q' if nav == 'q': return # toggle deep search with 'd' if nav == 'd': deep = not deep if deep: print('deep search on') else: print('deep search off') new_results = False continue # Show help with '?' if nav == '?': ExtendedArgumentParser.prompt_help(sys.stdout) new_results = False continue new_results = False # Nothing to browse if there are no results if not results: print('Not in a search context') continue # open all results and re-prompt with 'a' if nav == 'a': for index in range(0, count): browse(results[index][1]) continue # iterate over white-space separated indices for nav in nav.split(): if is_int(nav): index = int(nav) - 1 if index < 0 or index >= count: print('No matching index %s' % nav) continue browse(results[index][1]) elif '-' in nav and is_int(nav.split('-')[0]) \ and is_int(nav.split('-')[1]): lower = int(nav.split('-')[0]) upper = int(nav.split('-')[1]) if lower > upper: lower, upper = upper, lower for index in range(lower-1, upper): if 0 <= index < count: browse(results[index][1]) else: print('No matching index %d' % (index + 1)) else: print('Invalid input') break def print_record(row, idx=0): '''Print a single DB record Handles both search result and individual record :param idx: search result index. If 0, print with DB index ''' # Start with index and URL if idx != 0: pr = ID_str % (idx, row[1], row[0]) else: pr = ID_DB_str % (row[0], row[1]) # Indicate if record is immutable if row[5] & 1: pr = MUTE_str % (pr) else: pr = '%s\n' % (pr) # Append title if row[2] != '': pr = TITLE_str % (pr, row[2]) # Append description if row[4] != '': pr = DESC_str % (pr, row[4]) # Append tags IF not default (delimiter) if row[3] != DELIM: pr = TAG_str % (pr, row[3][1:-1]) print(pr) def format_json(resultset, single_record=False, field_filter=0): '''Return results in Json format :param single_record: indicates only one record :param field_filter: determines fields to show :return: record(s) in Json format ''' if single_record: marks = {} for row in resultset: if field_filter == 1: marks['uri'] = row[1] elif field_filter == 2: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] elif field_filter == 3: marks['title'] = row[2] else: marks['index'] = row[0] marks['uri'] = row[1] marks['title'] = row[2] marks['description'] = row[4] marks['tags'] = row[3][1:-1] else: marks = [] for row in resultset: if field_filter == 1: record = {'uri': row[1]} elif field_filter == 2: record = {'uri': row[1], 'tags': row[3][1:-1]} elif field_filter == 3: record = {'title': row[2]} else: record = {'index': row[0], 'uri': row[1], 'title': row[2], 'description': row[4], 'tags': row[3][1:-1]} marks.append(record) return json.dumps(marks, sort_keys=True, indent=4) def is_int(string): '''Check if a string is a digit :param string: input string :return: True on success, False on exception ''' try: int(string) return True except Exception: return False def browse(url): '''Duplicate stdin, stdout (to suppress showing errors on the terminal) and open URL in default browser :param url: URL to open ''' if not parse_url(url).scheme: # Prefix with 'http://' is no scheme # Otherwise, opening in browser fails anyway # We expect http to https redirection # will happen for https-only websites logerr('scheme missing in URI, trying http') url = '%s%s' % ('http://', url) _stderr = os.dup(2) os.close(2) _stdout = os.dup(1) os.close(1) fd = os.open(os.devnull, os.O_RDWR) os.dup2(fd, 2) os.dup2(fd, 1) try: webbrowser.open(url) except Exception as e: logerr('browse(): %s', e) finally: os.close(fd) os.dup2(_stderr, 2) os.dup2(_stdout, 1) def check_upstream_release(): '''Check and report the latest upstream release version''' proxies = { 'https': os.environ.get('https_proxy'), } try: r = requests.get( 'https://api.github.com/repos/jarun/buku/tags?per_page=1', proxies=proxies ) except Exception as e: logerr(e) return if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) else: latest = r.json()[0]['name'] if latest == 'v' + __version__: print('This is the latest release') else: print('Latest upstream release is %s' % latest) def regexp(expr, item): '''Perform a regular expression search''' return re.search(expr, item, re.IGNORECASE) is not None def read_in(msg): disable_sigint_handler() message = None try: message = input(msg) except KeyboardInterrupt: print('Interrupted.') enable_sigint_handler() return message def sigint_handler(signum, frame): '''Custom SIGINT handler''' global interrupted interrupted = True print('\nInterrupted.', file=sys.stderr) # Do a hard exit from here os._exit(1) DEFAULT_HANDLER = signal.signal(signal.SIGINT, sigint_handler) def disable_sigint_handler(): signal.signal(signal.SIGINT, DEFAULT_HANDLER) def enable_sigint_handler(): signal.signal(signal.SIGINT, sigint_handler) # --------------------- # Editor mode functions # --------------------- def get_system_editor(): '''Returns default system editor is $EDITOR is set''' return os.environ.get('EDITOR', 'none') def to_temp_file_content(url, title_in, tags_in, desc): '''Generate temporary file content string :param url: URL to open :param title_in: string title to add manually :param tags_in: string of comma-separated tags to add manually :param desc: string description :return: lines as newline separated string ''' strings = [] # URL strings.extend(['# Lines beginning with "#" will be stripped.\n\ # Add URL in next line (single line).', ]) if url is not None: strings.append(url) # TITLE strings.extend(['# Add TITLE in next line (single line). \ Leave blank to web fetch, "-" for no title.']) if title_in is None: title_in = '' elif title_in == '': title_in = '-' strings.append(title_in) # TAGS strings.extend(['# Add comma-separated TAGS in next line (single line).']) strings.append(tags_in.strip(DELIM) if not None else '') # DESC strings.append('# Add COMMENTS in next line(s).') if desc is not None and desc != '': strings.append(desc) else: strings.append('\n') return '\n'.join(strings) def parse_temp_file_content(content): '''Parse and return temporary file content :param content: string of content :return: tuple url: URL to open title: string title to add manually tags: string of comma-separated tags to add manually comments: string description ''' content = content.split('\n') content = [c for c in content if len(c) == 0 or c[0] != '#'] if len(content) == 0 or content[0].strip() == '': print('Edit aborted') return None url = content[0] title = None if len(content) > 1: title = content[1] if title == '': title = None elif title == '-': title = '' tags = ',' if len(content) > 2: tags = parse_tags([content[2]]) comments = [] if len(content) > 3: comments = [c for c in content[3:]] # need to remove all empty line that are at the end # and not those in the middle of the text for i in range(len(comments) - 1, -1, -1): if comments[i].strip() != '': break if i == -1: comments = [] else: comments = comments[0:i+1] comments = '\n'.join(comments) return url, title, tags, comments def edit_rec(editor, url, title_in, tags_in, desc): '''Edit a bookmark record :param editor: editor to open :param url: URL to open :param title_in: string title to add manually :param tags_in: string of comma-separated tags to add manually :param desc: string description :return: parsed content ''' import tempfile import subprocess temp_file_content = to_temp_file_content(url, title_in, tags_in, desc) fd, tmpfile = tempfile.mkstemp(prefix='buku-edit-') os.close(fd) try: with open(tmpfile, 'w+', encoding='utf-8') as fp: fp.write(temp_file_content) fp.flush() logdbg('Edited content written to %s', tmpfile) cmd = editor.split(' ') cmd.append(tmpfile) subprocess.call(cmd) with open(tmpfile, 'r', encoding='utf-8') as f: content = f.read() os.remove(tmpfile) except FileNotFoundError: if os.path.exists(tmpfile): os.remove(tmpfile) logerr('Cannot open editor') else: logerr('Cannot open tempfile') return None parsed_content = parse_temp_file_content(content) return parsed_content # Handle piped input def piped_input(argv, pipeargs=None): if not sys.stdin.isatty(): pipeargs.extend(argv) for s in sys.stdin.readlines(): pipeargs.extend(s.split()) # main starts here def main(): global colorize, ID_str, ID_DB_str, MUTE_str, TITLE_str, DESC_str, TAG_str title_in = None tags_in = None desc_in = None pipeargs = [] try: piped_input(sys.argv, pipeargs) except KeyboardInterrupt: pass # If piped input, set argument vector if pipeargs: sys.argv = pipeargs # Setup custom argument parser argparser = ExtendedArgumentParser( description='''Powerful command-line bookmark manager. Your mini web! POSITIONAL ARGUMENTS: KEYWORD search keywords''', formatter_class=argparse.RawTextHelpFormatter, usage='''buku [OPTIONS] [KEYWORD [KEYWORD ...]]''', add_help=False ) HIDE = argparse.SUPPRESS argparser.add_argument('keywords', nargs='*', metavar='KEYWORD', help=HIDE) # --------------------- # GENERAL OPTIONS GROUP # --------------------- general_grp = argparser.add_argument_group( title='GENERAL OPTIONS', description=''' -a, --add URL [tag, ...] bookmark URL with comma-separated tags -u, --update [...] update fields of an existing bookmark accepts indices and ranges refresh the title, if no edit options if no arguments: - update results when used with search - otherwise refresh all titles -w, --write [editor|index] open editor to edit a fresh bookmark to update by index, EDITOR must be set -d, --delete [...] remove bookmarks from DB accepts indices or a single range if no arguments: - delete results when used with search - otherwise delete all bookmarks -h, --help show this information and exit -v, --version show the program version and exit''') addarg = general_grp.add_argument addarg('-a', '--add', nargs='+', help=HIDE) addarg('-u', '--update', nargs='*', help=HIDE) addarg('-w', '--write', nargs='?', const=get_system_editor(), help=HIDE) addarg('-d', '--delete', nargs='*', help=HIDE) addarg('-h', '--help', action='store_true', help=HIDE) addarg('-v', '--version', action='version', version=__version__, help=HIDE) # ------------------ # EDIT OPTIONS GROUP # ------------------ edit_grp = argparser.add_argument_group( title='EDIT OPTIONS', description=''' --url keyword bookmark link --tag [+|-] [...] comma-separated tags clear bookmark tagset, if no arguments '+' appends to, '-' removes from tagset -t, --title [...] bookmark title; if no arguments: -a: do not set title, -u: clear title -c, --comment [...] description of the bookmark clears description, if no arguments --immutable N disable title fetch from web on update N=0: mutable (default), N=1: immutable''') addarg = edit_grp.add_argument addarg('--url', nargs=1, help=HIDE) addarg('--tag', nargs='*', help=HIDE) addarg('-t', '--title', nargs='*', help=HIDE) addarg('-c', '--comment', nargs='*', help=HIDE) addarg('--immutable', type=int, default=-1, choices={0, 1}, help=HIDE) # -------------------- # SEARCH OPTIONS GROUP # -------------------- search_grp = argparser.add_argument_group( title='SEARCH OPTIONS', description=''' -s, --sany find records with ANY search keyword this is the default search option -S, --sall find records with ALL search keywords special keywords - "blank": entries with empty title/tag "immutable": entries with locked title --deep match substrings ('pen' matches 'opens') --sreg run a regex search --stag search bookmarks by a tag list all tags, if no search keywords''') addarg = search_grp.add_argument addarg('-s', '--sany', action='store_true', help=HIDE) addarg('-S', '--sall', action='store_true', help=HIDE) addarg('--sreg', action='store_true', help=HIDE) addarg('--deep', action='store_true', help=HIDE) addarg('--stag', action='store_true', help=HIDE) # ------------------------ # ENCRYPTION OPTIONS GROUP # ------------------------ crypto_grp = argparser.add_argument_group( title='ENCRYPTION OPTIONS', description=''' -l, --lock [N] encrypt DB file with N (> 0, default 8) hash iterations to generate key -k, --unlock [N] decrypt DB file with N (> 0, default 8) hash iterations to generate key''') addarg = crypto_grp.add_argument addarg('-k', '--unlock', nargs='?', type=int, const=8, help=HIDE) addarg('-l', '--lock', nargs='?', type=int, const=8, help=HIDE) # ---------------- # POWER TOYS GROUP # ---------------- power_grp = argparser.add_argument_group( title='POWER TOYS', description=''' -e, --export file export bookmarks in Firefox format html export markdown, if file ends with '.md' format: [title](url), 1 entry per line use --tag to export only specific tags -i, --import file import Firefox or Chrome bookmarks html import markdown, if file ends with '.md' -m, --merge file add bookmarks from another buku DB file -p, --print [...] show record details by indices, ranges print all bookmarks, if no arguments -f, --format N limit fields in -p or Json search output N=1: URL, N=2: URL and tag, N=3: title -r, --replace oldtag [newtag ...] replace oldtag with newtag everywhere delete oldtag, if newtag not specified -j, --json Json formatted output for -p and search --nc disable color output --np do not show the prompt, run and exit -o, --open [...] browse bookmarks by indices and ranges open a random bookmark, if no arguments --oa browse all search results immediately --shorten index|URL fetch shortened url from tny.im service --expand index|URL expand a tny.im shortened url --tacit reduce verbosity --threads N max network connections in full refresh default N=4, min N=1, max N=10 -V check latest upstream version available -z, --debug show debug information and verbose logs''') addarg = power_grp.add_argument addarg('-e', '--export', nargs=1, help=HIDE) addarg('-i', '--import', nargs=1, dest='importfile', help=HIDE) addarg('-m', '--merge', nargs=1, help=HIDE) addarg('-p', '--print', nargs='*', help=HIDE) addarg('-f', '--format', type=int, default=0, choices={1, 2, 3}, help=HIDE) addarg('-r', '--replace', nargs='+', help=HIDE) addarg('-j', '--json', action='store_true', help=HIDE) addarg('--nc', action='store_true', help=HIDE) addarg('--np', action='store_true', help=HIDE) addarg('-o', '--open', nargs='*', help=HIDE) addarg('--oa', action='store_true', help=HIDE) addarg('--shorten', nargs=1, help=HIDE) addarg('--expand', nargs=1, help=HIDE) addarg('--tacit', action='store_true', help=HIDE) addarg('--threads', type=int, default=4, choices=range(1, 11), help=HIDE) addarg('-V', dest='upstream', action='store_true', help=HIDE) addarg('-z', '--debug', action='store_true', help=HIDE) # Undocumented API addarg('--fixtags', action='store_true', help=HIDE) # Show help and exit if no arguments if len(sys.argv) == 1: argparser.print_help(sys.stdout) sys.exit(1) # Parse the arguments args = argparser.parse_args() # Show help and exit if help requested if args.help: argparser.print_help(sys.stdout) sys.exit(0) # Set up debugging if args.debug: logger.setLevel(logging.DEBUG) logdbg('Version %s', __version__) else: logging.disable(logging.WARNING) urllib3.disable_warnings() # Handle color output preference if args.nc: colorize = False ID_str = '%d. %s [%s]\n' ID_DB_str = '%d. %s' MUTE_str = '%s (L)\n' TITLE_str = '%s > %s\n' DESC_str = '%s + %s\n' TAG_str = '%s # %s\n' # Handle encrypt/decrypt options at top priority if args.lock is not None: BukuCrypt.encrypt_file(args.lock) elif args.unlock is not None: BukuCrypt.decrypt_file(args.unlock) # Set up title if args.title is not None: if args.title: title_in = ' '.join(args.title) else: title_in = '' # Set up tags if args.tag is not None: if args.tag: tags_in = args.tag else: tags_in = [DELIM, ] # Set up comment if args.comment is not None: if args.comment: desc_in = ' '.join(args.comment) else: desc_in = '' # Initialize the database and get handles, set verbose by default bdb = BukuDb(args.json, args.format, not args.tacit, colorize=not args.nc) # Editor mode if args.write is not None: if args.write == 'none': logerr('EDITOR is not set') bdb.close_quit(1) elif args.write == '0': logerr('Cannot edit index 0') bdb.close_quit(1) if is_int(args.write): editor = get_system_editor() if editor == 'none': logerr('EDITOR must be set to use index with -w') bdb.close_quit() idx = int(args.write) rec = bdb.get_rec_by_id(idx) if not rec: logerr('No matching index %d', idx) bdb.close_quit(1) result = edit_rec(editor, rec[1], rec[2], rec[3], rec[4]) if result is not None: url, title, tags, desc = result bdb.update_rec(idx, url, title, tags, desc) elif args.add is None: # Edit and add a new bookmark # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+%s' % parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-%s' % parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = DELIM result = edit_rec(args.write, '', title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Add record if args.add is not None: if args.url is not None and args.update is None: logerr('Bookmark a single URL at a time') bdb.close_quit(1) # Parse tags into a comma-separated string tags = DELIM keywords = args.add if tags_in is not None: if tags_in[0] == '+': if len(tags_in) > 1: # The case: buku -a url tag1, tag2 --tag + tag3, tag4 tags_in = tags_in[1:] # In case of add, args.add may have URL followed by tags # Add delimiter as url+tags may not end with one keywords = args.add + [DELIM] + tags_in else: keywords = args.add + [DELIM] + tags_in if len(keywords) > 1: tags = parse_tags(keywords[1:]) url = args.add[0] if args.write and not is_int(args.write): result = edit_rec(args.write, url, title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Search record search_results = None search_opted = True update_search_results = False if args.sany: # Search URLs, titles, tags for any keyword search_results = bdb.searchdb(args.keywords, False, args.deep) elif args.sall: # Search URLs, titles, tags with all keywords search_results = bdb.searchdb(args.keywords, True, args.deep) elif args.sreg: # Run a regular expression search search_results = bdb.searchdb(args.keywords, regex=True) elif args.stag: # Search bookmarks by tag if args.keywords: search_results = bdb.search_by_tag(' '.join(args.keywords)) else: # Use sub prompt to list all tags prompt(bdb, None, args.np, subprompt=True) elif args.keywords: search_results = bdb.searchdb(args.keywords, False, args.deep) else: search_opted = False if search_results: oneshot = args.np to_delete = False # Open all results in browser right away if args.oa # is specified. The has priority over delete/update. # URLs are opened first and updated/deleted later. if args.oa: for row in search_results: browse(row[1]) # In case of search and delete/update, # prompt should be non-interactive # delete gets priority over update if args.delete is not None and not args.delete: oneshot = True to_delete = True elif args.update is not None and not args.update: oneshot = True update_search_results = True if not args.json: prompt(bdb, search_results, oneshot, args.deep) else: # Printing in Json format is non-interactive print(format_json(search_results, field_filter=args.format)) # Delete search results if opted if to_delete: bdb.delete_resultset(search_results) # Update record if args.update is not None: if args.url is not None: url_in = args.url[0] else: url_in = '' # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+%s' % parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-%s' % parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = None # No arguments to --update, update all if not args.update: # Update all records only if search was not opted if not search_opted: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) elif update_search_results and search_results is not None: if not args.tacit: print('Updated results:\n') pos = len(search_results) - 1 while pos >= 0: idx = search_results[pos][0] bdb.update_rec(idx, url_in, title_in, tags, desc_in, args.immutable, args.threads) # Commit at every 200th removal if pos % 200 == 0: bdb.conn.commit() pos -= 1 else: for idx in args.update: if is_int(idx): bdb.update_rec(int(idx), url_in, title_in, tags, desc_in, args.immutable, args.threads) elif '-' in idx and is_int(idx.split('-')[0]) \ and is_int(idx.split('-')[1]): lower = int(idx.split('-')[0]) upper = int(idx.split('-')[1]) if lower > upper: lower, upper = upper, lower # Update only once if range starts from 0 (all) if lower == 0: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) else: for _id in range(lower, upper + 1): bdb.update_rec(_id, url_in, title_in, tags, desc_in, args.immutable, args.threads) if interrupted: break if interrupted: break # Delete record if args.delete is not None: if not args.delete: # Attempt delete-all only if search was not opted if not search_opted: bdb.cleardb() elif len(args.delete) == 1 and '-' in args.delete[0]: vals = str(args.delete[0]).split('-') if len(vals) == 2 and is_int(vals[0]) and is_int(vals[1]): if int(vals[0]) == int(vals[1]): bdb.delete_rec(int(vals[0])) elif int(vals[0]) < int(vals[1]): bdb.delete_rec(0, int(vals[0]), int(vals[1]), True) else: bdb.delete_rec(0, int(vals[1]), int(vals[0]), True) else: logerr('Invalid index or range') bdb.close_quit(1) else: ids = [] # Select the unique indices for idx in args.delete: if idx not in ids: ids += (idx,) try: # Index delete order - highest to lowest ids.sort(key=lambda x: int(x), reverse=True) for idx in ids: bdb.delete_rec(int(idx)) except ValueError: logerr('Invalid index or range') # Print record if args.print is not None: if not args.print: bdb.print_rec(0) else: for idx in args.print: if is_int(idx): bdb.print_rec(int(idx)) elif '-' in idx and is_int(idx.split('-')[0]) \ and is_int(idx.split('-')[1]): lower = int(idx.split('-')[0]) upper = int(idx.split('-')[1]) if lower > upper: lower, upper = upper, lower for _id in range(lower, upper + 1): bdb.print_rec(_id) else: logerr('Invalid index or range to print') bdb.close_quit(1) # Replace a tag in DB if args.replace is not None: if len(args.replace) == 1: bdb.delete_tag_at_index(0, args.replace[0]) else: bdb.replace_tag(args.replace[0], args.replace[1:]) # Export bookmarks if args.export is not None: if args.tag is None: bdb.exportdb(args.export[0]) elif not args.tag: logerr('Missing tag') else: bdb.exportdb(args.export[0], args.tag) # Import bookmarks if args.importfile is not None: bdb.importdb(args.importfile[0]) # Merge a database file and exit if args.merge is not None: bdb.mergedb(args.merge[0]) # Open URL in browser if args.open is not None: if not args.open: bdb.browse_by_index(0) else: for idx in args.open: if is_int(idx): bdb.browse_by_index(int(idx)) elif '-' in idx and is_int(idx.split('-')[0]) \ and is_int(idx.split('-')[1]): lower = int(idx.split('-')[0]) upper = int(idx.split('-')[1]) if lower > upper: lower, upper = upper, lower for _id in range(lower, upper + 1): bdb.browse_by_index(_id) else: logerr('Invalid index or range to open') bdb.close_quit(1) # Shorten URL if args.shorten: if is_int(args.shorten[0]): shorturl = bdb.tnyfy_url(index=int(args.shorten[0])) else: shorturl = bdb.tnyfy_url(url=args.shorten[0]) if shorturl: print(shorturl) # Expand URL if args.expand: if is_int(args.expand[0]): url = bdb.tnyfy_url(index=int(args.expand[0]), shorten=False) else: url = bdb.tnyfy_url(url=args.expand[0], shorten=False) if url: print(url) # Report upstream version if args.upstream: check_upstream_release() # Fix tags if args.fixtags: bdb.fixtags() # Close DB connection and quit bdb.close_quit(0) if __name__ == '__main__': main()