#!/usr/bin/env python3 # # Bookmark management utility # # Copyright (C) 2015-2016 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import html.parser as HTMLParser import json import logging import os import re import requests import signal import sqlite3 import sys import threading import urllib3 from urllib3.util import parse_url, make_headers import webbrowser try: import readline readline except ImportError: pass __version__ = '2.7' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Globals update = False # Update a bookmark in DB title_in = None # Input title specified at cmdline tags_in = None # Input tags specified at cmdline desc_in = None # Description of the bookmark tagsearch = False # Search bookmarks by tag interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} # Disguise as Firefox on Ubuntu USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 \ Firefox/50.0' myheaders = None # Default dictionary of headers myproxy = None # Default proxy # Crypto globals BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks # Set up logging logging.basicConfig(format='[%(levelname)s] %(message)s') logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): '''Class to parse and fetch the title from a HTML page, if available ''' def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data = '%s%s' % (self.data, data) def error(self, message): pass class BukuCrypt: '''Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. ''' @staticmethod def get_filehash(filepath): '''Get the SHA256 hash of a file :param filepath: path to the file :return: hash digest of the file ''' from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): '''Encrypt the bookmarks database file :param iterations: number of iterations for key generation :param dbfile: custom database file path (including filename) ''' try: from getpass import getpass import struct from hashlib import sha256 from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = '%s.enc' % dbfile db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = '' password = getpass() passconfirm = getpass() if password == '': logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack(' 0: row = resultset.pop() else: cond.release() break cond.release() title, mime, bad = network_handler(row[1]) count += 1 cond.acquire() if bad: print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0]) cond.release() continue elif mime: print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0]) cond.release() continue elif title == '': print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0]) cond.release() continue self.cur.execute(query, (title, row[0],)) # Save after fetching 32 titles per thread if count & 0b11111 == 0: self.conn.commit() if self.chatty: print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' % (title, row[0])) cond.release() if interrupted: break logdbg('Thread %d: processed %d', threading.get_ident(), count) with cond: done['value'] += 1 processed['value'] += count cond.notify() if recs < threads: threads = recs for i in range(threads): thread = threading.Thread(target=refresh, args=(i, cond)) thread.start() while done['value'] < threads: cond.wait() logdbg('%d threads completed', done['value']) # Guard: records found == total records processed if recs != processed['value']: logerr('Records: %d, processed: %d !!!', recs, processed['value']) cond.release() self.conn.commit() return True def searchdb(self, keywords, all_keywords=False, deep=False, regex=False): '''Search the database for an entries with tags or URL or title info matching keywords and list those. :param keywords: keywords to search :param all_keywords: search any or all keywords :param deep: search for matching substrings :param regex: match a regular expression :return: search results, or None, if no matches ''' qry = 'SELECT id, url, metadata, tags, desc FROM bookmarks WHERE' # Deep query string q1 = "(tags LIKE ('%' || ? || '%') OR URL LIKE ('%' || ? || '%') OR \ metadata LIKE ('%' || ? || '%') OR desc LIKE ('%' || ? || '%'))" # Non-deep query string q2 = '(tags REGEXP ? OR URL REGEXP ? OR metadata REGEXP ? OR desc \ REGEXP ?)' qargs = [] if regex: for token in keywords: qry = '%s %s OR' % (qry, q2) qargs += (token, token, token, token,) qry = qry[:-3] elif all_keywords: if len(keywords) == 1 and keywords[0] == 'blank': qry = "SELECT * FROM bookmarks WHERE metadata = '' OR tags = ?" qargs += (DELIM,) elif len(keywords) == 1 and keywords[0] == 'immutable': qry = 'SELECT * FROM bookmarks WHERE flags & 1 == 1' else: for token in keywords: if deep: qry = '%s %s AND' % (qry, q1) else: token = '\\b' + token.rstrip('/') + '\\b' qry = '%s %s AND' % (qry, q2) qargs += (token, token, token, token,) qry = qry[:-4] elif not all_keywords: for token in keywords: if deep: qry = '%s %s OR' % (qry, q1) else: token = '\\b' + token.rstrip('/') + '\\b' qry = '%s %s OR' % (qry, q2) qargs += (token, token, token, token,) qry = qry[:-3] else: logerr('Invalid search option') return None qry = '%s ORDER BY id ASC' % qry logdbg('query: "%s", args: %s', qry, qargs) try: self.cur.execute(qry, qargs) except sqlite3.OperationalError as e: logerr(e) return None results = self.cur.fetchall() if len(results) == 0: return None return results def search_by_tag(self, tag): '''Search and list bookmarks with a tag :param tag: a tag to search as string :return: search results, or None, if no matches ''' tag = '%s%s%s' % (DELIM, tag.strip(DELIM), DELIM) query = "SELECT id, url, metadata, tags, desc FROM bookmarks \ WHERE tags LIKE '%' || ? || '%' ORDER BY id ASC" logdbg('query: "%s", args: %s', query, tag) self.cur.execute(query, (tag,)) results = self.cur.fetchall() if len(results) == 0: return None return results def compactdb(self, index, delay_commit=False): '''When an entry at index is deleted, move the last entry in DB to index, if index is lesser. :param index: DB index of deleted entry :param delay_commit: do not commit to DB, caller's responsibility ''' self.cur.execute('SELECT MAX(id) from bookmarks') results = self.cur.fetchall() # Return if the last index left in DB was just deleted if len(results) == 1 and results[0][0] is None: return query1 = 'SELECT id, URL, metadata, tags, \ desc FROM bookmarks WHERE id = ?' query2 = 'DELETE FROM bookmarks WHERE id = ?' query3 = 'INSERT INTO bookmarks(id, URL, metadata, \ tags, desc) VALUES (?, ?, ?, ?, ?)' for row in results: if row[0] > index: self.cur.execute(query1, (row[0],)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): '''Delete a single record or remove the table if index is None :param index: DB index of deleted entry :param low: actual lower index of range :param high: actual higher index of range :param is_range: a range is passed using low and high arguments :param delay_commit: do not commit to DB, caller's responsibility :return: True on success, False on failure ''' if is_range: # Delete a range of indices # If range starts from 0, delete all records if low == 0: return self.cleardb() try: query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): '''Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. This API forces a delayed commit. :param results: set of results to delete :return: True on success, False on failure ''' resp = input('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def cleardb(self): '''Drops the bookmark table if it exists :return: True on success, False on failure ''' resp = input('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index): '''Print bookmark details at index or all bookmarks if index is 0 Note: URL is printed on top because title may be blank :param index: index to print, 0 prints all ''' if index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) results = self.cur.fetchall() if len(results) == 0: logerr('No matching index %d', index) return except IndexError: logerr('No matching index %d', index) return if not self.json: for row in results: if self.field_filter == 0: print_record(row) elif self.field_filter == 1: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: print('%s\t%s' % (row[0], row[2])) else: print(format_json(results, True, self.field_filter)) else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not self.json: if self.field_filter == 0: for row in resultset: print_record(row) elif self.field_filter == 1: for row in resultset: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: for row in resultset: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: for row in resultset: print('%s\t%s' % (row[0], row[2])) else: print(format_json(resultset, field_filter=self.field_filter)) def get_all_tags(self): '''Get list of tags in DB :return: list of unique tags sorted alphabetically :return: a dictionary of {tag:usage_count} ''' tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if len(tags) == 0: return tags if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def replace_tag(self, orig, new=None): '''Replace original tag by new tags in all records. Remove original tag if new tag is empty. :param orig: original tag as string :param new: replacement tags as list :return: True on success, False on failure ''' newtags = DELIM orig = '%s%s%s' % (DELIM, orig, DELIM) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False if newtags == DELIM: return self.delete_tag_at_index(0, orig) query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) if len(results): self.conn.commit() return True def browse_by_index(self, index): '''Open URL at index in browser :param index: DB index :return: True on success, False on failure ''' if index == 0: query = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(query) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) query = 'SELECT URL FROM bookmarks WHERE id = ?' try: for row in self.cur.execute(query, (index,)): open_in_browser(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, markdown=False, taglist=None): '''Export bookmarks to a Firefox bookmarks formatted html or markdown file. :param filepath: path to file to export to :param markdown: use markdown syntax :param taglist: list of specific tags to export :return: True on success, False on failure ''' import time count = 0 timestamp = int(time.time()) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if len(tagstr) == 0 or tagstr == DELIM: logerr('Invalid tag') return False if len(tagstr) > 0: tags = tagstr.split(DELIM) query = '%s WHERE' % query for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = '%s%s%s' % (DELIM, tag, DELIM) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if len(resultset) == 0: print('No bookmarks exported') return False if os.path.exists(filepath): resp = input('%s exists. Overwrite? (y/n): ' % filepath) if resp != 'y': return False try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if not markdown: outfp.write(''' Bookmarks

Bookmarks

\ Buku bookmarks

''' % (timestamp, timestamp)) for row in resultset: out = '%s

\n

') else: outfp.write('List of buku bookmarks:\n\n') for row in resultset: if row[2] == '': out = '- [Untitled](%s)\n' % (row[1]) else: out = '- [%s](%s)\n' % (row[2], row[1]) outfp.write(out) count += 1 outfp.close() print('%s exported' % count) return True def importdb(self, filepath, markdown=False): '''Import bookmarks from a html or markdown file. Supports Firefox, Google Chrome and IE exported html :param filepath: path to file to import :param markdown: use markdown syntax :return: True on success, False on failure ''' if not markdown: try: import bs4 with open(filepath, mode='r', encoding='utf-8') as infp: soup = bs4.BeautifulSoup(infp, 'html.parser') except ImportError: logerr('Beautiful Soup not found') return False except Exception as e: logerr(e) return False html_tags = soup.findAll('a') for tag in html_tags: # Extract comment from

tag desc = None comment_tag = tag.findNextSibling('dd') if comment_tag: desc = comment_tag.text[0:comment_tag.text.find('\n')] self.add_rec(tag['href'], tag.string, ('%s%s%s' % (DELIM, tag['tags'], DELIM)) if tag.has_attr('tags') else None, desc, 0, True) self.conn.commit() infp.close() else: with open(filepath, mode='r', encoding='utf-8') as infp: for line in infp: # Supported markdown format: [title](url) # Find position of title end, url start delimiter combo index = line.find('](') if index != -1: # Find title start delimiter title_start_delim = line[:index].find('[') # Reverse find the url end delimiter url_end_delim = line[index + 2:].rfind(')') if title_start_delim != -1 and url_end_delim > 0: # Parse title title = line[title_start_delim + 1:index] # Parse url url = line[index + 2:index + 2 + url_end_delim] self.add_rec(url, title, None, None, 0, True) self.conn.commit() infp.close() return True def mergedb(self, path): '''Merge bookmarks from another Buku database file :param path: path to DB file to merge :return: True on success, False on failure ''' try: # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) else: indb_conn = sqlite3.connect(path) indb_cur = indb_conn.cursor() indb_cur.execute('SELECT * FROM bookmarks') except Exception as e: logerr(e) return False resultset = indb_cur.fetchall() for row in resultset: self.add_rec(row[1], row[2], row[3], row[4], row[5], True) if len(resultset): self.conn.commit() try: indb_cur.close() indb_conn.close() except Exception: pass return True def tnyfy_url(self, index=0, url=None, shorten=True): '''Shorted a URL using Google URL shortener :param index: shorten the URL at DB index (int) :param url: pass a URL (string) :param shorten: True (default) to shorten, False to expand (boolean) :return: shortened url string on success, None on failure ''' if not index and not url: logerr('Either a valid DB index or URL required') return None if index: self.cur.execute('SELECT url FROM bookmarks WHERE id = ?', (index,)) results = self.cur.fetchall() if len(results): url = results[0][0] else: return None proxies = { 'https': os.environ.get('https_proxy'), } urlbase = 'https://tny.im/yourls-api.php?action=' if shorten: _u = '%s%s%s' % (urlbase, 'shorturl&format=simple&url=', url) else: _u = '%s%s%s' % (urlbase, 'expand&format=simple&shorturl=', url) try: r = requests.post(_u, headers={ 'content-type': 'application/json', 'User-Agent': USER_AGENT }, proxies=proxies) except Exception as e: logerr(e) return None if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) return None return r.text def fixtags(self): '''Undocumented API to fix tags set in earlier versions. Functionalities: 1. Remove duplicate tags 2. Sort tags 3. Use lower case to store tags ''' to_commit = False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') resultset = self.cur.fetchall() query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: oldtags = row[1] if oldtags == ',': continue tags = parse_tags([oldtags]) if tags == oldtags: continue self.cur.execute(query, (tags, row[0],)) to_commit = True if to_commit: self.conn.commit() def close_quit(self, exitval=0): '''Close a DB connection and exit :param exitval: program exit value ''' if self.conn is not None: try: self.cur.close() self.conn.close() except Exception: # ignore errors here, we're closing down pass sys.exit(exitval) # Generic functions def is_bad_url(url): '''Check if URL is malformed This API is not bulletproof but works in most cases. :param url: URL to scan :return: True or False ''' # Get the netloc token netloc = parse_url(url).netloc if not netloc: # Try of prepend '//' and get netloc netloc = parse_url('//' + url).netloc if not netloc: return True logdbg('netloc: %s', netloc) # netloc cannot start or end with a '.' if netloc.startswith('.') or netloc.endswith('.'): return True # netloc should have at least one '.' index = netloc.rfind('.') if index < 0: return True return False def is_ignored_mime(url): '''Check if URL links to ignored mime Only a 'HEAD' request is made for these URLs :param url: URL to scan :return: True or False ''' for mime in SKIP_MIMES: if url.lower().endswith(mime): return True return False def get_page_title(resp): '''Invoke HTML parser and extract title from HTTP response :param resp: HTTP(S) GET response :return: title fetched from parsed page ''' parser = BukuHTMLParser() try: parser.feed(resp.data.decode(errors='replace')) except Exception as e: # Suppress Exception due to intentional self.reset() in BHTMLParser if logger.isEnabledFor(logging.DEBUG) \ and str(e) != 'we should not get here!': logerr('get_page_title(): %s', e) finally: return parser.parsed_title def gen_headers(): '''Generate headers for network connection''' global myheaders, myproxy myheaders = { 'Accept-Encoding': 'gzip,deflate', 'User-Agent': USER_AGENT, 'Accept': '*/*', 'Cookie': '', 'DNT': '1' } myproxy = os.environ.get('https_proxy') if myproxy: try: url = parse_url(myproxy) except Exception as e: logerr(e) return # Strip username and password (if present) and update headers if url.auth: myproxy = myproxy.replace(url.auth + '@', '') auth_headers = make_headers(basic_auth=url.auth) myheaders.update(auth_headers) logdbg('proxy: [%s]', myproxy) def get_PoolManager(): '''Creates a pool manager with proxy support, if applicable :return: ProxyManager if https_proxy is defined, else PoolManager. ''' if myproxy: return urllib3.ProxyManager(myproxy, num_pools=1, headers=myheaders) return urllib3.PoolManager(num_pools=1, headers=myheaders) def network_handler(url): '''Handle server connection and redirections :param url: URL to fetch :return: (title, recognized mime, bad url) tuple ''' http_handler = None page_title = None resp = None method = 'GET' if is_bad_url(url): return ('', 0, 1) if is_ignored_mime(url): method = 'HEAD' if not myheaders: gen_headers() try: http_handler = get_PoolManager() while True: resp = http_handler.request(method, url, timeout=40) if resp.status == 200: if method == 'GET': page_title = get_page_title(resp) elif resp.status == 403 and url.endswith('/'): # HTTP response Forbidden # Handle URLs in the form of https://www.domain.com/ # which fail when trying to fetch resource '/' # retry without trailing '/' logdbg('Received status 403: retrying...') # Remove trailing / url = url[:-1] resp.release_conn() continue else: logerr('[%s] %s', resp.status, resp.reason) if resp: resp.release_conn() break except Exception as e: logerr('network_handler(): %s', e) finally: if http_handler: http_handler.clear() if method == 'HEAD': return ('', 1, 0) if page_title is None: return ('', 0, 0) return (page_title.strip().replace('\n', ''), 0, 0) def parse_tags(keywords=None): '''Format and get tag string from tokens :param keywords: list of tags :return: comma-delimited string of tags :return: just delimiter, if no keywords :return: None, if keyword is None ''' if keywords is None: return None tags = DELIM orig_tags = [] unique_tags = [] # Cleanse and get the tags tagstr = ' '.join(keywords) marker = tagstr.find(DELIM) while marker >= 0: token = tagstr[0:marker] tagstr = tagstr[marker + 1:] marker = tagstr.find(DELIM) token = token.strip() if token == '': continue tags = '%s%s%s' % (tags, token, DELIM) tagstr = tagstr.strip() if tagstr != '': tags = '%s%s%s' % (tags, tagstr, DELIM) logdbg('keywords: %s', keywords) logdbg('parsed tags: [%s]', tags) if tags == DELIM: return tags orig_tags += tags.strip(DELIM).split(DELIM) for tag in orig_tags: if tag.lower() not in unique_tags: # Add unique tags in lower case unique_tags += (tag.lower(), ) # Sort the tags sorted_tags = sorted(unique_tags) # Wrap with delimiter return '%s%s%s' % (DELIM, DELIM.join(sorted_tags), DELIM) def taglist_subprompt(obj, noninteractive=False): '''Additional prompt to show unique tag list :param obj: a valid instance of BukuDb class :param noninteractive: do not seek user input :return: new command string ''' unique_tags, dic = obj.get_all_tags() msg = '\x1b[7mbuku (? for help)\x1b[0m ' new_results = True while True: if new_results: if len(unique_tags) == 0: count = 0 print('0 tags') else: count = 1 for tag in unique_tags: print('%6d. %s (%d)' % (count, tag, dic[tag])) count += 1 print() if noninteractive: return try: nav = input(msg) if not nav: nav = input(msg) if not nav: # Quit on double enter return 'q' nav = nav.strip() except EOFError: return 'q' if is_int(nav) and int(nav) > 0 and int(nav) < count: return 't ' + unique_tags[int(nav) - 1] elif is_int(nav): print('No matching index %s' % nav) new_results = False elif is_int(nav[0]): print('Invalid input') new_results = False elif nav == 't': new_results = True continue elif (nav == 'q' or nav == 'd' or nav == '?' or nav.startswith('s ') or nav.startswith('S ') or nav.startswith('r ') or nav.startswith('t ')): return nav else: print('Invalid input') new_results = False def prompt(obj, results, noninteractive=False, deep=False, subprompt=False): '''Show each matching result from a search and prompt :param obj: a valid instance of BukuDb class :param results: result set from a DB query :param noninteractive: do not seek user input :param deep: use deep search :param subprompt: jump directly to sub prompt ''' if not type(obj) is BukuDb: logerr('Not a BukuDb instance') return new_results = True msg = '\x1b[7mbuku (? for help)\x1b[0m ' while True: if not subprompt: if new_results: if results: count = 0 for row in results: count += 1 print_record(row, count) else: print('0 results') if noninteractive: return try: nav = input(msg) if not nav: nav = input(msg) if not nav: # Quit on double enter break nav = nav.strip() except EOFError: return else: nav = 't' subprompt = False # list tags with 't' if nav == 't': nav = taglist_subprompt(obj, noninteractive) if noninteractive: return # search ANY match with new keywords if nav.startswith('s '): results = obj.searchdb(nav[2:].split(), False, deep) new_results = True continue # search ALL match with new keywords if nav.startswith('S '): results = obj.searchdb(nav[2:].split(), True, deep) new_results = True continue # regular expressions search with new keywords if nav.startswith('r '): results = obj.searchdb(nav[2:].split(), True, regex=True) new_results = True continue # tag search with new keywords if nav.startswith('t '): results = obj.search_by_tag(nav[2:]) new_results = True continue # quit with 'q' if nav == 'q': return # toggle deep search with 'd' if nav == 'd': deep = not deep if deep: print('deep search on') else: print('deep search off') new_results = False continue # Show help with '?' if nav == '?': ExtendedArgumentParser.print_prompt_help(sys.stdout) new_results = False continue new_results = False # Nothing to browse if there are no results if not results: print('Not in a search context') continue # open all results and re-prompt with 'a' if nav == 'a': for index in range(0, count): try: open_in_browser(results[index][1]) except Exception as e: logerr('prompt() 1: %s', e) continue # iterate over white-space separated indices for nav in nav.split(): if is_int(nav): index = int(nav) - 1 if index < 0 or index >= count: print('No matching index %s' % nav) continue try: open_in_browser(results[index][1]) except Exception as e: logerr('prompt() 2: %s', e) elif '-' in nav and is_int(nav.split('-')[0]) \ and is_int(nav.split('-')[1]): lower = int(nav.split('-')[0]) upper = int(nav.split('-')[1]) if lower > upper: lower, upper = upper, lower for index in range(lower-1, upper): try: if 0 <= index < count: open_in_browser(results[index][1]) else: print('No matching index %d' % (index + 1)) except Exception as e: logerr('prompt() 3: %s', e) else: print('Invalid input') break def print_record(row, idx=0): '''Print a single DB record Handles both search result and individual record :param idx: search result index. If 0, print with DB index ''' # Start with index and URL if idx != 0: pr = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m \ \x1b[1m[%s]\x1b[0m\n' % (idx, row[1], row[0]) else: pr = '\x1b[1m\x1b[93m%d. \x1b[0m\x1b[92m%s\x1b[0m' % (row[0], row[1]) # Indicate if record is immutable if row[5] & 1: pr = '%s \x1b[1m(L)\x1b[0m\n' % (pr) else: pr = '%s\n' % (pr) # Append title if row[2] != '': pr = '%s \x1b[91m>\x1b[0m %s\n' % (pr, row[2]) # Append description if row[4] != '': pr = '%s \x1b[91m+\x1b[0m %s\n' % (pr, row[4]) # Append tags IF not default (delimiter) if row[3] != DELIM: pr = '%s \x1b[91m#\x1b[0m %s\n' % (pr, row[3][1:-1]) print(pr) def format_json(resultset, single_record=False, field_filter=0): '''Return results in Json format :param single_record: indicates only one record :param field_filter: determines fields to show :return: record(s) in Json format ''' if single_record: marks = {} for row in resultset: if field_filter == 1: marks['uri'] = row[1] elif field_filter == 2: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] elif field_filter == 3: marks['title'] = row[2] else: marks['index'] = row[0] marks['uri'] = row[1] marks['title'] = row[2] marks['description'] = row[4] marks['tags'] = row[3][1:-1] else: marks = [] for row in resultset: if field_filter == 1: record = {'uri': row[1]} elif field_filter == 2: record = {'uri': row[1], 'tags': row[3][1:-1]} elif field_filter == 3: record = {'title': row[2]} else: record = {'index': row[0], 'uri': row[1], 'title': row[2], 'description': row[4], 'tags': row[3][1:-1]} marks.append(record) return json.dumps(marks, sort_keys=True, indent=4) def is_int(string): '''Check if a string is a digit :param string: input string :return: True on success, False on exception ''' try: int(string) return True except Exception: return False def open_in_browser(url): '''Duplicate stdin, stdout (to suppress showing errors on the terminal) and open URL in default browser :param url: URL to open ''' if not parse_url(url).scheme: # Prefix with 'http://' is no scheme # Otherwise, opening in browser fails anyway # We expect http to https redirection # will happen for https-only websites logerr('scheme missing in URI, trying http') url = '%s%s' % ('http://', url) _stderr = os.dup(2) os.close(2) _stdout = os.dup(1) os.close(1) fd = os.open(os.devnull, os.O_RDWR) os.dup2(fd, 2) os.dup2(fd, 1) try: webbrowser.open(url) except Exception as e: logerr('open_in_browser(): %s', e) finally: os.close(fd) os.dup2(_stderr, 2) os.dup2(_stdout, 1) def check_upstream_release(): '''Check and report the latest upstream release version''' proxies = { 'https': os.environ.get('https_proxy'), } try: r = requests.get( 'https://api.github.com/repos/jarun/buku/tags?per_page=1', proxies=proxies ) except Exception as e: logerr(e) return if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) else: latest = r.json()[0]['name'] if latest == 'v' + __version__: print('This is the latest release') else: print('Latest upstream release is %s' % latest) def sigint_handler(signum, frame): '''Custom SIGINT handler''' global interrupted interrupted = True print('\nInterrupted.', file=sys.stderr) # Do a hard exit from here os._exit(1) signal.signal(signal.SIGINT, sigint_handler) def regexp(expr, item): '''Perform a regular expression search''' return re.search(expr, item, re.IGNORECASE) is not None # Custom Action classes for argparse class CustomUpdateAction(argparse.Action): '''Class to capture if optional param 'update' is actually used, even if sans arguments ''' def __call__(self, parser, args, values, option_string=None): global update update = True # NOTE: the following converts a None argument to an empty array [] setattr(args, self.dest, values) class CustomTagAction(argparse.Action): '''Class to capture if optional param 'tag' is actually used, even if sans arguments ''' def __call__(self, parser, args, values, option_string=None): global tags_in tags_in = [DELIM, ] setattr(args, self.dest, values) class CustomTitleAction(argparse.Action): '''Class to capture if optional param 'title' is actually used, even if sans arguments ''' def __call__(self, parser, args, values, option_string=None): global title_in title_in = '' setattr(args, self.dest, values) class CustomDescAction(argparse.Action): '''Class to capture if optional param 'comment' is actually used, even if sans arguments ''' def __call__(self, parser, args, values, option_string=None): global desc_in desc_in = '' setattr(args, self.dest, values) class CustomTagSearchAction(argparse.Action): '''Class to capture if optional param 'stag' is actually used, even if sans arguments ''' def __call__(self, parser, args, values, option_string=None): global tagsearch tagsearch = True setattr(args, self.dest, values) class ExtendedArgumentParser(argparse.ArgumentParser): '''Extend classic argument parser''' # Print program info @staticmethod def print_program_info(file=None): file.write(''' symbols: > title + comment # tags Version %s © 2015-2016 Arun Prakash Jana License: GPLv3 Webpage: https://github.com/jarun/Buku ''' % __version__) # Print prompt help @staticmethod def print_prompt_help(file=None): file.write(''' keys: 1-N browse search result indices and/or ranges a open all results in browser s keyword [...] search for records with ANY keyword S keyword [...] search for records with ALL keywords d match substrings ('pen' matches 'opened') r expression run a regex search t [...] search bookmarks by a tag or show tag list (tag list index fetches bookmarks by tag) ? show this help q, ^D, double Enter exit buku ''') # Help def print_help(self, file=None): super(ExtendedArgumentParser, self).print_help(file) self.print_program_info(file) # Handle piped input def piped_input(argv, pipeargs=None): if not sys.stdin.isatty(): pipeargs.extend(argv) for s in sys.stdin.readlines(): pipeargs.extend(s.split()) '''main starts here''' def main(): global tags_in, title_in, desc_in pipeargs = [] try: piped_input(sys.argv, pipeargs) except KeyboardInterrupt: pass # If piped input, set argument vector if len(pipeargs) > 0: sys.argv = pipeargs # Setup custom argument parser argparser = ExtendedArgumentParser( description='Powerful command-line bookmark manager. Your mini web!', formatter_class=argparse.RawTextHelpFormatter, usage='''buku [OPTIONS] [KEYWORD [KEYWORD ...]]''', add_help=False ) HIDE = argparse.SUPPRESS # --------------------- # GENERAL OPTIONS GROUP # --------------------- general_grp = argparser.add_argument_group( title='general options', description='''-a, --add URL [tag, ...] bookmark URL with comma-separated tags -u, --update [...] update fields of bookmark at DB indices accepts indices and ranges refresh all titles, if no arguments refresh titles of bookmarks at indices, if no edit options are specified -d, --delete [...] delete bookmarks. Valid inputs: either a hyphenated single range (100-200), OR space-separated indices (100 15 200) delete results with search options delete all bookmarks, if no arguments -h, --help show this information and exit''') addarg = general_grp.add_argument addarg('-a', '--add', nargs='+', help=HIDE) addarg('-u', '--update', nargs='*', action=CustomUpdateAction, help=HIDE) addarg('-d', '--delete', nargs='*', help=HIDE) addarg('-h', '--help', action='store_true', help=HIDE) # ------------------ # EDIT OPTIONS GROUP # ------------------ edit_grp = argparser.add_argument_group( title='edit options', description='''--url keyword specify url, works only with -u option --tag [+|-] [...] set comma-separated tags with -a and -u clear tags, if no arguments works with -a, -u append to tags, if preceded by '+' remove from tags, if preceded by '-' -t, --title [...] manually set title, works with -a, -u if no arguments: -a: do not set title, -u: clear title -c, --comment [...] description of the bookmark, works with -a, -u; clears comment, if no arguments --immutable N disable title fetch from web on update works with -a, -u N=0: mutable (default), N=1: immutable''') addarg = edit_grp.add_argument addarg('--url', nargs=1, help=HIDE) addarg('--tag', nargs='*', action=CustomTagAction, help=HIDE) addarg('-t', '--title', nargs='*', action=CustomTitleAction, help=HIDE) addarg('-c', '--comment', nargs='*', action=CustomDescAction, help=HIDE) addarg('--immutable', type=int, default=-1, choices={0, 1}, help=HIDE) # -------------------- # SEARCH OPTIONS GROUP # -------------------- search_grp = argparser.add_argument_group( title='search options', description='''-s, --sany keyword [...] find records with ANY search keyword -S, --sall keyword [...] find records with ALL search keywords special keywords - "blank": entries with empty title/tag "immutable": entries with locked title --deep match substrings ('pen' matches 'opens') --sreg expression run a regex search --stag [...] search bookmarks by a tag list all tags, if no arguments''') addarg = search_grp.add_argument addarg('-s', '--sany', nargs='+', help=HIDE) addarg('-S', '--sall', nargs='+', help=HIDE) addarg('--sreg', nargs=1, help=HIDE) addarg('--deep', action='store_true', help=HIDE) addarg('--stag', nargs='*', action=CustomTagSearchAction, help=HIDE) # ------------------------ # ENCRYPTION OPTIONS GROUP # ------------------------ crypto_grp = argparser.add_argument_group( title='encryption options', description='''-l, --lock [N] encrypt DB file with N (> 0, default 8) hash iterations to generate key -k, --unlock [N] decrypt DB file with N (> 0, default 8) hash iterations to generate key''') addarg = crypto_grp.add_argument addarg('-k', '--unlock', nargs='?', type=int, const=8, help=HIDE) addarg('-l', '--lock', nargs='?', type=int, const=8, help=HIDE) # ---------------- # POWER TOYS GROUP # ---------------- power_grp = argparser.add_argument_group( title='power toys', description='''-e, --export file export bookmarks to Firefox format html use --tag to export only specific tags -i, --import file import bookmarks from html file FF and Google Chrome formats supported --markdown use markdown with -e and -i format: [title](url), 1 per line -m, --merge file add bookmarks from another buku DB file -p, --print [...] show details of bookmark by DB index accepts indices and ranges show all bookmarks, if no arguments -f, --format N limit fields in -p or Json search output 1: URL, 2: URL and tag, 3: title -r, --replace oldtag [newtag ...] replace oldtag with newtag everywhere delete oldtag, if no newtag -j, --json Json formatted output for -p and search --noprompt do not show the prompt, run and exit -o, --open [N] open bookmark at DB index N in browser open a random index if N is omitted --shorten N/URL fetch shortened url from tny.im service accepts either a DB index or a URL --expand N/URL expand a tny.im shortened url --tacit reduce verbosity --threads N max network connections in full refresh default 4, min 1, max 10 --upstream check latest upstream version available -v, --version show program version and exit -z, --debug show debug information and verbose logs''') addarg = power_grp.add_argument addarg('-e', '--export', nargs=1, help=HIDE) addarg('-i', '--import', nargs=1, dest='importfile', help=HIDE) addarg('--markdown', action='store_true', help=HIDE) addarg('-m', '--merge', nargs=1, help=HIDE) addarg('-p', '--print', nargs='*', help=HIDE) addarg('-f', '--format', type=int, default=0, choices={1, 2, 3}, help=HIDE) addarg('-r', '--replace', nargs='+', help=HIDE) addarg('-j', '--json', action='store_true', help=HIDE) addarg('--noprompt', action='store_true', help=HIDE) addarg('-o', '--open', nargs='?', type=int, const=0, help=HIDE) addarg('--shorten', nargs=1, help=HIDE) addarg('--expand', nargs=1, help=HIDE) addarg('--tacit', action='store_true', help=HIDE) addarg('--threads', type=int, default=4, choices=range(1, 11), help=HIDE) addarg('--upstream', action='store_true', help=HIDE) addarg('-v', '--version', action='version', version=__version__, help=HIDE) addarg('-z', '--debug', action='store_true', help=HIDE) # Undocumented API addarg('--fixtags', action='store_true', help=HIDE) # Show help and exit if no arguments if len(sys.argv) < 2: argparser.print_help(sys.stdout) sys.exit(1) # Parse the arguments args = argparser.parse_args() # Show help and exit if help requested if args.help: argparser.print_help(sys.stdout) sys.exit(0) # Assign the values to globals if tags_in is not None and len(args.tag) > 0: tags_in = args.tag if title_in is not None and len(args.title) > 0: title_in = ' '.join(args.title) if desc_in is not None and len(args.comment) > 0: desc_in = ' '.join(args.comment) if args.debug: logger.setLevel(logging.DEBUG) logdbg('Version %s', __version__) else: logging.disable(logging.WARNING) urllib3.disable_warnings() # Handle encrypt/decrypt options at top priority if args.lock is not None: BukuCrypt.encrypt_file(args.lock) if args.unlock is not None: BukuCrypt.decrypt_file(args.unlock) # Initialize the database and get handles, set verbose by default bdb = BukuDb(args.json, args.format, not args.tacit) # Add a record if args.add is not None: # Parse tags into a comma-separated string tags = DELIM keywords = args.add if tags_in is not None: if tags_in[0] == '+' and len(tags_in) == 1: pass elif tags_in[0] == '+': # The case: buku -a url tag1, tag2 --tag + tag3, tag4 tags_in = tags_in[1:] # In case of add, args.add may have URL followed by tags # Add delimiter as url+tags may not end with one keywords = args.add + [DELIM] + tags_in else: keywords = args.add + [DELIM] + tags_in if len(keywords) > 1: tags = parse_tags(keywords[1:]) bdb.add_rec(args.add[0], title_in, tags, desc_in, args.immutable) # Update record if update: if args.url is not None: url_in = args.url[0] else: url_in = '' # Parse tags into a comma-separated string if tags_in and len(tags_in): if tags_in[0] == '+': tags = '+%s' % parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-%s' % parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = None if len(args.update) == 0: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) else: for idx in args.update: if is_int(idx): bdb.update_rec(int(idx), url_in, title_in, tags, desc_in, args.immutable, args.threads) elif '-' in idx and is_int(idx.split('-')[0]) \ and is_int(idx.split('-')[1]): lower = int(idx.split('-')[0]) upper = int(idx.split('-')[1]) if lower > upper: lower, upper = upper, lower # Update only once if range starts from 0 (all) if lower == 0: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) else: for _id in range(lower, upper + 1): bdb.update_rec(_id, url_in, title_in, tags, desc_in, args.immutable, args.threads) if interrupted: break if interrupted: break # Search operations search_results = None search_opted = True if args.sany is not None: # Search URLs, titles, tags for any keyword search_results = bdb.searchdb(args.sany, False, args.deep) elif args.sall is not None: # Search URLs, titles, tags with all keywords search_results = bdb.searchdb(args.sall, True, args.deep) elif args.sreg is not None: # Run a regular expression search search_results = bdb.searchdb(args.sreg, regex=True) elif tagsearch: # Search bookmarks by tag if len(args.stag) > 0: search_results = bdb.search_by_tag(' '.join(args.stag)) else: # Use sub prompt to list all tags prompt(bdb, None, args.noprompt, subprompt=True) search_opted = False else: search_opted = False if search_results: oneshot = args.noprompt # In case of search and delete, prompt should be non-interactive if args.delete is not None and len(args.delete) == 0: oneshot = True if not args.json: prompt(bdb, search_results, oneshot, args.deep) else: # Printing in Json format is non-interactive print(format_json(search_results, field_filter=args.format)) # Delete search results if opted if args.delete is not None and len(args.delete) == 0: bdb.delete_resultset(search_results) # Delete record(s) if args.delete is not None: if len(args.delete) == 0: # Attempt delete-all only if search was not opted if not search_opted: bdb.cleardb() elif len(args.delete) == 1 and '-' in args.delete[0]: vals = str(args.delete[0]).split('-') if len(vals) == 2 and is_int(vals[0]) and is_int(vals[1]): if int(vals[0]) == int(vals[1]): bdb.delete_rec(int(vals[0])) elif int(vals[0]) < int(vals[1]): bdb.delete_rec(0, int(vals[0]), int(vals[1]), True) else: bdb.delete_rec(0, int(vals[1]), int(vals[0]), True) else: logerr('Invalid index or range') bdb.close_quit(1) else: ids = [] # Select the unique indices for idx in args.delete: if idx not in ids: ids += (idx,) try: # Index delete order - highest to lowest ids.sort(key=lambda x: int(x), reverse=True) for idx in ids: bdb.delete_rec(int(idx)) except ValueError: logerr('Invalid index or range') # Print records if args.print is not None: if len(args.print) == 0: bdb.print_rec(0) else: for idx in args.print: if is_int(idx): bdb.print_rec(int(idx)) elif '-' in idx and is_int(idx.split('-')[0]) \ and is_int(idx.split('-')[1]): lower = int(idx.split('-')[0]) upper = int(idx.split('-')[1]) if lower > upper: lower, upper = upper, lower for _id in range(lower, upper + 1): bdb.print_rec(_id) else: logerr('Invalid index or range') bdb.close_quit(1) # Replace a tag in DB if args.replace is not None: if len(args.replace) == 1: bdb.delete_tag_at_index(0, args.replace[0]) else: bdb.replace_tag(args.replace[0], args.replace[1:]) # Export bookmarks if args.export is not None: if args.tag is None: bdb.exportdb(args.export[0], args.markdown) elif len(args.tag) == 0: logerr('Missing tag') else: bdb.exportdb(args.export[0], args.markdown, args.tag) # Import bookmarks if args.importfile is not None: bdb.importdb(args.importfile[0], args.markdown) # Merge a database file and exit if args.merge is not None: bdb.mergedb(args.merge[0]) # Open URL in browser if args.open is not None: if args.open < 0: logerr('Index must be >= 0') bdb.close_quit(1) bdb.browse_by_index(args.open) # Shorten URL if args.shorten: if is_int(args.shorten[0]): shorturl = bdb.tnyfy_url(index=int(args.shorten[0])) else: shorturl = bdb.tnyfy_url(url=args.shorten[0]) if shorturl: print(shorturl) # Expand URL if args.expand: if is_int(args.expand[0]): url = bdb.tnyfy_url(index=int(args.expand[0]), shorten=False) else: url = bdb.tnyfy_url(url=args.expand[0], shorten=False) if url: print(url) # Report upstream version if args.upstream: check_upstream_release() # Fix tags if args.fixtags: bdb.fixtags() # Close DB connection and quit bdb.close_quit(0) if __name__ == '__main__': main()