#!/usr/bin/env python3 # # Bookmark management utility # # Copyright © 2015-2017 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Buku. If not, see . import argparse import html.parser as HTMLParser import json import logging import os import re try: import readline readline except ImportError: pass import requests import signal import sqlite3 import sys import threading import urllib3 from urllib3.util import parse_url, make_headers import webbrowser __version__ = '3.1' __author__ = 'Arun Prakash Jana ' __license__ = 'GPLv3' # Global variables interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} colorize = True # Allow color output by default # Default colour to print records ID_str = '\x1b[96;1m%d. \x1b[1;92m%s\x1b[0;2m [%s]\x1b[0m\n' ID_DB_str = '\x1b[96;1m%d. \x1b[1;92m%s\x1b[0m' MUTE_str = '%s \x1b[2m(L)\x1b[0m\n' URL_str = '%s \x1b[91m>\x1b[0m \x1b[2m%s\x1b[0m\n' DESC_str = '%s \x1b[91m+\x1b[0m %s\n' TAG_str = '%s \x1b[91m#\x1b[0m %s\n' # Disguise as Firefox on Ubuntu USER_AGENT = ('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) ' 'Gecko/20100101 Firefox/51.0') myheaders = None # Default dictionary of headers myproxy = None # Default proxy # Set up logging logger = logging.getLogger() logdbg = logger.debug logerr = logger.error class BukuHTMLParser(HTMLParser.HTMLParser): '''Class to parse and fetch the title from a HTML page, if available ''' def __init__(self): HTMLParser.HTMLParser.__init__(self) self.in_title_tag = False self.data = '' self.prev_tag = None self.parsed_title = None def handle_starttag(self, tag, attrs): self.in_title_tag = False if tag == 'title': self.in_title_tag = True self.prev_tag = tag def handle_endtag(self, tag): if tag == 'title': self.in_title_tag = False if self.data != '': self.parsed_title = self.data self.reset() # We have received title data, exit parsing def handle_data(self, data): if self.prev_tag == 'title' and self.in_title_tag: self.data += data def error(self, message): pass class BukuCrypt: '''Class to handle encryption and decryption of the database file. Functionally a separate entity. Involves late imports in the static functions but it saves ~100ms each time. Given that encrypt/decrypt are not done automatically and any one should be called at a time, this doesn't seem to be an outrageous approach. ''' # Crypto constants BLOCKSIZE = 0x10000 # 64 KB blocks SALT_SIZE = 0x20 CHUNKSIZE = 0x80000 # Read/write 512 KB chunks @staticmethod def get_filehash(filepath): '''Get the SHA256 hash of a file :param filepath: path to the file :return: hash digest of the file ''' from hashlib import sha256 with open(filepath, 'rb') as fp: hasher = sha256() buf = fp.read(BukuCrypt.BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = fp.read(BukuCrypt.BLOCKSIZE) return hasher.digest() @staticmethod def encrypt_file(iterations, dbfile=None): '''Encrypt the bookmarks database file :param iterations: number of iterations for key generation :param dbfile: custom database file path (including filename) ''' try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import (Cipher, modes, algorithms) from getpass import getpass from hashlib import sha256 import struct except ImportError: logerr('cryptography lib(s) missing') sys.exit(1) if iterations < 1: logerr('Iterations must be >= 1') sys.exit(1) if not dbfile: dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') encfile = dbfile + '.enc' db_exists = os.path.exists(dbfile) enc_exists = os.path.exists(encfile) if db_exists and not enc_exists: pass elif not db_exists: logerr('%s missing. Already encrypted?', dbfile) sys.exit(1) else: # db_exists and enc_exists logerr('Both encrypted and flat DB files exist!') sys.exit(1) password = getpass() passconfirm = getpass() if not password or not passconfirm: logerr('Empty password') sys.exit(1) if password != passconfirm: logerr('Passwords do not match') sys.exit(1) try: # Get SHA256 hash of DB file dbhash = BukuCrypt.get_filehash(dbfile) except Exception as e: logerr(e) sys.exit(1) # Generate random 256-bit salt and key salt = os.urandom(BukuCrypt.SALT_SIZE) key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8') for _ in range(iterations): key = sha256(key).digest() iv = os.urandom(16) encryptor = Cipher( algorithms.AES(key), modes.CBC(iv), backend=default_backend() ).encryptor() filesize = os.path.getsize(dbfile) try: with open(dbfile, 'rb') as infp, open(encfile, 'wb') as outfp: outfp.write(struct.pack(' index: self.cur.execute(query1, (max_id,)) results = self.cur.fetchall() for row in results: self.cur.execute(query2, (row[0],)) self.cur.execute(query3, (index, row[1], row[2], row[3], row[4],)) if not delay_commit: self.conn.commit() if self.chatty: print('Index %d moved to %d' % (row[0], index)) def delete_rec(self, index, low=0, high=0, is_range=False, delay_commit=False): '''Delete a single record or remove the table if index is None :param index: DB index of deleted entry :param low: actual lower index of range :param high: actual higher index of range :param is_range: a range is passed using low and high arguments index is ignored if is_range is True (use dummy index) :param delay_commit: do not commit to DB, caller's responsibility :return: True on success, False on failure ''' if is_range: # Delete a range of indices if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low # If range starts from 0, delete all records if low == 0: return self.cleardb() try: query = 'DELETE from bookmarks where id BETWEEN ? AND ?' self.cur.execute(query, (low, high)) print('Index %d-%d: %d deleted' % (low, high, self.cur.rowcount)) if not self.cur.rowcount: return False # Compact DB by ascending order of index to ensure # the existing higher indices move only once # Delayed commit is forced for index in range(low, high + 1): self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() except IndexError: logerr('No matching index') return False elif index == 0: # Remove the table return self.cleardb() else: # Remove a single entry try: query = 'DELETE FROM bookmarks WHERE id = ?' self.cur.execute(query, (index,)) if self.cur.rowcount == 1: print('Index %d deleted' % index) self.compactdb(index, delay_commit=True) if not delay_commit: self.conn.commit() else: logerr('No matching index %d', index) return False except IndexError: logerr('No matching index %d', index) return False return True def delete_resultset(self, results): '''Delete search results in descending order of DB index. Indices are expected to be unique and in ascending order. This API forces a delayed commit. :param results: set of results to delete :return: True on success, False on failure ''' resp = read_in('Delete the search results? (y/n): ') if resp != 'y': return False # delete records in reverse order pos = len(results) - 1 while pos >= 0: idx = results[pos][0] self.delete_rec(idx, delay_commit=True) # Commit at every 200th removal if pos % 200 == 0: self.conn.commit() pos -= 1 return True def delete_rec_all(self, delay_commit=False): '''Removes all records in the Bookmarks table :param delay_commit: do not commit to DB, caller responsibility :return: True on success, False on failure ''' try: self.cur.execute('DELETE FROM bookmarks') if not delay_commit: self.conn.commit() return True except Exception as e: logerr('delete_rec_all(): %s', e) return False def cleardb(self): '''Drops the bookmark table if it exists :return: True on success, False on failure ''' resp = read_in('Remove ALL bookmarks? (y/n): ') if resp != 'y': print('No bookmarks deleted') return False self.cur.execute('DROP TABLE if exists bookmarks') self.conn.commit() print('All bookmarks deleted') return True def print_rec(self, index=0, low=0, high=0, is_range=False): '''Print bookmark details at index or all bookmarks if index is 0 A negative index behaves like tail, if title is blank show "Untitled" :param index: index to print, 0 prints all :param low: actual lower index of range :param high: actual higher index of range :param is_range: a range is passed using low and high arguments index is ignored if is_range is True ''' if (index < 0): # Show the last n records _id = self.get_max_id() if _id == -1: logerr('Empty database') return False low = (1 if _id <= -index else _id + index + 1) high = _id is_range = True if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 print all records if low == 0: query = 'SELECT * from bookmarks' resultset = self.cur.execute(query) else: query = 'SELECT * from bookmarks where id BETWEEN ? AND ?' resultset = self.cur.execute(query, (low, high)) except IndexError: logerr('Index out of range') return elif index != 0: # Show record at index try: query = 'SELECT * FROM bookmarks WHERE id = ? LIMIT 1' self.cur.execute(query, (index,)) results = self.cur.fetchall() if not results: logerr('No matching index %d', index) return except IndexError: logerr('No matching index %d', index) return if not self.json: for row in results: if self.field_filter == 0: print_single_rec(row) elif self.field_filter == 1: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: print('%s\t%s\t%s\t%s' % ( row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(results, True, self.field_filter)) return else: # Show all entries self.cur.execute('SELECT * FROM bookmarks') resultset = self.cur.fetchall() if not resultset: logerr('0 records') return if not self.json: if self.field_filter == 0: for row in resultset: print_single_rec(row) elif self.field_filter == 1: for row in resultset: print('%s\t%s' % (row[0], row[1])) elif self.field_filter == 2: for row in resultset: print('%s\t%s\t%s' % (row[0], row[1], row[3][1:-1])) elif self.field_filter == 3: for row in resultset: print('%s\t%s' % (row[0], row[2])) elif self.field_filter == 4: for row in resultset: print('%s\t%s\t%s\t%s' % ( row[0], row[1], row[2], row[3][1:-1])) else: print(format_json(resultset, field_filter=self.field_filter)) def get_tag_all(self): '''Get list of tags in DB :return: tuple (list of unique tags sorted alphabetically, a dictionary of {tag:usage_count}) ''' tags = [] unique_tags = [] dic = {} qry = 'SELECT DISTINCT tags, COUNT(tags) FROM bookmarks GROUP BY tags' for row in self.cur.execute(qry): tagset = row[0].strip(DELIM).split(DELIM) for tag in tagset: if tag not in tags: dic[tag] = row[1] tags += (tag,) else: dic[tag] += row[1] if not tags: return tags, dic if tags[0] == '': unique_tags = sorted(tags[1:]) else: unique_tags = sorted(tags) return unique_tags, dic def replace_tag(self, orig, new=None): '''Replace original tag by new tags in all records. Remove original tag if new tag is empty. :param orig: original tag as string :param new: replacement tags as list :return: True on success, False on failure ''' newtags = DELIM orig = delim_wrap(orig) if new is not None: newtags = parse_tags(new) if orig == newtags: print('Tags are same.') return False # Remove original tag from DB if new tagset reduces to delimiter if newtags == DELIM: return self.delete_tag_at_index(0, orig) # Update bookmarks with original tag query = 'SELECT id, tags FROM bookmarks WHERE tags LIKE ?' self.cur.execute(query, ('%' + orig + '%',)) results = self.cur.fetchall() if results: query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in results: tags = row[1].replace(orig, newtags) tags = parse_tags([tags]) self.cur.execute(query, (tags, row[0],)) print('Index %d updated' % row[0]) self.conn.commit() return True def set_tag(self, cmdstr, taglist): '''Append, overwrite, remove tags using the symbols >>, > and << respectively. :param cmdstr: command pattern :param taglist: a list of tags :return: number of indices updated on success, -1 on failure ''' if not cmdstr or not taglist: return -1 flag = 0 # 0: invalid, 1: append, 2: overwrite, 3: remove index = cmdstr.find('>>') if index == -1: index = cmdstr.find('>') if index != -1: flag = 2 else: index = cmdstr.find('<<') if index != -1: flag = 3 else: flag = 1 if not flag: return -1 tags = DELIM id_list = cmdstr[:index].split() try: for id in id_list: if is_int(id) and int(id) > 0: tags += taglist[int(id) - 1] + DELIM elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): tags += taglist[_id - 1] + DELIM else: return -1 except ValueError: return -1 if flag != 2: index += 1 update_count = 0 query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' try: db_id_list = cmdstr[index + 1:].split() for id in db_id_list: if is_int(id) and int(id) > 0: if flag == 1: if self.append_tag_at_index(id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, id,)) update_count += self.cur.rowcount else: self.delete_tag_at_index(id, tags, True) update_count += 1 elif '-' in id: vals = [int(x) for x in id.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0], vals[-1] + 1): if flag == 1: if self.append_tag_at_index(_id, tags, True): update_count += 1 elif flag == 2: tags = parse_tags([tags]) self.cur.execute(query, (tags, _id,)) update_count += self.cur.rowcount else: if self.delete_tag_at_index(_id, tags, True): update_count += 1 else: return -1 except ValueError: return -1 except sqlite3.IntegrityError: return -1 try: self.conn.commit() except: return -1 return update_count def browse_by_index(self, index=0, low=0, high=0, is_range=False): '''Open URL at index or range of indies in browser :param index: index to browse, 0 opens a random bookmark :param low: actual lower index of range :param high: actual higher index of range :param is_range: a range is passed using low and high arguments index is ignored if is_range is True :return: True on success, False on failure ''' if is_range: if low < 0 or high < 0: logerr('Negative range boundary') return False if low > high: low, high = high, low try: # If range starts from 0 throw an error if low <= 0: raise IndexError else: qry = 'SELECT URL from bookmarks where id BETWEEN ? AND ?' for row in self.cur.execute(qry, (low, high)): browse(row[0]) return True except IndexError: logerr('Index out of range') return False if index < 0: logerr('Invalid index %d', index) return False if index == 0: qry = 'SELECT id from bookmarks ORDER BY RANDOM() LIMIT 1' self.cur.execute(qry) result = self.cur.fetchone() # Return if no entries in DB if result is None: print('No bookmarks added yet ...') return False index = result[0] logdbg('Opening random index %d', index) qry = 'SELECT URL FROM bookmarks WHERE id = ? LIMIT 1' try: for row in self.cur.execute(qry, (index,)): browse(row[0]) return True logerr('No matching index %d', index) except IndexError: logerr('No matching index %d', index) return False def exportdb(self, filepath, taglist=None): '''Export bookmarks to a Firefox bookmarks formatted html or a markdown file, if destination file name ends with '.md'. :param filepath: path to file to export to :param taglist: list of specific tags to export :return: True on success, False on failure ''' import time count = 0 timestamp = str(int(time.time())) arguments = [] query = 'SELECT * FROM bookmarks' is_tag_valid = False if taglist is not None: tagstr = parse_tags(taglist) if not tagstr or tagstr == DELIM: logerr('Invalid tag') return False tags = tagstr.split(DELIM) query += ' WHERE' for tag in tags: if tag != '': is_tag_valid = True query += " tags LIKE '%' || ? || '%' OR" tag = delim_wrap(tag) arguments += (tag,) if is_tag_valid: query = query[:-3] else: query = query[:-6] logdbg('(%s), %s', query, arguments) self.cur.execute(query, arguments) resultset = self.cur.fetchall() if not resultset: print('No bookmarks exported') return False if os.path.exists(filepath): resp = read_in(filepath + ' exists. Overwrite? (y/n): ') if resp != 'y': return False try: outfp = open(filepath, mode='w', encoding='utf-8') except Exception as e: logerr(e) return False if filepath.endswith('.md'): outfp.write('List of buku bookmarks:\n\n') for row in resultset: if row[2] == '': out = '- [Untitled](' + row[1] + ')\n' else: out = '- [' + row[2] + '](' + row[1] + ')\n' outfp.write(out) count += 1 else: outfp.write('\n\n' '\n' 'Bookmarks\n' '

Bookmarks

\n\n' '

\n' '

Buku bookmarks

\n' '

\n' % (timestamp, timestamp)) for row in resultset: out = ('

\n' if row[4] != '': out += '
' + row[4] + '\n' outfp.write(out) count += 1 outfp.write('

\n

') outfp.close() print('%s exported' % count) return True def importdb(self, filepath, tacit=False): '''Import bookmarks from a html or a markdown file (with extension '.md'). Supports Firefox, Google Chrome and IE exported html :param filepath: path to file to import :return: True on success, False on failure ''' if not tacit: newtag = input('Specify unique tag for imports (Enter to skip): ') else: newtag = None if filepath.endswith('.md'): with open(filepath, mode='r', encoding='utf-8') as infp: for line in infp: # Supported markdown format: [title](url) # Find position of title end, url start delimiter combo index = line.find('](') if index != -1: # Find title start delimiter title_start_delim = line[:index].find('[') # Reverse find the url end delimiter url_end_delim = line[index + 2:].rfind(')') if title_start_delim != -1 and url_end_delim > 0: # Parse title title = line[title_start_delim + 1:index] # Parse url url = line[index + 2:index + 2 + url_end_delim] if (is_nongeneric_url(url)): continue self.add_rec(url, title, delim_wrap(newtag) if newtag else None, None, 0, True) self.conn.commit() infp.close() else: try: import bs4 with open(filepath, mode='r', encoding='utf-8') as infp: soup = bs4.BeautifulSoup(infp, 'html.parser') except ImportError: logerr('Beautiful Soup not found') return False except Exception as e: logerr(e) return False resp = input('Add imported folders names as tags? (y/n): ') for tag in soup.findAll('a'): # Extract comment from

tag try: if (is_nongeneric_url(tag['href'])): continue except KeyError as e: continue desc = None comment_tag = tag.findNextSibling('dd') if comment_tag: desc = comment_tag.find(text=True, recursive=False) # add parent folder as tag if resp == 'y': # could be its folder or not possible_folder = tag.find_previous('h3') # get list of tags within that folder tag_list = tag.parent.parent.find_parent('dl') if ((possible_folder) and possible_folder.parent in list(tag_list.parents)): # then it's the folder of this bookmark if tag.has_attr('tags'): tag['tags'] += (DELIM + possible_folder.text) else: tag['tags'] = possible_folder.text # add unique tag if opted if newtag: if tag.has_attr('tags'): tag['tags'] += (DELIM + newtag) else: tag['tags'] = newtag self.add_rec(tag['href'], tag.string, parse_tags([tag['tags']]) if tag.has_attr('tags') else None, desc, 0, True) self.conn.commit() infp.close() return True def mergedb(self, path): '''Merge bookmarks from another Buku database file :param path: path to DB file to merge :return: True on success, False on failure ''' try: # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) else: indb_conn = sqlite3.connect(path) indb_cur = indb_conn.cursor() indb_cur.execute('SELECT * FROM bookmarks') except Exception as e: logerr(e) return False resultset = indb_cur.fetchall() if resultset: for row in resultset: self.add_rec(row[1], row[2], row[3], row[4], row[5], True) self.conn.commit() try: indb_cur.close() indb_conn.close() except Exception: pass return True def tnyfy_url(self, index=0, url=None, shorten=True): '''Shorted a URL using Google URL shortener :param index: shorten the URL at DB index (int) :param url: pass a URL (string) :param shorten: True (default) to shorten, False to expand (boolean) :return: shortened url string on success, None on failure ''' if not index and not url: logerr('Either a valid DB index or URL required') return None if index: self.cur.execute('SELECT url FROM bookmarks WHERE id = ? LIMIT 1', (index,)) results = self.cur.fetchall() if not results: return None url = results[0][0] proxies = { 'https': os.environ.get('https_proxy'), } from urllib.parse import quote_plus as qp urlbase = 'https://tny.im/yourls-api.php?action=' if shorten: _u = urlbase + 'shorturl&format=simple&url=' + qp(url) else: _u = urlbase + 'expand&format=simple&shorturl=' + qp(url) try: r = requests.post(_u, headers={ 'content-type': 'application/json', 'User-Agent': USER_AGENT }, proxies=proxies) except Exception as e: logerr(e) return None if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) return None return r.text def fixtags(self): '''Undocumented API to fix tags set in earlier versions. Functionalities: 1. Remove duplicate tags 2. Sort tags 3. Use lower case to store tags ''' to_commit = False self.cur.execute('SELECT id, tags FROM bookmarks ORDER BY id ASC') resultset = self.cur.fetchall() query = 'UPDATE bookmarks SET tags = ? WHERE id = ?' for row in resultset: oldtags = row[1] if oldtags == DELIM: continue tags = parse_tags([oldtags]) if tags == oldtags: continue self.cur.execute(query, (tags, row[0],)) to_commit = True if to_commit: self.conn.commit() def close_quit(self, exitval=0): '''Close a DB connection and exit :param exitval: program exit value ''' if self.conn is not None: try: self.cur.close() self.conn.close() except Exception: # ignore errors here, we're closing down pass sys.exit(exitval) class ExtendedArgumentParser(argparse.ArgumentParser): '''Extend classic argument parser''' # Print program info @staticmethod def program_info(file=sys.stdout): if sys.platform == 'win32' and file == sys.stdout: file = sys.stderr file.write(''' SYMBOLS: > url + comment # tags Version %s Copyright © 2015-2017 %s License: %s Webpage: https://github.com/jarun/Buku ''' % (__version__, __author__, __license__)) # Print prompt help @staticmethod def prompt_help(file=sys.stdout): file.write(''' keys: 1-N browse search result indices and/or ranges a open all results in browser s keyword [...] search for records with ANY keyword S keyword [...] search for records with ALL keywords d match substrings ('pen' matches 'opened') r expression run a regex search t [...] search bookmarks by a tag or show tag list g [...][>>|>|<<][...] append, remove tags to/from indices and/or ranges o [...] browse bookmarks by indices and/or ranges p [...] print bookmarks by indices and/or ranges w [editor|index] edit and add or update a bookmark (tag list index fetches bookmarks by tag) ? show this help q, ^D, double Enter exit buku ''') # Help def print_help(self, file=sys.stdout): super(ExtendedArgumentParser, self).print_help(file) self.program_info(file) # ---------------- # Helper functions # ---------------- def is_bad_url(url): '''Check if URL is malformed This API is not bulletproof but works in most cases. :param url: URL to scan :return: True or False ''' # Get the netloc token netloc = parse_url(url).netloc if not netloc: # Try of prepend '//' and get netloc netloc = parse_url('//' + url).netloc if not netloc: return True logdbg('netloc: %s', netloc) # netloc cannot start or end with a '.' if netloc.startswith('.') or netloc.endswith('.'): return True # netloc should have at least one '.' if netloc.rfind('.') < 0: return True return False def is_nongeneric_url(url): '''Returns true for URLs which are non-http and non-generic''' ignored_prefix = ['place:', 'file://', 'apt:'] for prefix in ignored_prefix: if url.startswith(prefix): return True return False def is_ignored_mime(url): '''Check if URL links to ignored mime Only a 'HEAD' request is made for these URLs :param url: URL to scan :return: True or False ''' for mime in SKIP_MIMES: if url.lower().endswith(mime): logdbg('matched MIME: %s', mime) return True return False def get_page_title(resp): '''Invoke HTML parser and extract title from HTTP response :param resp: HTTP(S) GET response :return: title fetched from parsed page ''' parser = BukuHTMLParser() try: parser.feed(resp.data.decode(errors='replace')) except Exception as e: # Suppress Exception due to intentional self.reset() in BHTMLParser if (logger.isEnabledFor(logging.DEBUG) and str(e) != 'we should not get here!'): logerr('get_page_title(): %s', e) finally: return parser.parsed_title def gen_headers(): '''Generate headers for network connection''' global myheaders, myproxy myheaders = { 'Accept-Encoding': 'gzip,deflate', 'User-Agent': USER_AGENT, 'Accept': '*/*', 'Cookie': '', 'DNT': '1' } myproxy = os.environ.get('https_proxy') if myproxy: try: url = parse_url(myproxy) except Exception as e: logerr(e) return # Strip username and password (if present) and update headers if url.auth: myproxy = myproxy.replace(url.auth + '@', '') auth_headers = make_headers(basic_auth=url.auth) myheaders.update(auth_headers) logdbg('proxy: [%s]', myproxy) def get_PoolManager(): '''Creates a pool manager with proxy support, if applicable :return: ProxyManager if https_proxy is defined, else PoolManager. ''' if myproxy: return urllib3.ProxyManager(myproxy, num_pools=1, headers=myheaders) return urllib3.PoolManager(num_pools=1, headers=myheaders) def network_handler(url, http_head=False): '''Handle server connection and redirections :param url: URL to fetch :param http_head: send only HTTP HEAD request :return: (title, recognized mime, bad url) tuple ''' page_title = None if is_bad_url(url): return ('', 0, 1) if is_ignored_mime(url) or http_head: method = 'HEAD' else: method = 'GET' if not myheaders: gen_headers() try: http_handler = get_PoolManager() while True: resp = http_handler.request(method, url, timeout=40) if resp.status == 200: if method == 'GET': page_title = get_page_title(resp) elif resp.status == 403 and url.endswith('/'): # HTTP response Forbidden # Handle URLs in the form of https://www.domain.com/ # which fail when trying to fetch resource '/' # retry without trailing '/' logdbg('Received status 403: retrying...') # Remove trailing / url = url[:-1] resp.release_conn() continue else: logerr('[%s] %s', resp.status, resp.reason) if resp: resp.release_conn() break except Exception as e: logerr('network_handler(): %s', e) finally: if http_handler: http_handler.clear() if method == 'HEAD': return ('', 1, 0) if page_title is None: return ('', 0, 0) return (page_title.strip().replace('\n', ''), 0, 0) def parse_tags(keywords=[]): '''Format and get tag string from tokens :param keywords: list of tags :return: comma-delimited string of tags :return: just delimiter, if no keywords :return: None, if keyword is None ''' if keywords is None: return None if not keywords: return DELIM tags = DELIM # Cleanse and get the tags tagstr = ' '.join(keywords) marker = tagstr.find(DELIM) while marker >= 0: token = tagstr[0:marker] tagstr = tagstr[marker + 1:] marker = tagstr.find(DELIM) token = token.strip() if token == '': continue tags += token + DELIM tagstr = tagstr.strip() if tagstr != '': tags += tagstr + DELIM logdbg('keywords: %s', keywords) logdbg('parsed tags: [%s]', tags) if tags == DELIM: return tags orig_tags = tags.strip(DELIM).split(DELIM) # Add unique tags in lower case unique_tags = [] for tag in orig_tags: tag = tag.lower() if tag not in unique_tags: unique_tags += (tag, ) # Sort the tags sorted_tags = sorted(unique_tags) # Wrap with delimiter return delim_wrap(DELIM.join(sorted_tags)) def edit_at_prompt(obj, nav): '''Edit and add or update a bookmark :param obj: a valid instance of BukuDb class ''' if nav == 'w': editor = get_system_editor() if not is_editor_valid(editor): return elif is_int(nav[2:]): obj.edit_update_rec(int(nav[2:])) return else: editor = nav[2:] result = edit_rec(editor, '', None, DELIM, None) if result is not None: url, title, tags, desc = result obj.add_rec(url, title, tags, desc) def taglist_subprompt(obj, msg, noninteractive=False): '''Additional prompt to show unique tag list :param obj: a valid instance of BukuDb class :param msg: sub-prompt message :param noninteractive: do not seek user input :return: new command string ''' unique_tags, dic = obj.get_tag_all() new_results = True while True: if new_results: if not unique_tags: count = 0 print('0 tags') else: count = 1 for tag in unique_tags: print('%6d. %s (%d)' % (count, tag, dic[tag])) count += 1 print() if noninteractive: return try: nav = read_in(msg) if not nav: nav = read_in(msg) if not nav: # Quit on double enter return 'q' nav = nav.strip() except EOFError: return 'q' if is_int(nav) and int(nav) > 0 and int(nav) < count: return 't ' + unique_tags[int(nav) - 1] elif is_int(nav): print('No matching index %s' % nav) new_results = False elif nav == 't': new_results = True elif (nav == 'q' or nav == 'd' or nav == '?' or nav.startswith('s ') or nav.startswith('S ') or nav.startswith('r ') or nav.startswith('t ') or nav.startswith('o ') or nav.startswith('p ') or nav.startswith('g ')): return nav elif nav == 'w' or nav.startswith('w '): edit_at_prompt(obj, nav) new_results = False else: print('Invalid input') new_results = False def prompt(obj, results, noninteractive=False, deep=False, subprompt=False): '''Show each matching result from a search and prompt :param obj: a valid instance of BukuDb class :param results: result set from a DB query :param noninteractive: do not seek user input :param deep: use deep search :param subprompt: jump directly to sub prompt ''' if not type(obj) is BukuDb: logerr('Not a BukuDb instance') return new_results = True if colorize: msg = '\x1b[7mbuku (? for help)\x1b[0m ' else: msg = 'buku (? for help): ' while True: if not subprompt: if new_results: if results: count = 0 for row in results: count += 1 print_single_rec(row, count) else: print('0 results') if noninteractive: return try: nav = read_in(msg) if not nav: nav = read_in(msg) if not nav: # Quit on double enter break nav = nav.strip() except EOFError: return else: nav = 't' subprompt = False # list tags with 't' if nav == 't': nav = taglist_subprompt(obj, msg, noninteractive) if noninteractive: return # search ANY match with new keywords if nav.startswith('s '): results = obj.searchdb(nav[2:].split(), False, deep) new_results = True continue # search ALL match with new keywords if nav.startswith('S '): results = obj.searchdb(nav[2:].split(), True, deep) new_results = True continue # regular expressions search with new keywords if nav.startswith('r '): results = obj.searchdb(nav[2:].split(), True, regex=True) new_results = True continue # tag search with new keywords if nav.startswith('t '): results = obj.search_by_tag(nav[2:]) new_results = True continue # quit with 'q' if nav == 'q': return # No new results fetched beyond this point new_results = False # toggle deep search with 'd' if nav == 'd': deep = not deep if deep: print('deep search on') else: print('deep search off') continue # Show help with '?' if nav == '?': ExtendedArgumentParser.prompt_help(sys.stdout) continue # Edit and add or update if nav == 'w' or nav.startswith('w '): edit_at_prompt(obj, nav) continue # Append or overwrite tags if nav.startswith('g '): unique_tags, dic = obj.get_tag_all() _count = obj.set_tag(nav[2:], unique_tags) if _count == -1: print('Invalid input') else: print('%d updated' % _count) continue # Print bookmarks by DB index if nav.startswith('p '): id_list = nav[2:].split() try: for id in id_list: if is_int(id): obj.print_rec(int(id)) elif '-' in id: vals = [int(x) for x in id.split('-')] obj.print_rec(0, vals[0], vals[-1], True) else: print('Invalid input') except ValueError: print('Invalid input') continue # Browse bookmarks by DB index if nav.startswith('o '): id_list = nav[2:].split() try: for id in id_list: if is_int(id): obj.browse_by_index(int(id)) elif '-' in id: vals = [int(x) for x in id.split('-')] obj.browse_by_index(0, vals[0], vals[-1], True) else: print('Invalid input') except ValueError: print('Invalid input') continue # Nothing to browse if there are no results if not results: print('Not in a search context') continue # open all results and re-prompt with 'a' if nav == 'a': for index in range(0, count): browse(results[index][1]) continue # iterate over white-space separated indices for nav in nav.split(): if is_int(nav): index = int(nav) - 1 if index < 0 or index >= count: print('No matching index %s' % nav) continue browse(results[index][1]) elif '-' in nav: try: vals = [int(x) for x in nav.split('-')] if vals[0] > vals[-1]: vals[0], vals[-1] = vals[-1], vals[0] for _id in range(vals[0]-1, vals[-1]): if 0 <= _id < count: browse(results[_id][1]) else: print('No matching index %d' % (_id + 1)) except ValueError: print('Invalid input') break else: print('Invalid input') break def print_single_rec(row, idx=0): # NOQA '''Print a single DB record Handles both search result and individual record :param idx: search result index. If 0, print with DB index ''' # Start with index and title if idx != 0: pr = ID_str % (idx, row[2] if row[2] else 'Untitled', row[0]) else: pr = ID_DB_str % (row[0], row[2] if row[2] else 'Untitled') # Indicate if record is immutable if row[5] & 1: pr = MUTE_str % (pr) else: pr += '\n' # Append URL pr = URL_str % (pr, row[1]) # Append description if row[4]: pr = DESC_str % (pr, row[4]) # Append tags IF not default (delimiter) if row[3] != DELIM: pr = TAG_str % (pr, row[3][1:-1]) print(pr) def format_json(resultset, single_record=False, field_filter=0): '''Return results in Json format :param single_record: indicates only one record :param field_filter: determines fields to show :return: record(s) in Json format ''' if single_record: marks = {} for row in resultset: if field_filter == 1: marks['uri'] = row[1] elif field_filter == 2: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] elif field_filter == 3: marks['title'] = row[2] elif field_filter == 4: marks['uri'] = row[1] marks['tags'] = row[3][1:-1] marks['title'] = row[2] else: marks['index'] = row[0] marks['uri'] = row[1] marks['title'] = row[2] marks['description'] = row[4] marks['tags'] = row[3][1:-1] else: marks = [] for row in resultset: if field_filter == 1: record = {'uri': row[1]} elif field_filter == 2: record = {'uri': row[1], 'tags': row[3][1:-1]} elif field_filter == 3: record = {'title': row[2]} elif field_filter == 4: record = {'uri': row[1], 'title': row[2], 'tags': row[3][1:-1]} else: record = {'index': row[0], 'uri': row[1], 'title': row[2], 'description': row[4], 'tags': row[3][1:-1]} marks.append(record) return json.dumps(marks, sort_keys=True, indent=4) def is_int(string): '''Check if a string is a digit :param string: input string :return: True on success, False on exception ''' try: int(string) return True except Exception: return False def browse(url): '''Duplicate stdin, stdout (to suppress showing errors on the terminal) and open URL in default browser :param url: URL to open ''' if not parse_url(url).scheme: # Prefix with 'http://' if no scheme # Otherwise, opening in browser fails anyway # We expect http to https redirection # will happen for https-only websites logerr('scheme missing in URI, trying http') url = 'http://' + url _stderr = os.dup(2) os.close(2) _stdout = os.dup(1) os.close(1) fd = os.open(os.devnull, os.O_RDWR) os.dup2(fd, 2) os.dup2(fd, 1) try: if sys.platform != 'win32': webbrowser.open(url, new=2) else: # On Windows, the webbrowser module does not fork. # Use threads instead. def browserthread(): webbrowser.open(url, new=2) t = threading.Thread(target=browserthread) t.start() except Exception as e: logerr('browse(): %s', e) finally: os.close(fd) os.dup2(_stderr, 2) os.dup2(_stdout, 1) def check_upstream_release(): '''Check and report the latest upstream release version''' proxies = { 'https': os.environ.get('https_proxy'), } try: r = requests.get( 'https://api.github.com/repos/jarun/buku/releases?per_page=1', proxies=proxies ) except Exception as e: logerr(e) return if r.status_code != 200: logerr('[%s] %s', r.status_code, r.reason) else: latest = r.json()[0]['tag_name'] if latest == 'v' + __version__: print('This is the latest release') else: print('Latest upstream release is %s' % latest) def regexp(expr, item): '''Perform a regular expression search''' return re.search(expr, item, re.IGNORECASE) is not None def delim_wrap(token): '''Wrap a string with delimiters and return''' return DELIM + token + DELIM def read_in(msg): '''A wrapper to handle input() with interrupts disabled''' disable_sigint_handler() message = None try: message = input(msg) except KeyboardInterrupt: print('Interrupted.') enable_sigint_handler() return message def sigint_handler(signum, frame): '''Custom SIGINT handler''' global interrupted interrupted = True print('\nInterrupted.', file=sys.stderr) # Do a hard exit from here os._exit(1) DEFAULT_HANDLER = signal.signal(signal.SIGINT, sigint_handler) def disable_sigint_handler(): signal.signal(signal.SIGINT, DEFAULT_HANDLER) def enable_sigint_handler(): signal.signal(signal.SIGINT, sigint_handler) # --------------------- # Editor mode functions # --------------------- def get_system_editor(): '''Returns default system editor is $EDITOR is set''' return os.environ.get('EDITOR', 'none') def is_editor_valid(editor): '''Check if the editor string is valid :param editor: editor string :return: True if string is valid, else False ''' if editor == 'none': logerr('EDITOR is not set') return False if editor == '0': logerr('Cannot edit index 0') return False return True def to_temp_file_content(url, title_in, tags_in, desc): '''Generate temporary file content string :param url: URL to open :param title_in: string title to add manually :param tags_in: string of comma-separated tags to add manually :param desc: string description :return: lines as newline separated string ''' strings = [('# Lines beginning with "#" will be stripped.\n' '# Add URL in next line (single line).'), ] # URL if url is not None: strings += (url,) # TITLE strings += (('# Add TITLE in next line (single line). ' 'Leave blank to web fetch, "-" for no title.'),) if title_in is None: title_in = '' elif title_in == '': title_in = '-' strings += (title_in,) # TAGS strings += ('# Add comma-separated TAGS in next line (single line).',) strings += (tags_in.strip(DELIM),) if not None else '' # DESC strings += ('# Add COMMENTS in next line(s).',) if desc is not None and desc != '': strings += (desc,) else: strings += ('\n',) return '\n'.join(strings) def parse_temp_file_content(content): '''Parse and return temporary file content :param content: string of content :return: tuple url: URL to open title: string title to add manually tags: string of comma-separated tags to add manually comments: string description ''' content = content.split('\n') content = [c for c in content if not c or c[0] != '#'] if not content or content[0].strip() == '': print('Edit aborted') return None url = content[0] title = None if len(content) > 1: title = content[1] if title == '': title = None elif title == '-': title = '' tags = DELIM if len(content) > 2: tags = parse_tags([content[2]]) comments = [] if len(content) > 3: comments = [c for c in content[3:]] # need to remove all empty line that are at the end # and not those in the middle of the text for i in range(len(comments) - 1, -1, -1): if comments[i].strip() != '': break if i == -1: comments = [] else: comments = comments[0:i+1] comments = '\n'.join(comments) return url, title, tags, comments def edit_rec(editor, url, title_in, tags_in, desc): '''Edit a bookmark record :param editor: editor to open :param url: URL to open :param title_in: string title to add manually :param tags_in: string of comma-separated tags to add manually :param desc: string description :return: parsed content ''' import tempfile import subprocess temp_file_content = to_temp_file_content(url, title_in, tags_in, desc) fd, tmpfile = tempfile.mkstemp(prefix='buku-edit-') os.close(fd) try: with open(tmpfile, 'w+', encoding='utf-8') as fp: fp.write(temp_file_content) fp.flush() logdbg('Edited content written to %s', tmpfile) cmd = editor.split(' ') cmd += (tmpfile,) subprocess.call(cmd) with open(tmpfile, 'r', encoding='utf-8') as f: content = f.read() os.remove(tmpfile) except FileNotFoundError: if os.path.exists(tmpfile): os.remove(tmpfile) logerr('Cannot open editor') else: logerr('Cannot open tempfile') return None parsed_content = parse_temp_file_content(content) return parsed_content def setup_logger(logger): '''Setup logger with color :param logger: logger to colorize ''' def decorate_emit(fn): def new(*args): levelno = args[0].levelno if levelno == logging.DEBUG: color = '\x1b[35m' elif levelno == logging.ERROR: color = '\x1b[31m' elif levelno == logging.WARNING: color = '\x1b[33m' elif levelno == logging.INFO: color = '\x1b[32m' elif levelno == logging.CRITICAL: color = '\x1b[31m' else: color = '\x1b[0m' args[0].msg = '{}[{}]\x1b[0m {}'.format(color, args[0].levelname, args[0].msg) return fn(*args) return new sh = logging.StreamHandler() sh.emit = decorate_emit(sh.emit) logger.addHandler(sh) # Handle piped input def piped_input(argv, pipeargs=None): if not sys.stdin.isatty(): pipeargs += argv print('waiting for input') for s in sys.stdin: pipeargs += s.split() # main starts here def main(): global colorize, ID_str, ID_DB_str, MUTE_str, URL_str, DESC_str, TAG_str title_in = None tags_in = None desc_in = None pipeargs = [] try: piped_input(sys.argv, pipeargs) except KeyboardInterrupt: pass # If piped input, set argument vector if pipeargs: sys.argv = pipeargs # Setup custom argument parser argparser = ExtendedArgumentParser( description='''Powerful command-line bookmark manager. Your mini web! POSITIONAL ARGUMENTS: KEYWORD search keywords''', formatter_class=argparse.RawTextHelpFormatter, usage='''buku [OPTIONS] [KEYWORD [KEYWORD ...]]''', add_help=False ) HIDE = argparse.SUPPRESS argparser.add_argument('keywords', nargs='*', metavar='KEYWORD', help=HIDE) # --------------------- # GENERAL OPTIONS GROUP # --------------------- general_grp = argparser.add_argument_group( title='GENERAL OPTIONS', description=''' -a, --add URL [tag, ...] bookmark URL with comma-separated tags -u, --update [...] update fields of an existing bookmark accepts indices and ranges refresh the title, if no edit options if no arguments: - update results when used with search - otherwise refresh all titles -w, --write [editor|index] open editor to edit a fresh bookmark to update by index, EDITOR must be set -d, --delete [...] remove bookmarks from DB accepts indices or a single range if no arguments: - delete results when used with search - otherwise delete all bookmarks -h, --help show this information and exit -v, --version show the program version and exit''') addarg = general_grp.add_argument addarg('-a', '--add', nargs='+', help=HIDE) addarg('-u', '--update', nargs='*', help=HIDE) addarg('-w', '--write', nargs='?', const=get_system_editor(), help=HIDE) addarg('-d', '--delete', nargs='*', help=HIDE) addarg('-h', '--help', action='store_true', help=HIDE) addarg('-v', '--version', action='version', version=__version__, help=HIDE) # ------------------ # EDIT OPTIONS GROUP # ------------------ edit_grp = argparser.add_argument_group( title='EDIT OPTIONS', description=''' --url keyword bookmark link --tag [+|-] [...] comma-separated tags clear bookmark tagset, if no arguments '+' appends to, '-' removes from tagset --title [...] bookmark title; if no arguments: -a: do not set title, -u: clear title -c, --comment [...] notes or description of the bookmark clears description, if no arguments --immutable N disable title fetch from web on update N=0: mutable (default), N=1: immutable''') addarg = edit_grp.add_argument addarg('--url', nargs=1, help=HIDE) addarg('--tag', nargs='*', help=HIDE) addarg('--title', nargs='*', help=HIDE) addarg('-c', '--comment', nargs='*', help=HIDE) addarg('--immutable', type=int, default=-1, choices={0, 1}, help=HIDE) # -------------------- # SEARCH OPTIONS GROUP # -------------------- search_grp = argparser.add_argument_group( title='SEARCH OPTIONS', description=''' -s, --sany find records with ANY search keyword this is the default search option -S, --sall find records with ALL search keywords special keywords - "blank": entries with empty title/tag "immutable": entries with locked title --deep match substrings ('pen' matches 'opens') -r, --sreg run a regex search -t, --stag search bookmarks by a tag list all tags, if no search keywords''') addarg = search_grp.add_argument addarg('-s', '--sany', action='store_true', help=HIDE) addarg('-S', '--sall', action='store_true', help=HIDE) addarg('-r', '--sreg', action='store_true', help=HIDE) addarg('--deep', action='store_true', help=HIDE) addarg('-t', '--stag', action='store_true', help=HIDE) # ------------------------ # ENCRYPTION OPTIONS GROUP # ------------------------ crypto_grp = argparser.add_argument_group( title='ENCRYPTION OPTIONS', description=''' -l, --lock [N] encrypt DB file with N (> 0, default 8) hash iterations to generate key -k, --unlock [N] decrypt DB file with N (> 0, default 8) hash iterations to generate key''') addarg = crypto_grp.add_argument addarg('-k', '--unlock', nargs='?', type=int, const=8, help=HIDE) addarg('-l', '--lock', nargs='?', type=int, const=8, help=HIDE) # ---------------- # POWER TOYS GROUP # ---------------- power_grp = argparser.add_argument_group( title='POWER TOYS', description=''' -e, --export file export bookmarks in Firefox format html export markdown, if file ends with '.md' format: [title](url), 1 entry per line use --tag to export only specific tags -i, --import file import Firefox or Chrome bookmarks html import markdown, if file ends with '.md' -m, --merge file add bookmarks from another buku DB file -p, --print [...] show record details by indices, ranges print all bookmarks, if no arguments -n shows the last n results (like tail) -f, --format N limit fields in -p or Json search output N=1: URL, N=2: URL and tag, N=3: title, N=4: URL, title and tag -j, --json Json formatted output for -p and search --nc disable color output --np do not show the prompt, run and exit -o, --open [...] browse bookmarks by indices and ranges open a random bookmark, if no arguments --oa browse all search results immediately --replace old new replace old tag with new tag everywhere delete old tag, if new tag not specified --shorten index|URL fetch shortened url from tny.im service --expand index|URL expand a tny.im shortened url --tacit reduce verbosity --threads N max network connections in full refresh default N=4, min N=1, max N=10 -V check latest upstream version available -z, --debug show debug information and verbose logs''') addarg = power_grp.add_argument addarg('-e', '--export', nargs=1, help=HIDE) addarg('-i', '--import', nargs=1, dest='importfile', help=HIDE) addarg('-m', '--merge', nargs=1, help=HIDE) addarg('-p', '--print', nargs='*', help=HIDE) addarg('-f', '--format', type=int, default=0, choices={1, 2, 3, 4}, help=HIDE) addarg('-j', '--json', action='store_true', help=HIDE) addarg('--nc', action='store_true', help=HIDE) addarg('--np', action='store_true', help=HIDE) addarg('-o', '--open', nargs='*', help=HIDE) addarg('--oa', action='store_true', help=HIDE) addarg('--replace', nargs='+', help=HIDE) addarg('--shorten', nargs=1, help=HIDE) addarg('--expand', nargs=1, help=HIDE) addarg('--tacit', action='store_true', help=HIDE) addarg('--threads', type=int, default=4, choices=range(1, 11), help=HIDE) addarg('-V', dest='upstream', action='store_true', help=HIDE) addarg('-z', '--debug', action='store_true', help=HIDE) # Undocumented APIs addarg('--fixtags', action='store_true', help=HIDE) addarg('--db', nargs=1, help=HIDE) # Show help and exit if no arguments if len(sys.argv) == 1: argparser.print_help(sys.stdout) sys.exit(1) # Parse the arguments args = argparser.parse_args() # Show help and exit if help requested if args.help: argparser.print_help(sys.stdout) sys.exit(0) # Handle color output preference if args.nc: colorize = False ID_str = '%d. %s [%s]\n' ID_DB_str = '%d. %s' MUTE_str = '%s (L)\n' URL_str = '%s > %s\n' DESC_str = '%s + %s\n' TAG_str = '%s # %s\n' logging.basicConfig(format='[%(levelname)s] %(message)s') else: # Enable color in logs setup_logger(logger) # Set up debugging if args.debug: logger.setLevel(logging.DEBUG) logdbg('Version %s', __version__) else: logging.disable(logging.WARNING) urllib3.disable_warnings() # Handle encrypt/decrypt options at top priority if args.lock is not None: BukuCrypt.encrypt_file(args.lock) elif args.unlock is not None: BukuCrypt.decrypt_file(args.unlock) # Set up title if args.title is not None: if args.title: title_in = ' '.join(args.title) else: title_in = '' # Set up tags if args.tag is not None: if args.tag: tags_in = args.tag else: tags_in = [DELIM, ] # Set up comment if args.comment is not None: if args.comment: desc_in = ' '.join(args.comment) else: desc_in = '' # Initialize the database and get handles, set verbose by default bdb = BukuDb(args.json, args.format, not args.tacit, dbfile=args.db[0] if args.db is not None else None, colorize=not args.nc) # Editor mode if args.write is not None: if not is_editor_valid(args.write): bdb.close_quit(1) if is_int(args.write): if not bdb.edit_update_rec(int(args.write), args.immutable): bdb.close_quit(1) elif args.add is None: # Edit and add a new bookmark # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+' + parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-' + parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = DELIM result = edit_rec(args.write, '', title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Add record if args.add is not None: if args.url is not None and args.update is None: logerr('Bookmark a single URL at a time') bdb.close_quit(1) # Parse tags into a comma-separated string tags = DELIM keywords = args.add if tags_in is not None: if tags_in[0] == '+': if len(tags_in) > 1: # The case: buku -a url tag1, tag2 --tag + tag3, tag4 tags_in = tags_in[1:] # In case of add, args.add may have URL followed by tags # Add delimiter as url+tags may not end with one keywords = args.add + [DELIM] + tags_in else: keywords = args.add + [DELIM] + tags_in if len(keywords) > 1: tags = parse_tags(keywords[1:]) url = args.add[0] if args.write and not is_int(args.write): result = edit_rec(args.write, url, title_in, tags, desc_in) if result is not None: url, title_in, tags, desc_in = result bdb.add_rec(url, title_in, tags, desc_in, args.immutable) # Search record search_results = None search_opted = True update_search_results = False if args.sany: # Search URLs, titles, tags for any keyword search_results = bdb.searchdb(args.keywords, False, args.deep) elif args.sall: # Search URLs, titles, tags with all keywords search_results = bdb.searchdb(args.keywords, True, args.deep) elif args.sreg: # Run a regular expression search search_results = bdb.searchdb(args.keywords, regex=True) elif args.stag: # Search bookmarks by tag if args.keywords: search_results = bdb.search_by_tag(' '.join(args.keywords)) else: # Use sub prompt to list all tags prompt(bdb, None, args.np, subprompt=True) elif args.keywords: search_results = bdb.searchdb(args.keywords, False, args.deep) else: search_opted = False # Add cmdline search options to readline history if search_opted and args.keywords: try: readline.add_history(' '.join(args.keywords)) except Exception: pass if search_results: oneshot = args.np to_delete = False # Open all results in browser right away if args.oa # is specified. The has priority over delete/update. # URLs are opened first and updated/deleted later. if args.oa: for row in search_results: browse(row[1]) # In case of search and delete/update, # prompt should be non-interactive # delete gets priority over update if args.delete is not None and not args.delete: oneshot = True to_delete = True elif args.update is not None and not args.update: oneshot = True update_search_results = True if not args.json: prompt(bdb, search_results, oneshot, args.deep) else: # Printing in Json format is non-interactive print(format_json(search_results, field_filter=args.format)) # Delete search results if opted if to_delete: bdb.delete_resultset(search_results) # Update record if args.update is not None: if args.url is not None: url_in = args.url[0] else: url_in = '' # Parse tags into a comma-separated string if tags_in: if tags_in[0] == '+': tags = '+' + parse_tags(tags_in[1:]) elif tags_in[0] == '-': tags = '-' + parse_tags(tags_in[1:]) else: tags = parse_tags(tags_in) else: tags = None # No arguments to --update, update all if not args.update: # Update all records only if search was not opted if not search_opted: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) elif update_search_results and search_results is not None: if not args.tacit: print('Updated results:\n') pos = len(search_results) - 1 while pos >= 0: idx = search_results[pos][0] bdb.update_rec(idx, url_in, title_in, tags, desc_in, args.immutable, args.threads) # Commit at every 200th removal if pos % 200 == 0: bdb.conn.commit() pos -= 1 else: for idx in args.update: if is_int(idx): bdb.update_rec(int(idx), url_in, title_in, tags, desc_in, args.immutable, args.threads) elif '-' in idx: try: vals = [int(x) for x in idx.split('-')] if vals[0] > vals[1]: vals[0], vals[1] = vals[1], vals[0] # Update only once if range starts from 0 (all) if vals[0] == 0: bdb.update_rec(0, url_in, title_in, tags, desc_in, args.immutable, args.threads) else: for _id in range(vals[0], vals[1] + 1): bdb.update_rec(_id, url_in, title_in, tags, desc_in, args.immutable, args.threads) if interrupted: break except ValueError: logerr('Invalid index or range to update') bdb.close_quit(1) if interrupted: break # Delete record if args.delete is not None: if not args.delete: # Attempt delete-all only if search was not opted if not search_opted: bdb.cleardb() elif len(args.delete) == 1 and '-' in args.delete[0]: try: vals = [int(x) for x in args.delete[0].split('-')] if len(vals) == 2: bdb.delete_rec(0, vals[0], vals[1], True) except ValueError: logerr('Invalid index or range to delete') bdb.close_quit(1) else: ids = [] # Select the unique indices for idx in args.delete: if idx not in ids: ids += (idx,) try: # Index delete order - highest to lowest ids.sort(key=lambda x: int(x), reverse=True) for idx in ids: bdb.delete_rec(int(idx)) except ValueError: logerr('Invalid index or range or combination') bdb.close_quit(1) # Print record if args.print is not None: if not args.print: bdb.print_rec(0) else: try: for idx in args.print: if is_int(idx): bdb.print_rec(int(idx)) elif '-' in idx: vals = [int(x) for x in idx.split('-')] bdb.print_rec(0, vals[0], vals[-1], True) except ValueError: logerr('Invalid index or range to print') bdb.close_quit(1) # Replace a tag in DB if args.replace is not None: if len(args.replace) == 1: bdb.delete_tag_at_index(0, args.replace[0]) else: bdb.replace_tag(args.replace[0], args.replace[1:]) # Export bookmarks if args.export is not None: if args.tag is None: bdb.exportdb(args.export[0]) elif not args.tag: logerr('Missing tag') else: bdb.exportdb(args.export[0], args.tag) # Import bookmarks if args.importfile is not None: bdb.importdb(args.importfile[0], args.tacit) # Merge a database file and exit if args.merge is not None: bdb.mergedb(args.merge[0]) # Open URL in browser if args.open is not None: if not args.open: bdb.browse_by_index(0) else: try: for idx in args.open: if is_int(idx): bdb.browse_by_index(int(idx)) elif '-' in idx: vals = [int(x) for x in idx.split('-')] bdb.browse_by_index(0, vals[0], vals[-1], True) except ValueError: logerr('Invalid index or range to open') bdb.close_quit(1) # Shorten URL if args.shorten: if is_int(args.shorten[0]): shorturl = bdb.tnyfy_url(index=int(args.shorten[0])) else: shorturl = bdb.tnyfy_url(url=args.shorten[0]) if shorturl: print(shorturl) # Expand URL if args.expand: if is_int(args.expand[0]): url = bdb.tnyfy_url(index=int(args.expand[0]), shorten=False) else: url = bdb.tnyfy_url(url=args.expand[0], shorten=False) if url: print(url) # Report upstream version if args.upstream: check_upstream_release() # Fix tags if args.fixtags: bdb.fixtags() # Close DB connection and quit bdb.close_quit(0) if __name__ == '__main__': main()