buku/buku

926 lines
27 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
#
# Bookmark management utility
#
# Copyright (C) 2015 Arun Prakash Jana <engineerarun@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with buku. If not, see <http://www.gnu.org/licenses/>.
import sys
import os
import sqlite3
from getopt import getopt, GetoptError
import readline
import webbrowser
import html.parser as HTMLParser
from http.client import HTTPConnection
from http.client import HTTPSConnection
from urllib.parse import urljoin, unquote
import signal
# Import libraries needed for encryption
try:
import getpass
import hashlib
from Crypto.Cipher import AES
from Crypto import Random
import struct
no_crypto = False
BLOCKSIZE = 65536
SALT_SIZE = 32
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
except ImportError:
no_crypto = True
# Globals
addurl = False
addindex = None
online = False
delete = False
openurl = None
show = False
showindex = None
showOpt = 0
showTags = False
search = False
searchAll = False
entry = None
update = False
debug = False
titleData = None
titleManual = None
refresh = False
replace = False
encrypt = False
decrypt = False
iterations = int(8)
# Show usage of buku and exit
def usage():
print("Usage: OPTIONS [URL] [TAGS] [KEYWORDS ...]\n")
print("Bookmark manager. Your private Google.\n")
print("Options")
print(" -a URL tag 1, tag 2, ... add URL as bookmark with comma separated tags")
print(" -d N delete entry at DB index N (from -P), move last entry to N")
print(" -D delete ALL bookmarks")
print(" -g show all tags (sorted alphabetically)")
print(" -i N insert entry at DB index N, useful to fill deleted index")
print(" -k decrypt (unlock) database file")
print(" -l encrypt (lock) database file")
print(" -m manually add or update the title offline")
print(" -o N open URL at DB index N in browser")
print(" -p N show details of bookmark record at DB index N")
print(" -P show all bookmarks along with index from DB")
print(" -R refresh all bookmarks, tags retained")
print(" -r oldtag [newtag] replace oldtag with newtag in DB, deletes oldtag if newtag empty")
print(" -s keyword(s) search all bookmarks for a (partial) tag or any keyword")
print(" -S keyword(s) search all bookmarks for a (partial) tag or all keywords")
print(" -t N use N (> 0) hash iterations to generate key, works with -k, -l")
print(" -u N update all fields of entry at DB index N")
print(" -w fetch title info from web, works with -a, -i, -u")
print(" -x N works with -P, N=1: show only URL, N=2: show URL and tag")
print(" -z show debug information")
print(" any other option shows help and exits buku\n")
print("Keys")
print(" 1-N open Nth search result in browser. Enter exits buku.\n")
print("Version 1.8")
print("Copyright (C) 2015 Arun Prakash Jana <engineerarun@gmail.com>")
print("License: GPLv3")
print("Webpage: https://github.com/jarun/buku")
sys.exit(1)
# Get page response data
def getPageResp(url, redir=False):
if url.find("https://") >= 0: # Secure connection
server = url[8:]
marker = server.find("/")
if marker > 0:
if redir == False:
url = server[marker:]
server = server[:marker]
urlconn = HTTPSConnection(server, timeout=30)
elif url.find("http://") >= 0: # Insecure connection
server = url[7:]
marker = server.find("/")
if marker > 0:
if redir == False:
url = server[marker:]
server = server[:marker]
urlconn = HTTPConnection(server, timeout=30)
else:
return (None, None)
if debug:
print("server: [%s]" % server)
print("URL: [%s]" % unquote(url))
urlconn.request("GET", unquote(url))
resp = urlconn.getresponse()
return (resp, urlconn)
# Fetch title from URL
def fetchTitle(url):
global titleData
urlconn = None
try:
resp, urlconn = getPageResp(url, False)
if resp is None:
return ''
if resp.status != 200:
# Handle first redirection
if resp.status in (301,302,):
if debug:
print("Location header: %s" % resp.getheader('location', ''))
redirurl = urljoin(url, resp.getheader('location', ''))
if redirurl.find("sorry/IndexRedirect?") >= 0:
print("ERROR: Connection blocked due to unusual activity.")
else:
if debug:
print("Trying to fetch redirected URL.")
urlconn.close()
resp, urlconn = getPageResp(redirurl, True)
if resp is not None:
if resp.status != 200:
print("ERROR on retry:", str(resp.status), ": ", resp.reason)
else:
getTitleData(resp)
else: # if resp.status in (301,302,):
print("ERROR:", str(resp.status), ": ", resp.reason)
else: # if resp.status != 200:
getTitleData(resp)
except Exception as e:
print("Exception: %s" % e)
finally:
if urlconn is not None:
urlconn.close()
if titleData is None:
return ''
return titleData.strip().replace("\n","")
# Initialize the database connection
# Create bookmarks table is not existing
def initdb():
dbpath = os.path.join(os.environ.get('HOME'), '.cache', 'buku')
if not os.path.exists(dbpath):
os.makedirs(dbpath)
dbfile = os.path.join(dbpath, 'bookmarks.db')
encpath = os.path.join(dbpath, 'bookmarks.db.enc')
# Notify if DB file needs to be decrypted first
if os.path.exists(encpath) and not os.path.exists(dbfile):
print("Unlock database first")
sys.exit(1)
# Show info on first creation
if no_crypto == False and not os.path.exists(dbfile):
print("DB file is being created. You may want to encrypt it later.")
# Create a connection
conn = sqlite3.connect(dbfile)
cur = conn.cursor()
# Create table if it doesn't exist
cur.execute('''CREATE TABLE if not exists bookmarks \
(id integer PRIMARY KEY, URL text NOT NULL UNIQUE, metadata text, tags text)''')
conn.commit()
return (conn, cur)
# Add a new bookmark or update an existing record at index
def AddUpdateEntry(conn, cur, keywords, index):
global titleManual
global online
tags = ','
meta = ''
url = keywords[0]
# Cleanse and get the tags
if len(keywords) > 1:
for tag in keywords[1:]:
if tag[-1] == ',':
tag = tag.strip(',') + ','
else:
tag = tag.strip(',')
if tag == ',':
continue
if tags[-1] == ',':
tags += tag
else:
tags += ' ' + tag
if tags[-1] != ',':
tags += ','
if titleManual != None:
meta = titleManual
elif online == True:
meta = fetchTitle(url)
if meta == '':
print("\x1B[91mTitle: []\x1B[0m")
else:
print("Title: [%s]" % meta)
if index == None: # Insert a new entry
try:
if addindex == None: # addindex is index number to insert record at
cur.execute('INSERT INTO bookmarks(URL, metadata, tags) VALUES (?, ?, ?)', (url, meta, tags,))
else:
cur.execute('INSERT INTO bookmarks(id, URL, metadata, tags) VALUES (?, ?, ?, ?)', (int(addindex), url, meta, tags,))
conn.commit()
print("Added at index %d" % cur.lastrowid)
except sqlite3.IntegrityError:
for row in cur.execute("SELECT id from bookmarks where URL LIKE ?", (url,)):
print("URL already exists at index %s" % row[0])
return
print("Index %s exists" % addindex)
else: # Update an existing entry
try:
cur.execute("UPDATE bookmarks SET URL = ?, metadata = ?, tags = ? WHERE id = ?", (url, meta, tags, int(index),))
conn.commit()
if cur.rowcount == 1:
print("Updated index %d" % int(index))
else:
print("No matching index")
except sqlite3.IntegrityError:
print("URL already exists")
# Search the database for a tag or matching URL or Title info
def searchdb(cur, keywords):
searchtag = ''
for token in keywords:
searchtag += token + " "
searchtag = searchtag[0:-1]
arguments = []
arguments.append(searchtag)
placeholder = "'%' || ? || '%'"
query = "SELECT id, url, metadata, tags FROM bookmarks WHERE tags LIKE (%s)" % placeholder
if searchAll == True: # Match all keywords in URL or Title
query += " OR ("
for token in keywords:
query += "URL LIKE (%s) AND " % (placeholder)
arguments.append(token)
query = query[:-5] + ") OR ("
for token in keywords:
query += "metadata LIKE (%s) AND " % (placeholder)
arguments.append(token)
query = query[:-5] + ")"
else: # Match any keyword in URL or Title
for token in keywords:
query += " OR URL LIKE (%s) OR metadata LIKE (%s)" % (placeholder, placeholder)
arguments.append(token)
arguments.append(token)
if debug:
print("\"%s\", (%s)" % (query, arguments))
count = 0
results = []
for row in cur.execute(query, arguments):
results.append(row[1])
count += 1
print("\x1B[1m\x1B[93m%d. \x1B[0m\x1B[92m%s\x1B[0m (%d)\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (count, row[1], row[0], row[2], row[3][1:-1]))
if count == 0:
return
print("")
while True:
try:
nav = input("Result number to open: ")
except EOFError:
return
if is_int(nav):
index = int(nav) - 1
if index < 0 or index >= count:
print("Index out of bound")
continue
try:
openurl = unquote(results[int(nav) - 1])
browser_open(openurl)
except Exception as e:
print("Exception: %s" % e)
else:
break
# Move last row to empty position to compact DB
def compactDB(conn, cur, index):
cur.execute('SELECT MAX(id) from bookmarks')
results = cur.fetchall()
for row in results:
if row[0] > index:
cur.execute('SELECT id, URL, metadata, tags FROM bookmarks WHERE id = ?', (row[0],))
results = cur.fetchall()
for row in results:
cur.execute('DELETE FROM bookmarks WHERE id = ?', (row[0],))
conn.commit()
cur.execute('INSERT INTO bookmarks(id, URL, metadata, tags) VALUES (?, ?, ?, ?)', (index, row[1], row[2], row[3],))
conn.commit()
print("Index %d moved to %d" % (row[0], index))
# Delete a single record or remove the table
def cleardb(conn, cur, index):
if index == None: # Remove the table
resp = input("ALL bookmarks will be removed. Enter \x1b[1my\x1b[21m to confirm: ")
if resp != 'y':
print("No bookmarks deleted")
return
cur.execute('DROP TABLE if exists bookmarks')
conn.commit()
print("All bookmarks deleted")
else: # Remove a single entry
try:
cur.execute('DELETE FROM bookmarks WHERE id = ?', (int(index),))
conn.commit()
if cur.rowcount == 1:
print("Removed index %d" % int(index))
compactDB(conn, cur, int(index))
else:
print("No matching index")
except IndexError:
print("Index out of bound")
# Print all records in the table
def printdb(cur, index):
global showOpt
if index == None: # Show all entries
for row in cur.execute('SELECT * FROM bookmarks'):
if showOpt == 1:
print("%s %s" % (row[0], row[1]))
elif showOpt == 2:
print("%s %s %s" % (row[0], row[1], row[3][1:-1]))
else:
print("\x1B[1m\x1B[93m%s. \x1B[0m\x1B[92m%s\x1B[0m\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (row[0], row[1], row[2], row[3][1:-1]))
else: # Show record at index
try:
for row in cur.execute("SELECT * FROM bookmarks WHERE id = ?", (int(index),)):
print("\x1B[1m\x1B[93m%s. \x1B[0m\x1B[92m%s\x1B[0m\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (row[0], row[1], row[2], row[3][1:-1]))
return
print("No matching index")
except IndexError:
print("Index out of bound")
# Show all unique tags ordered alphabetically
def showUniqueTags(cur):
count = 1
Tags = []
uniqueTags = []
for row in cur.execute('SELECT DISTINCT tags FROM bookmarks'):
if row[0] == ',':
continue
Tags.extend(row[0].strip(',').split(','))
for tag in Tags:
if tag not in uniqueTags:
uniqueTags.append(tag)
Tags = sorted(uniqueTags, key=str.lower)
for tag in Tags:
print("%6d. %s" % (count, tag))
count += 1
# Replace or delete tags in DB
def replaceTags(conn, cur, orig, new):
update = False
delete = False
orig = ',' + orig + ','
new = new.strip(',')
if new == '':
new = ','
delete = True
else:
new = ',' + new + ','
if orig == new:
print("Tags are same.")
return
cur.execute("SELECT id, tags FROM bookmarks WHERE tags LIKE ?", ('%' + orig + '%',))
results = cur.fetchall()
for row in results:
if delete == False:
if row[1].find(new) >= 0:
new = ','
newtags = row[1].replace(orig, new)
cur.execute("UPDATE bookmarks SET tags = ? WHERE id = ?", (newtags, row[0],))
print("Updated index %d" % row[0])
update = True
if update:
conn.commit()
# Fetch index and open URL in browser
def fetchopen(index):
try:
for row in cur.execute("SELECT URL FROM bookmarks WHERE id = ?", (int(index),)):
url = unquote(row[0])
browser_open(url)
return
print("No matching index")
except IndexError:
print("Index out of bound")
# Check if a value is a digit
def is_int(string):
try:
int(string)
return True
except:
return False
# Fetch titleData from GET response
def getTitleData(resp):
charset = ''
charset = resp.headers.get_content_charset()
if charset == None:
charset = 'utf-8'
if debug:
print("charset: %s" % charset)
parser = BMHTMLParser()
try:
if charset == 'utf-8':
parser.feed(resp.read().decode(charset, "replace"))
else:
parser.feed(resp.read().decode(charset))
except Exception as e:
if debug:
print("Exception: %s" % e)
# Parse HTML page for Title info
class BMHTMLParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.inTitle = False
self.data = ""
self.lasttag = None
def handle_starttag(self, tag, attrs):
self.inTitle = False
if tag == "title":
self.inTitle = True
self.lasttag = tag
def handle_endtag(self, tag):
global titleData
if tag == "title":
self.inTitle = False
if self.data != "":
titleData = self.data
self.reset() # We have received title data, exit parsing
def handle_data(self, data):
if self.lasttag == "title" and self.inTitle == True:
self.data += data
# Open a URL in browser
def browser_open(url):
url = url.replace("%22", "\"")
_stderr = os.dup(2)
os.close(2)
_stdout = os.dup(1)
os.close(1)
fd = os.open(os.devnull, os.O_RDWR)
os.dup2(fd, 2)
os.dup2(fd, 1)
try:
webbrowser.open(url)
except Exception as e:
print("Browser Exception: %s" % e)
finally:
os.close(fd)
os.dup2(_stderr, 2)
os.dup2(_stdout, 1)
# Get the SHA256 hash of a file
def get_filehash(filepath):
with open(filepath, 'rb') as f:
hasher = hashlib.sha256()
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.digest()
# Encrypt the bookmarks database file
def encrypt_file():
dbpath = os.path.join(os.environ.get('HOME'), '.cache', 'buku', 'bookmarks.db')
encpath = dbpath + '.enc'
if not os.path.exists(dbpath):
print("%s missing. Already encrypted?" % dbpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
print("ERROR: Both encrypted and flat DB files exist!")
sys.exit(1)
password = ''
password = getpass.getpass()
passconfirm = getpass.getpass()
if password == '':
print("Empty password");
sys.exit(1)
if password != passconfirm:
print("Passwords don't match");
sys.exit(1)
# Get SHA256 hash of DB file
dbhash = get_filehash(dbpath)
# Generate randon 256-bit salt and key
salt = Random.get_random_bytes(SALT_SIZE)
key = (password + salt.decode('utf-8', "replace")).encode('utf-8')
for i in range(iterations):
key = hashlib.sha256(key).digest()
iv = Random.get_random_bytes(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(dbpath)
with open(dbpath, 'rb') as infile:
with open(encpath, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(salt)
outfile.write(iv)
# Embed DB file hash in encrypted file
outfile.write(dbhash)
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk += ' ' * (16 - len(chunk) % 16)
outfile.write(cipher.encrypt(chunk))
os.remove(dbpath)
print("File encrypted")
sys.exit(0)
# Decrypt the bookmarks database file
def decrypt_file():
dbpath = os.path.join(os.environ.get('HOME'), '.cache', 'buku', 'bookmarks.db')
encpath = dbpath + '.enc'
if not os.path.exists(encpath):
print("Error: %s missing" % encpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
print("ERROR: Both encrypted and flat DB files exist!")
sys.exit(1)
password = ''
password = getpass.getpass()
if password == '':
print("Decryption failed");
sys.exit(1)
with open(encpath, 'rb') as infile:
origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infile.read(32)
key = (password + salt.decode('utf-8', "replace")).encode('utf-8')
for i in range(iterations):
key = hashlib.sha256(key).digest()
iv = infile.read(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
# Get original DB file's SHA256 hash from encrypted file
enchash = infile.read(32)
with open(dbpath, 'wb') as outfile:
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break;
outfile.write(cipher.decrypt(chunk))
outfile.truncate(origsize)
# Match hash of generated file with that of original DB file
dbhash = get_filehash(dbpath)
if dbhash != enchash:
os.remove(dbpath)
print("Decryption failed");
else:
os.remove(encpath)
print("File decrypted")
sys.exit(0)
# SIGINT handler
def sigint_handler(signum, frame):
print('\nInterrupted.', file=sys.stderr)
sys.exit(1)
signal.signal(signal.SIGINT, sigint_handler)
# Main starts here
# ----------------
optlist = None
keywords = None
if len(sys.argv) < 2:
usage()
# Check cmdline options
try:
optlist, keywords = getopt(sys.argv[1:], "d:i:m:o:p:t:u:x:aDgklPRrsSwz")
if len(optlist) < 1:
usage()
for opt in optlist:
if opt[0] == "-a":
if update == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
addurl = True
elif opt[0] == "-d":
if addurl == True or update == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
entry = opt[1]
if int(entry) <= 0:
usage()
delete = True
elif opt[0] == "-D":
if addurl == True or update == True:
print("You can either add or update or delete in one instance\n")
usage()
delete = True
elif opt[0] == "-g":
showTags = True
elif opt[0] == "-i":
if update == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
addindex = opt[1]
if int(addindex) <= 0:
usage()
addurl = True
elif opt[0] == "-k":
if no_crypto == True:
print("Error: PyCrypto missing")
sys.exit(0)
decrypt = True
elif opt[0] == "-l":
if no_crypto == True:
print("Error: PyCrypto missing")
sys.exit(0)
encrypt = True
elif opt[0] == "-m":
titleManual = opt[1]
elif opt[0] == "-o":
if not opt[1].isdigit():
usage()
openurl = opt[1]
if int(openurl) <= 0:
usage()
elif opt[0] == "-p":
if not opt[1].isdigit():
usage()
showindex = opt[1]
if int(showindex) <= 0:
usage()
show = True
elif opt[0] == "-P":
show = True
elif opt[0] == "-R":
if addurl == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
online = True
refresh = True
elif opt[0] == "-r":
replace = True
elif opt[0] == "-s":
search = True
elif opt[0] == "-S":
searchAll = True
search = True
elif opt[0] == "-t":
if not opt[1].isdigit():
usage()
iterations = int(opt[1])
if iterations <= 0:
usage()
elif opt[0] == "-u":
if addurl == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
entry = opt[1]
if int(entry) <= 0:
usage()
update = True
elif opt[0] == "-w":
online = True
elif opt[0] == "-x":
if not opt[1].isdigit():
usage()
showOpt = int(opt[1])
if showOpt < 1 or showOpt > 2:
usage()
elif opt[0] == "-z":
debug = True
except GetoptError as e:
print("buku:", e)
sys.exit(1)
# Update and full DB refresh are mutually exclusive
if update == True and refresh == True:
print("You can either update a single index or refresh full DB at once.\n")
usage()
# Online title update conflicts with manual title option
if online == True and titleManual != None:
print("You can either fetch title from web or add/update title manually.\n")
usage()
# Handle encrypt/decrypt options at top priority
if encrypt == True:
encrypt_file()
if decrypt == True:
decrypt_file()
# Initilize the database and get handles
conn, cur = initdb()
# Replace a tag in DB
if replace == True:
numargs = len(keywords)
if addurl == True or update == True or delete == True:
print("Tag replace doesn't work with add or update or delete.\n")
conn.close()
usage()
elif numargs < 1 or numargs > 2:
print("Tag replace accepts 1 or 2 arguments\n")
conn.close()
usage()
elif numargs == 1:
replaceTags(conn, cur, keywords[0], "")
else:
replaceTags(conn, cur, keywords[0], keywords[1])
# Call add or update record
if addurl == True or update == True:
if len(keywords) < 1:
conn.close()
usage()
AddUpdateEntry(conn, cur, keywords, entry)
# Refresh full DB
if refresh == True:
cur.execute("SELECT id, url, tags FROM bookmarks")
results = cur.fetchall()
for row in results:
olddata = []
olddata.append(row[1])
if row[2] != '':
olddata.append(row[2][1:]) # Omit the initial ',' already in DB
else:
olddata.append(row[2])
AddUpdateEntry(conn, cur, olddata, row[0])
print("")
# Search tags, URLs, Title info
if search == True:
if len(keywords) < 1:
conn.close()
usage()
searchdb(cur, keywords)
# Print all records
if show == True:
printdb(cur, showindex)
# Show all unique tags
if showTags == True:
showUniqueTags(cur)
# Open URL in browser
if openurl != None:
fetchopen(openurl)
# Remove a single record of all records
if delete == True:
cleardb(conn, cur, entry)
# Close the connection before exiting
conn.close()