Merge branch 'master' into tests

This commit is contained in:
poikjhn 2016-06-02 02:31:40 +02:00
commit 9a614ae066
3 changed files with 225 additions and 180 deletions

View File

@ -60,15 +60,15 @@ Copyright (C) 2015-2016 [Arun Prakash Jana](mailto:engineerarun@gmail.com).
`buku` requires Python 3.x to work. `buku` requires Python 3.x to work.
Optional dependencies: Optional dependencies:
- Encryption: PyCrypto - Encryption: cryptography
- Import bookmarks: Beautiful Soup - Import bookmarks: Beautiful Soup
Run: Run:
$ sudo pip3 install pycrypto beautifulsoup4 $ sudo pip3 install cryptography beautifulsoup4
or on Ubuntu: or on Ubuntu:
$ sudo apt-get install python3-crypto python3-bs4 $ sudo apt-get install python3-cryptography python3-bs4
## Installing from this repository ## Installing from this repository

397
buku
View File

@ -29,21 +29,6 @@ import gzip
import io import io
import signal import signal
# Import libraries needed for encryption
try:
import getpass
import hashlib
from Crypto.Cipher import AES
from Crypto import Random
import struct
no_crypto = False
BLOCKSIZE = 65536
SALT_SIZE = 32
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
except ImportError:
no_crypto = True
# Globals # Globals
update = False # Update a bookmark in DB update = False # Update a bookmark in DB
@ -57,10 +42,17 @@ showOpt = 0 # Modify show. 1: show only URL, 2: show URL and tag
debug = False # Enable debug logs debug = False # Enable debug logs
pipeargs = [] # Holds arguments piped to the program pipeargs = [] # Holds arguments piped to the program
noninteractive = False # Show the prompt or not noninteractive = False # Show the prompt or not
interrupted = False # Received SIGINT
DELIMITER = ',' # Delimiter used to store tags in DB DELIMITER = ',' # Delimiter used to store tags in DB
_VERSION_ = '2.1' # Program version _VERSION_ = '2.1' # Program version
# Crypto globals
BLOCKSIZE = 65536
SALT_SIZE = 32
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
class BMHTMLParser(HTMLParser.HTMLParser): class BMHTMLParser(HTMLParser.HTMLParser):
"""Class to parse and fetch the title from a HTML page, if available""" """Class to parse and fetch the title from a HTML page, if available"""
@ -93,6 +85,187 @@ class BMHTMLParser(HTMLParser.HTMLParser):
pass pass
class BukuCrypt:
""" Class to handle encryption and decryption
of the database file. Functionally a separate entity.
Involves late imports in the static functions but it
saves ~100ms each time. Given that encrypt/decrypt are
not done automatically and any one should be called at
a time, this doesn't seem to be an outrageous approach.
"""
@staticmethod
def get_filehash(filepath):
"""Get the SHA256 hash of a file
Params: path to the file
"""
from hashlib import sha256
with open(filepath, 'rb') as f:
hasher = sha256()
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.digest()
@staticmethod
def encrypt_file(iterations):
"""Encrypt the bookmarks database file"""
try:
from getpass import getpass
import struct
from hashlib import sha256
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
except Exception:
print('cryptography missing')
sys.exit(1)
if iterations < 1:
printmsg('Iterations must be >= 1', 'ERROR')
sys.exit(1)
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(dbpath):
print('%s missing. Already encrypted?' % dbpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass()
passconfirm = getpass()
if password == '':
print('Empty password')
sys.exit(1)
if password != passconfirm:
print("Passwords don't match")
sys.exit(1)
# Get SHA256 hash of DB file
dbhash = BukuCrypt.get_filehash(dbpath)
# Generate random 256-bit salt and key
salt = os.urandom(SALT_SIZE)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = os.urandom(16)
encryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend()
).encryptor()
filesize = os.path.getsize(dbpath)
with open(dbpath, 'rb') as infile:
with open(encpath, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(salt)
outfile.write(iv)
# Embed DB file hash in encrypted file
outfile.write(dbhash)
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16))
outfile.write(encryptor.update(chunk) + encryptor.finalize())
os.remove(dbpath)
print('File encrypted')
sys.exit(0)
@staticmethod
def decrypt_file(iterations):
"""Decrypt the bookmarks database file"""
try:
from getpass import getpass
import struct
from hashlib import sha256
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
except Exception:
print('cryptography missing')
sys.exit(1)
if iterations < 1:
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(encpath):
printmsg('%s missing' % encpath, 'ERROR')
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass()
if password == '':
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
with open(encpath, 'rb') as infile:
origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infile.read(32)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = infile.read(16)
decryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend(),
).decryptor()
# Get original DB file's SHA256 hash from encrypted file
enchash = infile.read(32)
with open(dbpath, 'wb') as outfile:
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
outfile.write(decryptor.update(chunk) + decryptor.finalize())
outfile.truncate(origsize)
# Match hash of generated file with that of original DB file
dbhash = BukuCrypt.get_filehash(dbpath)
if dbhash != enchash:
os.remove(dbpath)
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
else:
os.remove(encpath)
print('File decrypted')
class BukuDb: class BukuDb:
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -205,7 +378,7 @@ class BukuDb:
return resultset[0][0] return resultset[0][0]
def add_bookmark(self, url, title_manual=None, tag_manual=None, desc=None): def add_bookmark(self, url, title_manual=None, tag_manual=None, desc=None, delayed_commit=False):
"""Add a new bookmark """Add a new bookmark
:param url: url to bookmark :param url: url to bookmark
@ -245,7 +418,8 @@ class BukuDb:
try: try:
self.cur.execute('INSERT INTO bookmarks(URL, metadata, tags, desc) VALUES (?, ?, ?, ?)', (url, meta, tag_manual, desc)) self.cur.execute('INSERT INTO bookmarks(URL, metadata, tags, desc) VALUES (?, ?, ?, ?)', (url, meta, tag_manual, desc))
self.conn.commit() if not delayed_commit:
self.conn.commit()
self.print_bookmark(self.cur.lastrowid) self.print_bookmark(self.cur.lastrowid)
except Exception as e: except Exception as e:
print('\x1b[1mEXCEPTION\x1b[21m [add_bookmark]: (%s) %s' % (type(e).__name__, e)) print('\x1b[1mEXCEPTION\x1b[21m [add_bookmark]: (%s) %s' % (type(e).__name__, e))
@ -352,16 +526,19 @@ class BukuDb:
print('Title: [%s]' % title) print('Title: [%s]' % title)
self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],)) self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],))
self.conn.commit()
print('Index %d updated\n' % row[0]) print('Index %d updated\n' % row[0])
if interrupted:
printmsg('Aborting refreshdb ...', 'WARNING')
break
else: else:
title = title_manual title = title_manual
for row in resultset: for row in resultset:
self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],)) self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],))
self.conn.commit()
print('Index %d updated\n' % row[0]) print('Index %d updated\n' % row[0])
self.conn.commit()
def searchdb(self, keywords, all_keywords=False, json=False): def searchdb(self, keywords, all_keywords=False, json=False):
"""Search the database for an entries with tags or URL """Search the database for an entries with tags or URL
or title info matching keywords and list those. or title info matching keywords and list those.
@ -539,7 +716,7 @@ class BukuDb:
count = 1 count = 1
Tags = [] Tags = []
uniqueTags = [] uniqueTags = []
for row in self.cur.execute('SELECT DISTINCT tags FROM bookmarks ORDER BY tags ASC'): for row in self.cur.execute('SELECT DISTINCT tags FROM bookmarks ORDER BY tags'):
tagset = row[0].strip(DELIMITER).split(DELIMITER) tagset = row[0].strip(DELIMITER).split(DELIMITER)
for tag in tagset: for tag in tagset:
if tag not in Tags: if tag not in Tags:
@ -621,12 +798,14 @@ class BukuDb:
try: try:
import bs4 import bs4
with open(fp, mode='r', encoding='utf-8') as f:
soup = bs4.BeautifulSoup(f, 'html.parser')
except ImportError: except ImportError:
printmsg('Beautiful Soup not found', 'ERROR') printmsg('Beautiful Soup not found', 'ERROR')
self.close_quit(1) self.close_quit(1)
except Exception as e:
with open(fp, encoding='utf-8') as f: print('\x1b[1mEXCEPTION\x1b[21m [import_bookmark]: (%s) %s' % (type(e).__name__, e))
soup = bs4.BeautifulSoup(f, 'html.parser') self.close_quit(1)
html_tags = soup.findAll('a') html_tags = soup.findAll('a')
for tag in html_tags: for tag in html_tags:
@ -637,9 +816,13 @@ class BukuDb:
desc = comment_tag.text[0:comment_tag.text.find('\n')] desc = comment_tag.text[0:comment_tag.text.find('\n')]
self.add_bookmark(tag['href'], self.add_bookmark(tag['href'],
tag.string, tag.string,
('%s%s%s' % (DELIMITER, tag['tags'], DELIMITER)) if tag.has_attr('tags') else None, ('%s%s%s' % (DELIMITER, tag['tags'], DELIMITER)) if tag.has_attr('tags') else None,
desc) desc,
True)
self.conn.commit()
f.close()
def mergedb(self, fp): def mergedb(self, fp):
"""Merge bookmarks from another Buku database file """Merge bookmarks from another Buku database file
@ -654,6 +837,8 @@ class BukuDb:
try: try:
# Create a connection # Create a connection
connfp = sqlite3.connect(fp) connfp = sqlite3.connect(fp)
# Python 3.4.4 and above
# connfp = sqlite3.connect('file:%s?mode=ro' % fp, uri=True)
curfp = connfp.cursor() curfp = connfp.cursor()
except Exception as e: except Exception as e:
print('\x1b[1mEXCEPTION\x1b[21m [mergedb]: (%s) %s' % (type(e).__name__, e)) print('\x1b[1mEXCEPTION\x1b[21m [mergedb]: (%s) %s' % (type(e).__name__, e))
@ -662,7 +847,9 @@ class BukuDb:
curfp.execute('SELECT * FROM bookmarks') curfp.execute('SELECT * FROM bookmarks')
resultset = curfp.fetchall() resultset = curfp.fetchall()
for row in resultset: for row in resultset:
self.add_bookmark(row[1], row[2], row[3], row[4]) self.add_bookmark(row[1], row[2], row[3], row[4], True)
self.conn.commit()
try: try:
curfp.close() curfp.close()
@ -670,7 +857,6 @@ class BukuDb:
except Exception: except Exception:
pass pass
def close_quit(self, exitval=0): def close_quit(self, exitval=0):
"""Close a DB connection and exit""" """Close a DB connection and exit"""
@ -717,7 +903,7 @@ def connect_server(url, fullurl=False, forced=False):
server = server[:marker] server = server[:marker]
elif not forced: # Handle domain name without trailing / elif not forced: # Handle domain name without trailing /
url = '/' url = '/'
urlconn = HTTPSConnection(server, timeout=45) urlconn = HTTPSConnection(server, timeout=30)
elif url.find('http://') >= 0: # Insecure connection elif url.find('http://') >= 0: # Insecure connection
server = url[7:] server = url[7:]
marker = server.find('/') marker = server.find('/')
@ -727,7 +913,7 @@ def connect_server(url, fullurl=False, forced=False):
server = server[:marker] server = server[:marker]
elif not forced: elif not forced:
url = '/' url = '/'
urlconn = HTTPConnection(server, timeout=45) urlconn = HTTPConnection(server, timeout=30)
else: else:
printmsg('Not a valid HTTP(S) url', 'WARNING') printmsg('Not a valid HTTP(S) url', 'WARNING')
if url.find(':') == -1: if url.find(':') == -1:
@ -1045,141 +1231,12 @@ def browser_open(url):
os.dup2(_stdout, 1) os.dup2(_stdout, 1)
def get_filehash(filepath):
"""Get the SHA256 hash of a file
Params: path to the file
"""
with open(filepath, 'rb') as f:
hasher = hashlib.sha256()
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.digest()
def encrypt_file(iterations):
"""Encrypt the bookmarks database file"""
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(dbpath):
print('%s missing. Already encrypted?' % dbpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass.getpass()
passconfirm = getpass.getpass()
if password == '':
print('Empty password')
sys.exit(1)
if password != passconfirm:
print("Passwords don't match")
sys.exit(1)
# Get SHA256 hash of DB file
dbhash = get_filehash(dbpath)
# Generate random 256-bit salt and key
salt = Random.get_random_bytes(SALT_SIZE)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = hashlib.sha256(key).digest()
iv = Random.get_random_bytes(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(dbpath)
with open(dbpath, 'rb') as infile:
with open(encpath, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(salt)
outfile.write(iv)
# Embed DB file hash in encrypted file
outfile.write(dbhash)
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16))
outfile.write(cipher.encrypt(chunk))
os.remove(dbpath)
print('File encrypted')
sys.exit(0)
def decrypt_file(iterations):
"""Decrypt the bookmarks database file"""
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(encpath):
printmsg('%s missing' % encpath, 'ERROR')
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass.getpass()
if password == '':
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
with open(encpath, 'rb') as infile:
origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infile.read(32)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = hashlib.sha256(key).digest()
iv = infile.read(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
# Get original DB file's SHA256 hash from encrypted file
enchash = infile.read(32)
with open(dbpath, 'wb') as outfile:
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
outfile.write(cipher.decrypt(chunk))
outfile.truncate(origsize)
# Match hash of generated file with that of original DB file
dbhash = get_filehash(dbpath)
if dbhash != enchash:
os.remove(dbpath)
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
else:
os.remove(encpath)
print('File decrypted')
def sigint_handler(signum, frame): def sigint_handler(signum, frame):
"""Custom SIGINT handler""" """Custom SIGINT handler"""
global interrupted
interrupted = True
print('\nInterrupted.', file=sys.stderr) print('\nInterrupted.', file=sys.stderr)
sys.exit(1) sys.exit(1)
@ -1438,22 +1495,10 @@ if __name__ == '__main__':
# Handle encrypt/decrypt options at top priority # Handle encrypt/decrypt options at top priority
if args.encrypt is not None: if args.encrypt is not None:
if no_crypto: BukuCrypt.encrypt_file(args.encrypt)
printmsg('PyCrypto missing', 'ERROR')
sys.exit(1)
if args.encrypt < 1:
printmsg('Iterations must be >= 1', 'ERROR')
sys.exit(1)
encrypt_file(args.encrypt)
if args.decrypt is not None: if args.decrypt is not None:
if no_crypto: BukuCrypt.decrypt_file(args.decrypt)
printmsg('PyCrypto missing', 'ERROR')
sys.exit(1)
if args.decrypt < 1:
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
decrypt_file(args.decrypt)
# Initialize the database and get handles # Initialize the database and get handles
bdb = BukuDb() bdb = BukuDb()

View File

@ -1,2 +1,2 @@
beautifulsoup4>=4.4.1 beautifulsoup4>=4.4.1
pycrypto>=2.6.1 cryptography>=1.3.2