Merge branch 'master' into tests

This commit is contained in:
poikjhn 2016-06-02 02:31:40 +02:00
commit 9a614ae066
3 changed files with 225 additions and 180 deletions

View File

@ -60,15 +60,15 @@ Copyright (C) 2015-2016 [Arun Prakash Jana](mailto:engineerarun@gmail.com).
`buku` requires Python 3.x to work.
Optional dependencies:
- Encryption: PyCrypto
- Encryption: cryptography
- Import bookmarks: Beautiful Soup
Run:
$ sudo pip3 install pycrypto beautifulsoup4
$ sudo pip3 install cryptography beautifulsoup4
or on Ubuntu:
$ sudo apt-get install python3-crypto python3-bs4
$ sudo apt-get install python3-cryptography python3-bs4
## Installing from this repository

397
buku
View File

@ -29,21 +29,6 @@ import gzip
import io
import signal
# Import libraries needed for encryption
try:
import getpass
import hashlib
from Crypto.Cipher import AES
from Crypto import Random
import struct
no_crypto = False
BLOCKSIZE = 65536
SALT_SIZE = 32
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
except ImportError:
no_crypto = True
# Globals
update = False # Update a bookmark in DB
@ -57,10 +42,17 @@ showOpt = 0 # Modify show. 1: show only URL, 2: show URL and tag
debug = False # Enable debug logs
pipeargs = [] # Holds arguments piped to the program
noninteractive = False # Show the prompt or not
interrupted = False # Received SIGINT
DELIMITER = ',' # Delimiter used to store tags in DB
_VERSION_ = '2.1' # Program version
# Crypto globals
BLOCKSIZE = 65536
SALT_SIZE = 32
CHUNKSIZE = 0x80000 # Read/write 512 KB chunks
class BMHTMLParser(HTMLParser.HTMLParser):
"""Class to parse and fetch the title from a HTML page, if available"""
@ -93,6 +85,187 @@ class BMHTMLParser(HTMLParser.HTMLParser):
pass
class BukuCrypt:
""" Class to handle encryption and decryption
of the database file. Functionally a separate entity.
Involves late imports in the static functions but it
saves ~100ms each time. Given that encrypt/decrypt are
not done automatically and any one should be called at
a time, this doesn't seem to be an outrageous approach.
"""
@staticmethod
def get_filehash(filepath):
"""Get the SHA256 hash of a file
Params: path to the file
"""
from hashlib import sha256
with open(filepath, 'rb') as f:
hasher = sha256()
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.digest()
@staticmethod
def encrypt_file(iterations):
"""Encrypt the bookmarks database file"""
try:
from getpass import getpass
import struct
from hashlib import sha256
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
except Exception:
print('cryptography missing')
sys.exit(1)
if iterations < 1:
printmsg('Iterations must be >= 1', 'ERROR')
sys.exit(1)
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(dbpath):
print('%s missing. Already encrypted?' % dbpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass()
passconfirm = getpass()
if password == '':
print('Empty password')
sys.exit(1)
if password != passconfirm:
print("Passwords don't match")
sys.exit(1)
# Get SHA256 hash of DB file
dbhash = BukuCrypt.get_filehash(dbpath)
# Generate random 256-bit salt and key
salt = os.urandom(SALT_SIZE)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = os.urandom(16)
encryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend()
).encryptor()
filesize = os.path.getsize(dbpath)
with open(dbpath, 'rb') as infile:
with open(encpath, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(salt)
outfile.write(iv)
# Embed DB file hash in encrypted file
outfile.write(dbhash)
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16))
outfile.write(encryptor.update(chunk) + encryptor.finalize())
os.remove(dbpath)
print('File encrypted')
sys.exit(0)
@staticmethod
def decrypt_file(iterations):
"""Decrypt the bookmarks database file"""
try:
from getpass import getpass
import struct
from hashlib import sha256
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
except Exception:
print('cryptography missing')
sys.exit(1)
if iterations < 1:
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(encpath):
printmsg('%s missing' % encpath, 'ERROR')
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass()
if password == '':
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
with open(encpath, 'rb') as infile:
origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infile.read(32)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = sha256(key).digest()
iv = infile.read(16)
decryptor = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend(),
).decryptor()
# Get original DB file's SHA256 hash from encrypted file
enchash = infile.read(32)
with open(dbpath, 'wb') as outfile:
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
outfile.write(decryptor.update(chunk) + decryptor.finalize())
outfile.truncate(origsize)
# Match hash of generated file with that of original DB file
dbhash = BukuCrypt.get_filehash(dbpath)
if dbhash != enchash:
os.remove(dbpath)
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
else:
os.remove(encpath)
print('File decrypted')
class BukuDb:
def __init__(self, *args, **kwargs):
@ -205,7 +378,7 @@ class BukuDb:
return resultset[0][0]
def add_bookmark(self, url, title_manual=None, tag_manual=None, desc=None):
def add_bookmark(self, url, title_manual=None, tag_manual=None, desc=None, delayed_commit=False):
"""Add a new bookmark
:param url: url to bookmark
@ -245,7 +418,8 @@ class BukuDb:
try:
self.cur.execute('INSERT INTO bookmarks(URL, metadata, tags, desc) VALUES (?, ?, ?, ?)', (url, meta, tag_manual, desc))
self.conn.commit()
if not delayed_commit:
self.conn.commit()
self.print_bookmark(self.cur.lastrowid)
except Exception as e:
print('\x1b[1mEXCEPTION\x1b[21m [add_bookmark]: (%s) %s' % (type(e).__name__, e))
@ -352,16 +526,19 @@ class BukuDb:
print('Title: [%s]' % title)
self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],))
self.conn.commit()
print('Index %d updated\n' % row[0])
if interrupted:
printmsg('Aborting refreshdb ...', 'WARNING')
break
else:
title = title_manual
for row in resultset:
self.cur.execute('UPDATE bookmarks SET metadata = ? WHERE id = ?', (title, row[0],))
self.conn.commit()
print('Index %d updated\n' % row[0])
self.conn.commit()
def searchdb(self, keywords, all_keywords=False, json=False):
"""Search the database for an entries with tags or URL
or title info matching keywords and list those.
@ -539,7 +716,7 @@ class BukuDb:
count = 1
Tags = []
uniqueTags = []
for row in self.cur.execute('SELECT DISTINCT tags FROM bookmarks ORDER BY tags ASC'):
for row in self.cur.execute('SELECT DISTINCT tags FROM bookmarks ORDER BY tags'):
tagset = row[0].strip(DELIMITER).split(DELIMITER)
for tag in tagset:
if tag not in Tags:
@ -621,12 +798,14 @@ class BukuDb:
try:
import bs4
with open(fp, mode='r', encoding='utf-8') as f:
soup = bs4.BeautifulSoup(f, 'html.parser')
except ImportError:
printmsg('Beautiful Soup not found', 'ERROR')
self.close_quit(1)
with open(fp, encoding='utf-8') as f:
soup = bs4.BeautifulSoup(f, 'html.parser')
except Exception as e:
print('\x1b[1mEXCEPTION\x1b[21m [import_bookmark]: (%s) %s' % (type(e).__name__, e))
self.close_quit(1)
html_tags = soup.findAll('a')
for tag in html_tags:
@ -637,9 +816,13 @@ class BukuDb:
desc = comment_tag.text[0:comment_tag.text.find('\n')]
self.add_bookmark(tag['href'],
tag.string,
('%s%s%s' % (DELIMITER, tag['tags'], DELIMITER)) if tag.has_attr('tags') else None,
desc)
tag.string,
('%s%s%s' % (DELIMITER, tag['tags'], DELIMITER)) if tag.has_attr('tags') else None,
desc,
True)
self.conn.commit()
f.close()
def mergedb(self, fp):
"""Merge bookmarks from another Buku database file
@ -654,6 +837,8 @@ class BukuDb:
try:
# Create a connection
connfp = sqlite3.connect(fp)
# Python 3.4.4 and above
# connfp = sqlite3.connect('file:%s?mode=ro' % fp, uri=True)
curfp = connfp.cursor()
except Exception as e:
print('\x1b[1mEXCEPTION\x1b[21m [mergedb]: (%s) %s' % (type(e).__name__, e))
@ -662,7 +847,9 @@ class BukuDb:
curfp.execute('SELECT * FROM bookmarks')
resultset = curfp.fetchall()
for row in resultset:
self.add_bookmark(row[1], row[2], row[3], row[4])
self.add_bookmark(row[1], row[2], row[3], row[4], True)
self.conn.commit()
try:
curfp.close()
@ -670,7 +857,6 @@ class BukuDb:
except Exception:
pass
def close_quit(self, exitval=0):
"""Close a DB connection and exit"""
@ -717,7 +903,7 @@ def connect_server(url, fullurl=False, forced=False):
server = server[:marker]
elif not forced: # Handle domain name without trailing /
url = '/'
urlconn = HTTPSConnection(server, timeout=45)
urlconn = HTTPSConnection(server, timeout=30)
elif url.find('http://') >= 0: # Insecure connection
server = url[7:]
marker = server.find('/')
@ -727,7 +913,7 @@ def connect_server(url, fullurl=False, forced=False):
server = server[:marker]
elif not forced:
url = '/'
urlconn = HTTPConnection(server, timeout=45)
urlconn = HTTPConnection(server, timeout=30)
else:
printmsg('Not a valid HTTP(S) url', 'WARNING')
if url.find(':') == -1:
@ -1045,141 +1231,12 @@ def browser_open(url):
os.dup2(_stdout, 1)
def get_filehash(filepath):
"""Get the SHA256 hash of a file
Params: path to the file
"""
with open(filepath, 'rb') as f:
hasher = hashlib.sha256()
buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.digest()
def encrypt_file(iterations):
"""Encrypt the bookmarks database file"""
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(dbpath):
print('%s missing. Already encrypted?' % dbpath)
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass.getpass()
passconfirm = getpass.getpass()
if password == '':
print('Empty password')
sys.exit(1)
if password != passconfirm:
print("Passwords don't match")
sys.exit(1)
# Get SHA256 hash of DB file
dbhash = get_filehash(dbpath)
# Generate random 256-bit salt and key
salt = Random.get_random_bytes(SALT_SIZE)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = hashlib.sha256(key).digest()
iv = Random.get_random_bytes(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
filesize = os.path.getsize(dbpath)
with open(dbpath, 'rb') as infile:
with open(encpath, 'wb') as outfile:
outfile.write(struct.pack('<Q', filesize))
outfile.write(salt)
outfile.write(iv)
# Embed DB file hash in encrypted file
outfile.write(dbhash)
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
elif len(chunk) % 16 != 0:
chunk = '%s%s' % (chunk, ' ' * (16 - len(chunk) % 16))
outfile.write(cipher.encrypt(chunk))
os.remove(dbpath)
print('File encrypted')
sys.exit(0)
def decrypt_file(iterations):
"""Decrypt the bookmarks database file"""
dbpath = os.path.join(BukuDb.get_dbdir_path(), 'bookmarks.db')
encpath = '%s.enc' % dbpath
if not os.path.exists(encpath):
printmsg('%s missing' % encpath, 'ERROR')
sys.exit(1)
# If both encrypted file and flat file exist, error out
if os.path.exists(dbpath) and os.path.exists(encpath):
printmsg('Both encrypted and flat DB files exist!', 'ERROR')
sys.exit(1)
password = ''
password = getpass.getpass()
if password == '':
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
with open(encpath, 'rb') as infile:
origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0]
# Read 256-bit salt and generate key
salt = infile.read(32)
key = ('%s%s' % (password, salt.decode('utf-8', 'replace'))).encode('utf-8')
for _ in range(iterations):
key = hashlib.sha256(key).digest()
iv = infile.read(16)
cipher = AES.new(key, AES.MODE_CBC, iv)
# Get original DB file's SHA256 hash from encrypted file
enchash = infile.read(32)
with open(dbpath, 'wb') as outfile:
while True:
chunk = infile.read(CHUNKSIZE)
if len(chunk) == 0:
break
outfile.write(cipher.decrypt(chunk))
outfile.truncate(origsize)
# Match hash of generated file with that of original DB file
dbhash = get_filehash(dbpath)
if dbhash != enchash:
os.remove(dbpath)
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
else:
os.remove(encpath)
print('File decrypted')
def sigint_handler(signum, frame):
"""Custom SIGINT handler"""
global interrupted
interrupted = True
print('\nInterrupted.', file=sys.stderr)
sys.exit(1)
@ -1438,22 +1495,10 @@ if __name__ == '__main__':
# Handle encrypt/decrypt options at top priority
if args.encrypt is not None:
if no_crypto:
printmsg('PyCrypto missing', 'ERROR')
sys.exit(1)
if args.encrypt < 1:
printmsg('Iterations must be >= 1', 'ERROR')
sys.exit(1)
encrypt_file(args.encrypt)
BukuCrypt.encrypt_file(args.encrypt)
if args.decrypt is not None:
if no_crypto:
printmsg('PyCrypto missing', 'ERROR')
sys.exit(1)
if args.decrypt < 1:
printmsg('Decryption failed', 'ERROR')
sys.exit(1)
decrypt_file(args.decrypt)
BukuCrypt.decrypt_file(args.decrypt)
# Initialize the database and get handles
bdb = BukuDb()

View File

@ -1,2 +1,2 @@
beautifulsoup4>=4.4.1
pycrypto>=2.6.1
cryptography>=1.3.2