draft: multithreaded refreshdb

This commit is contained in:
Damian 2016-11-27 23:00:42 -05:00
parent 82551c5706
commit b8cc710461

159
buku.py
View File

@ -32,6 +32,7 @@ import json
import logging
import inspect
import atexit
import threading
try:
import readline
@ -53,7 +54,6 @@ interrupted = False # Received SIGINT
DELIM = ',' # Delimiter used to store tags in DB
SKIP_MIMES = {'.pdf', '.txt'}
http_handler = None # urllib3 PoolManager handler
htmlparser = None # Use a single HTML Parser instance
# Disguise as Firefox on Ubuntu
USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:48.0) Gecko/20100101 \
@ -421,7 +421,7 @@ class BukuDb:
try:
# Create a connection
conn = sqlite3.connect(dbfile)
conn = sqlite3.connect(dbfile, timeout = 30, check_same_thread = False)
conn.create_function('REGEXP', 2, regexp)
cur = conn.cursor()
@ -751,13 +751,13 @@ class BukuDb:
return True
# single connection attempt
def refreshdb(self, index):
'''Refresh ALL records in the database. Fetch title for each
bookmark from the web and update the records. Doesn't update
the record if title is empty.
This API doesn't change DB index, URL or tags of a bookmark.
This API is verbose.
:param index: index of record to update, or 0 for all records
'''
@ -767,34 +767,138 @@ class BukuDb:
else:
self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \
flags & 1 != 1', (index,))
resultset = self.cur.fetchall()
if not len(resultset):
logerr('No matching index or title immutable or empty DB')
return False
return False
query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?'
for row in resultset:
title, mime, bad = network_handler(row[1])
if bad:
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
continue
elif mime:
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
continue
elif title == '':
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
continue
self.cur.execute(query, (title, row[0],))
LOCK = threading.Lock()
if self.chatty:
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
% (title, row[0]))
if interrupted:
break
def refresh():
'''call network_handler and update db
:param db: a BukuDb object
'''
while len(resultset) > 0:
row = resultset.pop(0)
title, mime, bad = network_handler(row[1])
if bad:
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
continue
elif mime:
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
continue
elif title == '':
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
continue
LOCK.acquire()
self.cur.execute(query, (title, row[0],))
self.conn.commit()
LOCK.release()
if self.chatty:
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
% (title, row[0]))
'''
# do we need this?
if interrupted:
break
'''
for thread in range(4):
thread = threading.Thread(target=refresh)
thread.start()
return True
# multi-connection attempt
def other_refreshdb(self, index):
'''Refresh ALL records in the database. Fetch title for each
bookmark from the web and update the records. Doesn't update
the record if title is empty.
This API doesn't change DB index, URL or tags of a bookmark.
This API is verbose.
:param index: index of record to update, or 0 for all records
'''
if index == 0:
self.cur.execute('SELECT id, url FROM bookmarks WHERE \
flags & 1 != 1 ORDER BY id ASC')
else:
self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \
flags & 1 != 1', (index,))
resultset = self.cur.fetchall()
if not len(resultset):
logerr('No matching index or title immutable or empty DB')
return False
query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?'
LOCK = threading.Lock()
dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db')
bdb2 = BukuDb(dbfile = dbfile)
bdb3 = BukuDb(dbfile = dbfile)
bdb4 = BukuDb(dbfile = dbfile)
def refresh(db):
'''call network_handler and update db
:param db: a BukuDb object
'''
while len(resultset) > 0:
row = resultset.pop(0)
title, mime, bad = network_handler(row[1])
if bad:
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
continue
elif mime:
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
continue
elif title == '':
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
continue
LOCK.acquire()
db.cur.execute(query, (title, row[0],))
db.conn.commit()
LOCK.release()
if db.chatty:
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
% (title, row[0]))
'''
# do we need this?
if interrupted:
break
'''
for thread in range(4):
if thread == 0:
first_thread = threading.Thread(target=refresh, args = (self,))
first_thread.start()
elif thread == 1:
second_thread = threading.Thread(target=refresh, args = (bdb2,))
second_thread.start()
elif thread == 2:
third_thread = threading.Thread(target=refresh, args = (bdb3,))
third_thread.start()
elif thread == 3:
fourth_thread = threading.Thread(target=refresh, args = (bdb4,))
fourth_thread.start()
self.conn.commit()
return True
def searchdb(self, keywords, all_keywords=False, deep=False, regex=False):
@ -1338,9 +1442,9 @@ Buku bookmarks</H3>
# Connect to input DB
if sys.version_info >= (3, 4, 4):
# Python 3.4.4 and above
indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True)
indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True, timeout = 30, check_same_thread = False)
else:
indb_conn = sqlite3.connect(path)
indb_conn = sqlite3.connect(path, timeout = 30, check_same_thread = False)
indb_cur = indb_conn.cursor()
indb_cur.execute('SELECT * FROM bookmarks')
@ -1496,10 +1600,7 @@ def get_page_title(resp):
:return: title fetched from parsed page
'''
global htmlparser
if not htmlparser:
htmlparser = BMHTMLParser()
htmlparser = BMHTMLParser()
try:
htmlparser.feed(resp.data.decode(errors='replace'))