draft: multithreaded refreshdb
This commit is contained in:
parent
82551c5706
commit
b8cc710461
159
buku.py
159
buku.py
@ -32,6 +32,7 @@ import json
|
||||
import logging
|
||||
import inspect
|
||||
import atexit
|
||||
import threading
|
||||
|
||||
try:
|
||||
import readline
|
||||
@ -53,7 +54,6 @@ interrupted = False # Received SIGINT
|
||||
DELIM = ',' # Delimiter used to store tags in DB
|
||||
SKIP_MIMES = {'.pdf', '.txt'}
|
||||
http_handler = None # urllib3 PoolManager handler
|
||||
htmlparser = None # Use a single HTML Parser instance
|
||||
|
||||
# Disguise as Firefox on Ubuntu
|
||||
USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:48.0) Gecko/20100101 \
|
||||
@ -421,7 +421,7 @@ class BukuDb:
|
||||
|
||||
try:
|
||||
# Create a connection
|
||||
conn = sqlite3.connect(dbfile)
|
||||
conn = sqlite3.connect(dbfile, timeout = 30, check_same_thread = False)
|
||||
conn.create_function('REGEXP', 2, regexp)
|
||||
cur = conn.cursor()
|
||||
|
||||
@ -751,13 +751,13 @@ class BukuDb:
|
||||
|
||||
return True
|
||||
|
||||
# single connection attempt
|
||||
def refreshdb(self, index):
|
||||
'''Refresh ALL records in the database. Fetch title for each
|
||||
bookmark from the web and update the records. Doesn't update
|
||||
the record if title is empty.
|
||||
This API doesn't change DB index, URL or tags of a bookmark.
|
||||
This API is verbose.
|
||||
|
||||
:param index: index of record to update, or 0 for all records
|
||||
'''
|
||||
|
||||
@ -767,34 +767,138 @@ class BukuDb:
|
||||
else:
|
||||
self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \
|
||||
flags & 1 != 1', (index,))
|
||||
|
||||
|
||||
resultset = self.cur.fetchall()
|
||||
if not len(resultset):
|
||||
logerr('No matching index or title immutable or empty DB')
|
||||
return False
|
||||
return False
|
||||
|
||||
query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?'
|
||||
for row in resultset:
|
||||
title, mime, bad = network_handler(row[1])
|
||||
if bad:
|
||||
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif mime:
|
||||
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif title == '':
|
||||
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
|
||||
continue
|
||||
|
||||
self.cur.execute(query, (title, row[0],))
|
||||
LOCK = threading.Lock()
|
||||
|
||||
if self.chatty:
|
||||
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
|
||||
% (title, row[0]))
|
||||
if interrupted:
|
||||
break
|
||||
def refresh():
|
||||
'''call network_handler and update db
|
||||
|
||||
:param db: a BukuDb object
|
||||
'''
|
||||
|
||||
while len(resultset) > 0:
|
||||
|
||||
row = resultset.pop(0)
|
||||
title, mime, bad = network_handler(row[1])
|
||||
if bad:
|
||||
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif mime:
|
||||
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif title == '':
|
||||
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
|
||||
continue
|
||||
|
||||
|
||||
LOCK.acquire()
|
||||
self.cur.execute(query, (title, row[0],))
|
||||
self.conn.commit()
|
||||
LOCK.release()
|
||||
|
||||
if self.chatty:
|
||||
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
|
||||
% (title, row[0]))
|
||||
'''
|
||||
# do we need this?
|
||||
if interrupted:
|
||||
break
|
||||
'''
|
||||
|
||||
for thread in range(4):
|
||||
thread = threading.Thread(target=refresh)
|
||||
thread.start()
|
||||
|
||||
return True
|
||||
|
||||
# multi-connection attempt
|
||||
def other_refreshdb(self, index):
|
||||
'''Refresh ALL records in the database. Fetch title for each
|
||||
bookmark from the web and update the records. Doesn't update
|
||||
the record if title is empty.
|
||||
This API doesn't change DB index, URL or tags of a bookmark.
|
||||
This API is verbose.
|
||||
:param index: index of record to update, or 0 for all records
|
||||
'''
|
||||
|
||||
if index == 0:
|
||||
self.cur.execute('SELECT id, url FROM bookmarks WHERE \
|
||||
flags & 1 != 1 ORDER BY id ASC')
|
||||
else:
|
||||
self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \
|
||||
flags & 1 != 1', (index,))
|
||||
|
||||
resultset = self.cur.fetchall()
|
||||
if not len(resultset):
|
||||
logerr('No matching index or title immutable or empty DB')
|
||||
return False
|
||||
|
||||
query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?'
|
||||
|
||||
LOCK = threading.Lock()
|
||||
|
||||
dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db')
|
||||
|
||||
bdb2 = BukuDb(dbfile = dbfile)
|
||||
bdb3 = BukuDb(dbfile = dbfile)
|
||||
bdb4 = BukuDb(dbfile = dbfile)
|
||||
|
||||
def refresh(db):
|
||||
'''call network_handler and update db
|
||||
|
||||
:param db: a BukuDb object
|
||||
'''
|
||||
|
||||
while len(resultset) > 0:
|
||||
|
||||
row = resultset.pop(0)
|
||||
title, mime, bad = network_handler(row[1])
|
||||
if bad:
|
||||
print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif mime:
|
||||
print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0])
|
||||
continue
|
||||
elif title == '':
|
||||
print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0])
|
||||
continue
|
||||
|
||||
|
||||
LOCK.acquire()
|
||||
db.cur.execute(query, (title, row[0],))
|
||||
db.conn.commit()
|
||||
LOCK.release()
|
||||
|
||||
if db.chatty:
|
||||
print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n'
|
||||
% (title, row[0]))
|
||||
'''
|
||||
# do we need this?
|
||||
if interrupted:
|
||||
break
|
||||
'''
|
||||
|
||||
for thread in range(4):
|
||||
if thread == 0:
|
||||
first_thread = threading.Thread(target=refresh, args = (self,))
|
||||
first_thread.start()
|
||||
elif thread == 1:
|
||||
second_thread = threading.Thread(target=refresh, args = (bdb2,))
|
||||
second_thread.start()
|
||||
elif thread == 2:
|
||||
third_thread = threading.Thread(target=refresh, args = (bdb3,))
|
||||
third_thread.start()
|
||||
elif thread == 3:
|
||||
fourth_thread = threading.Thread(target=refresh, args = (bdb4,))
|
||||
fourth_thread.start()
|
||||
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def searchdb(self, keywords, all_keywords=False, deep=False, regex=False):
|
||||
@ -1338,9 +1442,9 @@ Buku bookmarks</H3>
|
||||
# Connect to input DB
|
||||
if sys.version_info >= (3, 4, 4):
|
||||
# Python 3.4.4 and above
|
||||
indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True)
|
||||
indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True, timeout = 30, check_same_thread = False)
|
||||
else:
|
||||
indb_conn = sqlite3.connect(path)
|
||||
indb_conn = sqlite3.connect(path, timeout = 30, check_same_thread = False)
|
||||
|
||||
indb_cur = indb_conn.cursor()
|
||||
indb_cur.execute('SELECT * FROM bookmarks')
|
||||
@ -1496,10 +1600,7 @@ def get_page_title(resp):
|
||||
:return: title fetched from parsed page
|
||||
'''
|
||||
|
||||
global htmlparser
|
||||
|
||||
if not htmlparser:
|
||||
htmlparser = BMHTMLParser()
|
||||
htmlparser = BMHTMLParser()
|
||||
|
||||
try:
|
||||
htmlparser.feed(resp.data.decode(errors='replace'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user