diff --git a/buku.py b/buku.py index 97ec301..5602770 100755 --- a/buku.py +++ b/buku.py @@ -32,6 +32,7 @@ import json import logging import inspect import atexit +import threading try: import readline @@ -53,7 +54,6 @@ interrupted = False # Received SIGINT DELIM = ',' # Delimiter used to store tags in DB SKIP_MIMES = {'.pdf', '.txt'} http_handler = None # urllib3 PoolManager handler -htmlparser = None # Use a single HTML Parser instance # Disguise as Firefox on Ubuntu USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:48.0) Gecko/20100101 \ @@ -421,7 +421,7 @@ class BukuDb: try: # Create a connection - conn = sqlite3.connect(dbfile) + conn = sqlite3.connect(dbfile, timeout = 30, check_same_thread = False) conn.create_function('REGEXP', 2, regexp) cur = conn.cursor() @@ -751,13 +751,13 @@ class BukuDb: return True +# single connection attempt def refreshdb(self, index): '''Refresh ALL records in the database. Fetch title for each bookmark from the web and update the records. Doesn't update the record if title is empty. This API doesn't change DB index, URL or tags of a bookmark. This API is verbose. - :param index: index of record to update, or 0 for all records ''' @@ -767,34 +767,138 @@ class BukuDb: else: self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \ flags & 1 != 1', (index,)) - + resultset = self.cur.fetchall() if not len(resultset): logerr('No matching index or title immutable or empty DB') - return False + return False query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?' - for row in resultset: - title, mime, bad = network_handler(row[1]) - if bad: - print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0]) - continue - elif mime: - print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0]) - continue - elif title == '': - print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0]) - continue - self.cur.execute(query, (title, row[0],)) + LOCK = threading.Lock() - if self.chatty: - print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' - % (title, row[0])) - if interrupted: - break + def refresh(): + '''call network_handler and update db + + :param db: a BukuDb object + ''' + + while len(resultset) > 0: + + row = resultset.pop(0) + title, mime, bad = network_handler(row[1]) + if bad: + print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0]) + continue + elif mime: + print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0]) + continue + elif title == '': + print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0]) + continue + + + LOCK.acquire() + self.cur.execute(query, (title, row[0],)) + self.conn.commit() + LOCK.release() + + if self.chatty: + print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' + % (title, row[0])) + ''' + # do we need this? + if interrupted: + break + ''' + + for thread in range(4): + thread = threading.Thread(target=refresh) + thread.start() + + return True + +# multi-connection attempt + def other_refreshdb(self, index): + '''Refresh ALL records in the database. Fetch title for each + bookmark from the web and update the records. Doesn't update + the record if title is empty. + This API doesn't change DB index, URL or tags of a bookmark. + This API is verbose. + :param index: index of record to update, or 0 for all records + ''' + + if index == 0: + self.cur.execute('SELECT id, url FROM bookmarks WHERE \ + flags & 1 != 1 ORDER BY id ASC') + else: + self.cur.execute('SELECT id, url FROM bookmarks WHERE id = ? AND \ + flags & 1 != 1', (index,)) + + resultset = self.cur.fetchall() + if not len(resultset): + logerr('No matching index or title immutable or empty DB') + return False + + query = 'UPDATE bookmarks SET metadata = ? WHERE id = ?' + + LOCK = threading.Lock() + + dbfile = os.path.join(BukuDb.get_default_dbdir(), 'bookmarks.db') + + bdb2 = BukuDb(dbfile = dbfile) + bdb3 = BukuDb(dbfile = dbfile) + bdb4 = BukuDb(dbfile = dbfile) + + def refresh(db): + '''call network_handler and update db + + :param db: a BukuDb object + ''' + + while len(resultset) > 0: + + row = resultset.pop(0) + title, mime, bad = network_handler(row[1]) + if bad: + print('\x1b[1mIndex %d: malformed URL\x1b[0m\n' % row[0]) + continue + elif mime: + print('\x1b[1mIndex %d: mime HEAD requested\x1b[0m\n' % row[0]) + continue + elif title == '': + print('\x1b[1mIndex %d: no title\x1b[0m\n' % row[0]) + continue + + + LOCK.acquire() + db.cur.execute(query, (title, row[0],)) + db.conn.commit() + LOCK.release() + + if db.chatty: + print('Title: [%s]\n\x1b[92mIndex %d: updated\x1b[0m\n' + % (title, row[0])) + ''' + # do we need this? + if interrupted: + break + ''' + + for thread in range(4): + if thread == 0: + first_thread = threading.Thread(target=refresh, args = (self,)) + first_thread.start() + elif thread == 1: + second_thread = threading.Thread(target=refresh, args = (bdb2,)) + second_thread.start() + elif thread == 2: + third_thread = threading.Thread(target=refresh, args = (bdb3,)) + third_thread.start() + elif thread == 3: + fourth_thread = threading.Thread(target=refresh, args = (bdb4,)) + fourth_thread.start() - self.conn.commit() return True def searchdb(self, keywords, all_keywords=False, deep=False, regex=False): @@ -1338,9 +1442,9 @@ Buku bookmarks # Connect to input DB if sys.version_info >= (3, 4, 4): # Python 3.4.4 and above - indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True) + indb_conn = sqlite3.connect('file:%s?mode=ro' % path, uri=True, timeout = 30, check_same_thread = False) else: - indb_conn = sqlite3.connect(path) + indb_conn = sqlite3.connect(path, timeout = 30, check_same_thread = False) indb_cur = indb_conn.cursor() indb_cur.execute('SELECT * FROM bookmarks') @@ -1496,10 +1600,7 @@ def get_page_title(resp): :return: title fetched from parsed page ''' - global htmlparser - - if not htmlparser: - htmlparser = BMHTMLParser() + htmlparser = BMHTMLParser() try: htmlparser.feed(resp.data.decode(errors='replace'))