Merge pull request #298 from rachmadaniHaryono/feature/title

Feature/title
This commit is contained in:
Arun Prakash Jana 2018-08-01 18:47:08 +05:30 committed by GitHub
commit 2ad2cbb02b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 8 deletions

29
buku.py
View File

@ -1387,8 +1387,6 @@ class BukuDb:
resp = input('Delete these bookmarks? (y/n): ')
if resp != 'y':
return False
else:
return False
query = 'DELETE from bookmarks where id BETWEEN ? AND ?'
self.cur.execute(query, (low, high))
@ -1566,8 +1564,7 @@ class BukuDb:
resultset = self.cur.execute(query)
else:
query = 'SELECT * from bookmarks where id BETWEEN ? AND ?'
self.cur.execute(query, (low, high))
resultset = self.cur.fetchall()
resultset = self.cur.execute(query, (low, high))
except IndexError:
logerr('Index out of range')
return False
@ -1593,9 +1590,9 @@ class BukuDb:
self.cur.execute('SELECT * FROM bookmarks')
resultset = self.cur.fetchall()
if len(resultset) < 1:
if not resultset:
logerr('0 records')
return False
return True
if not self.json:
print_rec_with_filter(resultset, self.field_filter)
@ -2953,19 +2950,37 @@ def get_page_title(resp):
parser = BukuHTMLParser()
charset = 'utf-8'
soup = None
parsed_title = None
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(resp.data, 'html.parser')
except Exception as e:
logerr('get_page_title(): %s', e)
try:
charset_found = False
if 'content-type' in resp.headers:
_, params = cgi.parse_header(resp.headers['content-type'])
if params.get('charset') is not None:
charset = params.get('charset')
charset_found = True
if not charset_found and soup:
meta_tag = soup.find('meta', attrs={'http-equiv': 'Content-Type'})
if meta_tag:
_, params = cgi.parse_header(meta_tag.attrs['content'])
charset = params.get('charset', charset)
parser.feed(resp.data.decode(charset))
except Exception as e:
if isinstance(e, UnicodeDecodeError) and soup:
parsed_title = soup.find('title').text
# Suppress Exception due to intentional self.reset() in BHTMLParser
if (logger.isEnabledFor(logging.DEBUG) and str(e) != 'we should not get here!'):
logerr('get_page_title(): %s', e)
finally:
return re.sub('\s{2,}', ' ', parser.parsed_title)
if not parsed_title:
parsed_title = parser.parsed_title
return re.sub('\s{2,}', ' ', parsed_title)
def gen_headers():

View File

@ -1,6 +1,7 @@
"""test module."""
from itertools import product
from unittest import mock
from urllib.parse import urlparse
import json
import os
import signal
@ -52,10 +53,11 @@ def test_get_page_title():
"""test func."""
resp = mock.Mock()
parser = mock.Mock()
parser.parsed_title = 'doubled whitespace'
with mock.patch('buku.BukuHTMLParser', return_value=parser):
import buku
res = buku.get_page_title(resp)
assert res == parser.parsed_title
assert res == 'doubled whitespace'
def test_gen_headers():
@ -553,6 +555,31 @@ def test_sigint_handler(capsys):
['http://example.com/page1.txt', (('', 1, 0))],
['about:new_page', (('', 0, 1))],
['chrome://version/', (('', 0, 1))],
['chrome://version/', (('', 0, 1))],
[
'http://4pda.ru/forum/index.php?showtopic=182463&st=1640#entry6044923',
('Samsung GT-I5800 Galaxy 580 - Обсуждение - 4PDA', 0, 0)
],
[
'https://www.google.ru/search?'
'newwindow=1&safe=off&q=xkbcomp+alt+gr&'
'oq=xkbcomp+alt+gr&'
'gs_l=serp.3..33i21.28976559.28977886.0.'
'28978017.6.6.0.0.0.0.167.668.0j5.5.0....0...1c.1.64.'
'serp..1.2.311.06cSKPTLo18',
('xkbcomp alt gr', 0, 0)
],
[
'http://www.vim.org/scripts/script.php?script_id=4641',
(
'mlessnau_case - "in-case" selection, deletion and substitution '
'for underscore, camel, mixed case : vim online', 0, 0
)
],
[
'http://www.kadrof.ru/cat_exchange.shtml',
('Все биржи фриланса и удаленной работы - больше 110 сайтов | Kadrof.ru', 0, 0)
],
]
)
def test_network_handler_with_url(url, exp_res):
@ -562,6 +589,8 @@ def test_network_handler_with_url(url, exp_res):
buku.urllib3 = urllib3
buku.myproxy = None
res = buku.network_handler(url)
if urlparse(url).netloc == 'www.google.ru':
res = (res[0].split(" - ")[0], res[1], res[2])
assert res == exp_res