From 567763238a005a1d5f8f683be38ed69f9b7ccfc4 Mon Sep 17 00:00:00 2001 From: rachmadani haryono Date: Sat, 14 Oct 2017 19:13:53 +0800 Subject: [PATCH] Feature/exception on malformed url (#221) * fix: dev: malformed url * new: test; about and chrome protocol * fix: dev: fix lint * fix: dev: import error on urllib3 * fix: dev: urllib3 exceptions * fix: test: test on protocols * fix: dev: return value of is_bad_url * chg: dev: update buku - change logerr msg - sort ignored_prefix - check is_nongeneric_url and is_bad_url --- buku.py | 17 ++++++++++++++--- tests/test_buku.py | 4 ++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/buku.py b/buku.py index 80c7e89..f497d22 100755 --- a/buku.py +++ b/buku.py @@ -36,6 +36,7 @@ import sys import threading import time import urllib3 +from urllib3.exceptions import LocationParseError from urllib3.util import parse_url, make_headers import webbrowser @@ -2611,7 +2612,11 @@ def is_bad_url(url): """ # Get the netloc token - netloc = parse_url(url).netloc + try: + netloc = parse_url(url).netloc + except LocationParseError as e: + logerr('%s, URL: %s', (e, url)) + return True if not netloc: # Try of prepend '//' and get netloc netloc = parse_url('//' + url).netloc @@ -2645,7 +2650,13 @@ def is_nongeneric_url(url): True if URL is a non-generic URL, False otherwise. """ - ignored_prefix = ['place:', 'file://', 'apt:'] + ignored_prefix = [ + 'about:', + 'apt:', + 'chrome://', + 'file://', + 'place:', + ] for prefix in ignored_prefix: if url.startswith(prefix): @@ -2767,7 +2778,7 @@ def network_handler(url, http_head=False): page_title = None - if is_bad_url(url): + if is_nongeneric_url(url) or is_bad_url(url): return ('', 0, 1) if is_ignored_mime(url) or http_head: diff --git a/tests/test_buku.py b/tests/test_buku.py index fb3ac47..4a3e3fe 100644 --- a/tests/test_buku.py +++ b/tests/test_buku.py @@ -22,6 +22,8 @@ only_python_3_5 = pytest.mark.skipif( ['http://examplecom.', True], ['http://.example.com', True], ['http://example.com.', True], + ['about:newtab', True], + ['chrome://version/', True], ] ) def test_is_bad_url(url, exp_res): @@ -543,6 +545,8 @@ def test_sigint_handler(capsys): ['http://example.com.', ('', 0, 1)], ['http://example.com', ('Example Domain', 0, 0)], ['http://example.com/page1.txt', (('', 1, 0))], + ['about:new_page', (('', 0, 1))], + ['chrome://version/', (('', 0, 1))], ] ) def test_network_handler_with_url(url, exp_res):