diff --git a/README.md b/README.md index 9076eff..e99f2c1 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If you find `buku` useful, please consider donating via PayPal. - Delete all bookmarks from DB - Add a bookmark at Nth index, to fill deleted bookmark indices - Secure parameterized SQLite3 queries to access database -- Handle multiple HTTP redirections (reports redireted URL and IP blocking) +- Handle multiple HTTP redirections (reports redireted URL, loops, IP blocking) - Unicode in URL works - UTF-8 request and response, page character set detection - Works with Python 3.x diff --git a/buku b/buku index 7a7b553..b4b1db7 100755 --- a/buku +++ b/buku @@ -140,6 +140,13 @@ def initdb(): # Get page response data def getPageResp(url, redir=False): + if url.find("%20") != -1: + url = unquote(url) + url = url.replace(" ", "%20") + else: + url = unquote(url) + print("unquote: %s" % url) + if url.find("https://") >= 0: # Secure connection server = url[8:] marker = server.find("/") @@ -161,9 +168,9 @@ def getPageResp(url, redir=False): if debug: print("server: [%s]" % server) - print("URL: [%s]" % quote(unquote(url))) + print("URL: [%s]" % url) - urlconn.request("GET", quote(unquote(url))) + urlconn.request("GET", url) resp = urlconn.getresponse() return (resp, urlconn) @@ -185,16 +192,25 @@ def fetchTitle(url): getTitleData(resp) break if resp.status in (301,302,): - print("\x1b[1mREDIRECTION:\x1b[21m %s" % resp.getheader('location', '')) + redirurl = urljoin(url, resp.getheader('location', '')) + print("\x1b[1mREDIRECTION:\x1b[21m %s" % url) - url = urljoin(url, resp.getheader('location', '')) - if url.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks + if redirurl.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks print("ERROR: Connection blocked due to unusual activity.") break - else: - urlconn.close() - resp, urlconn = getPageResp(url, True) - continue + + marker = redirurl.find("redirectUrl=") + if marker != -1: + redirurl = redirurl[marker + 12:] + + # break same URL redirection loop + if url == redirurl: + print("ERROR: Detected repeated reirection to same URL") + break + + url = redirurl + urlconn.close() + resp, urlconn = getPageResp(url, True) else: print("ERROR in response:", str(resp.status), ": ", resp.reason) break