From 3ea64f3a5f113ba81ce25fd3f2874397c834d78d Mon Sep 17 00:00:00 2001 From: Arun Prakash Jana Date: Fri, 25 Mar 2016 02:08:38 +0530 Subject: [PATCH] Better redirection handling. Signed-off-by: Arun Prakash Jana --- README.md | 2 +- buku | 39 ++++++++++++++++----------------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index a557f12..9076eff 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If you find `buku` useful, please consider donating via PayPal. - Delete all bookmarks from DB - Add a bookmark at Nth index, to fill deleted bookmark indices - Secure parameterized SQLite3 queries to access database -- Handle first level of redirections (reports IP blocking) +- Handle multiple HTTP redirections (reports redireted URL and IP blocking) - Unicode in URL works - UTF-8 request and response, page character set detection - Works with Python 3.x diff --git a/buku b/buku index 4e2b04d..ea26c28 100755 --- a/buku +++ b/buku @@ -177,34 +177,27 @@ def fetchTitle(url): try: resp, urlconn = getPageResp(url, False) - if resp is None: - return '' - if resp.status != 200: - # Handle first redirection - while 1: + while 1: + if resp is None: + break + if resp.status == 200: + getTitleData(resp) + break + if resp.status in (301,302,): print("\x1b[1mREDIRECTION:\x1b[21m %s" % resp.getheader('location', '')) - redirurl = urljoin(url, resp.getheader('location', '')) - if redirurl.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks + url = urljoin(url, resp.getheader('location', '')) + if url.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks print("ERROR: Connection blocked due to unusual activity.") - else: - if debug: - print("Trying to fetch redirected URL.") - urlconn.close() - resp, urlconn = getPageResp(redirurl, True) - if resp is None: - break - if resp.status in (301,302,): - continue - if resp.status != 200: - print("ERROR on retry:", str(resp.status), ": ", resp.reason) - else: - getTitleData(resp) - break - else: # if resp.status != 200: - getTitleData(resp) + else: + urlconn.close() + resp, urlconn = getPageResp(url, True) + continue + else: + print("ERROR in response:", str(resp.status), ": ", resp.reason) + break except Exception as e: print("Exception: %s" % e) finally: