From 3ea64f3a5f113ba81ce25fd3f2874397c834d78d Mon Sep 17 00:00:00 2001
From: Arun Prakash Jana <engineerarun@gmail.com>
Date: Fri, 25 Mar 2016 02:08:38 +0530
Subject: [PATCH] Better redirection handling.

Signed-off-by: Arun Prakash Jana <engineerarun@gmail.com>
---
 README.md |  2 +-
 buku      | 39 ++++++++++++++++-----------------------
 2 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/README.md b/README.md
index a557f12..9076eff 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ If you find `buku` useful, please consider donating via PayPal.
 - Delete all bookmarks from DB
 - Add a bookmark at N<sup>th</sup> index, to fill deleted bookmark indices
 - Secure parameterized SQLite3 queries to access database
-- Handle first level of redirections (reports IP blocking)
+- Handle multiple HTTP redirections (reports redireted URL and IP blocking)
 - Unicode in URL works
 - UTF-8 request and response, page character set detection
 - Works with Python 3.x
diff --git a/buku b/buku
index 4e2b04d..ea26c28 100755
--- a/buku
+++ b/buku
@@ -177,34 +177,27 @@ def fetchTitle(url):
 
     try:
         resp, urlconn = getPageResp(url, False)
-        if resp is None:
-            return ''
 
-        if resp.status != 200:
-            # Handle first redirection
-            while 1:
+        while 1:
+            if resp is None:
+                break
+            if resp.status == 200:
+                getTitleData(resp)
+                break
+            if resp.status in (301,302,):
                 print("\x1b[1mREDIRECTION:\x1b[21m %s" % resp.getheader('location', ''))
 
-                redirurl = urljoin(url, resp.getheader('location', ''))
-                if redirurl.find("sorry/IndexRedirect?") >= 0:          # graecefully handle Google blocks
+                url = urljoin(url, resp.getheader('location', ''))
+                if url.find("sorry/IndexRedirect?") >= 0:          # graecefully handle Google blocks
                     print("ERROR: Connection blocked due to unusual activity.")
-                else:
-                    if debug:
-                        print("Trying to fetch redirected URL.")
-                    urlconn.close()
-                    resp, urlconn = getPageResp(redirurl, True)
-                    if resp is None:
-                        break
-                    if resp.status in (301,302,):
-                        continue
-                    if resp.status != 200:
-                        print("ERROR on retry:", str(resp.status), ": ", resp.reason)
-                    else:
-                        getTitleData(resp)
-
                     break
-        else: # if resp.status != 200:
-            getTitleData(resp)
+                else:
+                    urlconn.close()
+                    resp, urlconn = getPageResp(url, True)
+                    continue
+            else:
+                print("ERROR in response:", str(resp.status), ": ", resp.reason)
+                break
     except Exception as e:
         print("Exception: %s" % e)
     finally: