Better redirection handling.

Signed-off-by: Arun Prakash Jana <engineerarun@gmail.com>
This commit is contained in:
Arun Prakash Jana 2016-03-25 02:08:38 +05:30
parent 1ea332ba18
commit 3ea64f3a5f
2 changed files with 17 additions and 24 deletions

View File

@ -48,7 +48,7 @@ If you find `buku` useful, please consider donating via PayPal.
- Delete all bookmarks from DB
- Add a bookmark at N<sup>th</sup> index, to fill deleted bookmark indices
- Secure parameterized SQLite3 queries to access database
- Handle first level of redirections (reports IP blocking)
- Handle multiple HTTP redirections (reports redireted URL and IP blocking)
- Unicode in URL works
- UTF-8 request and response, page character set detection
- Works with Python 3.x

39
buku
View File

@ -177,34 +177,27 @@ def fetchTitle(url):
try:
resp, urlconn = getPageResp(url, False)
if resp is None:
return ''
if resp.status != 200:
# Handle first redirection
while 1:
while 1:
if resp is None:
break
if resp.status == 200:
getTitleData(resp)
break
if resp.status in (301,302,):
print("\x1b[1mREDIRECTION:\x1b[21m %s" % resp.getheader('location', ''))
redirurl = urljoin(url, resp.getheader('location', ''))
if redirurl.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks
url = urljoin(url, resp.getheader('location', ''))
if url.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks
print("ERROR: Connection blocked due to unusual activity.")
else:
if debug:
print("Trying to fetch redirected URL.")
urlconn.close()
resp, urlconn = getPageResp(redirurl, True)
if resp is None:
break
if resp.status in (301,302,):
continue
if resp.status != 200:
print("ERROR on retry:", str(resp.status), ": ", resp.reason)
else:
getTitleData(resp)
break
else: # if resp.status != 200:
getTitleData(resp)
else:
urlconn.close()
resp, urlconn = getPageResp(url, True)
continue
else:
print("ERROR in response:", str(resp.status), ": ", resp.reason)
break
except Exception as e:
print("Exception: %s" % e)
finally: