Detect and break redirection loops.
Unquote text before connection check and GET request. Signed-off-by: Arun Prakash Jana <engineerarun@gmail.com>
This commit is contained in:
parent
ea02856bc1
commit
a56fa0381d
@ -48,7 +48,7 @@ If you find `buku` useful, please consider donating via PayPal.
|
|||||||
- Delete all bookmarks from DB
|
- Delete all bookmarks from DB
|
||||||
- Add a bookmark at N<sup>th</sup> index, to fill deleted bookmark indices
|
- Add a bookmark at N<sup>th</sup> index, to fill deleted bookmark indices
|
||||||
- Secure parameterized SQLite3 queries to access database
|
- Secure parameterized SQLite3 queries to access database
|
||||||
- Handle multiple HTTP redirections (reports redireted URL and IP blocking)
|
- Handle multiple HTTP redirections (reports redireted URL, loops, IP blocking)
|
||||||
- Unicode in URL works
|
- Unicode in URL works
|
||||||
- UTF-8 request and response, page character set detection
|
- UTF-8 request and response, page character set detection
|
||||||
- Works with Python 3.x
|
- Works with Python 3.x
|
||||||
|
30
buku
30
buku
@ -140,6 +140,13 @@ def initdb():
|
|||||||
|
|
||||||
# Get page response data
|
# Get page response data
|
||||||
def getPageResp(url, redir=False):
|
def getPageResp(url, redir=False):
|
||||||
|
if url.find("%20") != -1:
|
||||||
|
url = unquote(url)
|
||||||
|
url = url.replace(" ", "%20")
|
||||||
|
else:
|
||||||
|
url = unquote(url)
|
||||||
|
print("unquote: %s" % url)
|
||||||
|
|
||||||
if url.find("https://") >= 0: # Secure connection
|
if url.find("https://") >= 0: # Secure connection
|
||||||
server = url[8:]
|
server = url[8:]
|
||||||
marker = server.find("/")
|
marker = server.find("/")
|
||||||
@ -161,9 +168,9 @@ def getPageResp(url, redir=False):
|
|||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print("server: [%s]" % server)
|
print("server: [%s]" % server)
|
||||||
print("URL: [%s]" % quote(unquote(url)))
|
print("URL: [%s]" % url)
|
||||||
|
|
||||||
urlconn.request("GET", quote(unquote(url)))
|
urlconn.request("GET", url)
|
||||||
resp = urlconn.getresponse()
|
resp = urlconn.getresponse()
|
||||||
return (resp, urlconn)
|
return (resp, urlconn)
|
||||||
|
|
||||||
@ -185,16 +192,25 @@ def fetchTitle(url):
|
|||||||
getTitleData(resp)
|
getTitleData(resp)
|
||||||
break
|
break
|
||||||
if resp.status in (301,302,):
|
if resp.status in (301,302,):
|
||||||
print("\x1b[1mREDIRECTION:\x1b[21m %s" % resp.getheader('location', ''))
|
redirurl = urljoin(url, resp.getheader('location', ''))
|
||||||
|
print("\x1b[1mREDIRECTION:\x1b[21m %s" % url)
|
||||||
|
|
||||||
url = urljoin(url, resp.getheader('location', ''))
|
if redirurl.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks
|
||||||
if url.find("sorry/IndexRedirect?") >= 0: # graecefully handle Google blocks
|
|
||||||
print("ERROR: Connection blocked due to unusual activity.")
|
print("ERROR: Connection blocked due to unusual activity.")
|
||||||
break
|
break
|
||||||
else:
|
|
||||||
|
marker = redirurl.find("redirectUrl=")
|
||||||
|
if marker != -1:
|
||||||
|
redirurl = redirurl[marker + 12:]
|
||||||
|
|
||||||
|
# break same URL redirection loop
|
||||||
|
if url == redirurl:
|
||||||
|
print("ERROR: Detected repeated reirection to same URL")
|
||||||
|
break
|
||||||
|
|
||||||
|
url = redirurl
|
||||||
urlconn.close()
|
urlconn.close()
|
||||||
resp, urlconn = getPageResp(url, True)
|
resp, urlconn = getPageResp(url, True)
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
print("ERROR in response:", str(resp.status), ": ", resp.reason)
|
print("ERROR in response:", str(resp.status), ": ", resp.reason)
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user