Reformat: distinct API to fetch page data.
Signed-off-by: Arun Prakash Jana <engineerarun@gmail.com>
This commit is contained in:
parent
e832826118
commit
2708e70004
106
buku
106
buku
@ -107,86 +107,80 @@ def usage():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Get page response data
|
||||||
|
def getPageResp(url, redir=False):
|
||||||
|
marker = -1
|
||||||
|
|
||||||
|
if url.find("https://") >= 0: # Secure connection
|
||||||
|
server = url[8:]
|
||||||
|
marker = server.find("/")
|
||||||
|
if marker > 0:
|
||||||
|
if redir == False:
|
||||||
|
url = server[marker:]
|
||||||
|
server = server[:marker]
|
||||||
|
urlconn = HTTPSConnection(server, timeout=30)
|
||||||
|
elif url.find("http://") >= 0: # Insecure connection
|
||||||
|
server = url[7:]
|
||||||
|
marker = server.find("/")
|
||||||
|
if marker > 0:
|
||||||
|
if redir == False:
|
||||||
|
url = server[marker:]
|
||||||
|
server = server[:marker]
|
||||||
|
urlconn = HTTPConnection(server, timeout=30)
|
||||||
|
else:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print("server: [%s]" % server)
|
||||||
|
print("URL: [%s]" % unquote(url))
|
||||||
|
|
||||||
|
urlconn.request("GET", unquote(url))
|
||||||
|
resp = urlconn.getresponse()
|
||||||
|
return (resp, urlconn)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Fetch title from URL
|
# Fetch title from URL
|
||||||
def fetchTitle(url):
|
def fetchTitle(url):
|
||||||
global titleData
|
global titleData
|
||||||
|
|
||||||
secure = True
|
urlconn = None
|
||||||
if url.find("https://") >= 0:
|
|
||||||
server = url[8:]
|
|
||||||
elif url.find("http://") >= 0:
|
|
||||||
secure = False
|
|
||||||
server = url[7:]
|
|
||||||
else:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
marker = server.find("/")
|
|
||||||
if marker > 0:
|
|
||||||
fetchurl = server[marker:]
|
|
||||||
server = server[:marker]
|
|
||||||
else:
|
|
||||||
fetchurl = url
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if debug:
|
resp, urlconn = getPageResp(url, False)
|
||||||
print("server: [%s]" % server)
|
if resp is None:
|
||||||
if secure == True:
|
return ''
|
||||||
urlconn = HTTPSConnection(server, timeout=30)
|
|
||||||
else:
|
|
||||||
urlconn = HTTPConnection(server, timeout=30)
|
|
||||||
|
|
||||||
if debug:
|
|
||||||
print("URL: [%s]" % fetchurl)
|
|
||||||
urlconn.request("GET", fetchurl)
|
|
||||||
resp = urlconn.getresponse()
|
|
||||||
if resp.status != 200:
|
if resp.status != 200:
|
||||||
# Handle first redirection
|
# Handle first redirection
|
||||||
if resp.status in (301,302,):
|
if resp.status in (301,302,):
|
||||||
if debug:
|
if debug:
|
||||||
print(resp.getheader('location', ''))
|
print("Location header: %s" % resp.getheader('location', ''))
|
||||||
|
|
||||||
redirurl = urljoin(url, resp.getheader('location', ''))
|
redirurl = urljoin(url, resp.getheader('location', ''))
|
||||||
if redirurl.find("sorry/IndexRedirect?") >= 0:
|
if redirurl.find("sorry/IndexRedirect?") >= 0:
|
||||||
print("ERROR: Connection blocked due to unusual activity.")
|
print("ERROR: Connection blocked due to unusual activity.")
|
||||||
else:
|
else:
|
||||||
urlconn.close()
|
|
||||||
|
|
||||||
if url.find("https://") >= 0: # Secure connection
|
|
||||||
server = redirurl[8:]
|
|
||||||
marker = server.find("/")
|
|
||||||
if marker > 0:
|
|
||||||
server = server[:marker]
|
|
||||||
urlconn = HTTPSConnection(server, timeout=30)
|
|
||||||
else:
|
|
||||||
server = redirurl[7:]
|
|
||||||
marker = server.find("/")
|
|
||||||
if marker > 0:
|
|
||||||
server = server[:marker]
|
|
||||||
urlconn = HTTPConnection(server, timeout=30)
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print("Redir server: [%s]" % server)
|
print("Trying to fetch redirected URL.")
|
||||||
print("Redir URL: [%s]" % unquote(redirurl))
|
urlconn.close()
|
||||||
|
resp, urlconn = getPageResp(redirurl, True)
|
||||||
urlconn.request("GET", unquote(redirurl))
|
if resp is not None:
|
||||||
resp = urlconn.getresponse()
|
if resp.status != 200:
|
||||||
if resp.status != 200:
|
print("ERROR on retry:", str(resp.status), ": ", resp.reason)
|
||||||
print("ERROR on retry:", str(resp.status), ": ", resp.reason)
|
else:
|
||||||
else:
|
getTitleData(resp)
|
||||||
getTitleData(resp)
|
|
||||||
if titleData is None:
|
|
||||||
titleData = ''
|
|
||||||
else: # if resp.status in (301,302,):
|
else: # if resp.status in (301,302,):
|
||||||
print("ERROR:", str(resp.status), ": ", resp.reason)
|
print("ERROR:", str(resp.status), ": ", resp.reason)
|
||||||
else: # if resp.status != 200:
|
else: # if resp.status != 200:
|
||||||
getTitleData(resp)
|
getTitleData(resp)
|
||||||
if titleData is None:
|
|
||||||
titleData = ''
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception: %s" % e)
|
print("Exception: %s" % e)
|
||||||
titleData = ''
|
|
||||||
finally:
|
finally:
|
||||||
urlconn.close()
|
if urlconn is not None:
|
||||||
|
urlconn.close()
|
||||||
|
if titleData is None:
|
||||||
|
return ''
|
||||||
return titleData.strip().replace("\n","")
|
return titleData.strip().replace("\n","")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user