Request gzip compressed data in HTTP(S) fetch.
This commit is contained in:
parent
4c25e0b1c0
commit
e5ca44e251
19
buku
19
buku
@ -26,6 +26,8 @@ import webbrowser
|
|||||||
import html.parser as HTMLParser
|
import html.parser as HTMLParser
|
||||||
from http.client import HTTPConnection, HTTPSConnection
|
from http.client import HTTPConnection, HTTPSConnection
|
||||||
from urllib.parse import urljoin, quote, unquote
|
from urllib.parse import urljoin, quote, unquote
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
import signal
|
import signal
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
@ -176,6 +178,7 @@ def initdb():
|
|||||||
|
|
||||||
def getPageResp(url, redir=False):
|
def getPageResp(url, redir=False):
|
||||||
"""Connect to a server and fetch the requested page data.
|
"""Connect to a server and fetch the requested page data.
|
||||||
|
Supports gzip compression.
|
||||||
|
|
||||||
Params: URL to fetch, redirection status
|
Params: URL to fetch, redirection status
|
||||||
Returns: connection, HTTP(S) GET response
|
Returns: connection, HTTP(S) GET response
|
||||||
@ -220,7 +223,9 @@ def getPageResp(url, redir=False):
|
|||||||
except:
|
except:
|
||||||
url = quote(url)
|
url = quote(url)
|
||||||
|
|
||||||
urlconn.request("GET", url)
|
urlconn.request("GET", url, None, {
|
||||||
|
"Accept-encoding": "gzip",
|
||||||
|
})
|
||||||
resp = urlconn.getresponse()
|
resp = urlconn.getresponse()
|
||||||
return (urlconn, resp)
|
return (urlconn, resp)
|
||||||
|
|
||||||
@ -232,8 +237,16 @@ def getTitleData(resp):
|
|||||||
Params: GET response
|
Params: GET response
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
data = None
|
||||||
charset = ''
|
charset = ''
|
||||||
charset = resp.headers.get_content_charset()
|
charset = resp.headers.get_content_charset()
|
||||||
|
|
||||||
|
if resp.headers.get('Content-Encoding') == 'gzip':
|
||||||
|
print("gzip")
|
||||||
|
data = gzip.GzipFile(fileobj=io.BytesIO(resp.read())).read()
|
||||||
|
else:
|
||||||
|
data = resp.read()
|
||||||
|
|
||||||
if charset == None:
|
if charset == None:
|
||||||
charset = 'utf-8'
|
charset = 'utf-8'
|
||||||
if debug:
|
if debug:
|
||||||
@ -242,9 +255,9 @@ def getTitleData(resp):
|
|||||||
parser = BMHTMLParser()
|
parser = BMHTMLParser()
|
||||||
try:
|
try:
|
||||||
if charset == 'utf-8':
|
if charset == 'utf-8':
|
||||||
parser.feed(resp.read().decode(charset, "replace"))
|
parser.feed(data.decode(charset, "replace"))
|
||||||
else:
|
else:
|
||||||
parser.feed(resp.read().decode(charset))
|
parser.feed(data.decode(charset))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
print("Exception [getTitleData]: %s" % e)
|
print("Exception [getTitleData]: %s" % e)
|
||||||
|
Loading…
Reference in New Issue
Block a user