Implement function error() for HTMLParser

1. Handle EXCEPTION [getTitleData]: (NotImplementedError) subclasses of ParserBase must override error()
URL: http://www.mycplus.com/featured-articles/top-ten-c-language-resources/

2. Add a debug log to show if server supports gzip
This commit is contained in:
Arun Prakash Jana 2016-04-10 12:39:51 +05:30
parent 7bde1f7ec6
commit 894b16a7ef
No known key found for this signature in database
GPG Key ID: C0A712ED95043DCB

5
buku
View File

@ -103,6 +103,9 @@ class BMHTMLParser(HTMLParser.HTMLParser):
if self.lasttag == "title" and self.inTitle == True: if self.lasttag == "title" and self.inTitle == True:
self.data += data self.data += data
def error(self, message):
pass
def getDataPath(): def getDataPath():
@ -240,6 +243,8 @@ def getTitleData(resp):
charset = resp.headers.get_content_charset() charset = resp.headers.get_content_charset()
if resp.headers.get('Content-Encoding') == 'gzip': if resp.headers.get('Content-Encoding') == 'gzip':
if debug:
print("gzip response")
data = gzip.GzipFile(fileobj=io.BytesIO(resp.read())).read() data = gzip.GzipFile(fileobj=io.BytesIO(resp.read())).read()
else: else:
data = resp.read() data = resp.read()