summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Hammond <mhammond@skippinet.com.au>2003-02-27 06:59:10 (GMT)
committerMark Hammond <mhammond@skippinet.com.au>2003-02-27 06:59:10 (GMT)
commitce56c377a0f548cdac3ab9c66117df654f934484 (patch)
treeb5bd56edb43d9b59db7203b80b13c42d22337b3a
parent05595e9d73b2c05fcd9492cf8f5d126282b82053 (diff)
downloadcpython-ce56c377a0f548cdac3ab9c66117df654f934484.zip
cpython-ce56c377a0f548cdac3ab9c66117df654f934484.tar.gz
cpython-ce56c377a0f548cdac3ab9c66117df654f934484.tar.bz2
When bad HTML is encountered, ignore the page rather than failing with
a traceback.
-rwxr-xr-xTools/webchecker/webchecker.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index e8d0ed7..e89529e 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -400,7 +400,15 @@ class Checker:
if local_fragment and self.nonames:
self.markdone(url_pair)
return
- page = self.getpage(url_pair)
+ try:
+ page = self.getpage(url_pair)
+ except sgmllib.SGMLParseError, msg:
+ msg = self.sanitize(msg)
+ self.note(0, "Error parsing %s: %s",
+ self.format_url(url_pair), msg)
+ # Dont actually mark the URL as bad - it exists, just
+ # we can't parse it!
+ page = None
if page:
# Store the page which corresponds to this URL.
self.name_table[url] = page