diff options
author | Mark Hammond <mhammond@skippinet.com.au> | 2003-02-27 06:59:10 (GMT) |
---|---|---|
committer | Mark Hammond <mhammond@skippinet.com.au> | 2003-02-27 06:59:10 (GMT) |
commit | ce56c377a0f548cdac3ab9c66117df654f934484 (patch) | |
tree | b5bd56edb43d9b59db7203b80b13c42d22337b3a /Tools | |
parent | 05595e9d73b2c05fcd9492cf8f5d126282b82053 (diff) | |
download | cpython-ce56c377a0f548cdac3ab9c66117df654f934484.zip cpython-ce56c377a0f548cdac3ab9c66117df654f934484.tar.gz cpython-ce56c377a0f548cdac3ab9c66117df654f934484.tar.bz2 |
When bad HTML is encountered, ignore the page rather than failing with
a traceback.
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/webchecker/webchecker.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index e8d0ed7..e89529e 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -400,7 +400,15 @@ class Checker: if local_fragment and self.nonames: self.markdone(url_pair) return - page = self.getpage(url_pair) + try: + page = self.getpage(url_pair) + except sgmllib.SGMLParseError, msg: + msg = self.sanitize(msg) + self.note(0, "Error parsing %s: %s", + self.format_url(url_pair), msg) + # Dont actually mark the URL as bad - it exists, just + # we can't parse it! + page = None if page: # Store the page which corresponds to this URL. self.name_table[url] = page |