diff options
author | Guido van Rossum <guido@python.org> | 1997-01-30 04:26:57 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-01-30 04:26:57 (GMT) |
commit | 2739cd74b3c2221c7f77f274027650dc272b386f (patch) | |
tree | 30ac7cf1a1339cfbac222cd49959a785ffc08d49 /Tools/webchecker/webchecker.py | |
parent | de662685885603d578e4edea83dac3ff8e9904b2 (diff) | |
download | cpython-2739cd74b3c2221c7f77f274027650dc272b386f.zip cpython-2739cd74b3c2221c7f77f274027650dc272b386f.tar.gz cpython-2739cd74b3c2221c7f77f274027650dc272b386f.tar.bz2 |
Some refinements of the external-link checking code: insert the errors
in the 'bad' dictionary (sanitize them so they are picklable; the
sanitation code is now a subroutine); don't check mailto: URLs; omit
colon in Error message.
Diffstat (limited to 'Tools/webchecker/webchecker.py')
-rwxr-xr-x | Tools/webchecker/webchecker.py | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index d6c81cc..7eb9a25 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -305,13 +305,18 @@ class Checker: show("HREF ", url, " from", self.ext[url]) if not checkext: continue + if url[:7] == 'mailto:': + if verbose > 2: print "Not checking", url + continue if verbose > 2: print "Checking", url, "..." try: f = self.urlopener.open(url) f.close() if verbose > 3: print "OK" except IOError, msg: - print "Error:", msg + msg = sanitize(msg) + print "Error", msg + self.bad[url] = msg def report_errors(self): if not self.bad: @@ -327,7 +332,10 @@ class Checker: try: origins = self.done[url] except KeyError: - origins = self.todo[url] + try: + origins = self.todo[url] + except KeyError: + origins = self.ext[url] for source, rawlink in origins: triple = url, rawlink, self.bad[url] try: @@ -406,13 +414,7 @@ class Checker: try: f = self.urlopener.open(url) except IOError, msg: - if (type(msg) == TupleType and - len(msg) >= 4 and - msg[0] == 'http error' and - type(msg[3]) == InstanceType): - # Remove the Message instance -- it may contain - # a file object which prevents pickling. - msg = msg[:3] + msg[4:] + msg = sanitize(msg) if verbose > 0: print "Error ", msg if verbose > 0: @@ -549,5 +551,16 @@ def show(p1, link, p2, origins): print +def sanitize(msg): + if (type(msg) == TupleType and + len(msg) >= 4 and + msg[0] == 'http error' and + type(msg[3]) == InstanceType): + # Remove the Message instance -- it may contain + # a file object which prevents pickling. + msg = msg[:3] + msg[4:] + return msg + + if __name__ == '__main__': main() |