From 2739cd74b3c2221c7f77f274027650dc272b386f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 30 Jan 1997 04:26:57 +0000 Subject: Some refinements of the external-link checking code: insert the errors in the 'bad' dictionary (sanitize them so they are picklable; the sanitation code is now a subroutine); don't check mailto: URLs; omit colon in Error message. --- Tools/webchecker/webchecker.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index d6c81cc..7eb9a25 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -305,13 +305,18 @@ class Checker: show("HREF ", url, " from", self.ext[url]) if not checkext: continue + if url[:7] == 'mailto:': + if verbose > 2: print "Not checking", url + continue if verbose > 2: print "Checking", url, "..." try: f = self.urlopener.open(url) f.close() if verbose > 3: print "OK" except IOError, msg: - print "Error:", msg + msg = sanitize(msg) + print "Error", msg + self.bad[url] = msg def report_errors(self): if not self.bad: @@ -327,7 +332,10 @@ class Checker: try: origins = self.done[url] except KeyError: - origins = self.todo[url] + try: + origins = self.todo[url] + except KeyError: + origins = self.ext[url] for source, rawlink in origins: triple = url, rawlink, self.bad[url] try: @@ -406,13 +414,7 @@ class Checker: try: f = self.urlopener.open(url) except IOError, msg: - if (type(msg) == TupleType and - len(msg) >= 4 and - msg[0] == 'http error' and - type(msg[3]) == InstanceType): - # Remove the Message instance -- it may contain - # a file object which prevents pickling. - msg = msg[:3] + msg[4:] + msg = sanitize(msg) if verbose > 0: print "Error ", msg if verbose > 0: @@ -549,5 +551,16 @@ def show(p1, link, p2, origins): print +def sanitize(msg): + if (type(msg) == TupleType and + len(msg) >= 4 and + msg[0] == 'http error' and + type(msg[3]) == InstanceType): + # Remove the Message instance -- it may contain + # a file object which prevents pickling. + msg = msg[:3] + msg[4:] + return msg + + if __name__ == '__main__': main() -- cgit v0.12