summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-01-30 04:26:57 (GMT)
committerGuido van Rossum <guido@python.org>1997-01-30 04:26:57 (GMT)
commit2739cd74b3c2221c7f77f274027650dc272b386f (patch)
tree30ac7cf1a1339cfbac222cd49959a785ffc08d49 /Tools/webchecker
parentde662685885603d578e4edea83dac3ff8e9904b2 (diff)
downloadcpython-2739cd74b3c2221c7f77f274027650dc272b386f.zip
cpython-2739cd74b3c2221c7f77f274027650dc272b386f.tar.gz
cpython-2739cd74b3c2221c7f77f274027650dc272b386f.tar.bz2
Some refinements of the external-link checking code: insert the errors
in the 'bad' dictionary (sanitize them so they are picklable; the sanitation code is now a subroutine); don't check mailto: URLs; omit colon in Error message.
Diffstat (limited to 'Tools/webchecker')
-rwxr-xr-xTools/webchecker/webchecker.py31
1 files changed, 22 insertions, 9 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index d6c81cc..7eb9a25 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -305,13 +305,18 @@ class Checker:
show("HREF ", url, " from", self.ext[url])
if not checkext:
continue
+ if url[:7] == 'mailto:':
+ if verbose > 2: print "Not checking", url
+ continue
if verbose > 2: print "Checking", url, "..."
try:
f = self.urlopener.open(url)
f.close()
if verbose > 3: print "OK"
except IOError, msg:
- print "Error:", msg
+ msg = sanitize(msg)
+ print "Error", msg
+ self.bad[url] = msg
def report_errors(self):
if not self.bad:
@@ -327,7 +332,10 @@ class Checker:
try:
origins = self.done[url]
except KeyError:
- origins = self.todo[url]
+ try:
+ origins = self.todo[url]
+ except KeyError:
+ origins = self.ext[url]
for source, rawlink in origins:
triple = url, rawlink, self.bad[url]
try:
@@ -406,13 +414,7 @@ class Checker:
try:
f = self.urlopener.open(url)
except IOError, msg:
- if (type(msg) == TupleType and
- len(msg) >= 4 and
- msg[0] == 'http error' and
- type(msg[3]) == InstanceType):
- # Remove the Message instance -- it may contain
- # a file object which prevents pickling.
- msg = msg[:3] + msg[4:]
+ msg = sanitize(msg)
if verbose > 0:
print "Error ", msg
if verbose > 0:
@@ -549,5 +551,16 @@ def show(p1, link, p2, origins):
print
+def sanitize(msg):
+ if (type(msg) == TupleType and
+ len(msg) >= 4 and
+ msg[0] == 'http error' and
+ type(msg[3]) == InstanceType):
+ # Remove the Message instance -- it may contain
+ # a file object which prevents pickling.
+ msg = msg[:3] + msg[4:]
+ return msg
+
+
if __name__ == '__main__':
main()