summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-01-31 18:57:23 (GMT)
committerGuido van Rossum <guido@python.org>1997-01-31 18:57:23 (GMT)
commit0b0b5f0279683b6b48762cf11df05a0f1ffc1bbc (patch)
tree296e85c2e864806162aa8380eaaa57b9e1c10279 /Tools/webchecker
parent42218ce33cd11fb2dc4f7a4e2eef092c4d0f2bbf (diff)
downloadcpython-0b0b5f0279683b6b48762cf11df05a0f1ffc1bbc.zip
cpython-0b0b5f0279683b6b48762cf11df05a0f1ffc1bbc.tar.gz
cpython-0b0b5f0279683b6b48762cf11df05a0f1ffc1bbc.tar.bz2
Spin off checking of external page in a subroutine.
Increase MAXPAGE to 150K. Add back printing of __doc__ for usage message.
Diffstat (limited to 'Tools/webchecker')
-rwxr-xr-xTools/webchecker/webchecker.py37
1 files changed, 20 insertions, 17 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index c454861..9e676ca 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -121,7 +121,7 @@ import robotparser
# Tunable parameters
DEFROOT = "file:/usr/local/etc/httpd/htdocs/" # Default root URL
-MAXPAGE = 50000 # Ignore files bigger than this
+MAXPAGE = 150000 # Ignore files bigger than this
ROUNDSIZE = 50 # Number of links processed per round
DUMPFILE = "@webchecker.pickle" # Pickled checkpoint
AGENTNAME = "webchecker" # Agent name for robots.txt parser
@@ -145,6 +145,7 @@ def main():
except getopt.error, msg:
sys.stdout = sys.stderr
print msg
+ print __doc__%globals()
sys.exit(2)
for o, a in opts:
if o == '-R':
@@ -314,22 +315,24 @@ class Checker:
for url in urls:
if verbose > 0:
show("HREF ", url, " from", self.ext[url])
- if not checkext:
- continue
- if url[:7] == 'mailto:':
- if verbose > 2: print "Not checking", url
- continue
- if verbose > 2: print "Checking", url, "..."
- try:
- f = self.urlopener.open(url)
- safeclose(f)
- if verbose > 3: print "OK"
- if self.bad.has_key(url):
- self.setgood(url)
- except IOError, msg:
- msg = sanitize(msg)
- if verbose > 0: print "Error", msg
- self.setbad(url, msg)
+ if checkext:
+ self.checkextpage(url)
+
+ def checkextpage(self, url):
+ if url[:7] == 'mailto:' or url[:5] == 'news:':
+ if verbose > 2: print "Not checking", url
+ return
+ if verbose > 2: print "Checking", url, "..."
+ try:
+ f = self.urlopener.open(url)
+ safeclose(f)
+ if verbose > 3: print "OK"
+ if self.bad.has_key(url):
+ self.setgood(url)
+ except IOError, msg:
+ msg = sanitize(msg)
+ if verbose > 0: print "Error", msg
+ self.setbad(url, msg)
def report_errors(self):
if not self.bad: