summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker/webchecker.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-01-30 03:30:20 (GMT)
committerGuido van Rossum <guido@python.org>1997-01-30 03:30:20 (GMT)
commit325a64f2072802fe9597a372d5f6223156cd8189 (patch)
treed5633790efd62adca4006b32e4a78b08eb827bf4 /Tools/webchecker/webchecker.py
parentdf47bafa1c3c30bc97d7f68c23e21cbb8be3f3e3 (diff)
downloadcpython-325a64f2072802fe9597a372d5f6223156cd8189.zip
cpython-325a64f2072802fe9597a372d5f6223156cd8189.tar.gz
cpython-325a64f2072802fe9597a372d5f6223156cd8189.tar.bz2
Catch I/O errors when parsing robots.txt file.
Add version number, printed at startup in non-quited mode.
Diffstat (limited to 'Tools/webchecker/webchecker.py')
-rwxr-xr-xTools/webchecker/webchecker.py18
1 files changed, 13 insertions, 5 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index 6b6fbf5..2ec9b61 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -93,6 +93,8 @@ rooturl -- URL to start checking
"""
+__version__ = "0.1"
+
import sys
import os
@@ -135,7 +137,6 @@ def main():
except getopt.error, msg:
sys.stdout = sys.stderr
print msg
- print __doc__ % globals()
sys.exit(2)
for o, a in opts:
if o == '-R':
@@ -151,6 +152,9 @@ def main():
if o == '-v':
verbose = verbose + 1
+ if verbose:
+ print AGENTNAME, "version", __version__
+
if restart:
if verbose > 0:
print "Loading checkpoint from %s ..." % dumpfile
@@ -234,13 +238,17 @@ class Checker:
self.addrobot(root)
def addrobot(self, root):
+ url = urlparse.urljoin(root, "/robots.txt")
self.robots[root] = rp = robotparser.RobotFileParser()
- if verbose > 3:
- print "Parsing robots.txt file"
+ if verbose > 2:
+ print "Parsing", url
rp.debug = 1
- url = urlparse.urljoin(root, "/robots.txt")
rp.set_url(url)
- rp.read()
+ try:
+ rp.read()
+ except IOError, msg:
+ if verbose > 1:
+ print "I/O error parsing", url, ":", msg
def run(self):
while self.todo: