diff options
author | Guido van Rossum <guido@python.org> | 1997-01-30 03:30:20 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-01-30 03:30:20 (GMT) |
commit | 325a64f2072802fe9597a372d5f6223156cd8189 (patch) | |
tree | d5633790efd62adca4006b32e4a78b08eb827bf4 /Tools/webchecker/webchecker.py | |
parent | df47bafa1c3c30bc97d7f68c23e21cbb8be3f3e3 (diff) | |
download | cpython-325a64f2072802fe9597a372d5f6223156cd8189.zip cpython-325a64f2072802fe9597a372d5f6223156cd8189.tar.gz cpython-325a64f2072802fe9597a372d5f6223156cd8189.tar.bz2 |
Catch I/O errors when parsing robots.txt file.
Add version number, printed at startup in non-quited mode.
Diffstat (limited to 'Tools/webchecker/webchecker.py')
-rwxr-xr-x | Tools/webchecker/webchecker.py | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index 6b6fbf5..2ec9b61 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -93,6 +93,8 @@ rooturl -- URL to start checking """ +__version__ = "0.1" + import sys import os @@ -135,7 +137,6 @@ def main(): except getopt.error, msg: sys.stdout = sys.stderr print msg - print __doc__ % globals() sys.exit(2) for o, a in opts: if o == '-R': @@ -151,6 +152,9 @@ def main(): if o == '-v': verbose = verbose + 1 + if verbose: + print AGENTNAME, "version", __version__ + if restart: if verbose > 0: print "Loading checkpoint from %s ..." % dumpfile @@ -234,13 +238,17 @@ class Checker: self.addrobot(root) def addrobot(self, root): + url = urlparse.urljoin(root, "/robots.txt") self.robots[root] = rp = robotparser.RobotFileParser() - if verbose > 3: - print "Parsing robots.txt file" + if verbose > 2: + print "Parsing", url rp.debug = 1 - url = urlparse.urljoin(root, "/robots.txt") rp.set_url(url) - rp.read() + try: + rp.read() + except IOError, msg: + if verbose > 1: + print "I/O error parsing", url, ":", msg def run(self): while self.todo: |