From 325a64f2072802fe9597a372d5f6223156cd8189 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 30 Jan 1997 03:30:20 +0000 Subject: Catch I/O errors when parsing robots.txt file. Add version number, printed at startup in non-quited mode. --- Tools/webchecker/webchecker.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index 6b6fbf5..2ec9b61 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -93,6 +93,8 @@ rooturl -- URL to start checking """ +__version__ = "0.1" + import sys import os @@ -135,7 +137,6 @@ def main(): except getopt.error, msg: sys.stdout = sys.stderr print msg - print __doc__ % globals() sys.exit(2) for o, a in opts: if o == '-R': @@ -151,6 +152,9 @@ def main(): if o == '-v': verbose = verbose + 1 + if verbose: + print AGENTNAME, "version", __version__ + if restart: if verbose > 0: print "Loading checkpoint from %s ..." % dumpfile @@ -234,13 +238,17 @@ class Checker: self.addrobot(root) def addrobot(self, root): + url = urlparse.urljoin(root, "/robots.txt") self.robots[root] = rp = robotparser.RobotFileParser() - if verbose > 3: - print "Parsing robots.txt file" + if verbose > 2: + print "Parsing", url rp.debug = 1 - url = urlparse.urljoin(root, "/robots.txt") rp.set_url(url) - rp.read() + try: + rp.read() + except IOError, msg: + if verbose > 1: + print "I/O error parsing", url, ":", msg def run(self): while self.todo: -- cgit v0.12