diff options
-rw-r--r-- | Lib/robotparser.py | 4 | ||||
-rw-r--r-- | Misc/cheatsheet | 2 |
2 files changed, 3 insertions, 3 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index e2af545..6b23188 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -83,7 +83,7 @@ class RobotFileParser: self.entries.append(entry) def parse(self, lines): - """parse the input lines from a robot.txt file. + """parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines.""" state = 0 @@ -148,7 +148,7 @@ class RobotFileParser: def can_fetch(self, useragent, url): """using the parsed robots.txt decide if useragent can fetch url""" - _debug("Checking robot.txt allowance for:\n user agent: %s\n url: %s" % + _debug("Checking robots.txt allowance for:\n user agent: %s\n url: %s" % (useragent, url)) if self.disallow_all: return False diff --git a/Misc/cheatsheet b/Misc/cheatsheet index 0c16ddb..487949a 100644 --- a/Misc/cheatsheet +++ b/Misc/cheatsheet @@ -1962,7 +1962,7 @@ repr Redo repr() but with limits on most sizes. rexec Restricted execution facilities ("safe" exec, eval, etc). rfc822 RFC-822 message manipulation class. rlcompleter Word completion for GNU readline 2.0. -robotparser Parse robot.txt files, useful for web spiders. +robotparser Parse robots.txt files, useful for web spiders. sched A generally useful event scheduler class. sets Module for a set datatype. sgmllib A parser for SGML. |