diff options
author | Raymond Hettinger <python@rcn.com> | 2002-05-29 16:18:42 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2002-05-29 16:18:42 (GMT) |
commit | aef22fb9cdf31fb7f0afc28ad049f08a89e23761 (patch) | |
tree | cf1771f344aef5d404a83b7e3b9a5086ac80ca43 /Lib/robotparser.py | |
parent | d68f5171ebb2f3404548c846523e9e43308a4130 (diff) | |
download | cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.zip cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.gz cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.bz2 |
Patch 560023 adding docstrings. 2.2 Candidate (after verifying modules were not updated after 2.2).
Diffstat (limited to 'Lib/robotparser.py')
-rw-r--r-- | Lib/robotparser.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 99bcdae..7940586 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -20,6 +20,11 @@ def _debug(msg): class RobotFileParser: + """ This class provides a set of methods to read, parse and answer + questions about a single robots.txt file. + + """ + def __init__(self, url=''): self.entries = [] self.default_entry = None @@ -29,17 +34,29 @@ class RobotFileParser: self.last_checked = 0 def mtime(self): + """Returns the time the robots.txt file was last fetched. + + This is useful for long-running web spiders that need to + check for new robots.txt files periodically. + + """ return self.last_checked def modified(self): + """Sets the time the robots.txt file was last fetched to the + current time. + + """ import time self.last_checked = time.time() def set_url(self, url): + """Sets the URL referring to a robots.txt file.""" self.url = url self.host, self.path = urlparse.urlparse(url)[1:3] def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" opener = URLopener() f = opener.open(self.url) lines = [] |