summaryrefslogtreecommitdiffstats
path: root/Lib/robotparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/robotparser.py')
-rw-r--r--Lib/robotparser.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py
index 99bcdae..7940586 100644
--- a/Lib/robotparser.py
+++ b/Lib/robotparser.py
@@ -20,6 +20,11 @@ def _debug(msg):
class RobotFileParser:
+ """ This class provides a set of methods to read, parse and answer
+ questions about a single robots.txt file.
+
+ """
+
def __init__(self, url=''):
self.entries = []
self.default_entry = None
@@ -29,17 +34,29 @@ class RobotFileParser:
self.last_checked = 0
def mtime(self):
+ """Returns the time the robots.txt file was last fetched.
+
+ This is useful for long-running web spiders that need to
+ check for new robots.txt files periodically.
+
+ """
return self.last_checked
def modified(self):
+ """Sets the time the robots.txt file was last fetched to the
+ current time.
+
+ """
import time
self.last_checked = time.time()
def set_url(self, url):
+ """Sets the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
def read(self):
+ """Reads the robots.txt URL and feeds it to the parser."""
opener = URLopener()
f = opener.open(self.url)
lines = []