summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/MimeWriter.py59
-rw-r--r--Lib/cmd.py62
-rw-r--r--Lib/dumbdbm.py12
-rw-r--r--Lib/formatter.py37
-rw-r--r--Lib/gzip.py38
-rw-r--r--Lib/htmllib.py49
-rw-r--r--Lib/pickle.py72
-rw-r--r--Lib/rexec.py161
-rw-r--r--Lib/robotparser.py17
9 files changed, 499 insertions, 8 deletions
diff --git a/Lib/MimeWriter.py b/Lib/MimeWriter.py
index bb878c1..58c0a0b 100644
--- a/Lib/MimeWriter.py
+++ b/Lib/MimeWriter.py
@@ -1,8 +1,11 @@
"""Generic MIME writer.
-Classes:
-
-MimeWriter - the only thing here.
+This module defines the class MimeWriter. The MimeWriter class implements
+a basic formatter for creating MIME multi-part files. It doesn't seek around
+the output file nor does it use large amounts of buffer space. You must write
+the parts out in the order that they should occur in the final file.
+MimeWriter does buffer the headers you add, allowing you to rearrange their
+order.
"""
@@ -86,6 +89,14 @@ class MimeWriter:
self._headers = []
def addheader(self, key, value, prefix=0):
+ """Add a header line to the MIME message.
+
+ The key is the name of the header, where the value obviously provides
+ the value of the header. The optional argument prefix determines
+ where the header is inserted; 0 means append at the end, 1 means
+ insert at the start. The default is to append.
+
+ """
lines = value.split("\n")
while lines and not lines[-1]: del lines[-1]
while lines and not lines[0]: del lines[0]
@@ -99,10 +110,26 @@ class MimeWriter:
self._headers.append(line)
def flushheaders(self):
+ """Writes out and forgets all headers accumulated so far.
+
+ This is useful if you don't need a body part at all; for example,
+ for a subpart of type message/rfc822 that's (mis)used to store some
+ header-like information.
+
+ """
self._fp.writelines(self._headers)
self._headers = []
def startbody(self, ctype, plist=[], prefix=1):
+ """Returns a file-like object for writing the body of the message.
+
+ The content-type is set to the provided ctype, and the optional
+ parameter, plist, provides additional parameters for the
+ content-type declaration. The optional argument prefix determines
+ where the header is inserted; 0 means append at the end, 1 means
+ insert at the start. The default is to insert at the start.
+
+ """
for name, value in plist:
ctype = ctype + ';\n %s=\"%s\"' % (name, value)
self.addheader("Content-Type", ctype, prefix=prefix)
@@ -111,16 +138,42 @@ class MimeWriter:
return self._fp
def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
+ """Returns a file-like object for writing the body of the message.
+
+ Additionally, this method initializes the multi-part code, where the
+ subtype parameter provides the multipart subtype, the boundary
+ parameter may provide a user-defined boundary specification, and the
+ plist parameter provides optional parameters for the subtype. The
+ optional argument, prefix, determines where the header is inserted;
+ 0 means append at the end, 1 means insert at the start. The default
+ is to insert at the start. Subparts should be created using the
+ nextpart() method.
+
+ """
self._boundary = boundary or mimetools.choose_boundary()
return self.startbody("multipart/" + subtype,
[("boundary", self._boundary)] + plist,
prefix=prefix)
def nextpart(self):
+ """Returns a new instance of MimeWriter which represents an
+ individual part in a multipart message.
+
+ This may be used to write the part as well as used for creating
+ recursively complex multipart messages. The message must first be
+ initialized with the startmultipartbody() method before using the
+ nextpart() method.
+
+ """
self._fp.write("\n--" + self._boundary + "\n")
return self.__class__(self._fp)
def lastpart(self):
+ """This is used to designate the last part of a multipart message.
+
+ It should always be used when writing multipart messages.
+
+ """
self._fp.write("\n--" + self._boundary + "--\n")
diff --git a/Lib/cmd.py b/Lib/cmd.py
index 5bbf4bc..c4caf5b 100644
--- a/Lib/cmd.py
+++ b/Lib/cmd.py
@@ -53,6 +53,17 @@ PROMPT = '(Cmd) '
IDENTCHARS = string.ascii_letters + string.digits + '_'
class Cmd:
+ """A simple framework for writing line-oriented command interpreters.
+
+ These are often useful for test harnesses, administrative tools, and
+ prototypes that will later be wrapped in a more sophisticated interface.
+
+ A Cmd instance or subclass instance is a line-oriented interpreter
+ framework. There is no good reason to instantiate Cmd itself; rather,
+ it's useful as a superclass of an interpreter class you define yourself
+ in order to inherit Cmd's methods and encapsulate action methods.
+
+ """
prompt = PROMPT
identchars = IDENTCHARS
ruler = '='
@@ -67,6 +78,14 @@ class Cmd:
use_rawinput = 1
def __init__(self, completekey='tab'):
+ """Instantiate a line-oriented interpreter framework.
+
+ The optional argument is the readline name of a completion key;
+ it defaults to the Tab key. If completekey is not None and the
+ readline module is available, command completion is done
+ automatically.
+
+ """
if completekey:
try:
import readline
@@ -76,6 +95,12 @@ class Cmd:
pass
def cmdloop(self, intro=None):
+ """Repeatedly issue a prompt, accept input, parse an initial prefix
+ off the received input, and dispatch to action methods, passing them
+ the remainder of the line as argument.
+
+ """
+
self.preloop()
if intro is not None:
self.intro = intro
@@ -106,15 +131,25 @@ class Cmd:
self.postloop()
def precmd(self, line):
+ """Hook method executed just before the command line is
+ interpreted, but after the input prompt is generated and issued.
+
+ """
return line
def postcmd(self, stop, line):
+ """Hook method executed just after a command dispatch is finished."""
return stop
def preloop(self):
+ """Hook method executed once when the cmdloop() method is called."""
pass
def postloop(self):
+ """Hook method executed once when the cmdloop() method is about to
+ return.
+
+ """
pass
def parseline(self, line):
@@ -134,6 +169,15 @@ class Cmd:
return cmd, arg, line
def onecmd(self, line):
+ """Interpret the argument as though it had been typed in response
+ to the prompt.
+
+ This may be overridden, but should not normally need to be;
+ see the precmd() and postcmd() methods for useful execution hooks.
+ The return value is a flag indicating whether interpretation of
+ commands by the interpreter should stop.
+
+ """
cmd, arg, line = self.parseline(line)
if not line:
return self.emptyline()
@@ -150,13 +194,31 @@ class Cmd:
return func(arg)
def emptyline(self):
+ """Called when an empty line is entered in response to the prompt.
+
+ If this method is not overridden, it repeats the last nonempty
+ command entered.
+
+ """
if self.lastcmd:
return self.onecmd(self.lastcmd)
def default(self, line):
+ """Called on an input line when the command prefix is not recognized.
+
+ If this method is not overridden, it prints an error message and
+ returns.
+
+ """
print '*** Unknown syntax:', line
def completedefault(self, *ignored):
+ """Method called to complete an input line when no command-specific
+ complete_*() method is available.
+
+ By default, it returns an empty list.
+
+ """
return []
def completenames(self, text, *ignored):
diff --git a/Lib/dumbdbm.py b/Lib/dumbdbm.py
index 3fb6e1d..c2b7952 100644
--- a/Lib/dumbdbm.py
+++ b/Lib/dumbdbm.py
@@ -154,5 +154,17 @@ class _Database:
def open(file, flag=None, mode=0666):
+ """Open the database file, filename, and return corresponding object.
+
+ The flag argument, used to control how the database is opened in the
+ other DBM implementations, is ignored in the dumbdbm module; the
+ database is always opened for update, and will be created if it does
+ not exist.
+
+ The optional mode argument is the UNIX mode of the file, used only when
+ the database has to be created. It defaults to octal code 0666 (and
+ will be modified by the prevailing umask).
+
+ """
# flag, mode arguments are currently ignored
return _Database(file, mode)
diff --git a/Lib/formatter.py b/Lib/formatter.py
index 0607526..75f4718 100644
--- a/Lib/formatter.py
+++ b/Lib/formatter.py
@@ -27,6 +27,15 @@ AS_IS = None
class NullFormatter:
+ """A formatter which does nothing.
+
+ If the writer parameter is omitted, a NullWriter instance is created.
+ No methods of the writer are called by NullFormatter instances.
+
+ Implementations should inherit from this class if implementing a writer
+ interface but don't need to inherit any implementation.
+
+ """
def __init__(self, writer=None):
if not writer:
@@ -52,6 +61,13 @@ class NullFormatter:
class AbstractFormatter:
+ """The standard formatter.
+
+ This implementation has demonstrated wide applicability to many writers,
+ and may be used directly in most circumstances. It has been used to
+ implement a full-featured World Wide Web browser.
+
+ """
# Space handling policy: blank spaces at the boundary between elements
# are handled by the outermost context. "Literal" data is not checked
@@ -283,7 +299,13 @@ class AbstractFormatter:
class NullWriter:
- """Minimal writer interface to use in testing & inheritance."""
+ """Minimal writer interface to use in testing & inheritance.
+
+ A writer which only provides the interface definition; no actions are
+ taken on any methods. This should be the base class for all writers
+ which do not need to inherit any implementation methods.
+
+ """
def __init__(self): pass
def flush(self): pass
def new_alignment(self, align): pass
@@ -300,6 +322,12 @@ class NullWriter:
class AbstractWriter(NullWriter):
+ """A writer which can be used in debugging formatters, but not much else.
+
+ Each method simply announces itself by printing its name and
+ arguments on standard output.
+
+ """
def new_alignment(self, align):
print "new_alignment(%s)" % `align`
@@ -336,6 +364,13 @@ class AbstractWriter(NullWriter):
class DumbWriter(NullWriter):
+ """Simple writer class which writes output on the file object passed in
+ as the file parameter or, if file is omitted, on standard output. The
+ output is simply word-wrapped to the number of columns specified by
+ the maxcol parameter. This class is suitable for reflowing a sequence
+ of paragraphs.
+
+ """
def __init__(self, file=None, maxcol=72):
self.file = file or sys.stdout
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 9e198c7..6838d19 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -27,14 +27,52 @@ def read32(input):
return struct.unpack("<l", input.read(4))[0]
def open(filename, mode="rb", compresslevel=9):
+ """Shorthand for GzipFile(filename, mode, compresslevel).
+
+ The filename argument is required; mode defaults to 'rb'
+ and compresslevel defaults to 9.
+
+ """
return GzipFile(filename, mode, compresslevel)
class GzipFile:
+ """The GzipFile class simulates most of the methods of a file object with
+ the exception of the readinto(), truncate(), and xreadlines() methods.
+
+ """
myfileobj = None
def __init__(self, filename=None, mode=None,
compresslevel=9, fileobj=None):
+ """Constructor for the GzipFile class.
+
+ At least one of fileobj and filename must be given a
+ non-trivial value.
+
+ The new class instance is based on fileobj, which can be a regular
+ file, a StringIO object, or any other object which simulates a file.
+ It defaults to None, in which case filename is opened to provide
+ a file object.
+
+ When fileobj is not None, the filename argument is only used to be
+ included in the gzip file header, which may includes the original
+ filename of the uncompressed file. It defaults to the filename of
+ fileobj, if discernible; otherwise, it defaults to the empty string,
+ and in this case the original filename is not included in the header.
+
+ The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
+ depending on whether the file will be read or written. The default
+ is the mode of fileobj if discernible; otherwise, the default is 'rb'.
+ Be aware that only the 'rb', 'ab', and 'wb' values should be used
+ for cross-platform portability.
+
+ The compresslevel argument is an integer from 1 to 9 controlling the
+ level of compression; 1 is fastest and produces the least compression,
+ and 9 is slowest and produces the most compression. The default is 9.
+
+ """
+
# guarantee the file is opened in binary mode on platforms
# that care about that sort of thing
if mode and 'b' not in mode:
diff --git a/Lib/htmllib.py b/Lib/htmllib.py
index 446192f..6219bf0 100644
--- a/Lib/htmllib.py
+++ b/Lib/htmllib.py
@@ -11,10 +11,23 @@ from formatter import AS_IS
__all__ = ["HTMLParser"]
class HTMLParser(SGMLParser):
+ """This is the basic HTML parser class.
+
+ It supports all entity names required by the HTML 2.0 specification
+ RFC 1866. It also defines handlers for all HTML 2.0 and many HTML 3.0
+ and 3.2 elements.
+
+ """
from htmlentitydefs import entitydefs
def __init__(self, formatter, verbose=0):
+ """Creates an instance of the HTMLParser class.
+
+ The formatter parameter is the formatter instance associated with
+ the parser.
+
+ """
SGMLParser.__init__(self, verbose)
self.formatter = formatter
self.savedata = None
@@ -43,9 +56,24 @@ class HTMLParser(SGMLParser):
# --- Hooks to save data; shouldn't need to be overridden
def save_bgn(self):
+ """Begins saving character data in a buffer instead of sending it
+ to the formatter object.
+
+ Retrieve the stored data via the save_end() method. Use of the
+ save_bgn() / save_end() pair may not be nested.
+
+ """
self.savedata = ''
def save_end(self):
+ """Ends buffering character data and returns all data saved since
+ the preceding call to the save_bgn() method.
+
+ If the nofill flag is false, whitespace is collapsed to single
+ spaces. A call to this method without a preceding call to the
+ save_bgn() method will raise a TypeError exception.
+
+ """
data = self.savedata
self.savedata = None
if not self.nofill:
@@ -55,11 +83,26 @@ class HTMLParser(SGMLParser):
# --- Hooks for anchors; should probably be overridden
def anchor_bgn(self, href, name, type):
+ """This method is called at the start of an anchor region.
+
+ The arguments correspond to the attributes of the <A> tag with
+ the same names. The default implementation maintains a list of
+ hyperlinks (defined by the HREF attribute for <A> tags) within
+ the document. The list of hyperlinks is available as the data
+ attribute anchorlist.
+
+ """
self.anchor = href
if self.anchor:
self.anchorlist.append(href)
def anchor_end(self):
+ """This method is called at the end of an anchor region.
+
+ The default implementation adds a textual footnote marker using an
+ index into the list of hyperlinks created by the anchor_bgn()method.
+
+ """
if self.anchor:
self.handle_data("[%d]" % len(self.anchorlist))
self.anchor = None
@@ -67,6 +110,12 @@ class HTMLParser(SGMLParser):
# --- Hook for images; should probably be overridden
def handle_image(self, src, alt, *args):
+ """This method is called to handle images.
+
+ The default implementation simply passes the alt value to the
+ handle_data() method.
+
+ """
self.handle_data(alt)
# --------- Top level elememts
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 6a162a9..a303465 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -41,9 +41,31 @@ compatible_formats = ["1.0", "1.1", "1.2"] # Old format versions we can read
mdumps = marshal.dumps
mloads = marshal.loads
-class PickleError(Exception): pass
-class PicklingError(PickleError): pass
-class UnpicklingError(PickleError): pass
+class PickleError(Exception):
+ """A common base class for the other pickling exceptions.
+
+ Inherits from \exception{Exception}.
+
+ """
+ pass
+
+class PicklingError(PickleError):
+ """This exception is raised when an unpicklable object is passed to the
+ dump() method.
+
+ """
+ pass
+
+class UnpicklingError(PickleError):
+ """This exception is raised when there is a problem unpickling an object,
+ such as a security violation.
+
+ Note that other exceptions may also be raised during unpickling, including
+ (but not necessarily limited to) AttributeError, EOFError, ImportError,
+ and IndexError.
+
+ """
+ pass
class _Stop(Exception):
def __init__(self, value):
@@ -111,14 +133,39 @@ del x
class Pickler:
def __init__(self, file, bin = 0):
+ """This takes a file-like object for writing a pickle data stream.
+
+ The optional bin parameter if true, tells the pickler to use the more
+ efficient binary pickle format, otherwise the ASCII format is used
+ (this is the default).
+
+ The file parameter must have a write() method that accepts a single
+ string argument. It can thus be an open file object, a StringIO
+ object, or any other custom object that meets this interface.
+
+ """
self.write = file.write
self.memo = {}
self.bin = bin
def clear_memo(self):
+ """Clears the pickler's "memo".
+
+ The memo is the data structure that remembers which objects the
+ pickler has already seen, so that shared or recursive objects pickled
+ by reference and not by value. This method is useful when re-using
+ picklers.
+
+ """
self.memo.clear()
def dump(self, object):
+ """Write a pickled representation of object to the open file object.
+
+ Either the binary or ASCII format will be used, depending on the
+ value of the bin flag passed to the constructor.
+
+ """
self.save(object)
self.write(STOP)
@@ -594,11 +641,30 @@ def whichmodule(cls, clsname):
class Unpickler:
def __init__(self, file):
+ """This takes a file-like object for reading a pickle data stream.
+
+ This class automatically determines whether the data stream was
+ written in binary mode or not, so it does not need a flag as in
+ the Pickler class factory.
+
+ The file-like object must have two methods, a read() method that
+ takes an integer argument, and a readline() method that requires no
+ arguments. Both methods should return a string. Thus file-like
+ object can be a file object opened for reading, a StringIO object,
+ or any other custom object that meets this interface.
+
+ """
self.readline = file.readline
self.read = file.read
self.memo = {}
def load(self):
+ """Read a pickled object representation from the open file object.
+
+ Return the reconstituted object hierarchy specified in the file
+ object.
+
+ """
self.mark = object() # any new unique object
self.stack = []
self.append = self.stack.append
diff --git a/Lib/rexec.py b/Lib/rexec.py
index 411fcc5..6dc1585a 100644
--- a/Lib/rexec.py
+++ b/Lib/rexec.py
@@ -114,8 +114,18 @@ RModuleImporter = ihooks.ModuleImporter
class RExec(ihooks._Verbose):
+ """Basic restricted execution framework.
- """Restricted Execution environment."""
+ Code executed in this restricted environment will only have access to
+ modules and functions that are deemed safe; you can subclass RExec to
+ add or remove capabilities as desired.
+
+ The RExec class can prevent code from performing unsafe operations like
+ reading or writing disk files, or using TCP/IP sockets. However, it does
+ not protect against code using extremely large amounts of memory or
+ processor time.
+
+ """
ok_path = tuple(sys.path) # That's a policy decision
@@ -135,6 +145,33 @@ class RExec(ihooks._Verbose):
nok_builtin_names = ('open', 'file', 'reload', '__import__')
def __init__(self, hooks = None, verbose = 0):
+ """Returns an instance of the RExec class.
+
+ The hooks parameter is an instance of the RHooks class or a subclass
+ of it. If it is omitted or None, the default RHooks class is
+ instantiated.
+
+ Whenever the RExec module searches for a module (even a built-in one)
+ or reads a module's code, it doesn't actually go out to the file
+ system itself. Rather, it calls methods of an RHooks instance that
+ was passed to or created by its constructor. (Actually, the RExec
+ object doesn't make these calls --- they are made by a module loader
+ object that's part of the RExec object. This allows another level of
+ flexibility, which can be useful when changing the mechanics of
+ import within the restricted environment.)
+
+ By providing an alternate RHooks object, we can control the file
+ system accesses made to import a module, without changing the
+ actual algorithm that controls the order in which those accesses are
+ made. For instance, we could substitute an RHooks object that
+ passes all filesystem requests to a file server elsewhere, via some
+ RPC mechanism such as ILU. Grail's applet loader uses this to support
+ importing applets from a URL for a directory.
+
+ If the verbose parameter is true, additional debugging output may be
+ sent to standard output.
+
+ """
ihooks._Verbose.__init__(self, verbose)
# XXX There's a circular reference here:
self.hooks = hooks or RHooks(verbose)
@@ -250,24 +287,67 @@ class RExec(ihooks._Verbose):
# The r* methods are public interfaces
def r_exec(self, code):
+ """Execute code within a restricted environment.
+
+ The code parameter must either be a string containing one or more
+ lines of Python code, or a compiled code object, which will be
+ executed in the restricted environment's __main__ module.
+
+ """
m = self.add_module('__main__')
exec code in m.__dict__
def r_eval(self, code):
+ """Evaluate code within a restricted environment.
+
+ The code parameter must either be a string containing a Python
+ expression, or a compiled code object, which will be evaluated in
+ the restricted environment's __main__ module. The value of the
+ expression or code object will be returned.
+
+ """
m = self.add_module('__main__')
return eval(code, m.__dict__)
def r_execfile(self, file):
+ """Execute the Python code in the file in the restricted
+ environment's __main__ module.
+
+ """
m = self.add_module('__main__')
execfile(file, m.__dict__)
def r_import(self, mname, globals={}, locals={}, fromlist=[]):
+ """Import a module, raising an ImportError exception if the module
+ is considered unsafe.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ """
return self.importer.import_module(mname, globals, locals, fromlist)
def r_reload(self, m):
+ """Reload the module object, re-parsing and re-initializing it.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ """
return self.importer.reload(m)
def r_unload(self, m):
+ """Unload the module.
+
+ Removes it from the restricted environment's sys.modules dictionary.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ """
return self.importer.unload(m)
# The s_* methods are similar but also swap std{in,out,err}
@@ -325,26 +405,105 @@ class RExec(ihooks._Verbose):
return r
def s_exec(self, *args):
+ """Execute code within a restricted environment.
+
+ Similar to the r_exec() method, but the code will be granted access
+ to restricted versions of the standard I/O streams sys.stdin,
+ sys.stderr, and sys.stdout.
+
+ The code parameter must either be a string containing one or more
+ lines of Python code, or a compiled code object, which will be
+ executed in the restricted environment's __main__ module.
+
+ """
return self.s_apply(self.r_exec, args)
def s_eval(self, *args):
+ """Evaluate code within a restricted environment.
+
+ Similar to the r_eval() method, but the code will be granted access
+ to restricted versions of the standard I/O streams sys.stdin,
+ sys.stderr, and sys.stdout.
+
+ The code parameter must either be a string containing a Python
+ expression, or a compiled code object, which will be evaluated in
+ the restricted environment's __main__ module. The value of the
+ expression or code object will be returned.
return self.s_apply(self.r_eval, args)
+ """
+
def s_execfile(self, *args):
+ """Execute the Python code in the file in the restricted
+ environment's __main__ module.
+
+ Similar to the r_execfile() method, but the code will be granted
+ access to restricted versions of the standard I/O streams sys.stdin,
+ sys.stderr, and sys.stdout.
+
+ """
return self.s_apply(self.r_execfile, args)
def s_import(self, *args):
+ """Import a module, raising an ImportError exception if the module
+ is considered unsafe.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ Similar to the r_import() method, but has access to restricted
+ versions of the standard I/O streams sys.stdin, sys.stderr, and
+ sys.stdout.
+
+ """
return self.s_apply(self.r_import, args)
def s_reload(self, *args):
+ """Reload the module object, re-parsing and re-initializing it.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ Similar to the r_reload() method, but has access to restricted
+ versions of the standard I/O streams sys.stdin, sys.stderr, and
+ sys.stdout.
+
+ """
return self.s_apply(self.r_reload, args)
def s_unload(self, *args):
+ """Unload the module.
+
+ Removes it from the restricted environment's sys.modules dictionary.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ Similar to the r_unload() method, but has access to restricted
+ versions of the standard I/O streams sys.stdin, sys.stderr, and
+ sys.stdout.
+
+ """
return self.s_apply(self.r_unload, args)
# Restricted open(...)
def r_open(self, file, mode='r', buf=-1):
+ """Method called when open() is called in the restricted environment.
+
+ The arguments are identical to those of the open() function, and a
+ file object (or a class instance compatible with file objects)
+ should be returned. RExec's default behaviour is allow opening
+ any file for reading, but forbidding any attempt to write a file.
+
+ This method is implicitly called by code executing in the
+ restricted environment. Overriding this method in a subclass is
+ used to change the policies enforced by a restricted environment.
+
+ """
if mode not in ('r', 'rb'):
raise IOError, "can't open files for writing in restricted mode"
return open(file, mode, buf)
diff --git a/Lib/robotparser.py b/Lib/robotparser.py
index 99bcdae..7940586 100644
--- a/Lib/robotparser.py
+++ b/Lib/robotparser.py
@@ -20,6 +20,11 @@ def _debug(msg):
class RobotFileParser:
+ """ This class provides a set of methods to read, parse and answer
+ questions about a single robots.txt file.
+
+ """
+
def __init__(self, url=''):
self.entries = []
self.default_entry = None
@@ -29,17 +34,29 @@ class RobotFileParser:
self.last_checked = 0
def mtime(self):
+ """Returns the time the robots.txt file was last fetched.
+
+ This is useful for long-running web spiders that need to
+ check for new robots.txt files periodically.
+
+ """
return self.last_checked
def modified(self):
+ """Sets the time the robots.txt file was last fetched to the
+ current time.
+
+ """
import time
self.last_checked = time.time()
def set_url(self, url):
+ """Sets the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
def read(self):
+ """Reads the robots.txt URL and feeds it to the parser."""
opener = URLopener()
f = opener.open(self.url)
lines = []