diff options
-rw-r--r-- | Lib/MimeWriter.py | 59 | ||||
-rw-r--r-- | Lib/cmd.py | 62 | ||||
-rw-r--r-- | Lib/dumbdbm.py | 12 | ||||
-rw-r--r-- | Lib/formatter.py | 37 | ||||
-rw-r--r-- | Lib/gzip.py | 38 | ||||
-rw-r--r-- | Lib/htmllib.py | 49 | ||||
-rw-r--r-- | Lib/pickle.py | 72 | ||||
-rw-r--r-- | Lib/rexec.py | 161 | ||||
-rw-r--r-- | Lib/robotparser.py | 17 |
9 files changed, 499 insertions, 8 deletions
diff --git a/Lib/MimeWriter.py b/Lib/MimeWriter.py index bb878c1..58c0a0b 100644 --- a/Lib/MimeWriter.py +++ b/Lib/MimeWriter.py @@ -1,8 +1,11 @@ """Generic MIME writer. -Classes: - -MimeWriter - the only thing here. +This module defines the class MimeWriter. The MimeWriter class implements +a basic formatter for creating MIME multi-part files. It doesn't seek around +the output file nor does it use large amounts of buffer space. You must write +the parts out in the order that they should occur in the final file. +MimeWriter does buffer the headers you add, allowing you to rearrange their +order. """ @@ -86,6 +89,14 @@ class MimeWriter: self._headers = [] def addheader(self, key, value, prefix=0): + """Add a header line to the MIME message. + + The key is the name of the header, where the value obviously provides + the value of the header. The optional argument prefix determines + where the header is inserted; 0 means append at the end, 1 means + insert at the start. The default is to append. + + """ lines = value.split("\n") while lines and not lines[-1]: del lines[-1] while lines and not lines[0]: del lines[0] @@ -99,10 +110,26 @@ class MimeWriter: self._headers.append(line) def flushheaders(self): + """Writes out and forgets all headers accumulated so far. + + This is useful if you don't need a body part at all; for example, + for a subpart of type message/rfc822 that's (mis)used to store some + header-like information. + + """ self._fp.writelines(self._headers) self._headers = [] def startbody(self, ctype, plist=[], prefix=1): + """Returns a file-like object for writing the body of the message. + + The content-type is set to the provided ctype, and the optional + parameter, plist, provides additional parameters for the + content-type declaration. The optional argument prefix determines + where the header is inserted; 0 means append at the end, 1 means + insert at the start. The default is to insert at the start. + + """ for name, value in plist: ctype = ctype + ';\n %s=\"%s\"' % (name, value) self.addheader("Content-Type", ctype, prefix=prefix) @@ -111,16 +138,42 @@ class MimeWriter: return self._fp def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1): + """Returns a file-like object for writing the body of the message. + + Additionally, this method initializes the multi-part code, where the + subtype parameter provides the multipart subtype, the boundary + parameter may provide a user-defined boundary specification, and the + plist parameter provides optional parameters for the subtype. The + optional argument, prefix, determines where the header is inserted; + 0 means append at the end, 1 means insert at the start. The default + is to insert at the start. Subparts should be created using the + nextpart() method. + + """ self._boundary = boundary or mimetools.choose_boundary() return self.startbody("multipart/" + subtype, [("boundary", self._boundary)] + plist, prefix=prefix) def nextpart(self): + """Returns a new instance of MimeWriter which represents an + individual part in a multipart message. + + This may be used to write the part as well as used for creating + recursively complex multipart messages. The message must first be + initialized with the startmultipartbody() method before using the + nextpart() method. + + """ self._fp.write("\n--" + self._boundary + "\n") return self.__class__(self._fp) def lastpart(self): + """This is used to designate the last part of a multipart message. + + It should always be used when writing multipart messages. + + """ self._fp.write("\n--" + self._boundary + "--\n") @@ -53,6 +53,17 @@ PROMPT = '(Cmd) ' IDENTCHARS = string.ascii_letters + string.digits + '_' class Cmd: + """A simple framework for writing line-oriented command interpreters. + + These are often useful for test harnesses, administrative tools, and + prototypes that will later be wrapped in a more sophisticated interface. + + A Cmd instance or subclass instance is a line-oriented interpreter + framework. There is no good reason to instantiate Cmd itself; rather, + it's useful as a superclass of an interpreter class you define yourself + in order to inherit Cmd's methods and encapsulate action methods. + + """ prompt = PROMPT identchars = IDENTCHARS ruler = '=' @@ -67,6 +78,14 @@ class Cmd: use_rawinput = 1 def __init__(self, completekey='tab'): + """Instantiate a line-oriented interpreter framework. + + The optional argument is the readline name of a completion key; + it defaults to the Tab key. If completekey is not None and the + readline module is available, command completion is done + automatically. + + """ if completekey: try: import readline @@ -76,6 +95,12 @@ class Cmd: pass def cmdloop(self, intro=None): + """Repeatedly issue a prompt, accept input, parse an initial prefix + off the received input, and dispatch to action methods, passing them + the remainder of the line as argument. + + """ + self.preloop() if intro is not None: self.intro = intro @@ -106,15 +131,25 @@ class Cmd: self.postloop() def precmd(self, line): + """Hook method executed just before the command line is + interpreted, but after the input prompt is generated and issued. + + """ return line def postcmd(self, stop, line): + """Hook method executed just after a command dispatch is finished.""" return stop def preloop(self): + """Hook method executed once when the cmdloop() method is called.""" pass def postloop(self): + """Hook method executed once when the cmdloop() method is about to + return. + + """ pass def parseline(self, line): @@ -134,6 +169,15 @@ class Cmd: return cmd, arg, line def onecmd(self, line): + """Interpret the argument as though it had been typed in response + to the prompt. + + This may be overridden, but should not normally need to be; + see the precmd() and postcmd() methods for useful execution hooks. + The return value is a flag indicating whether interpretation of + commands by the interpreter should stop. + + """ cmd, arg, line = self.parseline(line) if not line: return self.emptyline() @@ -150,13 +194,31 @@ class Cmd: return func(arg) def emptyline(self): + """Called when an empty line is entered in response to the prompt. + + If this method is not overridden, it repeats the last nonempty + command entered. + + """ if self.lastcmd: return self.onecmd(self.lastcmd) def default(self, line): + """Called on an input line when the command prefix is not recognized. + + If this method is not overridden, it prints an error message and + returns. + + """ print '*** Unknown syntax:', line def completedefault(self, *ignored): + """Method called to complete an input line when no command-specific + complete_*() method is available. + + By default, it returns an empty list. + + """ return [] def completenames(self, text, *ignored): diff --git a/Lib/dumbdbm.py b/Lib/dumbdbm.py index 3fb6e1d..c2b7952 100644 --- a/Lib/dumbdbm.py +++ b/Lib/dumbdbm.py @@ -154,5 +154,17 @@ class _Database: def open(file, flag=None, mode=0666): + """Open the database file, filename, and return corresponding object. + + The flag argument, used to control how the database is opened in the + other DBM implementations, is ignored in the dumbdbm module; the + database is always opened for update, and will be created if it does + not exist. + + The optional mode argument is the UNIX mode of the file, used only when + the database has to be created. It defaults to octal code 0666 (and + will be modified by the prevailing umask). + + """ # flag, mode arguments are currently ignored return _Database(file, mode) diff --git a/Lib/formatter.py b/Lib/formatter.py index 0607526..75f4718 100644 --- a/Lib/formatter.py +++ b/Lib/formatter.py @@ -27,6 +27,15 @@ AS_IS = None class NullFormatter: + """A formatter which does nothing. + + If the writer parameter is omitted, a NullWriter instance is created. + No methods of the writer are called by NullFormatter instances. + + Implementations should inherit from this class if implementing a writer + interface but don't need to inherit any implementation. + + """ def __init__(self, writer=None): if not writer: @@ -52,6 +61,13 @@ class NullFormatter: class AbstractFormatter: + """The standard formatter. + + This implementation has demonstrated wide applicability to many writers, + and may be used directly in most circumstances. It has been used to + implement a full-featured World Wide Web browser. + + """ # Space handling policy: blank spaces at the boundary between elements # are handled by the outermost context. "Literal" data is not checked @@ -283,7 +299,13 @@ class AbstractFormatter: class NullWriter: - """Minimal writer interface to use in testing & inheritance.""" + """Minimal writer interface to use in testing & inheritance. + + A writer which only provides the interface definition; no actions are + taken on any methods. This should be the base class for all writers + which do not need to inherit any implementation methods. + + """ def __init__(self): pass def flush(self): pass def new_alignment(self, align): pass @@ -300,6 +322,12 @@ class NullWriter: class AbstractWriter(NullWriter): + """A writer which can be used in debugging formatters, but not much else. + + Each method simply announces itself by printing its name and + arguments on standard output. + + """ def new_alignment(self, align): print "new_alignment(%s)" % `align` @@ -336,6 +364,13 @@ class AbstractWriter(NullWriter): class DumbWriter(NullWriter): + """Simple writer class which writes output on the file object passed in + as the file parameter or, if file is omitted, on standard output. The + output is simply word-wrapped to the number of columns specified by + the maxcol parameter. This class is suitable for reflowing a sequence + of paragraphs. + + """ def __init__(self, file=None, maxcol=72): self.file = file or sys.stdout diff --git a/Lib/gzip.py b/Lib/gzip.py index 9e198c7..6838d19 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -27,14 +27,52 @@ def read32(input): return struct.unpack("<l", input.read(4))[0] def open(filename, mode="rb", compresslevel=9): + """Shorthand for GzipFile(filename, mode, compresslevel). + + The filename argument is required; mode defaults to 'rb' + and compresslevel defaults to 9. + + """ return GzipFile(filename, mode, compresslevel) class GzipFile: + """The GzipFile class simulates most of the methods of a file object with + the exception of the readinto(), truncate(), and xreadlines() methods. + + """ myfileobj = None def __init__(self, filename=None, mode=None, compresslevel=9, fileobj=None): + """Constructor for the GzipFile class. + + At least one of fileobj and filename must be given a + non-trivial value. + + The new class instance is based on fileobj, which can be a regular + file, a StringIO object, or any other object which simulates a file. + It defaults to None, in which case filename is opened to provide + a file object. + + When fileobj is not None, the filename argument is only used to be + included in the gzip file header, which may includes the original + filename of the uncompressed file. It defaults to the filename of + fileobj, if discernible; otherwise, it defaults to the empty string, + and in this case the original filename is not included in the header. + + The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb', + depending on whether the file will be read or written. The default + is the mode of fileobj if discernible; otherwise, the default is 'rb'. + Be aware that only the 'rb', 'ab', and 'wb' values should be used + for cross-platform portability. + + The compresslevel argument is an integer from 1 to 9 controlling the + level of compression; 1 is fastest and produces the least compression, + and 9 is slowest and produces the most compression. The default is 9. + + """ + # guarantee the file is opened in binary mode on platforms # that care about that sort of thing if mode and 'b' not in mode: diff --git a/Lib/htmllib.py b/Lib/htmllib.py index 446192f..6219bf0 100644 --- a/Lib/htmllib.py +++ b/Lib/htmllib.py @@ -11,10 +11,23 @@ from formatter import AS_IS __all__ = ["HTMLParser"] class HTMLParser(SGMLParser): + """This is the basic HTML parser class. + + It supports all entity names required by the HTML 2.0 specification + RFC 1866. It also defines handlers for all HTML 2.0 and many HTML 3.0 + and 3.2 elements. + + """ from htmlentitydefs import entitydefs def __init__(self, formatter, verbose=0): + """Creates an instance of the HTMLParser class. + + The formatter parameter is the formatter instance associated with + the parser. + + """ SGMLParser.__init__(self, verbose) self.formatter = formatter self.savedata = None @@ -43,9 +56,24 @@ class HTMLParser(SGMLParser): # --- Hooks to save data; shouldn't need to be overridden def save_bgn(self): + """Begins saving character data in a buffer instead of sending it + to the formatter object. + + Retrieve the stored data via the save_end() method. Use of the + save_bgn() / save_end() pair may not be nested. + + """ self.savedata = '' def save_end(self): + """Ends buffering character data and returns all data saved since + the preceding call to the save_bgn() method. + + If the nofill flag is false, whitespace is collapsed to single + spaces. A call to this method without a preceding call to the + save_bgn() method will raise a TypeError exception. + + """ data = self.savedata self.savedata = None if not self.nofill: @@ -55,11 +83,26 @@ class HTMLParser(SGMLParser): # --- Hooks for anchors; should probably be overridden def anchor_bgn(self, href, name, type): + """This method is called at the start of an anchor region. + + The arguments correspond to the attributes of the <A> tag with + the same names. The default implementation maintains a list of + hyperlinks (defined by the HREF attribute for <A> tags) within + the document. The list of hyperlinks is available as the data + attribute anchorlist. + + """ self.anchor = href if self.anchor: self.anchorlist.append(href) def anchor_end(self): + """This method is called at the end of an anchor region. + + The default implementation adds a textual footnote marker using an + index into the list of hyperlinks created by the anchor_bgn()method. + + """ if self.anchor: self.handle_data("[%d]" % len(self.anchorlist)) self.anchor = None @@ -67,6 +110,12 @@ class HTMLParser(SGMLParser): # --- Hook for images; should probably be overridden def handle_image(self, src, alt, *args): + """This method is called to handle images. + + The default implementation simply passes the alt value to the + handle_data() method. + + """ self.handle_data(alt) # --------- Top level elememts diff --git a/Lib/pickle.py b/Lib/pickle.py index 6a162a9..a303465 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -41,9 +41,31 @@ compatible_formats = ["1.0", "1.1", "1.2"] # Old format versions we can read mdumps = marshal.dumps mloads = marshal.loads -class PickleError(Exception): pass -class PicklingError(PickleError): pass -class UnpicklingError(PickleError): pass +class PickleError(Exception): + """A common base class for the other pickling exceptions. + + Inherits from \exception{Exception}. + + """ + pass + +class PicklingError(PickleError): + """This exception is raised when an unpicklable object is passed to the + dump() method. + + """ + pass + +class UnpicklingError(PickleError): + """This exception is raised when there is a problem unpickling an object, + such as a security violation. + + Note that other exceptions may also be raised during unpickling, including + (but not necessarily limited to) AttributeError, EOFError, ImportError, + and IndexError. + + """ + pass class _Stop(Exception): def __init__(self, value): @@ -111,14 +133,39 @@ del x class Pickler: def __init__(self, file, bin = 0): + """This takes a file-like object for writing a pickle data stream. + + The optional bin parameter if true, tells the pickler to use the more + efficient binary pickle format, otherwise the ASCII format is used + (this is the default). + + The file parameter must have a write() method that accepts a single + string argument. It can thus be an open file object, a StringIO + object, or any other custom object that meets this interface. + + """ self.write = file.write self.memo = {} self.bin = bin def clear_memo(self): + """Clears the pickler's "memo". + + The memo is the data structure that remembers which objects the + pickler has already seen, so that shared or recursive objects pickled + by reference and not by value. This method is useful when re-using + picklers. + + """ self.memo.clear() def dump(self, object): + """Write a pickled representation of object to the open file object. + + Either the binary or ASCII format will be used, depending on the + value of the bin flag passed to the constructor. + + """ self.save(object) self.write(STOP) @@ -594,11 +641,30 @@ def whichmodule(cls, clsname): class Unpickler: def __init__(self, file): + """This takes a file-like object for reading a pickle data stream. + + This class automatically determines whether the data stream was + written in binary mode or not, so it does not need a flag as in + the Pickler class factory. + + The file-like object must have two methods, a read() method that + takes an integer argument, and a readline() method that requires no + arguments. Both methods should return a string. Thus file-like + object can be a file object opened for reading, a StringIO object, + or any other custom object that meets this interface. + + """ self.readline = file.readline self.read = file.read self.memo = {} def load(self): + """Read a pickled object representation from the open file object. + + Return the reconstituted object hierarchy specified in the file + object. + + """ self.mark = object() # any new unique object self.stack = [] self.append = self.stack.append diff --git a/Lib/rexec.py b/Lib/rexec.py index 411fcc5..6dc1585a 100644 --- a/Lib/rexec.py +++ b/Lib/rexec.py @@ -114,8 +114,18 @@ RModuleImporter = ihooks.ModuleImporter class RExec(ihooks._Verbose): + """Basic restricted execution framework. - """Restricted Execution environment.""" + Code executed in this restricted environment will only have access to + modules and functions that are deemed safe; you can subclass RExec to + add or remove capabilities as desired. + + The RExec class can prevent code from performing unsafe operations like + reading or writing disk files, or using TCP/IP sockets. However, it does + not protect against code using extremely large amounts of memory or + processor time. + + """ ok_path = tuple(sys.path) # That's a policy decision @@ -135,6 +145,33 @@ class RExec(ihooks._Verbose): nok_builtin_names = ('open', 'file', 'reload', '__import__') def __init__(self, hooks = None, verbose = 0): + """Returns an instance of the RExec class. + + The hooks parameter is an instance of the RHooks class or a subclass + of it. If it is omitted or None, the default RHooks class is + instantiated. + + Whenever the RExec module searches for a module (even a built-in one) + or reads a module's code, it doesn't actually go out to the file + system itself. Rather, it calls methods of an RHooks instance that + was passed to or created by its constructor. (Actually, the RExec + object doesn't make these calls --- they are made by a module loader + object that's part of the RExec object. This allows another level of + flexibility, which can be useful when changing the mechanics of + import within the restricted environment.) + + By providing an alternate RHooks object, we can control the file + system accesses made to import a module, without changing the + actual algorithm that controls the order in which those accesses are + made. For instance, we could substitute an RHooks object that + passes all filesystem requests to a file server elsewhere, via some + RPC mechanism such as ILU. Grail's applet loader uses this to support + importing applets from a URL for a directory. + + If the verbose parameter is true, additional debugging output may be + sent to standard output. + + """ ihooks._Verbose.__init__(self, verbose) # XXX There's a circular reference here: self.hooks = hooks or RHooks(verbose) @@ -250,24 +287,67 @@ class RExec(ihooks._Verbose): # The r* methods are public interfaces def r_exec(self, code): + """Execute code within a restricted environment. + + The code parameter must either be a string containing one or more + lines of Python code, or a compiled code object, which will be + executed in the restricted environment's __main__ module. + + """ m = self.add_module('__main__') exec code in m.__dict__ def r_eval(self, code): + """Evaluate code within a restricted environment. + + The code parameter must either be a string containing a Python + expression, or a compiled code object, which will be evaluated in + the restricted environment's __main__ module. The value of the + expression or code object will be returned. + + """ m = self.add_module('__main__') return eval(code, m.__dict__) def r_execfile(self, file): + """Execute the Python code in the file in the restricted + environment's __main__ module. + + """ m = self.add_module('__main__') execfile(file, m.__dict__) def r_import(self, mname, globals={}, locals={}, fromlist=[]): + """Import a module, raising an ImportError exception if the module + is considered unsafe. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + """ return self.importer.import_module(mname, globals, locals, fromlist) def r_reload(self, m): + """Reload the module object, re-parsing and re-initializing it. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + """ return self.importer.reload(m) def r_unload(self, m): + """Unload the module. + + Removes it from the restricted environment's sys.modules dictionary. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + """ return self.importer.unload(m) # The s_* methods are similar but also swap std{in,out,err} @@ -325,26 +405,105 @@ class RExec(ihooks._Verbose): return r def s_exec(self, *args): + """Execute code within a restricted environment. + + Similar to the r_exec() method, but the code will be granted access + to restricted versions of the standard I/O streams sys.stdin, + sys.stderr, and sys.stdout. + + The code parameter must either be a string containing one or more + lines of Python code, or a compiled code object, which will be + executed in the restricted environment's __main__ module. + + """ return self.s_apply(self.r_exec, args) def s_eval(self, *args): + """Evaluate code within a restricted environment. + + Similar to the r_eval() method, but the code will be granted access + to restricted versions of the standard I/O streams sys.stdin, + sys.stderr, and sys.stdout. + + The code parameter must either be a string containing a Python + expression, or a compiled code object, which will be evaluated in + the restricted environment's __main__ module. The value of the + expression or code object will be returned. return self.s_apply(self.r_eval, args) + """ + def s_execfile(self, *args): + """Execute the Python code in the file in the restricted + environment's __main__ module. + + Similar to the r_execfile() method, but the code will be granted + access to restricted versions of the standard I/O streams sys.stdin, + sys.stderr, and sys.stdout. + + """ return self.s_apply(self.r_execfile, args) def s_import(self, *args): + """Import a module, raising an ImportError exception if the module + is considered unsafe. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + Similar to the r_import() method, but has access to restricted + versions of the standard I/O streams sys.stdin, sys.stderr, and + sys.stdout. + + """ return self.s_apply(self.r_import, args) def s_reload(self, *args): + """Reload the module object, re-parsing and re-initializing it. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + Similar to the r_reload() method, but has access to restricted + versions of the standard I/O streams sys.stdin, sys.stderr, and + sys.stdout. + + """ return self.s_apply(self.r_reload, args) def s_unload(self, *args): + """Unload the module. + + Removes it from the restricted environment's sys.modules dictionary. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + Similar to the r_unload() method, but has access to restricted + versions of the standard I/O streams sys.stdin, sys.stderr, and + sys.stdout. + + """ return self.s_apply(self.r_unload, args) # Restricted open(...) def r_open(self, file, mode='r', buf=-1): + """Method called when open() is called in the restricted environment. + + The arguments are identical to those of the open() function, and a + file object (or a class instance compatible with file objects) + should be returned. RExec's default behaviour is allow opening + any file for reading, but forbidding any attempt to write a file. + + This method is implicitly called by code executing in the + restricted environment. Overriding this method in a subclass is + used to change the policies enforced by a restricted environment. + + """ if mode not in ('r', 'rb'): raise IOError, "can't open files for writing in restricted mode" return open(file, mode, buf) diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 99bcdae..7940586 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -20,6 +20,11 @@ def _debug(msg): class RobotFileParser: + """ This class provides a set of methods to read, parse and answer + questions about a single robots.txt file. + + """ + def __init__(self, url=''): self.entries = [] self.default_entry = None @@ -29,17 +34,29 @@ class RobotFileParser: self.last_checked = 0 def mtime(self): + """Returns the time the robots.txt file was last fetched. + + This is useful for long-running web spiders that need to + check for new robots.txt files periodically. + + """ return self.last_checked def modified(self): + """Sets the time the robots.txt file was last fetched to the + current time. + + """ import time self.last_checked = time.time() def set_url(self, url): + """Sets the URL referring to a robots.txt file.""" self.url = url self.host, self.path = urlparse.urlparse(url)[1:3] def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" opener = URLopener() f = opener.open(self.url) lines = [] |