Lib/
Lib/
+"""HTTP server base class.
+Note: the class in this module doesn't implement any HTTP request; see
+SimpleHTTPServer for simple implementations of GET, HEAD and POST
+(including CGI scripts).
+- BaseHTTPRequestHandler: HTTP request handler base class
+- test: test function
+XXX To do:
+- send server version
+- log requests even later (to capture byte count)
+- log user-agent header and other interesting goodies
+- send error log to separate file
+- are request names really case sensitive?
+# See also:
+# HTTP Working Group T. Berners-Lee
+# INTERNET-DRAFT R. T. Fielding
+# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
+# Expires September 8, 1995 March 8, 1995
+# URL:
+# Log files
+# ---------
+# Here's a quote from the NCSA httpd docs about log file format.
+# | The logfile format is as follows. Each line consists of:
+# |
+# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
+# |
+# | host: Either the DNS name or the IP number of the remote client
+# | rfc931: Any information returned by identd for this person,
+# | - otherwise.
+# | authuser: If user sent a userid for authentication, the user name,
+# | - otherwise.
+# | DD: Day
+# | Mon: Month (calendar name)
+# | YYYY: Year
+# | hh: hour (24-hour format, the machine's timezone)
+# | mm: minutes
+# | ss: seconds
+# | request: The first line of the HTTP request as sent by the client.
+# | ddd: the status code returned by the server, - if not available.
+# | bbbb: the total number of bytes sent,
+# | *not including the HTTP/1.0 header*, - if not available
+# |
+# | You can determine the name of the file accessed through request.
+# (Actually, the latter is only true if you know the server configuration
+# at the time the request was made!)
+__version__ = "0.2"
+import sys
+import time
+import socket # For gethostbyaddr()
+import string
+import rfc822
+import mimetools
+import SocketServer
+# Default error message
+<title>Error response</title>
+<h1>Error response</h1>
+<p>Error code %(code)d.
+<p>Message: %(message)s.
+<p>Error code explanation: %(code)s = %(explain)s.
+class HTTPServer(SocketServer.TCPServer):
+ def server_bind(self):
+ """Override server_bind to store the server name."""
+ SocketServer.TCPServer.server_bind(self)
+ host, port = self.socket.getsockname()
+ if not host or host == '':
+ host = socket.gethostname()
+ hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
+ if '.' not in hostname:
+ for host in hostnames:
+ if '.' in host:
+ hostname = host
+ break
+ self.server_name = hostname
+ self.server_port = port
+class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
+ """HTTP request handler base class.
+ The following explanation of HTTP serves to guide you through the
+ code as well as to expose any misunderstandings I may have about
+ HTTP (so you don't need to read the code to figure out I'm wrong
+ :-).
+ HTTP (HyperText Transfer Protocol) is an extensible protocol on
+ top of a reliable stream transport (e.g. TCP/IP). The protocol
+ recognizes three parts to a request:
+ 1. One line identifying the request type and path
+ 2. An optional set of RFC-822-style headers
+ 3. An optional data part
+ The headers and data are separated by a blank line.
+ The first line of the request has the form
+ <command> <path> <version>
+ where <command> is a (case-sensitive) keyword such as GET or POST,
+ <path> is a string containing path information for the request,
+ and <version> should be the string "HTTP/1.0". <path> is encoded
+ using the URL encoding scheme (using %xx to signify the ASCII
+ character with hex code xx).
+ The protocol is vague about whether lines are separated by LF
+ characters or by CRLF pairs -- for compatibility with the widest
+ range of clients, both should be accepted. Similarly, whitespace
+ in the request line should be treated sensibly (allowing multiple
+ spaces between components and allowing trailing whitespace).
+ Similarly, for output, lines ought to be separated by CRLF pairs
+ but most clients grok LF characters just fine.
+ If the first line of the request has the form
+ <command> <path>
+ (i.e. <version> is left out) then this is assumed to be an HTTP
+ 0.9 request; this form has no optional headers and data part and
+ the reply consists of just the data.
+ The reply form of the HTTP 1.0 protocol again has three parts:
+ 1. One line giving the response code
+ 2. An optional set of RFC-822-style headers
+ 3. The data
+ Again, the headers and data are separated by a blank line.
+ The response code line has the form
+ <version> <responsecode> <responsestring>
+ where <version> is the protocol version (always "HTTP/1.0"),
+ <responsecode> is a 3-digit response code indicating success or
+ failure of the request, and <responsestring> is an optional
+ human-readable string explaining what the response code means.
+ This server parses the request and the headers, and then calls a
+ function specific to the request type (<command>). Specifically,
+ a request SPAM will be handled by a method handle_SPAM(). If no
+ such method exists the server sends an error response to the
+ client. If it exists, it is called with no arguments:
+ do_SPAM()
+ Note that the request name is case sensitive (i.e. SPAM and spam
+ are different requests).
+ The various request details are stored in instance variables:
+ - client_address is the client IP address in the form (host,
+ port);
+ - command, path and version are the broken-down request line;
+ - headers is an instance of mimetools.Message (or a derived
+ class) containing the header information;
+ - rfile is a file object open for reading positioned at the
+ start of the optional input data part;
+ - wfile is a file object open for writing.
+ The first thing to be written must be the response line. Then
+ follow 0 or more header lines, then a blank line, and then the
+ actual data (if any). The meaning of the header lines depends on
+ the command executed by the server; in most cases, when data is
+ returned, there should be at least one header line of the form
+ Content-type: <type>/<subtype>
+ where <type> and <subtype> should be registered MIME types,
+ e.g. "text/html" or "text/plain".
+ """
+ # The Python system version, truncated to its first component.
+ sys_version = "Python/" + string.split(sys.version)[0]
+ # The server software version. You may want to override this.
+ # The format is multiple whitespace-separated strings,
+ # where each string is of the form name[/version].
+ server_version = "BaseHTTP/" + __version__
+ def handle(self):
+ """Handle a single HTTP request.
+ You normally don't need to override this method; see the class
+ __doc__ string for information on how to handle specific HTTP
+ commands such as GET and POST.
+ """
+ self.raw_requestline = self.rfile.readline()
+ requestline = self.raw_requestline
+ if requestline[-2:] == '\r\n':
+ requestline = requestline[:-2]
+ elif requestline[-1:] == '\n':
+ requestline = requestline[:-1]
+ self.requestline = requestline
+ words = string.split(requestline)
+ if len(words) == 3:
+ [command, path, version] = words
+ if version != self.protocol_version:
+ self.send_error(400, "Bad request version (%s)" % `version`)
+ return
+ elif len(words) == 2:
+ [command, path] = words
+ if command != 'GET':
+ self.send_error(400,
+ "Bad HTTP/0.9 request type (%s)" % `command`)
+ return
+ version = "HTTP/0.9"
+ else:
+ self.send_error(400, "Bad request syntax (%s)" % `command`)
+ return
+ self.command, self.path, self.request_version = command, path, version
+ self.headers = self.MessageClass(self.rfile, 0)
+ mname = 'do_' + command
+ if not hasattr(self, mname):
+ self.send_error(501, "Unsupported method (%s)" % `command`)
+ return
+ method = getattr(self, mname)
+ method()
+ def send_error(self, code, message=None):
+ """Send and log an error reply.
+ Arguments are the error code, and a detailed message.
+ The detailed message defaults to the short entry matching the
+ response code.
+ This sends an error response (so it must be called before any
+ output has been generated), logs the error, and finally sends
+ a piece of HTML explaining the error to the user.
+ """
+ try:
+ short, long = self.responses[code]
+ except KeyError:
+ short, long = '???', '???'
+ if not message:
+ message = short
+ explain = long
+ self.log_error("code %d, message %s", code, message)
+ self.send_response(code, message)
+ self.end_headers()
+ self.wfile.write(self.error_message_format %
+ {'code': code,
+ 'message': message,
+ 'explain': explain})
+ error_message_format = DEFAULT_ERROR_MESSAGE
+ def send_response(self, code, message=None):
+ """Send the response header and log the response code.
+ Also send two standard headers with the server software
+ version and the current date.
+ """
+ self.log_request(code)
+ if message is None:
+ if self.responses.has_key(code):
+ message = self.responses[code][1]
+ else:
+ message = ''
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("%s %s %s\r\n" %
+ (self.protocol_version, str(code), message))
+ self.send_header('Server', self.version_string())
+ self.send_header('Date', self.date_time_string())
+ def send_header(self, keyword, value):
+ """Send a MIME header."""
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("%s: %s\r\n" % (keyword, value))
+ def end_headers(self):
+ """Send the blank line ending the MIME headers."""
+ if self.request_version != 'HTTP/0.9':
+ self.wfile.write("\r\n")
+ def log_request(self, code='-', size='-'):
+ """Log an accepted request.
+ This is called by send_reponse().
+ """
+ self.log_message('"%s" %s %s',
+ self.requestline, str(code), str(size))
+ def log_error(self, *args):
+ """Log an error.
+ This is called when a request cannot be fulfilled. By
+ default it passes the message on to log_message().
+ Arguments are the same as for log_message().
+ XXX This should go to the separate error log.
+ """
+ apply(self.log_message, args)
+ def log_message(self, format, *args):
+ """Log an arbitrary message.
+ This is used by all other logging functions. Override
+ it if you have specific logging wishes.
+ The first argument, FORMAT, is a format string for the
+ message to be logged. If the format string contains
+ any % escapes requiring parameters, they should be
+ specified as subsequent arguments (it's just like
+ printf!).
+ The client host and current date/time are prefixed to
+ every message.
+ """
+ sys.stderr.write("%s - - [%s] %s\n" %
+ (self.address_string(),
+ self.log_date_time_string(),
+ format%args))
+ def version_string(self):
+ """Return the server software version string."""
+ return self.server_version + ' ' + self.sys_version
+ def date_time_string(self):
+ """Return the current date and time formatted for a message header."""
+ now = time.time()
+ year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
+ s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
+ self.weekdayname[wd],
+ day, self.monthname[month], year,
+ hh, mm, ss)
+ return s
+ def log_date_time_string(self):
+ """Return the current time formatted for logging."""
+ now = time.time()
+ year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
+ s = "%02d/%3s/%04d %02d:%02d:%02d" % (
+ day, self.monthname[month], year, hh, mm, ss)
+ return s
+ weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+ monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ def address_string(self):
+ """Return the client address formatted for logging.
+ This version looks up the full hostname using gethostbyaddr(),
+ and tries to find a name that contains at least one dot.
+ """
+ (host, port) = self.client_address
+ try:
+ name, names, addresses = socket.gethostbyaddr(host)
+ except socket.error, msg:
+ return host
+ names.insert(0, name)
+ for name in names:
+ if '.' in name: return name
+ return names[0]
+ # Essentially static class variables
+ # The version of the HTTP protocol we support.
+ # Don't override unless you know what you're doing (hint: incoming
+ # requests are required to have exactly this version string).
+ protocol_version = "HTTP/1.0"
+ # The Message-like class used to parse headers
+ MessageClass = mimetools.Message
+ # Table mapping response codes to messages; entries have the
+ # form {code: (shortmessage, longmessage)}.
+ # See
+ responses = {
+ 200: ('OK', 'Request fulfilled, document follows'),
+ 201: ('Created', 'Document created, URL follows'),
+ 202: ('Accepted',
+ 'Request accepted, processing continues off-line'),
+ 203: ('Partial information', 'Request fulfilled from cache'),
+ 204: ('No response', 'Request fulfilled, nothing follows'),
+ 301: ('Moved', 'Object moved permanently -- see URI list'),
+ 302: ('Found', 'Object moved temporarily -- see URI list'),
+ 303: ('Method', 'Object moved -- see Method and URL list'),
+ 304: ('Not modified',
+ 'Document has not changed singe given time'),
+ 400: ('Bad request',
+ 'Bad request syntax or unsupported method'),
+ 401: ('Unauthorized',
+ 'No permission -- see authorization schemes'),
+ 402: ('Payment required',
+ 'No payment -- see charging schemes'),
+ 403: ('Forbidden',
+ 'Request forbidden -- authorization will not help'),
+ 404: ('Not found', 'Nothing matches the given URI'),
+ 500: ('Internal error', 'Server got itself in trouble'),
+ 501: ('Not implemented',
+ 'Server does not support this operation'),
+ 502: ('Service temporarily overloaded',
+ 'The server cannot process the request due to a high load'),
+ 503: ('Gateway timeout',
+ 'The gateway server did not receive a timely response'),
+ }
+def test(HandlerClass = BaseHTTPRequestHandler,
+ ServerClass = HTTPServer):
+ """Test the HTTP request handler class.
+ This runs an HTTP server on port 8000 (or the first command line
+ argument).
+ """
+ if sys.argv[1:]:
+ port = string.atoi(sys.argv[1])
+ else:
+ port = 8000
+ server_address = ('', port)
+ httpd = ServerClass(server_address, HandlerClass)
+ print "Serving HTTP on port", port, "..."
+ httpd.serve_forever()
+if __name__ == '__main__':
+ test()
Lib/
Lib/
+"""CGI-savvy HTTP Server.
+This module builds on SimpleHTTPServer by implementing GET and POST
+requests to cgi-bin scripts.
+__version__ = "0.2"
+import os
+import sys
+import time
+import socket
+import string
+import urllib
+import BaseHTTPServer
+import SimpleHTTPServer
+class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
+ """Complete HTTP server with GET, HEAD and POST commands.
+ GET and HEAD also support running CGI scripts.
+ The POST command is *only* implemented for CGI scripts.
+ """
+ def do_POST(self):
+ """Serve a POST request.
+ This is only implemented for CGI scripts.
+ """
+ if self.is_cgi():
+ self.run_cgi()
+ else:
+ self.send_error(501, "Can only POST to CGI scripts")
+ def send_head(self):
+ """Version of send_head that support CGI scripts"""
+ if self.is_cgi():
+ return self.run_cgi()
+ else:
+ return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
+ def is_cgi(self):
+ """test whether PATH corresponds to a CGI script.
+ Return a tuple (dir, rest) if PATH requires running a
+ CGI script, None if not. Note that rest begins with a
+ slash if it is not empty.
+ The default implementation tests whether the path
+ begins with one of the strings in the list
+ self.cgi_directories (and the next character is a '/'
+ or the end of the string).
+ """
+ path = self.path
+ for x in self.cgi_directories:
+ i = len(x)
+ if path[:i] == x and (not path[i:] or path[i] == '/'):
+ self.cgi_info = path[:i], path[i+1:]
+ return 1
+ return 0
+ cgi_directories = ['/cgi-bin', '/htbin']
+ def run_cgi(self):
+ """Execute a CGI script."""
+ dir, rest = self.cgi_info
+ i = string.rfind(rest, '?')
+ if i >= 0:
+ rest, query = rest[:i], rest[i+1:]
+ else:
+ query = ''
+ i = string.find(rest, '/')
+ if i >= 0:
+ script, rest = rest[:i], rest[i:]
+ else:
+ script, rest = rest, ''
+ scriptname = dir + '/' + script
+ scriptfile = self.translate_path(scriptname)
+ if not os.path.exists(scriptfile):
+ self.send_error(404, "No such CGI script (%s)", `scriptname`)
+ return
+ if not os.path.isfile(scriptfile):
+ self.send_error(403, "CGI script is not a plain file (%s)",
+ `scriptname`)
+ return
+ if not executable(scriptfile):
+ self.send_error(403, "CGI script is not executable (%s)",
+ `scriptname`)
+ return
+ nobody = nobody_uid()
+ self.send_response(200, "Script output follows")
+ self.wfile.flush() # Always flush before forking
+ pid = os.fork()
+ if pid != 0:
+ # Parent
+ pid, sts = os.waitpid(pid, 0)
+ if sts:
+ self.log_error("CGI script exit status x%x" % sts)
+ return
+ # Child
+ try:
+ # Reference:
+ # XXX Much of the following could be prepared ahead of time!
+ env = {}
+ env['SERVER_SOFTWARE'] = self.version_string()
+ env['SERVER_NAME'] = self.server.server_name
+ env['GATEWAY_INTERFACE'] = 'CGI/1.1'
+ env['SERVER_PROTOCOL'] = self.protocol_version
+ env['SERVER_PORT'] = str(self.server.server_port)
+ env['REQUEST_METHOD'] = self.command
+ uqrest = urllib.unquote(rest)
+ env['PATH_INFO'] = uqrest
+ env['PATH_TRANSLATED'] = self.translate_path(uqrest)
+ env['SCRIPT_NAME'] = scriptname
+ if query:
+ env['QUERY_STRING'] = query
+ host = self.address_string()
+ if host != self.client_address[0]:
+ env['REMOTE_HOST'] = host
+ env['REMOTE_ADDR'] = self.client_address[0]
+ env['CONTENT_TYPE'] = self.headers.type
+ length = self.headers.getheader('content-length')
+ if length:
+ env['CONTENT_LENGTH'] = length
+ accept = []
+ for line in self.headers.getallmatchingheaders('accept'):
+ if line[:1] in string.whitespace:
+ accept.append(string.strip(line))
+ else:
+ accept = accept + string.split(line[7:])
+ env['HTTP_ACCEPT'] = string.joinfields(accept, ',')
+ ua = self.headers.getheader('user-agent')
+ if ua:
+ env['HTTP_USER_AGENT'] = ua
+ # XXX Other HTTP_* headers
+ import regsub
+ decoded_query = regsub.gsub('+', ' ', query)
+ try:
+ os.setuid(nobody)
+ except os.error:
+ pass
+ os.dup2(self.rfile.fileno(), 0)
+ os.dup2(self.wfile.fileno(), 1)
+ print scriptfile, script, decoded_query
+ os.execve(scriptfile,
+ [script, decoded_query],
+ env)
+ except:
+ self.server.handle_error(self.request, self.client_address)
+ os._exit(127)
+nobody = None
+def nobody_uid():
+ """Internal routine to get nobody's uid"""
+ global nobody
+ if nobody:
+ return nobody
+ import pwd
+ try:
+ nobody = pwd.getpwnam('nobody')[2]
+ except pwd.error:
+ nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
+ return nobody
+def executable(path):
+ """Test for executable file."""
+ try:
+ st = os.stat(path)
+ except os.error:
+ return 0
+ return 1
+def test(HandlerClass = CGIHTTPRequestHandler,
+ ServerClass = BaseHTTPServer.HTTPServer):
+ import sys
+ if sys.argv[1:2] == ['-r']:
+ db = MyArchive()
+ db.regenindices()
+ return
+ SimpleHTTPServer.test(HandlerClass, ServerClass)
+if __name__ == '__main__':
+ test()
Lib/
Lib/
+"""Simple HTTP Server.
+This module builds on BaseHTTPServer by implementing the standard GET
+and HEAD requests in a fairly straightforward manner.
+__version__ = "0.2"
+import os
+import pwd
+import sys
+import time
+import socket
+import string
+import posixpath
+import SocketServer
+import BaseHTTPServer
+def nobody_uid():
+ """Internal routine to get nobody's uid"""
+ try:
+ nobody = pwd.getpwnam('nobody')[2]
+ except pwd.error:
+ nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
+ return nobody
+nobody = nobody_uid()
+class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+ """Simple HTTP request handler with GET and HEAD commands.
+ This serves files from the current directory and any of its
+ subdirectories. It assumes that all files are plain text files
+ unless they have the extension ".html" in which case it assumes
+ they are HTML files.
+ The GET and HEAD requests are identical except that the HEAD
+ request omits the actual contents of the file.
+ """
+ server_version = "SimpleHTTP/" + __version__
+ def do_GET(self):
+ """Serve a GET request."""
+ f = self.send_head()
+ if f:
+ self.copyfile(f, self.wfile)
+ f.close()
+ def do_HEAD(self):
+ """Serve a HEAD request."""
+ f = self.send_head()
+ if f:
+ f.close()
+ def send_head(self):
+ """Common code for GET and HEAD commands.
+ This sends the response code and MIME headers.
+ Return value is either a file object (which has to be copied
+ to the outputfile by the caller unless the command was HEAD,
+ and must be closed by the caller under all circumstances), or
+ None, in which case the caller has nothing further to do.
+ """
+ path = self.translate_path(self.path)
+ if os.path.isdir(path):
+ self.send_error(403, "Directory listing not supported")
+ return None
+ try:
+ f = open(path)
+ except IOError:
+ self.send_error(404, "File not found")
+ return None
+ self.send_response(200)
+ self.send_header("Content-type", self.guess_type(path))
+ self.end_headers()
+ return f
+ def translate_path(self, path):
+ """Translate a /-separated PATH to the local filename syntax.
+ Components that mean special things to the local file system
+ (e.g. drive or directory names) are ignored. (XXX They should
+ probably be diagnosed.)
+ """
+ path = posixpath.normpath(path)
+ words = string.splitfields(path, '/')
+ words = filter(None, words)
+ path = os.getcwd()
+ for word in words:
+ drive, word = os.path.splitdrive(word)
+ head, word = os.path.split(word)
+ if word in (os.curdir, os.pardir): continue
+ path = os.path.join(path, word)
+ return path
+ def copyfile(self, source, outputfile):
+ """Copy all data between two file objects.
+ The SOURCE argument is a file object open for reading
+ (or anything with a read() method) and the DESTINATION
+ argument is a file object open for writing (or
+ anything with a write() method).
+ The only reason for overriding this would be to change
+ the block size or perhaps to replace newlines by CRLF
+ -- note however that this the default server uses this
+ to copy binary data as well.
+ """
+ BLOCKSIZE = 8192
+ while 1:
+ data =
+ if not data: break
+ outputfile.write(data)
+ def guess_type(self, path):
+ """Guess the type of a file.
+ Argument is a PATH (a filename).
+ Return value is a string of the form type/subtype,
+ usable for a MIME Content-type header.
+ The default implementation looks the file's extension
+ up in the table self.extensions_map, using text/plain
+ as a default; however it would be permissible (if
+ slow) to look inside the data to make a better guess.
+ """
+ base, ext = posixpath.splitext(path)
+ if self.extensions_map.has_key(ext):
+ return self.extensions_map[ext]
+ else:
+ return self.extensions_map['']
+ extensions_map = {
+ '': 'text/plain', # Default, *must* be present
+ '.html': 'text/html',
+ }
+def test(HandlerClass = SimpleHTTPRequestHandler,
+ ServerClass = SocketServer.TCPServer):
+ BaseHTTPServer.test(HandlerClass, ServerClass)
+if __name__ == '__main__':
+ test()
Lib/
Lib/
+"""Generic socket server classes.
+This module tries to capture the various aspects of defining a server:
+- address family:
+ - AF_INET: IP (Internet Protocol) sockets (default)
+ - AF_UNIX: Unix domain sockets
+ - others, e.g. AF_DECNET are conceivable (see <socket.h>
+- socket type:
+ - SOCK_STREAM (reliable stream, e.g. TCP)
+ - SOCK_DGRAM (datagrams, e.g. UDP)
+- client address verification before further looking at the request
+ (This is actually a hook for any processing that needs to look
+ at the request before anything else, e.g. logging)
+- how to handle multiple requests:
+ - synchronous (one request is handled at a time)
+ - forking (each request is handled by a new process)
+ - threading (each request is handled by a new thread)
+The classes in this module favor the server type that is simplest to
+write: a synchronous TCP/IP server. This is bad class design, but
+save some typing. (There's also the issue that a deep class hierarchy
+slows down method lookups.)
+There are four classes in an inheritance diagram that represent
+synchronous servers of four types:
+ +-----------+ +------------------+
+ | TCPServer |------->| UnixStreamServer |
+ +-----------+ +------------------+
+ |
+ v
+ +-----------+ +--------------------+
+ | UDPServer |------->| UnixDatagramServer |
+ +-----------+ +--------------------+
+(Note that UnixDatagramServer derives from UDPServer, not from
+UnixStreamServer -- the only difference between an IP and a Unix
+stream server is the address family, which is simply repeated in both
+unix server classes.)
+Forking and threading versions of each type of server can be created
+using the ForkingServer and ThreadingServer mix-in classes. For
+instance, a threading UDP server class is created as follows:
+ class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+(The Mix-in class must come first, since it overrides a method defined
+in UDPServer!)
+To implement a service, you must derive a class from
+BaseRequestHandler and redefine its handle() method. You can then run
+various versions of the service by combining one of the server classes
+with your request handler class.
+The request handler class must be different for datagram or stream
+services. This can be hidden by using the mix-in request handler
+classes StreamRequestHandler or DatagramRequestHandler.
+Of course, you still have to use your head!
+For instance, it makes no sense to use a forking server if the service
+contains state in memory that can be modified by requests (since the
+modifications in the child process would never reach the initial state
+kept in the parent process and passed to each child). In this case,
+you can use a threading server, but you will probably have to use
+locks to avoid two requests that come in nearly simultaneous to apply
+conflicting changes to the server state.
+On the other hand, if you are building e.g. an HTTP server, where all
+data is stored externally (e.g. in the file system), a synchronous
+class will essentially render the service "deaf" while one request is
+being handled -- which may be for a very long time if a client is slow
+to reqd all the data it has requested. Here a threading or forking
+server is appropriate.
+In some cases, it may be appropriate to process part of a request
+synchronously, but to finish processing in a forked child depending on
+the request data. This can be implemented by using a synchronous
+server and doing an explicit fork in the request handler class's
+handle() method.
+Another approach to handling multiple simultaneous requests in an
+environment that supports neither threads nor fork (or where these are
+too expensive or inappropriate for the service) is to maintain an
+explicit table of partially finished requests and to use select() to
+decide which request to work on next (or whether to handle a new
+incoming request). This is particularly important for stream services
+where each client can potentially be connected for a long time (if
+threads or subprocesses can't be used).
+Future work:
+- Standard classes for Sun RPC (which uses either UDP or TCP)
+- Standard mix-in classes to implement various authentication
+ and encryption schemes
+- Standard framework for select-based multiplexing
+XXX Open problems:
+- What to do with out-of-band data?
+__version__ = "0.2"
+import socket
+import sys
+import os
+class TCPServer:
+ """Base class for various socket-based server classes.
+ Defaults to synchronous IP stream (i.e., TCP).
+ Methods for the caller:
+ - __init__(server_address, RequestHandlerClass)
+ - serve_forever()
+ - handle_request() # if you don't use serve_forever()
+ - fileno() -> int # for select()
+ Methods that may be overridden:
+ - server_bind()
+ - server_activate()
+ - get_request() -> request, client_address
+ - verify_request(request, client_address)
+ - process_request(request, client_address)
+ - handle_error()
+ Methods for derived classes:
+ - finish_request(request, client_address)
+ Class variables that may be overridden by derived classes or
+ instances:
+ - address_family
+ - socket_type
+ - request_queue_size (only for stream sockets)
+ Instance variables:
+ - server_address
+ - RequestHandlerClass
+ - socket
+ """
+ address_family = socket.AF_INET
+ socket_type = socket.SOCK_STREAM
+ request_queue_size = 5
+ def __init__(self, server_address, RequestHandlerClass):
+ """Constructor. May be extended, do not override."""
+ self.server_address = server_address
+ self.RequestHandlerClass = RequestHandlerClass
+ self.socket = socket.socket(self.address_family,
+ self.socket_type)
+ self.server_bind()
+ self.server_activate()
+ def server_bind(self):
+ """Called by constructor to bind the socket.
+ May be overridden.
+ """
+ self.socket.bind(self.server_address)
+ def server_activate(self):
+ """Called by constructor to activate the server.
+ May be overridden.
+ """
+ self.socket.listen(self.request_queue_size)
+ def fileno(self):
+ """Return socket file number.
+ Interface required by select().
+ """
+ return self.socket.fileno()
+ def serve_forever(self):
+ """Handle one request at a time until doomsday."""
+ while 1:
+ self.handle_request()
+ # The distinction between handling, getting, processing and
+ # finishing a request is fairly arbitrary. Remember:
+ #
+ # - handle_request() is the top-level call. It calls
+ # get_request(), verify_request() and process_request()
+ # - get_request() is different for stream or datagram sockets
+ # - process_request() is the place that may fork a new process
+ # or create a new thread to finish the request
+ # - finish_request() instantiates the request handler class;
+ # this constructor will handle the request all by itself
+ def handle_request(self):
+ """Handle one request, possibly blocking."""
+ request, client_address = self.get_request()
+ if self.verify_request(request, client_address):
+ try:
+ self.process_request(request, client_address)
+ except:
+ self.handle_error(request, client_address)
+ def get_request(self):
+ """Get the request and client address from the socket.
+ May be overridden.
+ """
+ return self.socket.accept()
+ def verify_request(self, request, client_address):
+ """Verify the request. May be overridden.
+ Return true if we should proceed with this request.
+ """
+ return 1
+ def process_request(self, request, client_address):
+ """Call finish_request.
+ Overridden by ForkingMixIn and ThreadingMixIn.
+ """
+ self.finish_request(request, client_address)
+ def finish_request(self, request, client_address):
+ """Finish one request by instantiating RequestHandlerClass."""
+ self.RequestHandlerClass(request, client_address, self)
+ def handle_error(self, request, client_address):
+ """Handle an error gracefully. May be overridden.
+ The default is to print a traceback and continue.
+ """
+ exc, value, tb = sys.exc_type, sys.exc_value, sys.exc_traceback
+ print '-'*40
+ print 'Exception happened during processing of request from',
+ print client_address
+ import traceback
+ traceback.print_exception(exc, value, tb)
+ print '-'*40
+class UDPServer(TCPServer):
+ """UDP server class."""
+ socket_type = socket.SOCK_DGRAM
+ max_packet_size = 8192
+ def get_request(self):
+ return self.socket.recvfrom(max_packet_size)
+if hasattr(socket, 'AF_UNIX'):
+ class UnixStreamServer(TCPServer):
+ address_family = socket.AF_UNIX
+ class UnixDatagramServer(UDPServer):
+ address_family = socket.AF_UNIX
+class ForkingMixIn:
+ """Mix-in class to handle each request in a new process."""
+ active_children = None
+ def collect_children(self):
+ """Internal routine to wait for died children."""
+ while self.active_children:
+ pid = os.waitpid(0, os.WNOHANG)
+ if not pid: break
+ self.active_children.remove(pid)
+ def process_request(self, request, client_address):
+ """Fork a new subprocess to process the request."""
+ self.collect_children()
+ pid = os.fork()
+ if pid:
+ # Parent process
+ if self.active_children is None:
+ self.active_children = []
+ self.active_children.append(pid)
+ return
+ else:
+ # Child process.
+ # This must never return, hence os._exit()!
+ try:
+ self.finish_request(request, client_address)
+ os._exit(0)
+ except:
+ try:
+ self.handle_error(request,
+ client_address)
+ finally:
+ os._exit(1)
+class ThreadingMixIn:
+ """Mix-in class to handle each request in a new thread."""
+ def process_request(self, request, client_address):
+ """Start a new thread to process the request."""
+ import thread
+ thread.start_new_thread(self.finish_request,
+ (request, client_address))
+class ForkingUDPServer(ForkingMixIn, UDPServer): pass
+class ForkingTCPServer(ForkingMixIn, TCPServer): pass
+class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
+class BaseRequestHandler:
+ """Base class for request handler classes.
+ This class is instantiated for each request to be handled. The
+ constructor sets the instance variables request, client_address
+ and server, and then calls the handle() method. To implement a
+ specific service, all you need to do is to derive a class which
+ defines a handle() method.
+ The handle() method can find the request as self.request, the
+ client address as self.client_request, and the server (in case it
+ needs access to per-server information) as self.server. Since a
+ separate instance is created for each request, the handle() method
+ can define arbitrary other instance variariables.
+ """
+ def __init__(self, request, client_address, server):
+ self.request = request
+ self.client_address = client_address
+ self.server = server
+ try:
+ self.setup()
+ self.handle()
+ self.finish()
+ finally:
+ sys.exc_traceback = None # Help garbage collection
+ def setup(self):
+ pass
+ def __del__(self):
+ pass
+ def handle(self):
+ pass
+ def finish(self):
+ pass
+# The following two classes make it possible to use the same service
+# class for stream or datagram servers.
+# Each class sets up these instance variables:
+# - rfile: a file object from which receives the request is read
+# - wfile: a file object to which the reply is written
+# When the handle() method returns, wfile is flushed properly
+class StreamRequestHandler(BaseRequestHandler):
+ """Define self.rfile and self.wfile for stream sockets."""
+ def setup(self):
+ self.connection = self.request
+ self.rfile = self.connection.makefile('r')
+ self.wfile = self.connection.makefile('w', 0)
+ def finish(self):
+ self.wfile.flush()
+class DatagramRequestHandler(BaseRequestHandler):
+ """Define self.rfile and self.wfile for datagram sockets."""
+ def setup(self):
+ import StringIO
+ self.packet, self.socket = self.request
+ self.rfile = StringIO.StringIO(self.packet)
+ self.wfile = StringIO.StringIO(self.packet)
+ def finish(self):
+ self.socket.send(self.wfile.getvalue())
Lib/
Lib/
+"""Import hook support.
+Consistent use of this module will make it possible to change the
+different mechanisms involved in loading modules independently.
+While the built-in module imp exports interfaces to the built-in
+module searching and loading algorithm, and it is possible to replace
+the built-in function __import__ in order to change the semantics of
+the import statement, until now it has been difficult to combine the
+effect of different __import__ hacks, like loading modules from URLs
+(, implementing a hierarchical module namespace (
+or restricted execution (
+This module defines three new concepts:
+(1) A "file system hooks" class provides an interface to a filesystem.
+One hooks class is defined (Hooks), which uses the interface provided
+by standard modules os and os.path. It should be used as the base
+class for other hooks classes.
+(2) A "module loader" class provides an interface to to search for a
+module in a search path and to load it. It defines a method which
+searches for a module in a single directory; by overriding this method
+one can redefine the details of the search. If the directory is None,
+built-in and frozen modules are searched instead.
+Two module loader class are defined, both implementing the search
+strategy used by the built-in __import__ function: ModuleLoader uses
+the imp module's find_module interface, while HookableModuleLoader
+uses a file system hooks class to interact with the file system. Both
+use the imp module's load_* interfaces to actually load the module.
+(3) A "module importer" class provides an interface to import a
+module, as well as interfaces to reload and unload a module. It also
+provides interfaces to install and uninstall itself instead of the
+default __import__ and reload (and unload) functions.
+One module importer class is defined (ModuleImporter), which uses a
+module loader instance passed in (by default HookableModuleLoader is
+The classes defined here should be used as base classes for extended
+functionality along those lines.
+If a module mporter class supports dotted names, its import_module()
+must return a different value depending on whether it is called on
+behalf of a "from ... import ..." statement or not. (This is caused
+by the way the __import__ hook is used by the Python interpreter.) It
+would also do wise to install a different version of reload().
+XXX Should the imp.load_* functions also be called via the hooks
+import __builtin__
+import imp
+import os
+import sys
+class _Verbose:
+ def __init__(self, verbose = 0):
+ self.verbose = verbose
+ def get_verbose(self):
+ return self.verbose
+ def set_verbose(self, verbose):
+ self.verbose = verbose
+ # XXX The following is an experimental interface
+ def note(self, *args):
+ if self.verbose:
+ apply(self.message, args)
+ def message(self, format, *args):
+ print format%args
+class BasicModuleLoader(_Verbose):
+ """Basic module loader.
+ This provides the same functionality as built-in import. It
+ doesn't deal with checking sys.modules -- all it provides is
+ find_module() and a load_module(), as well as find_module_in_dir()
+ which searches just one directory, and can be overridden by a
+ derived class to change the module search algorithm when the basic
+ dependency on sys.path is unchanged.
+ The interface is a little more convenient than imp's:
+ find_module(name, [path]) returns None or 'stuff', and
+ load_module(name, stuff) loads the module.
+ """
+ def find_module(self, name, path = None):
+ if path is None:
+ path = [None] + self.default_path()
+ for dir in path:
+ stuff = self.find_module_in_dir(name, dir)
+ if stuff: return stuff
+ return None
+ def default_path(self):
+ return sys.path
+ def find_module_in_dir(self, name, dir):
+ if dir is None:
+ return self.find_builtin_module(name)
+ else:
+ try:
+ return imp.find_module(name, [dir])
+ except ImportError:
+ return None
+ def find_builtin_module(self, name):
+ if imp.is_builtin(name):
+ return None, '', ('', '', BUILTIN_MODULE)
+ if imp.is_frozen(name):
+ return None, '', ('', '', FROZEN_MODULE)
+ return None
+ def load_module(self, name, stuff):
+ file, filename, (suff, mode, type) = stuff
+ if type == BUILTIN_MODULE:
+ return imp.init_builtin(name)
+ if type == FROZEN_MODULE:
+ return imp.init_frozen(name)
+ if type == C_EXTENSION:
+ return imp.load_dynamic(name, filename, file)
+ if type == PY_SOURCE:
+ return imp.load_source(name, filename, file)
+ if type == PY_COMPILED:
+ return imp.load_compiled(name, filename, file)
+ raise ImportError, "Unrecognized module type (%s) for %s" % \
+ (`type`, name)
+class Hooks(_Verbose):
+ """Hooks into the filesystem and interpreter.
+ By deriving a subclass you can redefine your filesystem interface,
+ e.g. to merge it with the URL space.
+ This base class behaves just like the native filesystem.
+ """
+ # imp interface
+ def get_suffixes(self): return imp.get_suffixes()
+ def new_module(self, name): return imp.new_module(name)
+ def is_builtin(self, name): return imp.is_builtin(name)
+ def init_builtin(self, name): return imp.init_builtin(name)
+ def is_frozen(self, name): return imp.is_frozen(name)
+ def init_frozen(self, name): return imp.init_frozen(name)
+ def get_frozen_object(self, name): return imp.get_frozen_object(name)
+ def load_source(self, name, filename, file=None):
+ return imp.load_source(name, filename, file)
+ def load_compiled(self, name, filename, file=None):
+ return imp.load_compiled(name, filename, file)
+ def load_dynamic(self, name, filename, file=None):
+ return imp.load_dynamic(name, filename, file)
+ def add_module(self, name):
+ d = self.modules_dict()
+ if d.has_key(name): return d[name]
+ d[name] = m = self.new_module(name)
+ return m
+ # sys interface
+ def modules_dict(self): return sys.modules
+ def default_path(self): return sys.path
+ def path_split(self, x): return os.path.split(x)
+ def path_join(self, x, y): return os.path.join(x, y)
+ def path_isabs(self, x): return os.path.isabs(x)
+ # etc.
+ def path_exists(self, x): return os.path.exists(x)
+ def path_isdir(self, x): return os.path.isdir(x)
+ def path_isfile(self, x): return os.path.isfile(x)
+ def path_islink(self, x): return os.path.islink(x)
+ # etc.
+ def openfile(self, *x): return apply(open, x)
+ openfile_error = IOError
+ def listdir(self, x): return os.listdir(x)
+ listdir_error = os.error
+ # etc.
+class ModuleLoader(BasicModuleLoader):
+ """Default module loader; uses file system hooks.
+ By defining suitable hooks, you might be able to load modules from
+ other sources than the file system, e.g. from compressed or
+ encrypted files, tar files or (if you're brave!) URLs.
+ """
+ def __init__(self, hooks = None, verbose = 0):
+ BasicModuleLoader.__init__(self, verbose)
+ self.hooks = hooks or Hooks(verbose)
+ def default_path(self):
+ return self.hooks.default_path()
+ def modules_dict(self):
+ return self.hooks.modules_dict()
+ def get_hooks(self):
+ return self.hooks
+ def set_hooks(self, hooks):
+ self.hooks = hooks
+ def find_builtin_module(self, name):
+ if self.hooks.is_builtin(name):
+ return None, '', ('', '', BUILTIN_MODULE)
+ if self.hooks.is_frozen(name):
+ return None, '', ('', '', FROZEN_MODULE)
+ return None
+ def find_module_in_dir(self, name, dir):
+ if dir is None:
+ return self.find_builtin_module(name)
+ for info in self.hooks.get_suffixes():
+ suff, mode, type = info
+ fullname = self.hooks.path_join(dir, name+suff)
+ try:
+ fp = self.hooks.openfile(fullname, mode)
+ return fp, fullname, info
+ except self.hooks.openfile_error:
+ pass
+ return None
+ def load_module(self, name, stuff):
+ file, filename, (suff, mode, type) = stuff
+ if type == BUILTIN_MODULE:
+ return self.hooks.init_builtin(name)
+ if type == FROZEN_MODULE:
+ return self.hooks.init_frozen(name)
+ if type == C_EXTENSION:
+ return self.hooks.load_dynamic(name, filename, file)
+ if type == PY_SOURCE:
+ return self.hooks.load_source(name, filename, file)
+ if type == PY_COMPILED:
+ return self.hooks.load_compiled(name, filename, file)
+ raise ImportError, "Unrecognized module type (%s) for %s" % \
+ (`type`, name)
+class FancyModuleLoader(ModuleLoader):
+ """Fancy module loader -- parses and execs the code itself."""
+ def load_module(self, name, stuff):
+ file, filename, (suff, mode, type) = stuff
+ if type == FROZEN_MODULE:
+ code = self.hooks.get_frozen_object(name)
+ elif type == PY_COMPILED:
+ code = marshal.load(file)
+ elif type == PY_SOURCE:
+ data =
+ code = compile(data, filename, 'exec')
+ else:
+ return ModuleLoader.load_module(self, name, stuff)
+ m = self.hooks.add_module(name)
+ exec code in m.__dict__
+ return m
+class ModuleImporter(_Verbose):
+ """Default module importer; uses module loader.
+ This provides the same functionality as built-in import, when
+ combined with ModuleLoader.
+ """
+ def __init__(self, loader = None, verbose = 0):
+ _Verbose.__init__(self, verbose)
+ self.loader = loader or ModuleLoader(None, verbose)
+ self.modules = self.loader.modules_dict()
+ def get_loader(self):
+ return self.loader
+ def set_loader(self, loader):
+ self.loader = loader
+ def get_hooks(self):
+ return self.loader.get_hooks()
+ def set_hooks(self, hooks):
+ return self.loader.set_hooks(hooks)
+ def import_module(self, name, globals={}, locals={}, fromlist=[]):
+ if self.modules.has_key(name):
+ return self.modules[name] # Fast path
+ stuff = self.loader.find_module(name)
+ if not stuff:
+ raise ImportError, "No module named %s" % name
+ return self.loader.load_module(name, stuff)
+ def reload(self, module, path = None):
+ stuff = self.loader.find_module(name, path)
+ if not stuff:
+ raise ImportError, "Module %s not found for reload" % name
+ return self.loader.load_module(name, stuff)
+ def unload(self, module):
+ del self.modules[module.__name__]
+ # XXX Should this try to clear the module's namespace?
+ def install(self):
+ self.save_import_module = __builtin__.__import__
+ self.save_reload = __builtin__.reload
+ if not hasattr(__builtin__, 'unload'):
+ __builtin__.unload = None
+ self.save_unload = __builtin__.unload
+ __builtin__.__import__ = self.import_module
+ __builtin__.reload = self.reload
+ __builtin__.unload = self.unload
+ def uninstall(self):
+ __builtin__.__import__ = self.save_import_module
+ __builtin__.reload = self.save_reload
+ __builtin__.unload = self.save_unload
+ if not __builtin__.unload:
+ del __builtin__.unload
+# XXX Some experimental hacks -- importing ihooks auto-installs!
+# XXX (That's supposed to be transparent anyway...)
+default_importer = None
+current_importer = None
+def install(importer = None):
+ global current_importer
+ current_importer = importer or default_importer or ModuleImporter()
+ current_importer.install()
+def uninstall():
+ global current_importer
+ current_importer.uninstall()
Lib/
Lib/
+"""New import scheme with package support.
+A Package is a module that can contain other modules. Packages can be
+nested. Package introduce dotted names for modules, like P.Q.M, which
+could correspond to a file P/Q/ found somewhere on sys.path. It
+is possible to import a package itself, though this makes little sense
+unless the package contains a module called __init__.
+A package has two variables that control the namespace used for
+packages and modules, both initialized to sensible defaults the first
+time the package is referenced.
+(1) A package's *module search path*, contained in the per-package
+variable __path__, defines a list of *directories* where submodules or
+subpackages of the package are searched. It is initialized to the
+directory containing the package. Setting this variable to None makes
+the module search path default to sys.path (this is not quite the same
+as setting it to sys.path, since the latter won't track later
+assignments to sys.path).
+(2) A package's *import domain*, contained in the per-package variable
+__domain__, defines a list of *packages* that are searched (using
+their respective module search paths) to satisfy imports. It is
+initialized to the list cosisting of the package itself, its parent
+package, its parent's parent, and so on, ending with the root package
+(the nameless package containing all top-level packages and modules,
+whose module search path is None, implying sys.path).
+The default domain implements a search algorithm called "expanding
+search". An alternative search algorithm called "explicit search"
+fixes the import search path to contain only the root package,
+requiring the modules in the package to name all imported modules by
+their full name. The convention of using '__' to refer to the current
+package (both as a per-module variable and in module names) can be
+used by packages using explicit search to refer to modules in the same
+package; this combination is known as "explicit-relative search".
+The PackageImporter and PackageLoader classes together implement the
+following policies:
+- There is a root package, whose name is ''. It cannot be imported
+ directly but may be referenced, e.g. by using '__' from a top-level
+ module.
+- In each module or package, the variable '__' contains a reference to
+ the parent package; in the root package, '__' points to itself.
+- In the name for imported modules (e.g. M in "import M" or "from M
+ import ..."), a leading '__' refers to the current package (i.e.
+ the package containing the current module); leading '__.__' and so
+ on refer to the current package's parent, and so on. The use of
+ '__' elsewhere in the module name is not supported.
+- Modules are searched using the "expanding search" algorithm by
+ virtue of the default value for __domain__.
+- If A.B.C is imported, A is searched using __domain__; then
+ subpackage B is searched in A using its __path__, and so on.
+- Built-in modules have priority: even if a file exists in a
+ package, "import sys" imports the built-in sys module.
+- The same holds for frozen modules, for better or for worse.
+- Submodules and subpackages are not automatically loaded when their
+ parent packages is loaded.
+- The construct "from package import *" is illegal. (It can still be
+ used to import names from a module.)
+- When "from package import module1, module2, ..." is used, those
+ modules are explicitly loaded.
+- When a package is loaded, if it has a submodule __init__, that
+ module is loaded. This is the place where required submodules can
+ be loaded, the __path__ variable extended, etc. The __init__ module
+ is loaded even if the package was loaded only in order to create a
+ stub for a sub-package: if "import P.Q.R" is the first reference to
+ P, and P has a submodule __init__, P.__init__ is loaded before P.Q
+ is even searched.
+- It is possible to import a package that has no __init__ submodule;
+ this is not particularly useful but there may be useful applications
+ for it (e.g. to manipulate its search paths from the outside!).
+- There are no special provisions for os.chdir(). If you plan to use
+ os.chdir() before you have imported all your modules, it is better
+ not to have relative pathnames in sys.path. (This could actually be
+ fixed by changing the implementation of path_join() in the hook to
+ absolutize paths.)
+- Packages and modules are introduced in sys.modules as soon as their
+ loading is started. When the loading is terminated by an exception,
+ the sys.modules entries remain around.
+- There are no special measures to support mutually recursive modules,
+ but it will work under the same conditions where it works in the
+ flat module space system.
+- Sometimes dummy entries (whose value is None) are entered in
+ sys.modules, to indicate that a particular module does not exist --
+ this is done to speed up the expanding search algorithm when a
+ module residing at a higher level is repeatedly imported (Python
+ promises that importing a previously imported module is cheap!)
+- Although dynamically loaded extensions are allowed inside packages,
+ the current implementation (hardcoded in the interpreter) of their
+ initialization may cause problems if an extension invokes the
+ interpreter during its initialization.
+- reload() may find another version of the module only if it occurs on
+ the package search path. Thus, it keeps the connection to the
+ package to which the module belongs, but may find a different file.
+XXX Need to have an explicit name for '', e.g. '__root__'.
+import imp
+import string
+import sys
+import __builtin__
+import ihooks
+from ihooks import ModuleLoader, ModuleImporter
+class PackageLoader(ModuleLoader):
+ """A subclass of ModuleLoader with package support.
+ find_module_in_dir() will succeed if there's a subdirectory with
+ the given name; load_module() will create a stub for a package and
+ load its __init__ module if it exists.
+ """
+ def find_module_in_dir(self, name, dir):
+ if dir is not None:
+ dirname = self.hooks.path_join(dir, name)
+ if self.hooks.path_isdir(dirname):
+ return None, dirname, ('', '', 'PACKAGE')
+ return ModuleLoader.find_module_in_dir(self, name, dir)
+ def load_module(self, name, stuff):
+ file, filename, info = stuff
+ suff, mode, type = info
+ if type == 'PACKAGE':
+ return self.load_package(name, stuff)
+ if sys.modules.has_key(name):
+ m = sys.modules[name]
+ else:
+ sys.modules[name] = m = imp.new_module(name)
+ self.set_parent(m)
+ if type == imp.C_EXTENSION and '.' in name:
+ return self.load_dynamic(name, stuff)
+ else:
+ return ModuleLoader.load_module(self, name, stuff)
+ def load_dynamic(self, name, stuff):
+ file, filename, (suff, mode, type) = stuff
+ # Hack around restriction in imp.load_dynamic()
+ i = string.rfind(name, '.')
+ tail = name[i+1:]
+ if sys.modules.has_key(tail):
+ save = sys.modules[tail]
+ else:
+ save = None
+ sys.modules[tail] = imp.new_module(name)
+ try:
+ m = imp.load_dynamic(tail, filename, file)
+ finally:
+ if save:
+ sys.modules[tail] = save
+ else:
+ del sys.modules[tail]
+ sys.modules[name] = m
+ return m
+ def load_package(self, name, stuff):
+ file, filename, info = stuff
+ if sys.modules.has_key(name):
+ package = sys.modules[name]
+ else:
+ sys.modules[name] = package = imp.new_module(name)
+ package.__path__ = [filename]
+ self.init_package(package)
+ return package
+ def init_package(self, package):
+ self.set_parent(package)
+ self.set_domain(package)
+ self.call_init_module(package)
+ def set_parent(self, m):
+ name = m.__name__
+ if '.' in name:
+ name = name[:string.rfind(name, '.')]
+ else:
+ name = ''
+ m.__ = sys.modules[name]
+ def set_domain(self, package):
+ name = package.__name__
+ package.__domain__ = domain = [name]
+ while '.' in name:
+ name = name[:string.rfind(name, '.')]
+ domain.append(name)
+ if name:
+ domain.append('')
+ def call_init_module(self, package):
+ stuff = self.find_module('__init__', package.__path__)
+ if stuff:
+ m = self.load_module(package.__name__ + '.__init__', stuff)
+ package.__init__ = m
+class PackageImporter(ModuleImporter):
+ """Importer that understands packages and '__'."""
+ def __init__(self, loader = None, verbose = 0):
+ ModuleImporter.__init__(self,
+ loader or PackageLoader(None, verbose), verbose)
+ def import_module(self, name, globals={}, locals={}, fromlist=[]):
+ if globals.has_key('__'):
+ package = globals['__']
+ else:
+ # No calling context, assume in root package
+ package = sys.modules['']
+ if name[:3] in ('__.', '__'):
+ p = package
+ name = name[3:]
+ while name[:3] in ('__.', '__'):
+ p = package.__
+ name = name[3:]
+ if not name:
+ return self.finish(package, p, '', fromlist)
+ if '.' in name:
+ i = string.find(name, '.')
+ name, tail = name[:i], name[i:]
+ else:
+ tail = ''
+ mname = p.__name__ and p.__name__+'.'+name or name
+ m = self.get1(mname)
+ return self.finish(package, m, tail, fromlist)
+ if '.' in name:
+ i = string.find(name, '.')
+ name, tail = name[:i], name[i:]
+ else:
+ tail = ''
+ for pname in package.__domain__:
+ mname = pname and pname+'.'+name or name
+ m = self.get0(mname)
+ if m: break
+ else:
+ raise ImportError, "No such module %s" % name
+ return self.finish(m, m, tail, fromlist)
+ def finish(self, module, m, tail, fromlist):
+ # Got ....A; now get ....A.B.C.D
+ yname = m.__name__
+ if tail and sys.modules.has_key(yname + tail): # Fast path
+ yname, tail = yname + tail, ''
+ m = self.get1(yname)
+ while tail:
+ i = string.find(tail, '.', 1)
+ if i > 0:
+ head, tail = tail[:i], tail[i:]
+ else:
+ head, tail = tail, ''
+ yname = yname + head
+ m = self.get1(yname)
+ # Got ....A.B.C.D; now finalize things depending on fromlist
+ if not fromlist:
+ return module
+ if '__' in fromlist:
+ raise ImportError, "Can't import __ from anywhere"
+ if not hasattr(m, '__path__'): return m
+ if '*' in fromlist:
+ raise ImportError, "Can't import * from a package"
+ for f in fromlist:
+ if hasattr(m, f): continue
+ fname = yname + '.' + f
+ self.get1(fname)
+ return m
+ def get1(self, name):
+ m = self.get(name)
+ if not m:
+ raise ImportError, "No module named %s" % name
+ return m
+ def get0(self, name):
+ m = self.get(name)
+ if not m:
+ sys.modules[name] = None
+ return m
+ def get(self, name):
+ # Internal routine to get or load a module when its parent exists
+ if sys.modules.has_key(name):
+ return sys.modules[name]
+ if '.' in name:
+ i = string.rfind(name, '.')
+ head, tail = name[:i], name[i+1:]
+ else:
+ head, tail = '', name
+ path = sys.modules[head].__path__
+ stuff = self.loader.find_module(tail, path)
+ if not stuff:
+ return None
+ sys.modules[name] = m = self.loader.load_module(name, stuff)
+ if head:
+ setattr(sys.modules[head], tail, m)
+ return m
+ def reload(self, module):
+ name = module.__name__
+ if '.' in name:
+ i = string.rfind(name, '.')
+ head, tail = name[:i], name[i+1:]
+ path = sys.modules[head].__path__
+ else:
+ tail = name
+ path = sys.modules[''].__path__
+ stuff = self.loader.find_module(tail, path)
+ if not stuff:
+ raise ImportError, "No module named %s" % name
+ return self.loader.load_module(name, stuff)
+ def unload(self, module):
+ if hasattr(module, '__path__'):
+ raise ImportError, "don't know how to unload packages yet"
+ PackageImporter.unload(self, module)
+ def install(self):
+ if not sys.modules.has_key(''):
+ sys.modules[''] = package = imp.new_module('')
+ package.__path__ = None
+ self.loader.init_package(package)
+ for m in sys.modules.values():
+ if not m: continue
+ if not hasattr(m, '__'):
+ self.loader.set_parent(m)
+ ModuleImporter.install(self)
+def install(v = 0):
+ ihooks.install(PackageImporter(None, v))
+def uninstall():
+ ihooks.uninstall()
+def ni(v = 0):
+ install(v)
+def no():
+ uninstall()
+def test():
+ import pdb
+ try:
+ testproper()
+ except:
+ sys.last_type, sys.last_value, sys.last_traceback = (
+ sys.exc_type, sys.exc_value, sys.exc_traceback)
+ print
+ print sys.last_type, ':', sys.last_value
+ print
+def testproper():
+ install(1)
+ try:
+ import mactest
+ print dir(mactest)
+ raw_input('OK?')
+ finally:
+ uninstall()
+if __name__ == '__main__':
+ test()
+"""New import scheme with package support.
+A Package is a module that can contain other modules. Packages can be
+nested. Package introduce dotted names for modules, like P.Q.M, which
+could correspond to a file P/Q/ found somewhere on sys.path. It
+is possible to import a package itself, though this makes little sense
+unless the package contains a module called __init__.
+A package has two variables that control the namespace used for
+packages and modules, both initialized to sensible defaults the first
+time the package is referenced.
+(1) A package's *module search path*, contained in the per-package
+variable __path__, defines a list of *directories* where submodules or
+subpackages of the package are searched. It is initialized to the
+directory containing the package. Setting this variable to None makes
+the module search path default to sys.path (this is not quite the same
+as setting it to sys.path, since the latter won't track later
+assignments to sys.path).
+(2) A package's *import domain*, contained in the per-package variable
+__domain__, defines a list of *packages* that are searched (using
+their respective module search paths) to satisfy imports. It is
+initialized to the list cosisting of the package itself, its parent
+package, its parent's parent, and so on, ending with the root package
+(the nameless package containing all top-level packages and modules,
+whose module search path is None, implying sys.path).
+The default domain implements a search algorithm called "expanding
+search". An alternative search algorithm called "explicit search"
+fixes the import search path to contain only the root package,
+requiring the modules in the package to name all imported modules by
+their full name. The convention of using '__' to refer to the current
+package (both as a per-module variable and in module names) can be
+used by packages using explicit search to refer to modules in the same
+package; this combination is known as "explicit-relative search".
+The PackageImporter and PackageLoader classes together implement the
+following policies:
+- There is a root package, whose name is ''. It cannot be imported
+ directly but may be referenced, e.g. by using '__' from a top-level
+ module.
+- In each module or package, the variable '__' contains a reference to
+ the parent package; in the root package, '__' points to itself.
+- In the name for imported modules (e.g. M in "import M" or "from M
+ import ..."), a leading '__' refers to the current package (i.e.
+ the package containing the current module); leading '__.__' and so
+ on refer to the current package's parent, and so on. The use of
+ '__' elsewhere in the module name is not supported.
+- Modules are searched using the "expanding search" algorithm by
+ virtue of the default value for __domain__.
+- If A.B.C is imported, A is searched using __domain__; then
+ subpackage B is searched in A using its __path__, and so on.
+- Built-in modules have priority: even if a file exists in a
+ package, "import sys" imports the built-in sys module.
+- The same holds for frozen modules, for better or for worse.
+- Submodules and subpackages are not automatically loaded when their
+ parent packages is loaded.
+- The construct "from package import *" is illegal. (It can still be
+ used to import names from a module.)
+- When "from package import module1, module2, ..." is used, those
+ modules are explicitly loaded.
+- When a package is loaded, if it has a submodule __init__, that
+ module is loaded. This is the place where required submodules can
+ be loaded, the __path__ variable extended, etc. The __init__ module
+ is loaded even if the package was loaded only in order to create a
+ stub for a sub-package: if "import P.Q.R" is the first reference to
+ P, and P has a submodule __init__, P.__init__ is loaded before P.Q
+ is even searched.
+- It is possible to import a package that has no __init__ submodule;
+ this is not particularly useful but there may be useful applications
+ for it (e.g. to manipulate its search paths from the outside!).
+- There are no special provisions for os.chdir(). If you plan to use
+ os.chdir() before you have imported all your modules, it is better
+ not to have relative pathnames in sys.path. (This could actually be
+ fixed by changing the implementation of path_join() in the hook to
+ absolutize paths.)
+- Packages and modules are introduced in sys.modules as soon as their
+ loading is started. When the loading is terminated by an exception,
+ the sys.modules entries remain around.
+- There are no special measures to support mutually recursive modules,
+ but it will work under the same conditions where it works in the
+ flat module space system.
+- Sometimes dummy entries (whose value is None) are entered in
+ sys.modules, to indicate that a particular module does not exist --
+ this is done to speed up the expanding search algorithm when a
+ module residing at a higher level is repeatedly imported (Python
+ promises that importing a previously imported module is cheap!)
+- Although dynamically loaded extensions are allowed inside packages,
+ the current implementation (hardcoded in the interpreter) of their
+ initialization may cause problems if an extension invokes the
+ interpreter during its initialization.
+- reload() may find another version of the module only if it occurs on
+ the package search path. Thus, it keeps the connection to the
+ package to which the module belongs, but may find a different file.
+XXX Need to have an explicit name for '', e.g. '__root__'.
+import imp
+import string
+import sys
+import __builtin__
+import ihooks
+from ihooks import ModuleLoader, ModuleImporter
+class PackageLoader(ModuleLoader):
+ """A subclass of ModuleLoader with package support.
+ find_module_in_dir() will succeed if there's a subdirectory with
+ the given name; load_module() will create a stub for a package and
+ load its __init__ module if it exists.
+ """
+ def find_module_in_dir(self, name, dir):
+ if dir is not None:
+ dirname = self.hooks.path_join(dir, name)
+ if self.hooks.path_isdir(dirname):
+ return None, dirname, ('', '', 'PACKAGE')
+ return ModuleLoader.find_module_in_dir(self, name, dir)
+ def load_module(self, name, stuff):
+ file, filename, info = stuff
+ suff, mode, type = info
+ if type == 'PACKAGE':
+ return self.load_package(name, stuff)
+ if sys.modules.has_key(name):
+ m = sys.modules[name]
+ else:
+ sys.modules[name] = m = imp.new_module(name)
+ self.set_parent(m)
+ if type == imp.C_EXTENSION and '.' in name:
+ return self.load_dynamic(name, stuff)
+ else:
+ return ModuleLoader.load_module(self, name, stuff)
+ def load_dynamic(self, name, stuff):
+ file, filename, (suff, mode, type) = stuff
+ # Hack around restriction in imp.load_dynamic()
+ i = string.rfind(name, '.')
+ tail = name[i+1:]
+ if sys.modules.has_key(tail):
+ save = sys.modules[tail]
+ else:
+ save = None
+ sys.modules[tail] = imp.new_module(name)
+ try:
+ m = imp.load_dynamic(tail, filename, file)
+ finally:
+ if save:
+ sys.modules[tail] = save
+ else:
+ del sys.modules[tail]
+ sys.modules[name] = m
+ return m
+ def load_package(self, name, stuff):
+ file, filename, info = stuff
+ if sys.modules.has_key(name):
+ package = sys.modules[name]
+ else:
+ sys.modules[name] = package = imp.new_module(name)
+ package.__path__ = [filename]
+ self.init_package(package)
+ return package
+ def init_package(self, package):
+ self.set_parent(package)
+ self.set_domain(package)
+ self.call_init_module(package)
+ def set_parent(self, m):
+ name = m.__name__
+ if '.' in name:
+ name = name[:string.rfind(name, '.')]
+ else:
+ name = ''
+ m.__ = sys.modules[name]
+ def set_domain(self, package):
+ name = package.__name__
+ package.__domain__ = domain = [name]
+ while '.' in name:
+ name = name[:string.rfind(name, '.')]
+ domain.append(name)
+ if name:
+ domain.append('')
+ def call_init_module(self, package):
+ stuff = self.find_module('__init__', package.__path__)
+ if stuff:
+ m = self.load_module(package.__name__ + '.__init__', stuff)
+ package.__init__ = m
+class PackageImporter(ModuleImporter):
+ """Importer that understands packages and '__'."""
+ def __init__(self, loader = None, verbose = 0):
+ ModuleImporter.__init__(self,
+ loader or PackageLoader(None, verbose), verbose)
+ def import_module(self, name, globals={}, locals={}, fromlist=[]):
+ if globals.has_key('__'):
+ package = globals['__']
+ else:
+ # No calling context, assume in root package
+ package = sys.modules['']
+ if name[:3] in ('__.', '__'):
+ p = package
+ name = name[3:]
+ while name[:3] in ('__.', '__'):
+ p = package.__
+ name = name[3:]
+ if not name:
+ return self.finish(package, p, '', fromlist)
+ if '.' in name:
+ i = string.find(name, '.')
+ name, tail = name[:i], name[i:]
+ else:
+ tail = ''
+ mname = p.__name__ and p.__name__+'.'+name or name
+ m = self.get1(mname)
+ return self.finish(package, m, tail, fromlist)
+ if '.' in name:
+ i = string.find(name, '.')
+ name, tail = name[:i], name[i:]
+ else:
+ tail = ''
+ for pname in package.__domain__:
+ mname = pname and pname+'.'+name or name
+ m = self.get0(mname)
+ if m: break
+ else:
+ raise ImportError, "No such module %s" % name
+ return self.finish(m, m, tail, fromlist)
+ def finish(self, module, m, tail, fromlist):
+ # Got ....A; now get ....A.B.C.D
+ yname = m.__name__
+ if tail and sys.modules.has_key(yname + tail): # Fast path
+ yname, tail = yname + tail, ''
+ m = self.get1(yname)
+ while tail:
+ i = string.find(tail, '.', 1)
+ if i > 0:
+ head, tail = tail[:i], tail[i:]
+ else:
+ head, tail = tail, ''
+ yname = yname + head
+ m = self.get1(yname)
+ # Got ....A.B.C.D; now finalize things depending on fromlist
+ if not fromlist:
+ return module
+ if '__' in fromlist:
+ raise ImportError, "Can't import __ from anywhere"
+ if not hasattr(m, '__path__'): return m
+ if '*' in fromlist:
+ raise ImportError, "Can't import * from a package"
+ for f in fromlist:
+ if hasattr(m, f): continue
+ fname = yname + '.' + f
+ self.get1(fname)
+ return m
+ def get1(self, name):
+ m = self.get(name)
+ if not m:
+ raise ImportError, "No module named %s" % name
+ return m
+ def get0(self, name):
+ m = self.get(name)
+ if not m:
+ sys.modules[name] = None
+ return m
+ def get(self, name):
+ # Internal routine to get or load a module when its parent exists
+ if sys.modules.has_key(name):
+ return sys.modules[name]
+ if '.' in name:
+ i = string.rfind(name, '.')
+ head, tail = name[:i], name[i+1:]
+ else:
+ head, tail = '', name
+ path = sys.modules[head].__path__
+ stuff = self.loader.find_module(tail, path)
+ if not stuff:
+ return None
+ sys.modules[name] = m = self.loader.load_module(name, stuff)
+ if head:
+ setattr(sys.modules[head], tail, m)
+ return m
+ def reload(self, module):
+ name = module.__name__
+ if '.' in name:
+ i = string.rfind(name, '.')
+ head, tail = name[:i], name[i+1:]
+ path = sys.modules[head].__path__
+ else:
+ tail = name
+ path = sys.modules[''].__path__
+ stuff = self.loader.find_module(tail, path)
+ if not stuff:
+ raise ImportError, "No module named %s" % name
+ return self.loader.load_module(name, stuff)
+ def unload(self, module):
+ if hasattr(module, '__path__'):
+ raise ImportError, "don't know how to unload packages yet"
+ PackageImporter.unload(self, module)
+ def install(self):
+ if not sys.modules.has_key(''):
+ sys.modules[''] = package = imp.new_module('')
+ package.__path__ = None
+ self.loader.init_package(package)
+ for m in sys.modules.values():
+ if not m: continue
+ if not hasattr(m, '__'):
+ self.loader.set_parent(m)
+ ModuleImporter.install(self)
+def install(v = 0):
+ ihooks.install(PackageImporter(None, v))
+def uninstall():
+ ihooks.uninstall()
+def ni(v = 0):
+ install(v)
+def no():
+ uninstall()
+def test():
+ import pdb
+ try:
+ testproper()
+ except:
+ sys.last_type, sys.last_value, sys.last_traceback = (
+ sys.exc_type, sys.exc_value, sys.exc_traceback)
+ print
+ print sys.last_type, ':', sys.last_value
+ print
+def testproper():
+ install(1)
+ try:
+ import mactest
+ print dir(mactest)
+ raw_input('OK?')
+ finally:
+ uninstall()
+if __name__ == '__main__':
+ test()