diff options
author | Guido van Rossum <guido@python.org> | 1995-08-04 04:00:20 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1995-08-04 04:00:20 (GMT) |
commit | e7e578ffe042ca84f5b57e8056cb5598c7b44e5a (patch) | |
tree | 4a3b2e705ae78d7030280aef3f5839579755a698 /Lib | |
parent | 40d1ea3b9ccf9e779f253685d1f5f6cf530945fa (diff) | |
download | cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.zip cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.tar.gz cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.tar.bz2 |
Initial revision
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/BaseHTTPServer.py | 482 | ||||
-rw-r--r-- | Lib/CGIHTTPServer.py | 203 | ||||
-rw-r--r-- | Lib/SimpleHTTPServer.py | 161 | ||||
-rw-r--r-- | Lib/SocketServer.py | 413 | ||||
-rw-r--r-- | Lib/ihooks.py | 365 | ||||
-rw-r--r-- | Lib/ni.py | 390 | ||||
-rw-r--r-- | Lib/ni1.py | 390 |
7 files changed, 2404 insertions, 0 deletions
diff --git a/Lib/BaseHTTPServer.py b/Lib/BaseHTTPServer.py new file mode 100644 index 0000000..681139b --- /dev/null +++ b/Lib/BaseHTTPServer.py @@ -0,0 +1,482 @@ +"""HTTP server base class. + +Note: the class in this module doesn't implement any HTTP request; see +SimpleHTTPServer for simple implementations of GET, HEAD and POST +(including CGI scripts). + +Contents: + +- BaseHTTPRequestHandler: HTTP request handler base class +- test: test function + +XXX To do: + +- send server version +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +- are request names really case sensitive? + +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt + + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + + +__version__ = "0.2" + + +import sys +import time +import socket # For gethostbyaddr() +import string +import rfc822 +import mimetools +import SocketServer + +# Default error message +DEFAULT_ERROR_MESSAGE = """\ +<head> +<title>Error response</title> +</head> +<body> +<h1>Error response</h1> +<p>Error code %(code)d. +<p>Message: %(message)s. +<p>Error code explanation: %(code)s = %(explain)s. +</body> +""" + + +class HTTPServer(SocketServer.TCPServer): + + def server_bind(self): + """Override server_bind to store the server name.""" + SocketServer.TCPServer.server_bind(self) + host, port = self.socket.getsockname() + if not host or host == '0.0.0.0': + host = socket.gethostname() + hostname, hostnames, hostaddrs = socket.gethostbyaddr(host) + if '.' not in hostname: + for host in hostnames: + if '.' in host: + hostname = host + break + self.server_name = hostname + self.server_port = port + + +class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + <command> <path> <version> + + where <command> is a (case-sensitive) keyword such as GET or POST, + <path> is a string containing path information for the request, + and <version> should be the string "HTTP/1.0". <path> is encoded + using the URL encoding scheme (using %xx to signify the ASCII + character with hex code xx). + + The protocol is vague about whether lines are separated by LF + characters or by CRLF pairs -- for compatibility with the widest + range of clients, both should be accepted. Similarly, whitespace + in the request line should be treated sensibly (allowing multiple + spaces between components and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + <command> <path> + + (i.e. <version> is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.0 protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + <version> <responsecode> <responsestring> + + where <version> is the protocol version (always "HTTP/1.0"), + <responsecode> is a 3-digit response code indicating success or + failure of the request, and <responsestring> is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (<command>). Specifically, + a request SPAM will be handled by a method handle_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of mimetools.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: <type>/<subtype> + + where <type> and <subtype> should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + string.split(sys.version)[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + def handle(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + + self.raw_requestline = self.rfile.readline() + requestline = self.raw_requestline + if requestline[-2:] == '\r\n': + requestline = requestline[:-2] + elif requestline[-1:] == '\n': + requestline = requestline[:-1] + self.requestline = requestline + words = string.split(requestline) + if len(words) == 3: + [command, path, version] = words + if version != self.protocol_version: + self.send_error(400, "Bad request version (%s)" % `version`) + return + elif len(words) == 2: + [command, path] = words + if command != 'GET': + self.send_error(400, + "Bad HTTP/0.9 request type (%s)" % `command`) + return + version = "HTTP/0.9" + else: + self.send_error(400, "Bad request syntax (%s)" % `command`) + return + self.command, self.path, self.request_version = command, path, version + self.headers = self.MessageClass(self.rfile, 0) + mname = 'do_' + command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%s)" % `command`) + return + method = getattr(self, mname) + method() + + def send_error(self, code, message=None): + """Send and log an error reply. + + Arguments are the error code, and a detailed message. + The detailed message defaults to the short entry matching the + response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + short, long = self.responses[code] + except KeyError: + short, long = '???', '???' + if not message: + message = short + explain = long + self.log_error("code %d, message %s", code, message) + self.send_response(code, message) + self.end_headers() + self.wfile.write(self.error_message_format % + {'code': code, + 'message': message, + 'explain': explain}) + + error_message_format = DEFAULT_ERROR_MESSAGE + + def send_response(self, code, message=None): + """Send the response header and log the response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + if message is None: + if self.responses.has_key(code): + message = self.responses[code][1] + else: + message = '' + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s %s %s\r\n" % + (self.protocol_version, str(code), message)) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_header(self, keyword, value): + """Send a MIME header.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s: %s\r\n" % (keyword, value)) + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("\r\n") + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_reponse(). + + """ + + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + apply(self.log_message, args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client host and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self): + """Return the current date and time formatted for a message header.""" + now = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + self.weekdayname[wd], + day, self.monthname[month], year, + hh, mm, ss) + return s + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address formatted for logging. + + This version looks up the full hostname using gethostbyaddr(), + and tries to find a name that contains at least one dot. + + """ + + (host, port) = self.client_address + try: + name, names, addresses = socket.gethostbyaddr(host) + except socket.error, msg: + return host + names.insert(0, name) + for name in names: + if '.' in name: return name + return names[0] + + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Don't override unless you know what you're doing (hint: incoming + # requests are required to have exactly this version string). + protocol_version = "HTTP/1.0" + + # The Message-like class used to parse headers + MessageClass = mimetools.Message + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html + responses = { + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Partial information', 'Request fulfilled from cache'), + 204: ('No response', 'Request fulfilled, nothing follows'), + + 301: ('Moved', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('Method', 'Object moved -- see Method and URL list'), + 304: ('Not modified', + 'Document has not changed singe given time'), + + 400: ('Bad request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not found', 'Nothing matches the given URI'), + + 500: ('Internal error', 'Server got itself in trouble'), + 501: ('Not implemented', + 'Server does not support this operation'), + 502: ('Service temporarily overloaded', + 'The server cannot process the request due to a high load'), + 503: ('Gateway timeout', + 'The gateway server did not receive a timely response'), + + } + + +def test(HandlerClass = BaseHTTPRequestHandler, + ServerClass = HTTPServer): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the first command line + argument). + + """ + + if sys.argv[1:]: + port = string.atoi(sys.argv[1]) + else: + port = 8000 + server_address = ('', port) + + httpd = ServerClass(server_address, HandlerClass) + + print "Serving HTTP on port", port, "..." + httpd.serve_forever() + + +if __name__ == '__main__': + test() diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py new file mode 100644 index 0000000..eeb51fe --- /dev/null +++ b/Lib/CGIHTTPServer.py @@ -0,0 +1,203 @@ +"""CGI-savvy HTTP Server. + +This module builds on SimpleHTTPServer by implementing GET and POST +requests to cgi-bin scripts. + +""" + + +__version__ = "0.2" + + +import os +import sys +import time +import socket +import string +import urllib +import BaseHTTPServer +import SimpleHTTPServer + + +class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error(501, "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """test whether PATH corresponds to a CGI script. + + Return a tuple (dir, rest) if PATH requires running a + CGI script, None if not. Note that rest begins with a + slash if it is not empty. + + The default implementation tests whether the path + begins with one of the strings in the list + self.cgi_directories (and the next character is a '/' + or the end of the string). + + """ + + path = self.path + + for x in self.cgi_directories: + i = len(x) + if path[:i] == x and (not path[i:] or path[i] == '/'): + self.cgi_info = path[:i], path[i+1:] + return 1 + return 0 + + cgi_directories = ['/cgi-bin', '/htbin'] + + def run_cgi(self): + """Execute a CGI script.""" + dir, rest = self.cgi_info + i = string.rfind(rest, '?') + if i >= 0: + rest, query = rest[:i], rest[i+1:] + else: + query = '' + i = string.find(rest, '/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error(404, "No such CGI script (%s)", `scriptname`) + return + if not os.path.isfile(scriptfile): + self.send_error(403, "CGI script is not a plain file (%s)", + `scriptname`) + return + if not executable(scriptfile): + self.send_error(403, "CGI script is not executable (%s)", + `scriptname`) + return + nobody = nobody_uid() + self.send_response(200, "Script output follows") + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + if sts: + self.log_error("CGI script exit status x%x" % sts) + return + # Child + try: + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = {} + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + host = self.address_string() + if host != self.client_address[0]: + env['REMOTE_HOST'] = host + env['REMOTE_ADDR'] = self.client_address[0] + # AUTH_TYPE + # REMOTE_USER + # REMOTE_IDENT + env['CONTENT_TYPE'] = self.headers.type + length = self.headers.getheader('content-length') + if length: + env['CONTENT_LENGTH'] = length + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in string.whitespace: + accept.append(string.strip(line)) + else: + accept = accept + string.split(line[7:]) + env['HTTP_ACCEPT'] = string.joinfields(accept, ',') + ua = self.headers.getheader('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + # XXX Other HTTP_* headers + import regsub + decoded_query = regsub.gsub('+', ' ', query) + try: + os.setuid(nobody) + except os.error: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + print scriptfile, script, decoded_query + os.execve(scriptfile, + [script, decoded_query], + env) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + import pwd + try: + nobody = pwd.getpwnam('nobody')[2] + except pwd.error: + nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) + return nobody + + +def executable(path): + """Test for executable file.""" + try: + st = os.stat(path) + except os.error: + return 0 + return 1 + + +def test(HandlerClass = CGIHTTPRequestHandler, + ServerClass = BaseHTTPServer.HTTPServer): + import sys + if sys.argv[1:2] == ['-r']: + db = MyArchive() + db.regenindices() + return + SimpleHTTPServer.test(HandlerClass, ServerClass) + + +if __name__ == '__main__': + test() diff --git a/Lib/SimpleHTTPServer.py b/Lib/SimpleHTTPServer.py new file mode 100644 index 0000000..67ec75a --- /dev/null +++ b/Lib/SimpleHTTPServer.py @@ -0,0 +1,161 @@ +"""Simple HTTP Server. + +This module builds on BaseHTTPServer by implementing the standard GET +and HEAD requests in a fairly straightforward manner. + +""" + + +__version__ = "0.2" + + +import os +import pwd +import sys +import time +import socket +import string +import posixpath +import SocketServer +import BaseHTTPServer + + +def nobody_uid(): + """Internal routine to get nobody's uid""" + try: + nobody = pwd.getpwnam('nobody')[2] + except pwd.error: + nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) + return nobody + +nobody = nobody_uid() + + +class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): + + """Simple HTTP request handler with GET and HEAD commands. + + This serves files from the current directory and any of its + subdirectories. It assumes that all files are plain text files + unless they have the extension ".html" in which case it assumes + they are HTML files. + + The GET and HEAD requests are identical except that the HEAD + request omits the actual contents of the file. + + """ + + server_version = "SimpleHTTP/" + __version__ + + def do_GET(self): + """Serve a GET request.""" + f = self.send_head() + if f: + self.copyfile(f, self.wfile) + f.close() + + def do_HEAD(self): + """Serve a HEAD request.""" + f = self.send_head() + if f: + f.close() + + def send_head(self): + """Common code for GET and HEAD commands. + + This sends the response code and MIME headers. + + Return value is either a file object (which has to be copied + to the outputfile by the caller unless the command was HEAD, + and must be closed by the caller under all circumstances), or + None, in which case the caller has nothing further to do. + + """ + path = self.translate_path(self.path) + if os.path.isdir(path): + self.send_error(403, "Directory listing not supported") + return None + try: + f = open(path) + except IOError: + self.send_error(404, "File not found") + return None + self.send_response(200) + self.send_header("Content-type", self.guess_type(path)) + self.end_headers() + return f + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + path = posixpath.normpath(path) + words = string.splitfields(path, '/') + words = filter(None, words) + path = os.getcwd() + for word in words: + drive, word = os.path.splitdrive(word) + head, word = os.path.split(word) + if word in (os.curdir, os.pardir): continue + path = os.path.join(path, word) + return path + + def copyfile(self, source, outputfile): + """Copy all data between two file objects. + + The SOURCE argument is a file object open for reading + (or anything with a read() method) and the DESTINATION + argument is a file object open for writing (or + anything with a write() method). + + The only reason for overriding this would be to change + the block size or perhaps to replace newlines by CRLF + -- note however that this the default server uses this + to copy binary data as well. + + """ + + BLOCKSIZE = 8192 + while 1: + data = source.read(BLOCKSIZE) + if not data: break + outputfile.write(data) + + def guess_type(self, path): + """Guess the type of a file. + + Argument is a PATH (a filename). + + Return value is a string of the form type/subtype, + usable for a MIME Content-type header. + + The default implementation looks the file's extension + up in the table self.extensions_map, using text/plain + as a default; however it would be permissible (if + slow) to look inside the data to make a better guess. + + """ + + base, ext = posixpath.splitext(path) + if self.extensions_map.has_key(ext): + return self.extensions_map[ext] + else: + return self.extensions_map[''] + + extensions_map = { + '': 'text/plain', # Default, *must* be present + '.html': 'text/html', + } + + +def test(HandlerClass = SimpleHTTPRequestHandler, + ServerClass = SocketServer.TCPServer): + BaseHTTPServer.test(HandlerClass, ServerClass) + + +if __name__ == '__main__': + test() diff --git a/Lib/SocketServer.py b/Lib/SocketServer.py new file mode 100644 index 0000000..102d9b5 --- /dev/null +++ b/Lib/SocketServer.py @@ -0,0 +1,413 @@ +"""Generic socket server classes. + +This module tries to capture the various aspects of defining a server: + +- address family: + - AF_INET: IP (Internet Protocol) sockets (default) + - AF_UNIX: Unix domain sockets + - others, e.g. AF_DECNET are conceivable (see <socket.h> +- socket type: + - SOCK_STREAM (reliable stream, e.g. TCP) + - SOCK_DGRAM (datagrams, e.g. UDP) +- client address verification before further looking at the request + (This is actually a hook for any processing that needs to look + at the request before anything else, e.g. logging) +- how to handle multiple requests: + - synchronous (one request is handled at a time) + - forking (each request is handled by a new process) + - threading (each request is handled by a new thread) + +The classes in this module favor the server type that is simplest to +write: a synchronous TCP/IP server. This is bad class design, but +save some typing. (There's also the issue that a deep class hierarchy +slows down method lookups.) + +There are four classes in an inheritance diagram that represent +synchronous servers of four types: + + +-----------+ +------------------+ + | TCPServer |------->| UnixStreamServer | + +-----------+ +------------------+ + | + v + +-----------+ +--------------------+ + | UDPServer |------->| UnixDatagramServer | + +-----------+ +--------------------+ + +(Note that UnixDatagramServer derives from UDPServer, not from +UnixStreamServer -- the only difference between an IP and a Unix +stream server is the address family, which is simply repeated in both +unix server classes.) + +Forking and threading versions of each type of server can be created +using the ForkingServer and ThreadingServer mix-in classes. For +instance, a threading UDP server class is created as follows: + + class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass + +(The Mix-in class must come first, since it overrides a method defined +in UDPServer!) + +To implement a service, you must derive a class from +BaseRequestHandler and redefine its handle() method. You can then run +various versions of the service by combining one of the server classes +with your request handler class. + +The request handler class must be different for datagram or stream +services. This can be hidden by using the mix-in request handler +classes StreamRequestHandler or DatagramRequestHandler. + +Of course, you still have to use your head! + +For instance, it makes no sense to use a forking server if the service +contains state in memory that can be modified by requests (since the +modifications in the child process would never reach the initial state +kept in the parent process and passed to each child). In this case, +you can use a threading server, but you will probably have to use +locks to avoid two requests that come in nearly simultaneous to apply +conflicting changes to the server state. + +On the other hand, if you are building e.g. an HTTP server, where all +data is stored externally (e.g. in the file system), a synchronous +class will essentially render the service "deaf" while one request is +being handled -- which may be for a very long time if a client is slow +to reqd all the data it has requested. Here a threading or forking +server is appropriate. + +In some cases, it may be appropriate to process part of a request +synchronously, but to finish processing in a forked child depending on +the request data. This can be implemented by using a synchronous +server and doing an explicit fork in the request handler class's +handle() method. + +Another approach to handling multiple simultaneous requests in an +environment that supports neither threads nor fork (or where these are +too expensive or inappropriate for the service) is to maintain an +explicit table of partially finished requests and to use select() to +decide which request to work on next (or whether to handle a new +incoming request). This is particularly important for stream services +where each client can potentially be connected for a long time (if +threads or subprocesses can't be used). + +Future work: +- Standard classes for Sun RPC (which uses either UDP or TCP) +- Standard mix-in classes to implement various authentication + and encryption schemes +- Standard framework for select-based multiplexing + +XXX Open problems: +- What to do with out-of-band data? + +""" + + +__version__ = "0.2" + + +import socket +import sys +import os + + +class TCPServer: + + """Base class for various socket-based server classes. + + Defaults to synchronous IP stream (i.e., TCP). + + Methods for the caller: + + - __init__(server_address, RequestHandlerClass) + - serve_forever() + - handle_request() # if you don't use serve_forever() + - fileno() -> int # for select() + + Methods that may be overridden: + + - server_bind() + - server_activate() + - get_request() -> request, client_address + - verify_request(request, client_address) + - process_request(request, client_address) + - handle_error() + + Methods for derived classes: + + - finish_request(request, client_address) + + Class variables that may be overridden by derived classes or + instances: + + - address_family + - socket_type + - request_queue_size (only for stream sockets) + + Instance variables: + + - server_address + - RequestHandlerClass + - socket + + """ + + address_family = socket.AF_INET + + socket_type = socket.SOCK_STREAM + + request_queue_size = 5 + + def __init__(self, server_address, RequestHandlerClass): + """Constructor. May be extended, do not override.""" + self.server_address = server_address + self.RequestHandlerClass = RequestHandlerClass + self.socket = socket.socket(self.address_family, + self.socket_type) + self.server_bind() + self.server_activate() + + def server_bind(self): + """Called by constructor to bind the socket. + + May be overridden. + + """ + self.socket.bind(self.server_address) + + def server_activate(self): + """Called by constructor to activate the server. + + May be overridden. + + """ + self.socket.listen(self.request_queue_size) + + def fileno(self): + """Return socket file number. + + Interface required by select(). + + """ + return self.socket.fileno() + + def serve_forever(self): + """Handle one request at a time until doomsday.""" + while 1: + self.handle_request() + + # The distinction between handling, getting, processing and + # finishing a request is fairly arbitrary. Remember: + # + # - handle_request() is the top-level call. It calls + # get_request(), verify_request() and process_request() + # - get_request() is different for stream or datagram sockets + # - process_request() is the place that may fork a new process + # or create a new thread to finish the request + # - finish_request() instantiates the request handler class; + # this constructor will handle the request all by itself + + def handle_request(self): + """Handle one request, possibly blocking.""" + request, client_address = self.get_request() + if self.verify_request(request, client_address): + try: + self.process_request(request, client_address) + except: + self.handle_error(request, client_address) + + def get_request(self): + """Get the request and client address from the socket. + + May be overridden. + + """ + return self.socket.accept() + + def verify_request(self, request, client_address): + """Verify the request. May be overridden. + + Return true if we should proceed with this request. + + """ + return 1 + + def process_request(self, request, client_address): + """Call finish_request. + + Overridden by ForkingMixIn and ThreadingMixIn. + + """ + self.finish_request(request, client_address) + + def finish_request(self, request, client_address): + """Finish one request by instantiating RequestHandlerClass.""" + self.RequestHandlerClass(request, client_address, self) + + def handle_error(self, request, client_address): + """Handle an error gracefully. May be overridden. + + The default is to print a traceback and continue. + + """ + exc, value, tb = sys.exc_type, sys.exc_value, sys.exc_traceback + print '-'*40 + print 'Exception happened during processing of request from', + print client_address + import traceback + traceback.print_exception(exc, value, tb) + print '-'*40 + + +class UDPServer(TCPServer): + + """UDP server class.""" + + socket_type = socket.SOCK_DGRAM + + max_packet_size = 8192 + + def get_request(self): + return self.socket.recvfrom(max_packet_size) + + +if hasattr(socket, 'AF_UNIX'): + + class UnixStreamServer(TCPServer): + + address_family = socket.AF_UNIX + + + class UnixDatagramServer(UDPServer): + + address_family = socket.AF_UNIX + + +class ForkingMixIn: + + """Mix-in class to handle each request in a new process.""" + + active_children = None + + def collect_children(self): + """Internal routine to wait for died children.""" + while self.active_children: + pid = os.waitpid(0, os.WNOHANG) + if not pid: break + self.active_children.remove(pid) + + def process_request(self, request, client_address): + """Fork a new subprocess to process the request.""" + self.collect_children() + pid = os.fork() + if pid: + # Parent process + if self.active_children is None: + self.active_children = [] + self.active_children.append(pid) + return + else: + # Child process. + # This must never return, hence os._exit()! + try: + self.finish_request(request, client_address) + os._exit(0) + except: + try: + self.handle_error(request, + client_address) + finally: + os._exit(1) + + +class ThreadingMixIn: + + """Mix-in class to handle each request in a new thread.""" + + def process_request(self, request, client_address): + """Start a new thread to process the request.""" + import thread + thread.start_new_thread(self.finish_request, + (request, client_address)) + + +class ForkingUDPServer(ForkingMixIn, UDPServer): pass +class ForkingTCPServer(ForkingMixIn, TCPServer): pass + +class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass +class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass + + +class BaseRequestHandler: + + """Base class for request handler classes. + + This class is instantiated for each request to be handled. The + constructor sets the instance variables request, client_address + and server, and then calls the handle() method. To implement a + specific service, all you need to do is to derive a class which + defines a handle() method. + + The handle() method can find the request as self.request, the + client address as self.client_request, and the server (in case it + needs access to per-server information) as self.server. Since a + separate instance is created for each request, the handle() method + can define arbitrary other instance variariables. + + """ + + def __init__(self, request, client_address, server): + self.request = request + self.client_address = client_address + self.server = server + try: + self.setup() + self.handle() + self.finish() + finally: + sys.exc_traceback = None # Help garbage collection + + def setup(self): + pass + + def __del__(self): + pass + + def handle(self): + pass + + def finish(self): + pass + + +# The following two classes make it possible to use the same service +# class for stream or datagram servers. +# Each class sets up these instance variables: +# - rfile: a file object from which receives the request is read +# - wfile: a file object to which the reply is written +# When the handle() method returns, wfile is flushed properly + + +class StreamRequestHandler(BaseRequestHandler): + + """Define self.rfile and self.wfile for stream sockets.""" + + def setup(self): + self.connection = self.request + self.rfile = self.connection.makefile('r') + self.wfile = self.connection.makefile('w', 0) + + def finish(self): + self.wfile.flush() + + +class DatagramRequestHandler(BaseRequestHandler): + + """Define self.rfile and self.wfile for datagram sockets.""" + + def setup(self): + import StringIO + self.packet, self.socket = self.request + self.rfile = StringIO.StringIO(self.packet) + self.wfile = StringIO.StringIO(self.packet) + + def finish(self): + self.socket.send(self.wfile.getvalue()) diff --git a/Lib/ihooks.py b/Lib/ihooks.py new file mode 100644 index 0000000..b379fc5 --- /dev/null +++ b/Lib/ihooks.py @@ -0,0 +1,365 @@ +"""Import hook support. + +Consistent use of this module will make it possible to change the +different mechanisms involved in loading modules independently. + +While the built-in module imp exports interfaces to the built-in +module searching and loading algorithm, and it is possible to replace +the built-in function __import__ in order to change the semantics of +the import statement, until now it has been difficult to combine the +effect of different __import__ hacks, like loading modules from URLs +(rimport.py), implementing a hierarchical module namespace (newimp.py) +or restricted execution (rexec.py). + +This module defines three new concepts: + +(1) A "file system hooks" class provides an interface to a filesystem. + +One hooks class is defined (Hooks), which uses the interface provided +by standard modules os and os.path. It should be used as the base +class for other hooks classes. + +(2) A "module loader" class provides an interface to to search for a +module in a search path and to load it. It defines a method which +searches for a module in a single directory; by overriding this method +one can redefine the details of the search. If the directory is None, +built-in and frozen modules are searched instead. + +Two module loader class are defined, both implementing the search +strategy used by the built-in __import__ function: ModuleLoader uses +the imp module's find_module interface, while HookableModuleLoader +uses a file system hooks class to interact with the file system. Both +use the imp module's load_* interfaces to actually load the module. + +(3) A "module importer" class provides an interface to import a +module, as well as interfaces to reload and unload a module. It also +provides interfaces to install and uninstall itself instead of the +default __import__ and reload (and unload) functions. + +One module importer class is defined (ModuleImporter), which uses a +module loader instance passed in (by default HookableModuleLoader is +instantiated). + +The classes defined here should be used as base classes for extended +functionality along those lines. + +If a module mporter class supports dotted names, its import_module() +must return a different value depending on whether it is called on +behalf of a "from ... import ..." statement or not. (This is caused +by the way the __import__ hook is used by the Python interpreter.) It +would also do wise to install a different version of reload(). + +XXX Should the imp.load_* functions also be called via the hooks +instance? + +""" + + +import __builtin__ +import imp +import os +import sys + + +from imp import C_EXTENSION, PY_SOURCE, PY_COMPILED +BUILTIN_MODULE = 32 +FROZEN_MODULE = 33 + + +class _Verbose: + + def __init__(self, verbose = 0): + self.verbose = verbose + + def get_verbose(self): + return self.verbose + + def set_verbose(self, verbose): + self.verbose = verbose + + # XXX The following is an experimental interface + + def note(self, *args): + if self.verbose: + apply(self.message, args) + + def message(self, format, *args): + print format%args + + +class BasicModuleLoader(_Verbose): + + """Basic module loader. + + This provides the same functionality as built-in import. It + doesn't deal with checking sys.modules -- all it provides is + find_module() and a load_module(), as well as find_module_in_dir() + which searches just one directory, and can be overridden by a + derived class to change the module search algorithm when the basic + dependency on sys.path is unchanged. + + The interface is a little more convenient than imp's: + find_module(name, [path]) returns None or 'stuff', and + load_module(name, stuff) loads the module. + + """ + + def find_module(self, name, path = None): + if path is None: + path = [None] + self.default_path() + for dir in path: + stuff = self.find_module_in_dir(name, dir) + if stuff: return stuff + return None + + def default_path(self): + return sys.path + + def find_module_in_dir(self, name, dir): + if dir is None: + return self.find_builtin_module(name) + else: + try: + return imp.find_module(name, [dir]) + except ImportError: + return None + + def find_builtin_module(self, name): + if imp.is_builtin(name): + return None, '', ('', '', BUILTIN_MODULE) + if imp.is_frozen(name): + return None, '', ('', '', FROZEN_MODULE) + return None + + def load_module(self, name, stuff): + file, filename, (suff, mode, type) = stuff + if type == BUILTIN_MODULE: + return imp.init_builtin(name) + if type == FROZEN_MODULE: + return imp.init_frozen(name) + if type == C_EXTENSION: + return imp.load_dynamic(name, filename, file) + if type == PY_SOURCE: + return imp.load_source(name, filename, file) + if type == PY_COMPILED: + return imp.load_compiled(name, filename, file) + raise ImportError, "Unrecognized module type (%s) for %s" % \ + (`type`, name) + + +class Hooks(_Verbose): + + """Hooks into the filesystem and interpreter. + + By deriving a subclass you can redefine your filesystem interface, + e.g. to merge it with the URL space. + + This base class behaves just like the native filesystem. + + """ + + # imp interface + def get_suffixes(self): return imp.get_suffixes() + def new_module(self, name): return imp.new_module(name) + def is_builtin(self, name): return imp.is_builtin(name) + def init_builtin(self, name): return imp.init_builtin(name) + def is_frozen(self, name): return imp.is_frozen(name) + def init_frozen(self, name): return imp.init_frozen(name) + def get_frozen_object(self, name): return imp.get_frozen_object(name) + def load_source(self, name, filename, file=None): + return imp.load_source(name, filename, file) + def load_compiled(self, name, filename, file=None): + return imp.load_compiled(name, filename, file) + def load_dynamic(self, name, filename, file=None): + return imp.load_dynamic(name, filename, file) + + def add_module(self, name): + d = self.modules_dict() + if d.has_key(name): return d[name] + d[name] = m = self.new_module(name) + return m + + # sys interface + def modules_dict(self): return sys.modules + def default_path(self): return sys.path + + def path_split(self, x): return os.path.split(x) + def path_join(self, x, y): return os.path.join(x, y) + def path_isabs(self, x): return os.path.isabs(x) + # etc. + + def path_exists(self, x): return os.path.exists(x) + def path_isdir(self, x): return os.path.isdir(x) + def path_isfile(self, x): return os.path.isfile(x) + def path_islink(self, x): return os.path.islink(x) + # etc. + + def openfile(self, *x): return apply(open, x) + openfile_error = IOError + def listdir(self, x): return os.listdir(x) + listdir_error = os.error + # etc. + + +class ModuleLoader(BasicModuleLoader): + + """Default module loader; uses file system hooks. + + By defining suitable hooks, you might be able to load modules from + other sources than the file system, e.g. from compressed or + encrypted files, tar files or (if you're brave!) URLs. + + """ + + def __init__(self, hooks = None, verbose = 0): + BasicModuleLoader.__init__(self, verbose) + self.hooks = hooks or Hooks(verbose) + + def default_path(self): + return self.hooks.default_path() + + def modules_dict(self): + return self.hooks.modules_dict() + + def get_hooks(self): + return self.hooks + + def set_hooks(self, hooks): + self.hooks = hooks + + def find_builtin_module(self, name): + if self.hooks.is_builtin(name): + return None, '', ('', '', BUILTIN_MODULE) + if self.hooks.is_frozen(name): + return None, '', ('', '', FROZEN_MODULE) + return None + + def find_module_in_dir(self, name, dir): + if dir is None: + return self.find_builtin_module(name) + for info in self.hooks.get_suffixes(): + suff, mode, type = info + fullname = self.hooks.path_join(dir, name+suff) + try: + fp = self.hooks.openfile(fullname, mode) + return fp, fullname, info + except self.hooks.openfile_error: + pass + return None + + def load_module(self, name, stuff): + file, filename, (suff, mode, type) = stuff + if type == BUILTIN_MODULE: + return self.hooks.init_builtin(name) + if type == FROZEN_MODULE: + return self.hooks.init_frozen(name) + if type == C_EXTENSION: + return self.hooks.load_dynamic(name, filename, file) + if type == PY_SOURCE: + return self.hooks.load_source(name, filename, file) + if type == PY_COMPILED: + return self.hooks.load_compiled(name, filename, file) + raise ImportError, "Unrecognized module type (%s) for %s" % \ + (`type`, name) + + +class FancyModuleLoader(ModuleLoader): + + """Fancy module loader -- parses and execs the code itself.""" + + def load_module(self, name, stuff): + file, filename, (suff, mode, type) = stuff + if type == FROZEN_MODULE: + code = self.hooks.get_frozen_object(name) + elif type == PY_COMPILED: + file.seek(8) + code = marshal.load(file) + elif type == PY_SOURCE: + data = file.read() + code = compile(data, filename, 'exec') + else: + return ModuleLoader.load_module(self, name, stuff) + m = self.hooks.add_module(name) + exec code in m.__dict__ + return m + + +class ModuleImporter(_Verbose): + + """Default module importer; uses module loader. + + This provides the same functionality as built-in import, when + combined with ModuleLoader. + + """ + + def __init__(self, loader = None, verbose = 0): + _Verbose.__init__(self, verbose) + self.loader = loader or ModuleLoader(None, verbose) + self.modules = self.loader.modules_dict() + + def get_loader(self): + return self.loader + + def set_loader(self, loader): + self.loader = loader + + def get_hooks(self): + return self.loader.get_hooks() + + def set_hooks(self, hooks): + return self.loader.set_hooks(hooks) + + def import_module(self, name, globals={}, locals={}, fromlist=[]): + if self.modules.has_key(name): + return self.modules[name] # Fast path + stuff = self.loader.find_module(name) + if not stuff: + raise ImportError, "No module named %s" % name + return self.loader.load_module(name, stuff) + + def reload(self, module, path = None): + stuff = self.loader.find_module(name, path) + if not stuff: + raise ImportError, "Module %s not found for reload" % name + return self.loader.load_module(name, stuff) + + def unload(self, module): + del self.modules[module.__name__] + # XXX Should this try to clear the module's namespace? + + def install(self): + self.save_import_module = __builtin__.__import__ + self.save_reload = __builtin__.reload + if not hasattr(__builtin__, 'unload'): + __builtin__.unload = None + self.save_unload = __builtin__.unload + __builtin__.__import__ = self.import_module + __builtin__.reload = self.reload + __builtin__.unload = self.unload + + def uninstall(self): + __builtin__.__import__ = self.save_import_module + __builtin__.reload = self.save_reload + __builtin__.unload = self.save_unload + if not __builtin__.unload: + del __builtin__.unload + + +# XXX Some experimental hacks -- importing ihooks auto-installs! +# XXX (That's supposed to be transparent anyway...) + +default_importer = None +current_importer = None + +def install(importer = None): + global current_importer + current_importer = importer or default_importer or ModuleImporter() + current_importer.install() + +def uninstall(): + global current_importer + current_importer.uninstall() + + +install() diff --git a/Lib/ni.py b/Lib/ni.py new file mode 100644 index 0000000..70b1c22 --- /dev/null +++ b/Lib/ni.py @@ -0,0 +1,390 @@ +"""New import scheme with package support. + +A Package is a module that can contain other modules. Packages can be +nested. Package introduce dotted names for modules, like P.Q.M, which +could correspond to a file P/Q/M.py found somewhere on sys.path. It +is possible to import a package itself, though this makes little sense +unless the package contains a module called __init__. + +A package has two variables that control the namespace used for +packages and modules, both initialized to sensible defaults the first +time the package is referenced. + +(1) A package's *module search path*, contained in the per-package +variable __path__, defines a list of *directories* where submodules or +subpackages of the package are searched. It is initialized to the +directory containing the package. Setting this variable to None makes +the module search path default to sys.path (this is not quite the same +as setting it to sys.path, since the latter won't track later +assignments to sys.path). + +(2) A package's *import domain*, contained in the per-package variable +__domain__, defines a list of *packages* that are searched (using +their respective module search paths) to satisfy imports. It is +initialized to the list cosisting of the package itself, its parent +package, its parent's parent, and so on, ending with the root package +(the nameless package containing all top-level packages and modules, +whose module search path is None, implying sys.path). + +The default domain implements a search algorithm called "expanding +search". An alternative search algorithm called "explicit search" +fixes the import search path to contain only the root package, +requiring the modules in the package to name all imported modules by +their full name. The convention of using '__' to refer to the current +package (both as a per-module variable and in module names) can be +used by packages using explicit search to refer to modules in the same +package; this combination is known as "explicit-relative search". + +The PackageImporter and PackageLoader classes together implement the +following policies: + +- There is a root package, whose name is ''. It cannot be imported + directly but may be referenced, e.g. by using '__' from a top-level + module. + +- In each module or package, the variable '__' contains a reference to + the parent package; in the root package, '__' points to itself. + +- In the name for imported modules (e.g. M in "import M" or "from M + import ..."), a leading '__' refers to the current package (i.e. + the package containing the current module); leading '__.__' and so + on refer to the current package's parent, and so on. The use of + '__' elsewhere in the module name is not supported. + +- Modules are searched using the "expanding search" algorithm by + virtue of the default value for __domain__. + +- If A.B.C is imported, A is searched using __domain__; then + subpackage B is searched in A using its __path__, and so on. + +- Built-in modules have priority: even if a file sys.py exists in a + package, "import sys" imports the built-in sys module. + +- The same holds for frozen modules, for better or for worse. + +- Submodules and subpackages are not automatically loaded when their + parent packages is loaded. + +- The construct "from package import *" is illegal. (It can still be + used to import names from a module.) + +- When "from package import module1, module2, ..." is used, those + modules are explicitly loaded. + +- When a package is loaded, if it has a submodule __init__, that + module is loaded. This is the place where required submodules can + be loaded, the __path__ variable extended, etc. The __init__ module + is loaded even if the package was loaded only in order to create a + stub for a sub-package: if "import P.Q.R" is the first reference to + P, and P has a submodule __init__, P.__init__ is loaded before P.Q + is even searched. + +Caveats: + +- It is possible to import a package that has no __init__ submodule; + this is not particularly useful but there may be useful applications + for it (e.g. to manipulate its search paths from the outside!). + +- There are no special provisions for os.chdir(). If you plan to use + os.chdir() before you have imported all your modules, it is better + not to have relative pathnames in sys.path. (This could actually be + fixed by changing the implementation of path_join() in the hook to + absolutize paths.) + +- Packages and modules are introduced in sys.modules as soon as their + loading is started. When the loading is terminated by an exception, + the sys.modules entries remain around. + +- There are no special measures to support mutually recursive modules, + but it will work under the same conditions where it works in the + flat module space system. + +- Sometimes dummy entries (whose value is None) are entered in + sys.modules, to indicate that a particular module does not exist -- + this is done to speed up the expanding search algorithm when a + module residing at a higher level is repeatedly imported (Python + promises that importing a previously imported module is cheap!) + +- Although dynamically loaded extensions are allowed inside packages, + the current implementation (hardcoded in the interpreter) of their + initialization may cause problems if an extension invokes the + interpreter during its initialization. + +- reload() may find another version of the module only if it occurs on + the package search path. Thus, it keeps the connection to the + package to which the module belongs, but may find a different file. + +XXX Need to have an explicit name for '', e.g. '__root__'. + +""" + + +import imp +import string +import sys +import __builtin__ + +import ihooks +from ihooks import ModuleLoader, ModuleImporter + + +class PackageLoader(ModuleLoader): + + """A subclass of ModuleLoader with package support. + + find_module_in_dir() will succeed if there's a subdirectory with + the given name; load_module() will create a stub for a package and + load its __init__ module if it exists. + + """ + + def find_module_in_dir(self, name, dir): + if dir is not None: + dirname = self.hooks.path_join(dir, name) + if self.hooks.path_isdir(dirname): + return None, dirname, ('', '', 'PACKAGE') + return ModuleLoader.find_module_in_dir(self, name, dir) + + def load_module(self, name, stuff): + file, filename, info = stuff + suff, mode, type = info + if type == 'PACKAGE': + return self.load_package(name, stuff) + if sys.modules.has_key(name): + m = sys.modules[name] + else: + sys.modules[name] = m = imp.new_module(name) + self.set_parent(m) + if type == imp.C_EXTENSION and '.' in name: + return self.load_dynamic(name, stuff) + else: + return ModuleLoader.load_module(self, name, stuff) + + def load_dynamic(self, name, stuff): + file, filename, (suff, mode, type) = stuff + # Hack around restriction in imp.load_dynamic() + i = string.rfind(name, '.') + tail = name[i+1:] + if sys.modules.has_key(tail): + save = sys.modules[tail] + else: + save = None + sys.modules[tail] = imp.new_module(name) + try: + m = imp.load_dynamic(tail, filename, file) + finally: + if save: + sys.modules[tail] = save + else: + del sys.modules[tail] + sys.modules[name] = m + return m + + def load_package(self, name, stuff): + file, filename, info = stuff + if sys.modules.has_key(name): + package = sys.modules[name] + else: + sys.modules[name] = package = imp.new_module(name) + package.__path__ = [filename] + self.init_package(package) + return package + + def init_package(self, package): + self.set_parent(package) + self.set_domain(package) + self.call_init_module(package) + + def set_parent(self, m): + name = m.__name__ + if '.' in name: + name = name[:string.rfind(name, '.')] + else: + name = '' + m.__ = sys.modules[name] + + def set_domain(self, package): + name = package.__name__ + package.__domain__ = domain = [name] + while '.' in name: + name = name[:string.rfind(name, '.')] + domain.append(name) + if name: + domain.append('') + + def call_init_module(self, package): + stuff = self.find_module('__init__', package.__path__) + if stuff: + m = self.load_module(package.__name__ + '.__init__', stuff) + package.__init__ = m + + +class PackageImporter(ModuleImporter): + + """Importer that understands packages and '__'.""" + + def __init__(self, loader = None, verbose = 0): + ModuleImporter.__init__(self, + loader or PackageLoader(None, verbose), verbose) + + def import_module(self, name, globals={}, locals={}, fromlist=[]): + if globals.has_key('__'): + package = globals['__'] + else: + # No calling context, assume in root package + package = sys.modules[''] + if name[:3] in ('__.', '__'): + p = package + name = name[3:] + while name[:3] in ('__.', '__'): + p = package.__ + name = name[3:] + if not name: + return self.finish(package, p, '', fromlist) + if '.' in name: + i = string.find(name, '.') + name, tail = name[:i], name[i:] + else: + tail = '' + mname = p.__name__ and p.__name__+'.'+name or name + m = self.get1(mname) + return self.finish(package, m, tail, fromlist) + if '.' in name: + i = string.find(name, '.') + name, tail = name[:i], name[i:] + else: + tail = '' + for pname in package.__domain__: + mname = pname and pname+'.'+name or name + m = self.get0(mname) + if m: break + else: + raise ImportError, "No such module %s" % name + return self.finish(m, m, tail, fromlist) + + def finish(self, module, m, tail, fromlist): + # Got ....A; now get ....A.B.C.D + yname = m.__name__ + if tail and sys.modules.has_key(yname + tail): # Fast path + yname, tail = yname + tail, '' + m = self.get1(yname) + while tail: + i = string.find(tail, '.', 1) + if i > 0: + head, tail = tail[:i], tail[i:] + else: + head, tail = tail, '' + yname = yname + head + m = self.get1(yname) + + # Got ....A.B.C.D; now finalize things depending on fromlist + if not fromlist: + return module + if '__' in fromlist: + raise ImportError, "Can't import __ from anywhere" + if not hasattr(m, '__path__'): return m + if '*' in fromlist: + raise ImportError, "Can't import * from a package" + for f in fromlist: + if hasattr(m, f): continue + fname = yname + '.' + f + self.get1(fname) + return m + + def get1(self, name): + m = self.get(name) + if not m: + raise ImportError, "No module named %s" % name + return m + + def get0(self, name): + m = self.get(name) + if not m: + sys.modules[name] = None + return m + + def get(self, name): + # Internal routine to get or load a module when its parent exists + if sys.modules.has_key(name): + return sys.modules[name] + if '.' in name: + i = string.rfind(name, '.') + head, tail = name[:i], name[i+1:] + else: + head, tail = '', name + path = sys.modules[head].__path__ + stuff = self.loader.find_module(tail, path) + if not stuff: + return None + sys.modules[name] = m = self.loader.load_module(name, stuff) + if head: + setattr(sys.modules[head], tail, m) + return m + + def reload(self, module): + name = module.__name__ + if '.' in name: + i = string.rfind(name, '.') + head, tail = name[:i], name[i+1:] + path = sys.modules[head].__path__ + else: + tail = name + path = sys.modules[''].__path__ + stuff = self.loader.find_module(tail, path) + if not stuff: + raise ImportError, "No module named %s" % name + return self.loader.load_module(name, stuff) + + def unload(self, module): + if hasattr(module, '__path__'): + raise ImportError, "don't know how to unload packages yet" + PackageImporter.unload(self, module) + + def install(self): + if not sys.modules.has_key(''): + sys.modules[''] = package = imp.new_module('') + package.__path__ = None + self.loader.init_package(package) + for m in sys.modules.values(): + if not m: continue + if not hasattr(m, '__'): + self.loader.set_parent(m) + ModuleImporter.install(self) + + +def install(v = 0): + ihooks.install(PackageImporter(None, v)) + +def uninstall(): + ihooks.uninstall() + +def ni(v = 0): + install(v) + +def no(): + uninstall() + +def test(): + import pdb + try: + testproper() + except: + sys.last_type, sys.last_value, sys.last_traceback = ( + sys.exc_type, sys.exc_value, sys.exc_traceback) + print + print sys.last_type, ':', sys.last_value + print + pdb.pm() + +def testproper(): + install(1) + try: + import mactest + print dir(mactest) + raw_input('OK?') + finally: + uninstall() + + +if __name__ == '__main__': + test() diff --git a/Lib/ni1.py b/Lib/ni1.py new file mode 100644 index 0000000..70b1c22 --- /dev/null +++ b/Lib/ni1.py @@ -0,0 +1,390 @@ +"""New import scheme with package support. + +A Package is a module that can contain other modules. Packages can be +nested. Package introduce dotted names for modules, like P.Q.M, which +could correspond to a file P/Q/M.py found somewhere on sys.path. It +is possible to import a package itself, though this makes little sense +unless the package contains a module called __init__. + +A package has two variables that control the namespace used for +packages and modules, both initialized to sensible defaults the first +time the package is referenced. + +(1) A package's *module search path*, contained in the per-package +variable __path__, defines a list of *directories* where submodules or +subpackages of the package are searched. It is initialized to the +directory containing the package. Setting this variable to None makes +the module search path default to sys.path (this is not quite the same +as setting it to sys.path, since the latter won't track later +assignments to sys.path). + +(2) A package's *import domain*, contained in the per-package variable +__domain__, defines a list of *packages* that are searched (using +their respective module search paths) to satisfy imports. It is +initialized to the list cosisting of the package itself, its parent +package, its parent's parent, and so on, ending with the root package +(the nameless package containing all top-level packages and modules, +whose module search path is None, implying sys.path). + +The default domain implements a search algorithm called "expanding +search". An alternative search algorithm called "explicit search" +fixes the import search path to contain only the root package, +requiring the modules in the package to name all imported modules by +their full name. The convention of using '__' to refer to the current +package (both as a per-module variable and in module names) can be +used by packages using explicit search to refer to modules in the same +package; this combination is known as "explicit-relative search". + +The PackageImporter and PackageLoader classes together implement the +following policies: + +- There is a root package, whose name is ''. It cannot be imported + directly but may be referenced, e.g. by using '__' from a top-level + module. + +- In each module or package, the variable '__' contains a reference to + the parent package; in the root package, '__' points to itself. + +- In the name for imported modules (e.g. M in "import M" or "from M + import ..."), a leading '__' refers to the current package (i.e. + the package containing the current module); leading '__.__' and so + on refer to the current package's parent, and so on. The use of + '__' elsewhere in the module name is not supported. + +- Modules are searched using the "expanding search" algorithm by + virtue of the default value for __domain__. + +- If A.B.C is imported, A is searched using __domain__; then + subpackage B is searched in A using its __path__, and so on. + +- Built-in modules have priority: even if a file sys.py exists in a + package, "import sys" imports the built-in sys module. + +- The same holds for frozen modules, for better or for worse. + +- Submodules and subpackages are not automatically loaded when their + parent packages is loaded. + +- The construct "from package import *" is illegal. (It can still be + used to import names from a module.) + +- When "from package import module1, module2, ..." is used, those + modules are explicitly loaded. + +- When a package is loaded, if it has a submodule __init__, that + module is loaded. This is the place where required submodules can + be loaded, the __path__ variable extended, etc. The __init__ module + is loaded even if the package was loaded only in order to create a + stub for a sub-package: if "import P.Q.R" is the first reference to + P, and P has a submodule __init__, P.__init__ is loaded before P.Q + is even searched. + +Caveats: + +- It is possible to import a package that has no __init__ submodule; + this is not particularly useful but there may be useful applications + for it (e.g. to manipulate its search paths from the outside!). + +- There are no special provisions for os.chdir(). If you plan to use + os.chdir() before you have imported all your modules, it is better + not to have relative pathnames in sys.path. (This could actually be + fixed by changing the implementation of path_join() in the hook to + absolutize paths.) + +- Packages and modules are introduced in sys.modules as soon as their + loading is started. When the loading is terminated by an exception, + the sys.modules entries remain around. + +- There are no special measures to support mutually recursive modules, + but it will work under the same conditions where it works in the + flat module space system. + +- Sometimes dummy entries (whose value is None) are entered in + sys.modules, to indicate that a particular module does not exist -- + this is done to speed up the expanding search algorithm when a + module residing at a higher level is repeatedly imported (Python + promises that importing a previously imported module is cheap!) + +- Although dynamically loaded extensions are allowed inside packages, + the current implementation (hardcoded in the interpreter) of their + initialization may cause problems if an extension invokes the + interpreter during its initialization. + +- reload() may find another version of the module only if it occurs on + the package search path. Thus, it keeps the connection to the + package to which the module belongs, but may find a different file. + +XXX Need to have an explicit name for '', e.g. '__root__'. + +""" + + +import imp +import string +import sys +import __builtin__ + +import ihooks +from ihooks import ModuleLoader, ModuleImporter + + +class PackageLoader(ModuleLoader): + + """A subclass of ModuleLoader with package support. + + find_module_in_dir() will succeed if there's a subdirectory with + the given name; load_module() will create a stub for a package and + load its __init__ module if it exists. + + """ + + def find_module_in_dir(self, name, dir): + if dir is not None: + dirname = self.hooks.path_join(dir, name) + if self.hooks.path_isdir(dirname): + return None, dirname, ('', '', 'PACKAGE') + return ModuleLoader.find_module_in_dir(self, name, dir) + + def load_module(self, name, stuff): + file, filename, info = stuff + suff, mode, type = info + if type == 'PACKAGE': + return self.load_package(name, stuff) + if sys.modules.has_key(name): + m = sys.modules[name] + else: + sys.modules[name] = m = imp.new_module(name) + self.set_parent(m) + if type == imp.C_EXTENSION and '.' in name: + return self.load_dynamic(name, stuff) + else: + return ModuleLoader.load_module(self, name, stuff) + + def load_dynamic(self, name, stuff): + file, filename, (suff, mode, type) = stuff + # Hack around restriction in imp.load_dynamic() + i = string.rfind(name, '.') + tail = name[i+1:] + if sys.modules.has_key(tail): + save = sys.modules[tail] + else: + save = None + sys.modules[tail] = imp.new_module(name) + try: + m = imp.load_dynamic(tail, filename, file) + finally: + if save: + sys.modules[tail] = save + else: + del sys.modules[tail] + sys.modules[name] = m + return m + + def load_package(self, name, stuff): + file, filename, info = stuff + if sys.modules.has_key(name): + package = sys.modules[name] + else: + sys.modules[name] = package = imp.new_module(name) + package.__path__ = [filename] + self.init_package(package) + return package + + def init_package(self, package): + self.set_parent(package) + self.set_domain(package) + self.call_init_module(package) + + def set_parent(self, m): + name = m.__name__ + if '.' in name: + name = name[:string.rfind(name, '.')] + else: + name = '' + m.__ = sys.modules[name] + + def set_domain(self, package): + name = package.__name__ + package.__domain__ = domain = [name] + while '.' in name: + name = name[:string.rfind(name, '.')] + domain.append(name) + if name: + domain.append('') + + def call_init_module(self, package): + stuff = self.find_module('__init__', package.__path__) + if stuff: + m = self.load_module(package.__name__ + '.__init__', stuff) + package.__init__ = m + + +class PackageImporter(ModuleImporter): + + """Importer that understands packages and '__'.""" + + def __init__(self, loader = None, verbose = 0): + ModuleImporter.__init__(self, + loader or PackageLoader(None, verbose), verbose) + + def import_module(self, name, globals={}, locals={}, fromlist=[]): + if globals.has_key('__'): + package = globals['__'] + else: + # No calling context, assume in root package + package = sys.modules[''] + if name[:3] in ('__.', '__'): + p = package + name = name[3:] + while name[:3] in ('__.', '__'): + p = package.__ + name = name[3:] + if not name: + return self.finish(package, p, '', fromlist) + if '.' in name: + i = string.find(name, '.') + name, tail = name[:i], name[i:] + else: + tail = '' + mname = p.__name__ and p.__name__+'.'+name or name + m = self.get1(mname) + return self.finish(package, m, tail, fromlist) + if '.' in name: + i = string.find(name, '.') + name, tail = name[:i], name[i:] + else: + tail = '' + for pname in package.__domain__: + mname = pname and pname+'.'+name or name + m = self.get0(mname) + if m: break + else: + raise ImportError, "No such module %s" % name + return self.finish(m, m, tail, fromlist) + + def finish(self, module, m, tail, fromlist): + # Got ....A; now get ....A.B.C.D + yname = m.__name__ + if tail and sys.modules.has_key(yname + tail): # Fast path + yname, tail = yname + tail, '' + m = self.get1(yname) + while tail: + i = string.find(tail, '.', 1) + if i > 0: + head, tail = tail[:i], tail[i:] + else: + head, tail = tail, '' + yname = yname + head + m = self.get1(yname) + + # Got ....A.B.C.D; now finalize things depending on fromlist + if not fromlist: + return module + if '__' in fromlist: + raise ImportError, "Can't import __ from anywhere" + if not hasattr(m, '__path__'): return m + if '*' in fromlist: + raise ImportError, "Can't import * from a package" + for f in fromlist: + if hasattr(m, f): continue + fname = yname + '.' + f + self.get1(fname) + return m + + def get1(self, name): + m = self.get(name) + if not m: + raise ImportError, "No module named %s" % name + return m + + def get0(self, name): + m = self.get(name) + if not m: + sys.modules[name] = None + return m + + def get(self, name): + # Internal routine to get or load a module when its parent exists + if sys.modules.has_key(name): + return sys.modules[name] + if '.' in name: + i = string.rfind(name, '.') + head, tail = name[:i], name[i+1:] + else: + head, tail = '', name + path = sys.modules[head].__path__ + stuff = self.loader.find_module(tail, path) + if not stuff: + return None + sys.modules[name] = m = self.loader.load_module(name, stuff) + if head: + setattr(sys.modules[head], tail, m) + return m + + def reload(self, module): + name = module.__name__ + if '.' in name: + i = string.rfind(name, '.') + head, tail = name[:i], name[i+1:] + path = sys.modules[head].__path__ + else: + tail = name + path = sys.modules[''].__path__ + stuff = self.loader.find_module(tail, path) + if not stuff: + raise ImportError, "No module named %s" % name + return self.loader.load_module(name, stuff) + + def unload(self, module): + if hasattr(module, '__path__'): + raise ImportError, "don't know how to unload packages yet" + PackageImporter.unload(self, module) + + def install(self): + if not sys.modules.has_key(''): + sys.modules[''] = package = imp.new_module('') + package.__path__ = None + self.loader.init_package(package) + for m in sys.modules.values(): + if not m: continue + if not hasattr(m, '__'): + self.loader.set_parent(m) + ModuleImporter.install(self) + + +def install(v = 0): + ihooks.install(PackageImporter(None, v)) + +def uninstall(): + ihooks.uninstall() + +def ni(v = 0): + install(v) + +def no(): + uninstall() + +def test(): + import pdb + try: + testproper() + except: + sys.last_type, sys.last_value, sys.last_traceback = ( + sys.exc_type, sys.exc_value, sys.exc_traceback) + print + print sys.last_type, ':', sys.last_value + print + pdb.pm() + +def testproper(): + install(1) + try: + import mactest + print dir(mactest) + raw_input('OK?') + finally: + uninstall() + + +if __name__ == '__main__': + test() |