diff options
author | Guido van Rossum <guido@python.org> | 1995-08-04 04:00:20 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1995-08-04 04:00:20 (GMT) |
commit | e7e578ffe042ca84f5b57e8056cb5598c7b44e5a (patch) | |
tree | 4a3b2e705ae78d7030280aef3f5839579755a698 /Lib/BaseHTTPServer.py | |
parent | 40d1ea3b9ccf9e779f253685d1f5f6cf530945fa (diff) | |
download | cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.zip cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.tar.gz cpython-e7e578ffe042ca84f5b57e8056cb5598c7b44e5a.tar.bz2 |
Initial revision
Diffstat (limited to 'Lib/BaseHTTPServer.py')
-rw-r--r-- | Lib/BaseHTTPServer.py | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/Lib/BaseHTTPServer.py b/Lib/BaseHTTPServer.py new file mode 100644 index 0000000..681139b --- /dev/null +++ b/Lib/BaseHTTPServer.py @@ -0,0 +1,482 @@ +"""HTTP server base class. + +Note: the class in this module doesn't implement any HTTP request; see +SimpleHTTPServer for simple implementations of GET, HEAD and POST +(including CGI scripts). + +Contents: + +- BaseHTTPRequestHandler: HTTP request handler base class +- test: test function + +XXX To do: + +- send server version +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +- are request names really case sensitive? + +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt + + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + + +__version__ = "0.2" + + +import sys +import time +import socket # For gethostbyaddr() +import string +import rfc822 +import mimetools +import SocketServer + +# Default error message +DEFAULT_ERROR_MESSAGE = """\ +<head> +<title>Error response</title> +</head> +<body> +<h1>Error response</h1> +<p>Error code %(code)d. +<p>Message: %(message)s. +<p>Error code explanation: %(code)s = %(explain)s. +</body> +""" + + +class HTTPServer(SocketServer.TCPServer): + + def server_bind(self): + """Override server_bind to store the server name.""" + SocketServer.TCPServer.server_bind(self) + host, port = self.socket.getsockname() + if not host or host == '0.0.0.0': + host = socket.gethostname() + hostname, hostnames, hostaddrs = socket.gethostbyaddr(host) + if '.' not in hostname: + for host in hostnames: + if '.' in host: + hostname = host + break + self.server_name = hostname + self.server_port = port + + +class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + <command> <path> <version> + + where <command> is a (case-sensitive) keyword such as GET or POST, + <path> is a string containing path information for the request, + and <version> should be the string "HTTP/1.0". <path> is encoded + using the URL encoding scheme (using %xx to signify the ASCII + character with hex code xx). + + The protocol is vague about whether lines are separated by LF + characters or by CRLF pairs -- for compatibility with the widest + range of clients, both should be accepted. Similarly, whitespace + in the request line should be treated sensibly (allowing multiple + spaces between components and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + <command> <path> + + (i.e. <version> is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.0 protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + <version> <responsecode> <responsestring> + + where <version> is the protocol version (always "HTTP/1.0"), + <responsecode> is a 3-digit response code indicating success or + failure of the request, and <responsestring> is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (<command>). Specifically, + a request SPAM will be handled by a method handle_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of mimetools.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: <type>/<subtype> + + where <type> and <subtype> should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + string.split(sys.version)[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + def handle(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + + self.raw_requestline = self.rfile.readline() + requestline = self.raw_requestline + if requestline[-2:] == '\r\n': + requestline = requestline[:-2] + elif requestline[-1:] == '\n': + requestline = requestline[:-1] + self.requestline = requestline + words = string.split(requestline) + if len(words) == 3: + [command, path, version] = words + if version != self.protocol_version: + self.send_error(400, "Bad request version (%s)" % `version`) + return + elif len(words) == 2: + [command, path] = words + if command != 'GET': + self.send_error(400, + "Bad HTTP/0.9 request type (%s)" % `command`) + return + version = "HTTP/0.9" + else: + self.send_error(400, "Bad request syntax (%s)" % `command`) + return + self.command, self.path, self.request_version = command, path, version + self.headers = self.MessageClass(self.rfile, 0) + mname = 'do_' + command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%s)" % `command`) + return + method = getattr(self, mname) + method() + + def send_error(self, code, message=None): + """Send and log an error reply. + + Arguments are the error code, and a detailed message. + The detailed message defaults to the short entry matching the + response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + short, long = self.responses[code] + except KeyError: + short, long = '???', '???' + if not message: + message = short + explain = long + self.log_error("code %d, message %s", code, message) + self.send_response(code, message) + self.end_headers() + self.wfile.write(self.error_message_format % + {'code': code, + 'message': message, + 'explain': explain}) + + error_message_format = DEFAULT_ERROR_MESSAGE + + def send_response(self, code, message=None): + """Send the response header and log the response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + if message is None: + if self.responses.has_key(code): + message = self.responses[code][1] + else: + message = '' + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s %s %s\r\n" % + (self.protocol_version, str(code), message)) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_header(self, keyword, value): + """Send a MIME header.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s: %s\r\n" % (keyword, value)) + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("\r\n") + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_reponse(). + + """ + + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + apply(self.log_message, args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client host and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self): + """Return the current date and time formatted for a message header.""" + now = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + self.weekdayname[wd], + day, self.monthname[month], year, + hh, mm, ss) + return s + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address formatted for logging. + + This version looks up the full hostname using gethostbyaddr(), + and tries to find a name that contains at least one dot. + + """ + + (host, port) = self.client_address + try: + name, names, addresses = socket.gethostbyaddr(host) + except socket.error, msg: + return host + names.insert(0, name) + for name in names: + if '.' in name: return name + return names[0] + + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Don't override unless you know what you're doing (hint: incoming + # requests are required to have exactly this version string). + protocol_version = "HTTP/1.0" + + # The Message-like class used to parse headers + MessageClass = mimetools.Message + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html + responses = { + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Partial information', 'Request fulfilled from cache'), + 204: ('No response', 'Request fulfilled, nothing follows'), + + 301: ('Moved', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('Method', 'Object moved -- see Method and URL list'), + 304: ('Not modified', + 'Document has not changed singe given time'), + + 400: ('Bad request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not found', 'Nothing matches the given URI'), + + 500: ('Internal error', 'Server got itself in trouble'), + 501: ('Not implemented', + 'Server does not support this operation'), + 502: ('Service temporarily overloaded', + 'The server cannot process the request due to a high load'), + 503: ('Gateway timeout', + 'The gateway server did not receive a timely response'), + + } + + +def test(HandlerClass = BaseHTTPRequestHandler, + ServerClass = HTTPServer): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the first command line + argument). + + """ + + if sys.argv[1:]: + port = string.atoi(sys.argv[1]) + else: + port = 8000 + server_address = ('', port) + + httpd = ServerClass(server_address, HandlerClass) + + print "Serving HTTP on port", port, "..." + httpd.serve_forever() + + +if __name__ == '__main__': + test() |