diff options
Diffstat (limited to 'Lib/wsgiref')
| -rw-r--r-- | Lib/wsgiref/handlers.py | 203 | ||||
| -rw-r--r-- | Lib/wsgiref/headers.py | 62 | ||||
| -rw-r--r-- | Lib/wsgiref/simple_server.py | 59 | ||||
| -rw-r--r-- | Lib/wsgiref/util.py | 40 | ||||
| -rw-r--r-- | Lib/wsgiref/validate.py | 42 |
5 files changed, 167 insertions, 239 deletions
diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index 4a77137..67064a6 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -5,7 +5,10 @@ from .headers import Headers import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] +__all__ = [ + 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', + 'IISCGIHandler', 'read_environ' +] # Weekday and month names for HTTP date/time formatting; always English! _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -19,6 +22,73 @@ def format_date_time(timestamp): _weekdayname[wd], day, _monthname[month], year, hh, mm, ss ) +_is_request = { + 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', + 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', +}.__contains__ + +def _needs_transcode(k): + return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ + or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) + +def read_environ(): + """Read environment, fixing HTTP variables""" + enc = sys.getfilesystemencoding() + esc = 'surrogateescape' + try: + ''.encode('utf-8', esc) + except LookupError: + esc = 'replace' + environ = {} + + # Take the basic environment from native-unicode os.environ. Attempt to + # fix up the variables that come from the HTTP request to compensate for + # the bytes->unicode decoding step that will already have taken place. + for k, v in os.environ.items(): + if _needs_transcode(k): + + # On win32, the os.environ is natively Unicode. Different servers + # decode the request bytes using different encodings. + if sys.platform == 'win32': + software = os.environ.get('SERVER_SOFTWARE', '').lower() + + # On IIS, the HTTP request will be decoded as UTF-8 as long + # as the input is a valid UTF-8 sequence. Otherwise it is + # decoded using the system code page (mbcs), with no way to + # detect this has happened. Because UTF-8 is the more likely + # encoding, and mbcs is inherently unreliable (an mbcs string + # that happens to be valid UTF-8 will not be decoded as mbcs) + # always recreate the original bytes as UTF-8. + if software.startswith('microsoft-iis/'): + v = v.encode('utf-8').decode('iso-8859-1') + + # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct + # to the Unicode environ. No modification needed. + elif software.startswith('apache/'): + pass + + # Python 3's http.server.CGIHTTPRequestHandler decodes + # using the urllib.unquote default of UTF-8, amongst other + # issues. + elif ( + software.startswith('simplehttp/') + and 'python/3' in software + ): + v = v.encode('utf-8').decode('iso-8859-1') + + # For other servers, guess that they have written bytes to + # the environ using stdio byte-oriented interfaces, ending up + # with the system code page. + else: + v = v.encode(enc, 'replace').decode('iso-8859-1') + + # Recover bytes from unicode environ, using surrogate escapes + # where available (Python 3.1+). + else: + v = v.encode(enc, esc).decode('iso-8859-1') + + environ[k] = v + return environ class BaseHandler: @@ -37,7 +107,7 @@ class BaseHandler: # os_environ is used to supply configuration from the OS environment: # by default it's a copy of 'os.environ' as of import time, but you can # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) + os_environ= read_environ() # Collaborator classes wsgi_file_wrapper = FileWrapper # set to None to disable @@ -45,9 +115,9 @@ class BaseHandler: # Error handling (also per-subclass or per-instance) traceback_limit = None # Print entire traceback to self.get_stderr() - error_status = "500 Dude, this is whack!" + error_status = "500 Internal Server Error" error_headers = [('Content-Type','text/plain')] - error_body = "A server error occurred. Please contact the administrator." + error_body = b"A server error occurred. Please contact the administrator." # State variables (don't mess with these) status = result = None @@ -55,13 +125,6 @@ class BaseHandler: headers = None bytes_sent = 0 - - - - - - - def run(self, application): """Invoke the application""" # Note to self: don't move the close()! Asynchronous servers shouldn't @@ -145,7 +208,7 @@ class BaseHandler: self.set_content_length() def start_response(self, status, headers,exc_info=None): - """'start_response()' callable as specified by PEP 333""" + """'start_response()' callable as specified by PEP 3333""" if exc_info: try: @@ -157,49 +220,48 @@ class BaseHandler: elif self.headers is not None: raise AssertionError("Headers already set!") + self.status = status + self.headers = self.headers_class(headers) status = self._convert_string_type(status, "Status") assert len(status)>=4,"Status must be at least 4 characters" assert int(status[:3]),"Status message must begin w/3-digit code" assert status[3]==" ", "Status message must have a space after code" - str_headers = [] - for name,val in headers: - name = self._convert_string_type(name, "Header name") - val = self._convert_string_type(val, "Header value") - str_headers.append((name, val)) - assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" + if __debug__: + for name, val in headers: + name = self._convert_string_type(name, "Header name") + val = self._convert_string_type(val, "Header value") + assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" - self.status = status - self.headers = self.headers_class(str_headers) return self.write def _convert_string_type(self, value, title): """Convert/check value type.""" - if isinstance(value, str): + if type(value) is str: return value - assert isinstance(value, bytes), \ - "{0} must be a string or bytes object (not {1})".format(title, value) - return str(value, "iso-8859-1") + raise AssertionError( + "{0} must be of type str (got {1})".format(title, repr(value)) + ) def send_preamble(self): """Transmit version/status/date/server, via self._write()""" if self.origin_server: if self.client_is_modern(): - self._write('HTTP/%s %s\r\n' % (self.http_version,self.status)) + self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1')) if 'Date' not in self.headers: self._write( - 'Date: %s\r\n' % format_date_time(time.time()) + ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1') ) if self.server_software and 'Server' not in self.headers: - self._write('Server: %s\r\n' % self.server_software) + self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1')) else: - self._write('Status: %s\r\n' % self.status) + self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1')) def write(self, data): - """'write()' callable as specified by PEP 333""" + """'write()' callable as specified by PEP 3333""" - assert isinstance(data, (str, bytes)), \ - "write() argument must be a string or bytes" + assert type(data) is bytes, \ + "write() argument must be a bytes instance" if not self.status: raise AssertionError("write() before start_response()") @@ -266,7 +328,7 @@ class BaseHandler: self.headers_sent = True if not self.origin_server or self.client_is_modern(): self.send_preamble() - self._write(str(self.headers)) + self._write(bytes(self.headers)) def result_is_file(self): @@ -353,15 +415,6 @@ class BaseHandler: raise NotImplementedError - - - - - - - - - class SimpleHandler(BaseHandler): """Handler that's just initialized with streams, environment, etc. @@ -395,12 +448,6 @@ class SimpleHandler(BaseHandler): self.environ.update(self.base_env) def _write(self,data): - if isinstance(data, str): - try: - data = data.encode("iso-8859-1") - except UnicodeEncodeError: - raise ValueError("Unicode data must contain only code points" - " representable in ISO-8859-1 encoding") self.stdout.write(data) def _flush(self): @@ -432,23 +479,6 @@ class BaseCGIHandler(SimpleHandler): origin_server = False - - - - - - - - - - - - - - - - - class CGIHandler(BaseCGIHandler): """CGI-based invocation via sys.stdin/stdout/stderr and os.environ @@ -474,23 +504,42 @@ class CGIHandler(BaseCGIHandler): def __init__(self): BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), - multithread=False, multiprocess=True + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + read_environ(), multithread=False, multiprocess=True ) +class IISCGIHandler(BaseCGIHandler): + """CGI-based invocation with workaround for IIS path bug + This handler should be used in preference to CGIHandler when deploying on + Microsoft IIS without having set the config allowPathInfo option (IIS>=7) + or metabase allowPathInfoForScriptMappings (IIS<7). + """ + wsgi_run_once = True + os_environ = {} + # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at + # the front, causing problems for WSGI applications that wish to implement + # routing. This handler strips any such duplicated path. + # IIS can be configured to pass the correct PATH_INFO, but this causes + # another bug where PATH_TRANSLATED is wrong. Luckily this variable is + # rarely used and is not guaranteed by WSGI. On IIS<7, though, the + # setting can only be made on a vhost level, affecting all other script + # mappings, many of which break when exposed to the PATH_TRANSLATED bug. + # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + # rarely uses it because there is still no UI for it.) - - - - - - - - - - -# + # There is no way for CGI code to tell whether the option was set, so a + # separate handler class is provided. + def __init__(self): + environ= read_environ() + path = environ.get('PATH_INFO', '') + script = environ.get('SCRIPT_NAME', '') + if (path+'/').startswith(script+'/'): + environ['PATH_INFO'] = path[len(script):] + BaseCGIHandler.__init__( + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + environ, multithread=False, multiprocess=True + ) diff --git a/Lib/wsgiref/headers.py b/Lib/wsgiref/headers.py index 025067a..d939628 100644 --- a/Lib/wsgiref/headers.py +++ b/Lib/wsgiref/headers.py @@ -1,6 +1,6 @@ """Manage HTTP Response Headers -Much of this module is red-handedly pilfered from email.Message in the stdlib, +Much of this module is red-handedly pilfered from email.message in the stdlib, so portions are Copyright (C) 2001,2002 Python Software Foundation, and were written by Barry Warsaw. """ @@ -25,38 +25,25 @@ def _formatparam(param, value=None, quote=1): return param - - - - - - - - - - - - class Headers: """Manage a collection of HTTP response headers""" def __init__(self,headers): - if not isinstance(headers, list): + if type(headers) is not list: raise TypeError("Headers must be a list of name/value tuples") - self._headers = [] - for k, v in headers: - k = self._convert_string_type(k) - v = self._convert_string_type(v) - self._headers.append((k, v)) + self._headers = headers + if __debug__: + for k, v in headers: + self._convert_string_type(k) + self._convert_string_type(v) def _convert_string_type(self, value): """Convert/check value type.""" - if isinstance(value, str): + if type(value) is str: return value - assert isinstance(value, bytes), ("Header names/values must be" - " a string or bytes object (not {0})".format(value)) - return str(value, "iso-8859-1") + raise AssertionError("Header names/values must be" + " of type str (got {0})".format(repr(value))) def __len__(self): """Return the total number of headers, including duplicates.""" @@ -87,10 +74,6 @@ class Headers: """ return self.get(name) - - - - def __contains__(self, name): """Return true if the message contains the header.""" return self.get(name) is not None @@ -127,9 +110,6 @@ class Headers: """ return [k for k, v in self._headers] - - - def values(self): """Return a list of all header values. @@ -158,6 +138,9 @@ class Headers: suitable for direct HTTP transmission.""" return '\r\n'.join(["%s: %s" % kv for kv in self._headers]+['','']) + def __bytes__(self): + return str(self).encode('iso-8859-1') + def setdefault(self,name,value): """Return first matching header value for 'name', or 'value' @@ -171,7 +154,6 @@ class Headers: else: return result - def add_header(self, _name, _value, **_params): """Extended header setting. @@ -184,7 +166,7 @@ class Headers: h.add_header('content-disposition', 'attachment', filename='bud.gif') - Note that unlike the corresponding 'email.Message' method, this does + Note that unlike the corresponding 'email.message' method, this does *not* handle '(charset, language, value)' tuples: all values must be strings or None. """ @@ -200,19 +182,3 @@ class Headers: v = self._convert_string_type(v) parts.append(_formatparam(k.replace('_', '-'), v)) self._headers.append((self._convert_string_type(_name), "; ".join(parts))) - - - - - - - - - - - - - - - -# diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index da14144..af82f95 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -1,4 +1,4 @@ -"""BaseHTTPServer that implements the Python WSGI protocol (PEP 333, rev 1.21) +"""BaseHTTPServer that implements the Python WSGI protocol (PEP 3333) This is both an example of how WSGI can be implemented, and a basis for running simple web applications on a local machine, such as might be done when testing @@ -15,7 +15,7 @@ import sys import urllib.parse from wsgiref.handlers import SimpleHandler -__version__ = "0.1" +__version__ = "0.2" __all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server'] @@ -38,8 +38,6 @@ class ServerHandler(SimpleHandler): - - class WSGIServer(HTTPServer): """BaseHTTPServer that implements the Python WSGI protocol""" @@ -69,18 +67,6 @@ class WSGIServer(HTTPServer): - - - - - - - - - - - - class WSGIRequestHandler(BaseHTTPRequestHandler): server_version = "WSGIServer/" + __version__ @@ -88,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): def get_environ(self): env = self.server.base_environ.copy() env['SERVER_PROTOCOL'] = self.request_version + env['SERVER_SOFTWARE'] = self.server_version env['REQUEST_METHOD'] = self.command if '?' in self.path: path,query = self.path.split('?',1) else: path,query = self.path,'' - env['PATH_INFO'] = urllib.parse.unquote(path) + env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1') env['QUERY_STRING'] = query host = self.address_string() @@ -139,29 +126,6 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): - - - - - - - - - - - - - - - - - - - - - - - def demo_app(environ,start_response): from io import StringIO stdout = StringIO() @@ -170,7 +134,7 @@ def demo_app(environ,start_response): h = sorted(environ.items()) for k,v in h: print(k,'=',repr(v), file=stdout) - start_response(b"200 OK", [(b'Content-Type',b'text/plain; charset=utf-8')]) + start_response("200 OK", [('Content-Type','text/plain; charset=utf-8')]) return [stdout.getvalue().encode("utf-8")] @@ -190,16 +154,3 @@ if __name__ == '__main__': import webbrowser webbrowser.open('http://localhost:8000/xyz?abc') httpd.handle_request() # serve one request, then exit - - - - - - - - - - - - -# diff --git a/Lib/wsgiref/util.py b/Lib/wsgiref/util.py index 06eb956..1f1e6cc 100644 --- a/Lib/wsgiref/util.py +++ b/Lib/wsgiref/util.py @@ -32,13 +32,6 @@ class FileWrapper: return data raise StopIteration - - - - - - - def guess_scheme(environ): """Return a guess for whether 'wsgi.url_scheme' should be 'http' or 'https' """ @@ -161,7 +154,6 @@ def setup_testing_defaults(environ): - _hoppish = { 'connection':1, 'keep-alive':1, 'proxy-authenticate':1, 'proxy-authorization':1, 'te':1, 'trailers':1, 'transfer-encoding':1, @@ -171,35 +163,3 @@ _hoppish = { def is_hop_by_hop(header_name): """Return true if 'header_name' is an HTTP/1.1 "Hop-by-Hop" header""" return _hoppish(header_name.lower()) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# diff --git a/Lib/wsgiref/validate.py b/Lib/wsgiref/validate.py index 2df3f9f..05a485d 100644 --- a/Lib/wsgiref/validate.py +++ b/Lib/wsgiref/validate.py @@ -128,11 +128,10 @@ def assert_(cond, *args): raise AssertionError(*args) def check_string_type(value, title): - if isinstance(value, str): + if type (value) is str: return value - assert isinstance(value, bytes), \ - "{0} must be a string or bytes object (not {1})".format(title, value) - return str(value, "iso-8859-1") + raise AssertionError( + "{0} must be of type str (got {1})".format(title, repr(value))) def validator(application): @@ -197,20 +196,21 @@ class InputWrapper: def read(self, *args): assert_(len(args) == 1) v = self.input.read(*args) - assert_(isinstance(v, bytes)) + assert_(type(v) is bytes) return v - def readline(self): - v = self.input.readline() - assert_(isinstance(v, bytes)) + def readline(self, *args): + assert_(len(args) <= 1) + v = self.input.readline(*args) + assert_(type(v) is bytes) return v def readlines(self, *args): assert_(len(args) <= 1) lines = self.input.readlines(*args) - assert_(isinstance(lines, list)) + assert_(type(lines) is list) for line in lines: - assert_(isinstance(line, bytes)) + assert_(type(line) is bytes) return lines def __iter__(self): @@ -229,7 +229,7 @@ class ErrorWrapper: self.errors = wsgi_errors def write(self, s): - assert_(isinstance(s, str)) + assert_(type(s) is str) self.errors.write(s) def flush(self): @@ -248,7 +248,7 @@ class WriteWrapper: self.writer = wsgi_writer def __call__(self, s): - assert_(isinstance(s, (str, bytes))) + assert_(type(s) is bytes) self.writer(s) class PartialIteratorWrapper: @@ -275,6 +275,8 @@ class IteratorWrapper: assert_(not self.closed, "Iterator read after closed") v = next(self.iterator) + if type(v) is not bytes: + assert_(False, "Iterator yielded non-bytestring (%r)" % (v,)) if self.check_start_response is not None: assert_(self.check_start_response, "The application returns and we started iterating over its body, but start_response has not yet been called") @@ -294,7 +296,7 @@ class IteratorWrapper: "Iterator garbage collected without being closed") def check_environ(environ): - assert_(isinstance(environ, dict), + assert_(type(environ) is dict, "Environment is not of the right type: %r (environment: %r)" % (type(environ), environ)) @@ -321,11 +323,11 @@ def check_environ(environ): if '.' in key: # Extension, we don't care about its type continue - assert_(isinstance(environ[key], str), + assert_(type(environ[key]) is str, "Environmental variable %s is not a string: %r (value: %r)" % (key, type(environ[key]), environ[key])) - assert_(isinstance(environ['wsgi.version'], tuple), + assert_(type(environ['wsgi.version']) is tuple, "wsgi.version should be a tuple (%r)" % (environ['wsgi.version'],)) assert_(environ['wsgi.url_scheme'] in ('http', 'https'), "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme']) @@ -385,12 +387,12 @@ def check_status(status): % status, WSGIWarning) def check_headers(headers): - assert_(isinstance(headers, list), + assert_(type(headers) is list, "Headers (%r) must be of type list: %r" % (headers, type(headers))) header_names = {} for item in headers: - assert_(isinstance(item, tuple), + assert_(type(item) is tuple, "Individual headers (%r) must be of type tuple: %r" % (item, type(item))) assert_(len(item) == 2) @@ -428,14 +430,14 @@ def check_content_type(status, headers): assert_(0, "No Content-Type header found in headers (%s)" % headers) def check_exc_info(exc_info): - assert_(exc_info is None or isinstance(exc_info, tuple), + assert_(exc_info is None or type(exc_info) is tuple, "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info))) # More exc_info checks? def check_iterator(iterator): - # Technically a string is legal, which is why it's a really bad + # Technically a bytestring is legal, which is why it's a really bad # idea, because it may cause the response to be returned # character-by-character assert_(not isinstance(iterator, (str, bytes)), "You should not return a string as your application iterator, " - "instead return a single-item list containing that string.") + "instead return a single-item list containing a bytestring.") |
