diff options
Diffstat (limited to 'Lib/wsgiref/handlers.py')
| -rw-r--r-- | Lib/wsgiref/handlers.py | 215 |
1 files changed, 133 insertions, 82 deletions
diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index 4a77137..63d5993 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -5,7 +5,10 @@ from .headers import Headers import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] +__all__ = [ + 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', + 'IISCGIHandler', 'read_environ' +] # Weekday and month names for HTTP date/time formatting; always English! _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -19,6 +22,73 @@ def format_date_time(timestamp): _weekdayname[wd], day, _monthname[month], year, hh, mm, ss ) +_is_request = { + 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', + 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', +}.__contains__ + +def _needs_transcode(k): + return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ + or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) + +def read_environ(): + """Read environment, fixing HTTP variables""" + enc = sys.getfilesystemencoding() + esc = 'surrogateescape' + try: + ''.encode('utf-8', esc) + except LookupError: + esc = 'replace' + environ = {} + + # Take the basic environment from native-unicode os.environ. Attempt to + # fix up the variables that come from the HTTP request to compensate for + # the bytes->unicode decoding step that will already have taken place. + for k, v in os.environ.items(): + if _needs_transcode(k): + + # On win32, the os.environ is natively Unicode. Different servers + # decode the request bytes using different encodings. + if sys.platform == 'win32': + software = os.environ.get('SERVER_SOFTWARE', '').lower() + + # On IIS, the HTTP request will be decoded as UTF-8 as long + # as the input is a valid UTF-8 sequence. Otherwise it is + # decoded using the system code page (mbcs), with no way to + # detect this has happened. Because UTF-8 is the more likely + # encoding, and mbcs is inherently unreliable (an mbcs string + # that happens to be valid UTF-8 will not be decoded as mbcs) + # always recreate the original bytes as UTF-8. + if software.startswith('microsoft-iis/'): + v = v.encode('utf-8').decode('iso-8859-1') + + # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct + # to the Unicode environ. No modification needed. + elif software.startswith('apache/'): + pass + + # Python 3's http.server.CGIHTTPRequestHandler decodes + # using the urllib.unquote default of UTF-8, amongst other + # issues. + elif ( + software.startswith('simplehttp/') + and 'python/3' in software + ): + v = v.encode('utf-8').decode('iso-8859-1') + + # For other servers, guess that they have written bytes to + # the environ using stdio byte-oriented interfaces, ending up + # with the system code page. + else: + v = v.encode(enc, 'replace').decode('iso-8859-1') + + # Recover bytes from unicode environ, using surrogate escapes + # where available (Python 3.1+). + else: + v = v.encode(enc, esc).decode('iso-8859-1') + + environ[k] = v + return environ class BaseHandler: @@ -37,7 +107,7 @@ class BaseHandler: # os_environ is used to supply configuration from the OS environment: # by default it's a copy of 'os.environ' as of import time, but you can # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) + os_environ= read_environ() # Collaborator classes wsgi_file_wrapper = FileWrapper # set to None to disable @@ -45,9 +115,9 @@ class BaseHandler: # Error handling (also per-subclass or per-instance) traceback_limit = None # Print entire traceback to self.get_stderr() - error_status = "500 Dude, this is whack!" + error_status = "500 Internal Server Error" error_headers = [('Content-Type','text/plain')] - error_body = "A server error occurred. Please contact the administrator." + error_body = b"A server error occurred. Please contact the administrator." # State variables (don't mess with these) status = result = None @@ -55,13 +125,6 @@ class BaseHandler: headers = None bytes_sent = 0 - - - - - - - def run(self, application): """Invoke the application""" # Note to self: don't move the close()! Asynchronous servers shouldn't @@ -111,11 +174,13 @@ class BaseHandler: in the event loop to iterate over the data, and to call 'self.close()' once the response is finished. """ - if not self.result_is_file() or not self.sendfile(): - for data in self.result: - self.write(data) - self.finish_content() - self.close() + try: + if not self.result_is_file() or not self.sendfile(): + for data in self.result: + self.write(data) + self.finish_content() + finally: + self.close() def get_scheme(self): @@ -145,7 +210,7 @@ class BaseHandler: self.set_content_length() def start_response(self, status, headers,exc_info=None): - """'start_response()' callable as specified by PEP 333""" + """'start_response()' callable as specified by PEP 3333""" if exc_info: try: @@ -157,49 +222,48 @@ class BaseHandler: elif self.headers is not None: raise AssertionError("Headers already set!") + self.status = status + self.headers = self.headers_class(headers) status = self._convert_string_type(status, "Status") assert len(status)>=4,"Status must be at least 4 characters" assert int(status[:3]),"Status message must begin w/3-digit code" assert status[3]==" ", "Status message must have a space after code" - str_headers = [] - for name,val in headers: - name = self._convert_string_type(name, "Header name") - val = self._convert_string_type(val, "Header value") - str_headers.append((name, val)) - assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" + if __debug__: + for name, val in headers: + name = self._convert_string_type(name, "Header name") + val = self._convert_string_type(val, "Header value") + assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" - self.status = status - self.headers = self.headers_class(str_headers) return self.write def _convert_string_type(self, value, title): """Convert/check value type.""" - if isinstance(value, str): + if type(value) is str: return value - assert isinstance(value, bytes), \ - "{0} must be a string or bytes object (not {1})".format(title, value) - return str(value, "iso-8859-1") + raise AssertionError( + "{0} must be of type str (got {1})".format(title, repr(value)) + ) def send_preamble(self): """Transmit version/status/date/server, via self._write()""" if self.origin_server: if self.client_is_modern(): - self._write('HTTP/%s %s\r\n' % (self.http_version,self.status)) + self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1')) if 'Date' not in self.headers: self._write( - 'Date: %s\r\n' % format_date_time(time.time()) + ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1') ) if self.server_software and 'Server' not in self.headers: - self._write('Server: %s\r\n' % self.server_software) + self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1')) else: - self._write('Status: %s\r\n' % self.status) + self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1')) def write(self, data): - """'write()' callable as specified by PEP 333""" + """'write()' callable as specified by PEP 3333""" - assert isinstance(data, (str, bytes)), \ - "write() argument must be a string or bytes" + assert type(data) is bytes, \ + "write() argument must be a bytes instance" if not self.status: raise AssertionError("write() before start_response()") @@ -266,7 +330,7 @@ class BaseHandler: self.headers_sent = True if not self.origin_server or self.client_is_modern(): self.send_preamble() - self._write(str(self.headers)) + self._write(bytes(self.headers)) def result_is_file(self): @@ -353,15 +417,6 @@ class BaseHandler: raise NotImplementedError - - - - - - - - - class SimpleHandler(BaseHandler): """Handler that's just initialized with streams, environment, etc. @@ -395,12 +450,6 @@ class SimpleHandler(BaseHandler): self.environ.update(self.base_env) def _write(self,data): - if isinstance(data, str): - try: - data = data.encode("iso-8859-1") - except UnicodeEncodeError: - raise ValueError("Unicode data must contain only code points" - " representable in ISO-8859-1 encoding") self.stdout.write(data) def _flush(self): @@ -432,23 +481,6 @@ class BaseCGIHandler(SimpleHandler): origin_server = False - - - - - - - - - - - - - - - - - class CGIHandler(BaseCGIHandler): """CGI-based invocation via sys.stdin/stdout/stderr and os.environ @@ -474,23 +506,42 @@ class CGIHandler(BaseCGIHandler): def __init__(self): BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), - multithread=False, multiprocess=True + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + read_environ(), multithread=False, multiprocess=True ) +class IISCGIHandler(BaseCGIHandler): + """CGI-based invocation with workaround for IIS path bug + This handler should be used in preference to CGIHandler when deploying on + Microsoft IIS without having set the config allowPathInfo option (IIS>=7) + or metabase allowPathInfoForScriptMappings (IIS<7). + """ + wsgi_run_once = True + os_environ = {} + # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at + # the front, causing problems for WSGI applications that wish to implement + # routing. This handler strips any such duplicated path. + # IIS can be configured to pass the correct PATH_INFO, but this causes + # another bug where PATH_TRANSLATED is wrong. Luckily this variable is + # rarely used and is not guaranteed by WSGI. On IIS<7, though, the + # setting can only be made on a vhost level, affecting all other script + # mappings, many of which break when exposed to the PATH_TRANSLATED bug. + # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + # rarely uses it because there is still no UI for it.) - - - - - - - - - - -# + # There is no way for CGI code to tell whether the option was set, so a + # separate handler class is provided. + def __init__(self): + environ= read_environ() + path = environ.get('PATH_INFO', '') + script = environ.get('SCRIPT_NAME', '') + if (path+'/').startswith(script+'/'): + environ['PATH_INFO'] = path[len(script):] + BaseCGIHandler.__init__( + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + environ, multithread=False, multiprocess=True + ) |
