summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhillip J. Eby <pje@telecommunity.com>2010-11-03 22:39:01 (GMT)
committerPhillip J. Eby <pje@telecommunity.com>2010-11-03 22:39:01 (GMT)
commitb6d4a8e4de715d7a7f1fa009f5b636643e76e292 (patch)
tree8c3d0f29582a9191fe4819b2e25f843e7228fa5d
parent3c6830ca8f406e4a18843aa15186048877555e45 (diff)
downloadcpython-b6d4a8e4de715d7a7f1fa009f5b636643e76e292.zip
cpython-b6d4a8e4de715d7a7f1fa009f5b636643e76e292.tar.gz
cpython-b6d4a8e4de715d7a7f1fa009f5b636643e76e292.tar.bz2
Implement http://bugs.python.org/issue10155 using And Clover's patch, w/added
docs and support for more client-generated CGI variables. (This should complete the WSGI 1.0.1 compliance changes for Python 3.x.)
-rw-r--r--Doc/library/wsgiref.rst44
-rw-r--r--Lib/test/test_wsgiref.py4
-rw-r--r--Lib/wsgiref/handlers.py115
-rw-r--r--Lib/wsgiref/simple_server.py5
-rw-r--r--Misc/NEWS4
5 files changed, 164 insertions, 8 deletions
diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst
index ceacbac..385652b 100644
--- a/Doc/library/wsgiref.rst
+++ b/Doc/library/wsgiref.rst
@@ -456,6 +456,32 @@ input, output, and error streams.
environment.
+.. class:: IISCGIHandler()
+
+ A specialized alternative to :class:`CGIHandler`, for use when deploying on
+ Microsoft's IIS web server, without having set the config allowPathInfo
+ option (IIS>=7) or metabase allowPathInfoForScriptMappings (IIS<7).
+
+ By default, IIS gives a ``PATH_INFO`` that duplicates the ``SCRIPT_NAME`` at
+ the front, causing problems for WSGI applications that wish to implement
+ routing. This handler strips any such duplicated path.
+
+ IIS can be configured to pass the correct ``PATH_INFO``, but this causes
+ another bug where ``PATH_TRANSLATED`` is wrong. Luckily this variable is
+ rarely used and is not guaranteed by WSGI. On IIS<7, though, the
+ setting can only be made on a vhost level, affecting all other script
+ mappings, many of which break when exposed to the ``PATH_TRANSLATED`` bug.
+ For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
+ rarely uses it because there is still no UI for it.)
+
+ There is no way for CGI code to tell whether the option was set, so a
+ separate handler class is provided. It is used in the same way as
+ :class:`CGIHandler`, i.e., by calling ``IISCGIHandler().run(app)``, where
+ ``app`` is the WSGI application object you wish to invoke.
+
+ .. versionadded:: 3.2
+
+
.. class:: BaseCGIHandler(stdin, stdout, stderr, environ, multithread=True, multiprocess=False)
Similar to :class:`CGIHandler`, but instead of using the :mod:`sys` and
@@ -696,6 +722,24 @@ input, output, and error streams.
version of the response set to the client. It defaults to ``"1.0"``.
+.. function:: read_environ()
+
+ Transcode CGI variables from ``os.environ`` to PEP 3333 "bytes in unicode"
+ strings, returning a new dictionary. This function is used by
+ :class:`CGIHandler` and :class:`IISCGIHandler` in place of directly using
+ ``os.environ``, which is not necessarily WSGI-compliant on all platforms
+ and web servers using Python 3 -- specifically, ones where the OS's
+ actual environment is Unicode (i.e. Windows), or ones where the environment
+ is bytes, but the system encoding used by Python to decode it is anything
+ other than ISO-8859-1 (e.g. Unix systems using UTF-8).
+
+ If you are implementing a CGI-based handler of your own, you probably want
+ to use this routine instead of just copying values out of ``os.environ``
+ directly.
+
+ .. versionadded:: 3.2
+
+
Examples
--------
diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py
index 49d372d..8051b4a 100644
--- a/Lib/test/test_wsgiref.py
+++ b/Lib/test/test_wsgiref.py
@@ -131,7 +131,7 @@ class IntegrationTests(TestCase):
def check_hello(self, out, has_length=True):
self.assertEqual(out,
("HTTP/1.0 200 OK\r\n"
- "Server: WSGIServer/0.1 Python/"+sys.version.split()[0]+"\r\n"
+ "Server: WSGIServer/0.2 Python/"+sys.version.split()[0]+"\r\n"
"Content-Type: text/plain\r\n"
"Date: Mon, 05 Jun 2006 18:49:54 GMT\r\n" +
(has_length and "Content-Length: 13\r\n" or "") +
@@ -187,7 +187,7 @@ class IntegrationTests(TestCase):
ver = sys.version.split()[0].encode('ascii')
self.assertEqual(
b"HTTP/1.0 200 OK\r\n"
- b"Server: WSGIServer/0.1 Python/" + ver + b"\r\n"
+ b"Server: WSGIServer/0.2 Python/" + ver + b"\r\n"
b"Content-Type: text/plain; charset=utf-8\r\n"
b"Date: Wed, 24 Dec 2008 13:29:32 GMT\r\n"
b"\r\n"
diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py
index 3e11219..6d6f80f 100644
--- a/Lib/wsgiref/handlers.py
+++ b/Lib/wsgiref/handlers.py
@@ -5,7 +5,10 @@ from .headers import Headers
import sys, os, time
-__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler']
+__all__ = [
+ 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
+ 'IISCGIHandler', 'read_environ'
+]
# Weekday and month names for HTTP date/time formatting; always English!
_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
@@ -19,6 +22,74 @@ def format_date_time(timestamp):
_weekdayname[wd], day, _monthname[month], year, hh, mm, ss
)
+_is_request = {
+ 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
+ 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
+}.__contains__
+
+def _needs_transcode(k):
+ return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
+ or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
+
+def read_environ():
+ """Read environment, fixing HTTP variables"""
+ enc = sys.getfilesystemencoding()
+ esc = 'surrogateescape'
+ try:
+ ''.encode('utf-8', esc)
+ except LookupError:
+ esc = 'replace'
+ environ = {}
+
+ # Take the basic environment from native-unicode os.environ. Attempt to
+ # fix up the variables that come from the HTTP request to compensate for
+ # the bytes->unicode decoding step that will already have taken place.
+ for k, v in os.environ.items():
+ if _needs_transcode(k):
+
+ # On win32, the os.environ is natively Unicode. Different servers
+ # decode the request bytes using different encodings.
+ if sys.platform == 'win32':
+ software = os.environ.get('SERVER_SOFTWARE', '').lower()
+
+ # On IIS, the HTTP request will be decoded as UTF-8 as long
+ # as the input is a valid UTF-8 sequence. Otherwise it is
+ # decoded using the system code page (mbcs), with no way to
+ # detect this has happened. Because UTF-8 is the more likely
+ # encoding, and mbcs is inherently unreliable (an mbcs string
+ # that happens to be valid UTF-8 will not be decoded as mbcs)
+ # always recreate the original bytes as UTF-8.
+ if software.startswith('microsoft-iis/'):
+ v = v.encode('utf-8').decode('iso-8859-1')
+
+ # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
+ # to the Unicode environ. No modification needed.
+ elif software.startswith('apache/'):
+ pass
+
+ # Python 3's http.server.CGIHTTPRequestHandler decodes
+ # using the urllib.unquote default of UTF-8, amongst other
+ # issues.
+ elif (
+ software.startswith('simplehttp/')
+ and 'python/3' in software
+ ):
+ v = v.encode('utf-8').decode('iso-8859-1')
+
+ # For other servers, guess that they have written bytes to
+ # the environ using stdio byte-oriented interfaces, ending up
+ # with the system code page.
+ else:
+ v = v.encode(enc, 'replace').decode('iso-8859-1')
+
+ # Recover bytes from unicode environ, using surrogate escapes
+ # where available (Python 3.1+).
+ else:
+ v = v.encode(enc, esc).decode('iso-8859-1')
+
+ environ[k] = v
+ return environ
+
class BaseHandler:
"""Manage the invocation of a WSGI application"""
@@ -36,7 +107,7 @@ class BaseHandler:
# os_environ is used to supply configuration from the OS environment:
# by default it's a copy of 'os.environ' as of import time, but you can
# override this in e.g. your __init__ method.
- os_environ = dict(os.environ.items())
+ os_environ= read_environ()
# Collaborator classes
wsgi_file_wrapper = FileWrapper # set to None to disable
@@ -431,6 +502,42 @@ class CGIHandler(BaseCGIHandler):
def __init__(self):
BaseCGIHandler.__init__(
- self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()),
- multithread=False, multiprocess=True
+ self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
+ read_environ(), multithread=False, multiprocess=True
+ )
+
+
+class IISCGIHandler(BaseCGIHandler):
+ """CGI-based invocation with workaround for IIS path bug
+
+ This handler should be used in preference to CGIHandler when deploying on
+ Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
+ or metabase allowPathInfoForScriptMappings (IIS<7).
+ """
+ wsgi_run_once = True
+ os_environ = {}
+
+ # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
+ # the front, causing problems for WSGI applications that wish to implement
+ # routing. This handler strips any such duplicated path.
+
+ # IIS can be configured to pass the correct PATH_INFO, but this causes
+ # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
+ # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
+ # setting can only be made on a vhost level, affecting all other script
+ # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
+ # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
+ # rarely uses it because there is still no UI for it.)
+
+ # There is no way for CGI code to tell whether the option was set, so a
+ # separate handler class is provided.
+ def __init__(self):
+ environ= read_environ()
+ path = environ.get('PATH_INFO', '')
+ script = environ.get('SCRIPT_NAME', '')
+ if (path+'/').startswith(script+'/'):
+ environ['PATH_INFO'] = path[len(script):]
+ BaseCGIHandler.__init__(
+ self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
+ environ, multithread=False, multiprocess=True
)
diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py
index 550f4d8..af82f95 100644
--- a/Lib/wsgiref/simple_server.py
+++ b/Lib/wsgiref/simple_server.py
@@ -15,7 +15,7 @@ import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
-__version__ = "0.1"
+__version__ = "0.2"
__all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server']
@@ -74,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):
def get_environ(self):
env = self.server.base_environ.copy()
env['SERVER_PROTOCOL'] = self.request_version
+ env['SERVER_SOFTWARE'] = self.server_version
env['REQUEST_METHOD'] = self.command
if '?' in self.path:
path,query = self.path.split('?',1)
else:
path,query = self.path,''
- env['PATH_INFO'] = urllib.parse.unquote(path)
+ env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')
env['QUERY_STRING'] = query
host = self.address_string()
diff --git a/Misc/NEWS b/Misc/NEWS
index ba5ba38..2b81371 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -59,6 +59,10 @@ Core and Builtins
Library
-------
+- Issue #10155: Add IISCGIHandler to wsgiref.handlers to support IIS
+ CGI environment better, and to correct unicode environment values
+ for WSGI 1.0.1.
+
- Issue #10281: nntplib now returns None for absent fields in the OVER/XOVER
response, instead of raising an exception.