summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Panter <vadmium+py@gmail.com>2016-04-17 02:17:03 (GMT)
committerMartin Panter <vadmium+py@gmail.com>2016-04-17 02:17:03 (GMT)
commit50dd1f7dd68ed2f526adfebd5caa342e26da4517 (patch)
tree675aa1607bb0e537ef86793c9d9735f177486817
parent06172e7bd4e19ab002069998e315cd32139f475b (diff)
downloadcpython-50dd1f7dd68ed2f526adfebd5caa342e26da4517.zip
cpython-50dd1f7dd68ed2f526adfebd5caa342e26da4517.tar.gz
cpython-50dd1f7dd68ed2f526adfebd5caa342e26da4517.tar.bz2
Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8
Patch by Anthony Sottile.
-rw-r--r--Lib/test/test_wsgiref.py24
-rw-r--r--Lib/wsgiref/simple_server.py2
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
4 files changed, 29 insertions, 1 deletions
diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py
index 3f800ef..b7d02e8 100644
--- a/Lib/test/test_wsgiref.py
+++ b/Lib/test/test_wsgiref.py
@@ -1,3 +1,4 @@
+from unittest import mock
from unittest import TestCase
from wsgiref.util import setup_testing_defaults
from wsgiref.headers import Headers
@@ -221,6 +222,29 @@ class IntegrationTests(TestCase):
b"data",
out)
+ def test_cp1252_url(self):
+ def app(e, s):
+ s("200 OK", [
+ ("Content-Type", "text/plain"),
+ ("Date", "Wed, 24 Dec 2008 13:29:32 GMT"),
+ ])
+ # PEP3333 says environ variables are decoded as latin1.
+ # Encode as latin1 to get original bytes
+ return [e["PATH_INFO"].encode("latin1")]
+
+ out, err = run_amock(
+ validator(app), data=b"GET /\x80%80 HTTP/1.0")
+ self.assertEqual(
+ [
+ b"HTTP/1.0 200 OK",
+ mock.ANY,
+ b"Content-Type: text/plain",
+ b"Date: Wed, 24 Dec 2008 13:29:32 GMT",
+ b"",
+ b"/\x80\x80",
+ ],
+ out.splitlines())
+
class UtilityTests(TestCase):
diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py
index 378b316..e396788 100644
--- a/Lib/wsgiref/simple_server.py
+++ b/Lib/wsgiref/simple_server.py
@@ -82,7 +82,7 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):
else:
path,query = self.path,''
- env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')
+ env['PATH_INFO'] = urllib.parse.unquote(path, 'iso-8859-1')
env['QUERY_STRING'] = query
host = self.address_string()
diff --git a/Misc/ACKS b/Misc/ACKS
index 01b42f4..e293ddc 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1376,6 +1376,7 @@ Nir Soffer
Paul Sokolovsky
Evgeny Sologubov
Cody Somerville
+Anthony Sottile
Edoardo Spadolini
Geoffrey Spear
Clay Spence
diff --git a/Misc/NEWS b/Misc/NEWS
index ec6626f..94d8255 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -107,6 +107,9 @@ Core and Builtins
Library
-------
+- Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8. Patch by
+ Anthony Sottile.
+
- Issue #26735: Fix :func:`os.urandom` on Solaris 11.3 and newer when reading
more than 1,024 bytes: call ``getrandom()`` multiple times with a limit of
1024 bytes per call.