Issue #17214: Percent-encode non-ASCII bytes in redirect targets

Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target.
author: Martin Panter <vadmium+py@gmail.com> 2016-05-16 01:14:20 (GMT)
committer: Martin Panter <vadmium+py@gmail.com> 2016-05-16 01:14:20 (GMT)
commit: e6f060903cf2080b6570a87fde5021aa14d05530 (patch)
tree: 7aa104e7862ff4cb1f61baf74bf09d78f11094db /Lib/urllib
parent: ce6e06874b235f7825888c20fd2c6f4670a4aeba (diff)
download: cpython-e6f060903cf2080b6570a87fde5021aa14d05530.zip
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.gz
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.bz2
1 files changed, 11 insertions, 1 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bbd2bdf..1731fe3 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@ import os
 import posixpath
 import re
 import socket
+import string
 import sys
 import time
 import collections
@@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
         # from the user (of urllib.request, in this case).  In practice,
         # essentially all clients do redirect in this case, so we do
         # the same.
-        # be conciliant with URIs containing a space
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
+
         CONTENT_HEADERS = ("content-length", "content-type")
         newheaders = dict((k, v) for k, v in req.headers.items()
                           if k.lower() not in CONTENT_HEADERS)
@@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
             urlparts[2] = "/"
         newurl = urlunparse(urlparts)
 
+        # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
+        # original bytes and percent-encode non-ASCII bytes, and any special
+        # characters such as the space.
+        newurl = quote(
+            newurl, encoding="iso-8859-1", safe=string.punctuation)
         newurl = urljoin(req.full_url, newurl)
 
         # XXX Probably want to forget about the state of the current
author	Martin Panter <vadmium+py@gmail.com>	2016-05-16 01:14:20 (GMT)
committer	Martin Panter <vadmium+py@gmail.com>	2016-05-16 01:14:20 (GMT)
commit	e6f060903cf2080b6570a87fde5021aa14d05530 (patch)
tree	7aa104e7862ff4cb1f61baf74bf09d78f11094db /Lib/urllib
parent	ce6e06874b235f7825888c20fd2c6f4670a4aeba (diff)
download	cpython-e6f060903cf2080b6570a87fde5021aa14d05530.zip cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.gz cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.bz2