summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
authorMartin Panter <vadmium+py@gmail.com>2016-05-16 01:14:20 (GMT)
committerMartin Panter <vadmium+py@gmail.com>2016-05-16 01:14:20 (GMT)
commite6f060903cf2080b6570a87fde5021aa14d05530 (patch)
tree7aa104e7862ff4cb1f61baf74bf09d78f11094db /Lib/urllib
parentce6e06874b235f7825888c20fd2c6f4670a4aeba (diff)
downloadcpython-e6f060903cf2080b6570a87fde5021aa14d05530.zip
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.gz
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.bz2
Issue #17214: Percent-encode non-ASCII bytes in redirect targets
Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target.
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/request.py12
1 files changed, 11 insertions, 1 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bbd2bdf..1731fe3 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@ import os
import posixpath
import re
import socket
+import string
import sys
import time
import collections
@@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
- # be conciliant with URIs containing a space
+
+ # Be conciliant with URIs containing a space. This is mainly
+ # redundant with the more complete encoding done in http_error_302(),
+ # but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20')
+
CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS)
@@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
urlparts[2] = "/"
newurl = urlunparse(urlparts)
+ # http.client.parse_headers() decodes as ISO-8859-1. Recover the
+ # original bytes and percent-encode non-ASCII bytes, and any special
+ # characters such as the space.
+ newurl = quote(
+ newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current