summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMartin Panter <vadmium+py@gmail.com>2016-05-16 01:14:20 (GMT)
committerMartin Panter <vadmium+py@gmail.com>2016-05-16 01:14:20 (GMT)
commite6f060903cf2080b6570a87fde5021aa14d05530 (patch)
tree7aa104e7862ff4cb1f61baf74bf09d78f11094db /Lib
parentce6e06874b235f7825888c20fd2c6f4670a4aeba (diff)
downloadcpython-e6f060903cf2080b6570a87fde5021aa14d05530.zip
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.gz
cpython-e6f060903cf2080b6570a87fde5021aa14d05530.tar.bz2
Issue #17214: Percent-encode non-ASCII bytes in redirect targets
Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_urllib2.py35
-rw-r--r--Lib/urllib/request.py12
2 files changed, 46 insertions, 1 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 58c3071..eda7ccc 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1224,6 +1224,41 @@ class HandlerTests(unittest.TestCase):
fp = urllib.request.urlopen("http://python.org/path")
self.assertEqual(fp.geturl(), "http://python.org/path?query")
+ def test_redirect_encoding(self):
+ # Some characters in the redirect target may need special handling,
+ # but most ASCII characters should be treated as already encoded
+ class Handler(urllib.request.HTTPHandler):
+ def http_open(self, req):
+ result = self.do_open(self.connection, req)
+ self.last_buf = self.connection.buf
+ # Set up a normal response for the next request
+ self.connection = test_urllib.fakehttp(
+ b'HTTP/1.1 200 OK\r\n'
+ b'Content-Length: 3\r\n'
+ b'\r\n'
+ b'123'
+ )
+ return result
+ handler = Handler()
+ opener = urllib.request.build_opener(handler)
+ tests = (
+ (b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
+ (b'/spaced%20path/', b'/spaced%20path/'),
+ (b'/spaced path/', b'/spaced%20path/'),
+ (b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
+ )
+ for [location, result] in tests:
+ with self.subTest(repr(location)):
+ handler.connection = test_urllib.fakehttp(
+ b'HTTP/1.1 302 Redirect\r\n'
+ b'Location: ' + location + b'\r\n'
+ b'\r\n'
+ )
+ response = opener.open('http://example.com/')
+ expected = b'GET ' + result + b' '
+ request = handler.last_buf
+ self.assertTrue(request.startswith(expected), repr(request))
+
def test_proxy(self):
o = OpenerDirector()
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bbd2bdf..1731fe3 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@ import os
import posixpath
import re
import socket
+import string
import sys
import time
import collections
@@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
- # be conciliant with URIs containing a space
+
+ # Be conciliant with URIs containing a space. This is mainly
+ # redundant with the more complete encoding done in http_error_302(),
+ # but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20')
+
CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS)
@@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
urlparts[2] = "/"
newurl = urlunparse(urlparts)
+ # http.client.parse_headers() decodes as ISO-8859-1. Recover the
+ # original bytes and percent-encode non-ASCII bytes, and any special
+ # characters such as the space.
+ newurl = quote(
+ newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current