summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_urllib.py7
-rw-r--r--Lib/test/test_urllib2.py51
-rw-r--r--Lib/urllib/request.py14
-rw-r--r--Misc/NEWS9
4 files changed, 76 insertions, 5 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 49e2a2c..5d05f8d 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1,4 +1,4 @@
-"""Regresssion tests for urllib"""
+"""Regresssion tests for what was in Python 2's "urllib" module"""
import urllib.parse
import urllib.request
@@ -86,10 +86,11 @@ def fakehttp(fakedata):
# buffer to store data for verification in urlopen tests.
buf = None
- fakesock = FakeSocket(fakedata)
def connect(self):
- self.sock = self.fakesock
+ self.sock = FakeSocket(self.fakedata)
+ type(self).fakesock = self.sock
+ FakeHTTPConnection.fakedata = fakedata
return FakeHTTPConnection
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 8716a5e..eda7ccc 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1208,6 +1208,57 @@ class HandlerTests(unittest.TestCase):
fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip())
+ def test_redirect_no_path(self):
+ # Issue 14132: Relative redirect strips original path
+ real_class = http.client.HTTPConnection
+ response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n"
+ http.client.HTTPConnection = test_urllib.fakehttp(response1)
+ self.addCleanup(setattr, http.client, "HTTPConnection", real_class)
+ urls = iter(("/path", "/path?query"))
+ def request(conn, method, url, *pos, **kw):
+ self.assertEqual(url, next(urls))
+ real_class.request(conn, method, url, *pos, **kw)
+ # Change response for subsequent connection
+ conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!"
+ http.client.HTTPConnection.request = request
+ fp = urllib.request.urlopen("http://python.org/path")
+ self.assertEqual(fp.geturl(), "http://python.org/path?query")
+
+ def test_redirect_encoding(self):
+ # Some characters in the redirect target may need special handling,
+ # but most ASCII characters should be treated as already encoded
+ class Handler(urllib.request.HTTPHandler):
+ def http_open(self, req):
+ result = self.do_open(self.connection, req)
+ self.last_buf = self.connection.buf
+ # Set up a normal response for the next request
+ self.connection = test_urllib.fakehttp(
+ b'HTTP/1.1 200 OK\r\n'
+ b'Content-Length: 3\r\n'
+ b'\r\n'
+ b'123'
+ )
+ return result
+ handler = Handler()
+ opener = urllib.request.build_opener(handler)
+ tests = (
+ (b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
+ (b'/spaced%20path/', b'/spaced%20path/'),
+ (b'/spaced path/', b'/spaced%20path/'),
+ (b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
+ )
+ for [location, result] in tests:
+ with self.subTest(repr(location)):
+ handler.connection = test_urllib.fakehttp(
+ b'HTTP/1.1 302 Redirect\r\n'
+ b'Location: ' + location + b'\r\n'
+ b'\r\n'
+ )
+ response = opener.open('http://example.com/')
+ expected = b'GET ' + result + b' '
+ request = handler.last_buf
+ self.assertTrue(request.startswith(expected), repr(request))
+
def test_proxy(self):
o = OpenerDirector()
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 4373c26..4a3daec 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@ import os
import posixpath
import re
import socket
+import string
import sys
import time
import collections
@@ -676,8 +677,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
- # be conciliant with URIs containing a space
+
+ # Be conciliant with URIs containing a space. This is mainly
+ # redundant with the more complete encoding done in http_error_302(),
+ # but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20')
+
CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS)
@@ -712,11 +717,16 @@ class HTTPRedirectHandler(BaseHandler):
"%s - Redirection to url '%s' is not allowed" % (msg, newurl),
headers, fp)
- if not urlparts.path:
+ if not urlparts.path and urlparts.netloc:
urlparts = list(urlparts)
urlparts[2] = "/"
newurl = urlunparse(urlparts)
+ # http.client.parse_headers() decodes as ISO-8859-1. Recover the
+ # original bytes and percent-encode non-ASCII bytes, and any special
+ # characters such as the space.
+ newurl = quote(
+ newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current
diff --git a/Misc/NEWS b/Misc/NEWS
index fe75bf9..33dedeb 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -277,6 +277,15 @@ Core and Builtins
Library
-------
+- Issue #14132: Fix urllib.request redirect handling when the target only has
+ a query string. Original fix by Ján Janech.
+
+- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
+ bytes found in redirect target URLs. Some servers send Location header
+ fields with non-ASCII bytes, but "http.client" requires the request target
+ to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on
+ patch by Christian Heimes.
+
- Issue #27033: The default value of the decode_data parameter for
smtpd.SMTPChannel and smtpd.SMTPServer constructors is changed to False.