From 7774d7831e8809795c64ce27f7df52674581d298 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 28 Sep 2019 08:32:01 -0400 Subject: bpo-38216, bpo-36274: Allow subclasses to separately override validation and encoding behavior (GH-16448) * bpo-38216: Allow bypassing input validation * bpo-36274: Also allow the URL encoding to be overridden. * bpo-38216, bpo-36274: Add tests demonstrating a hook for overriding validation, test demonstrating override encoding, and a test to capture expectation of the interface for the URL. * Call with skip_host to avoid tripping on the host checking in the URL. * Remove obsolete comment. * Make _prepare_path_encoding its own attr. This makes overriding just that simpler. Also, don't use the := operator to make backporting easier. * Add a news entry. * _prepare_path_encoding -> _encode_prepared_path() * Once again separate the path validation and request encoding, drastically simplifying the behavior. Drop the guarantee that all processing happens in _prepare_path. --- Lib/http/client.py | 27 +++++++++++++------- Lib/test/test_httplib.py | 29 ++++++++++++++++++++++ .../2019-09-27-15-24-45.bpo-38216.-7yvZR.rst | 4 +++ 3 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-09-27-15-24-45.bpo-38216.-7yvZR.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index f61267e..33a4347 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -1085,18 +1085,15 @@ class HTTPConnection: else: raise CannotSendRequest(self.__state) - # Save the method we use, we need it later in the response phase + # Save the method for use later in the response phase self._method = method - if not url: - url = '/' - # Prevent CVE-2019-9740. - if match := _contains_disallowed_url_pchar_re.search(url): - raise InvalidURL(f"URL can't contain control characters. {url!r} " - f"(found at least {match.group()!r})") + + url = url or '/' + self._validate_path(url) + request = '%s %s %s' % (method, url, self._http_vsn_str) - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) + self._output(self._encode_request(request)) if self._http_vsn == 11: # Issue some standard headers for better HTTP/1.1 compliance @@ -1174,6 +1171,18 @@ class HTTPConnection: # For HTTP/1.0, the server will assume "not chunked" pass + def _encode_request(self, request): + # ASCII also helps prevent CVE-2019-9740. + return request.encode('ascii') + + def _validate_path(self, url): + """Validate a url for putrequest.""" + # Prevent CVE-2019-9740. + match = _contains_disallowed_url_pchar_re.search(url) + if match: + raise InvalidURL(f"URL can't contain control characters. {url!r} " + f"(found at least {match.group()!r})") + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 1121224..e1aa414 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1155,6 +1155,34 @@ class BasicTest(TestCase): thread.join() self.assertEqual(result, b"proxied data\n") + def test_putrequest_override_validation(self): + """ + It should be possible to override the default validation + behavior in putrequest (bpo-38216). + """ + class UnsafeHTTPConnection(client.HTTPConnection): + def _validate_path(self, url): + pass + + conn = UnsafeHTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/\x00') + + def test_putrequest_override_encoding(self): + """ + It should be possible to override the default encoding + to transmit bytes in another encoding even if invalid + (bpo-36274). + """ + class UnsafeHTTPConnection(client.HTTPConnection): + def _encode_request(self, str_url): + return str_url.encode('utf-8') + + conn = UnsafeHTTPConnection('example.com') + conn.sock = FakeSocket('') + conn.putrequest('GET', '/☃') + + class ExtendedReadTest(TestCase): """ Test peek(), read1(), readline() @@ -1279,6 +1307,7 @@ class ExtendedReadTest(TestCase): p = self.resp.peek(0) self.assertLessEqual(0, len(p)) + class ExtendedReadTestChunked(ExtendedReadTest): """ Test peek(), read1(), readline() in chunked mode diff --git a/Misc/NEWS.d/next/Library/2019-09-27-15-24-45.bpo-38216.-7yvZR.rst b/Misc/NEWS.d/next/Library/2019-09-27-15-24-45.bpo-38216.-7yvZR.rst new file mode 100644 index 0000000..ac8e2b0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-27-15-24-45.bpo-38216.-7yvZR.rst @@ -0,0 +1,4 @@ +Allow the rare code that wants to send invalid http requests from the +`http.client` library a way to do so. The fixes for bpo-30458 led to +breakage for some projects that were relying on this ability to test their +own behavior in the face of bad requests. -- cgit v0.12