From e176e0c105786e9f476758eb5438c57223b65e7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 19 Mar 2020 02:35:44 +0100 Subject: [2.7] closes bpo-38576: Disallow control characters in hostnames in http.client. (GH-19052) Add host validation for control characters for more CVE-2019-18348 protection. (cherry picked from commit 83fc70159b24) Co-authored-by: Ashwin Ramaswami --- Lib/httplib.py | 13 +++++++++ Lib/test/test_httplib.py | 13 ++++++++- Lib/test/test_urllib2.py | 32 +++++++++++++++++----- .../2020-03-18-01-30-50.bpo-38576.cvI68q.rst | 3 ++ 4 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst diff --git a/Lib/httplib.py b/Lib/httplib.py index 79532b9..fcc4152 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -745,6 +745,8 @@ class HTTPConnection: (self.host, self.port) = self._get_hostport(host, port) + self._validate_host(self.host) + # This is stored as an instance variable to allow unittests # to replace with a suitable mock self._create_connection = socket.create_connection @@ -1029,6 +1031,17 @@ class HTTPConnection: ).format(matched=match.group(), url=url) raise InvalidURL(msg) + def _validate_host(self, host): + """Validate a host so it doesn't contain control characters.""" + # Prevent CVE-2019-18348. + match = _contains_disallowed_url_pchar_re.search(host) + if match: + msg = ( + "URL can't contain control characters. {host!r} " + "(found at least {matched!r})" + ).format(matched=match.group(), host=host) + raise InvalidURL(msg) + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 5462fdd..d8a57f7 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -702,7 +702,7 @@ class BasicTest(TestCase): with self.assertRaisesRegexp(socket.error, "Invalid response"): conn._tunnel() - def test_putrequest_override_validation(self): + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation behavior in putrequest (bpo-38216). @@ -715,6 +715,17 @@ class BasicTest(TestCase): conn.sock = FakeSocket('') conn.putrequest('GET', '/\x00') + def test_putrequest_override_host_validation(self): + class UnsafeHTTPConnection(httplib.HTTPConnection): + def _validate_host(self, url): + pass + + conn = UnsafeHTTPConnection('example.com\r\n') + conn.sock = FakeSocket('') + # set skip_host so a ValueError is not raised upon adding the + # invalid URL as the value of the "Host:" header + conn.putrequest('GET', '/', skip_host=1) + class OfflineTest(TestCase): def test_responses(self): diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 9531818..20a0f58 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1321,7 +1321,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): ) @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_control_char_rejected(self): + def test_url_path_with_control_char_rejected(self): for char_no in range(0, 0x21) + range(0x7f, 0x100): char = chr(char_no) schemeless_url = "//localhost:7777/test%s/" % char @@ -1345,7 +1345,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_newline_header_injection_rejected(self): + def test_url_path_with_newline_header_injection_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" schemeless_url = "//" + host + ":8080/test/?test=a" @@ -1357,14 +1357,32 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): # calls urllib.parse.quote() on the URL which makes all of the # above attempts at injection within the url _path_ safe. InvalidURL = httplib.InvalidURL - with self.assertRaisesRegexp( - InvalidURL, r"contain control.*\\r.*(found at least . .)"): - urllib2.urlopen("http:" + schemeless_url) - with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"): - urllib2.urlopen("https:" + schemeless_url) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\r.*(found at least . .)"): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\n"): + urllib2.urlopen("https:{}".format(schemeless_url)) finally: self.unfakehttp() + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_control_char_rejected(self): + for char_no in list(range(0, 0x21)) + [0x7f]: + char = chr(char_no) + schemeless_url = "//localhost{}/test/".format(char) + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + try: + escaped_char_repr = repr(char).replace('\\', r'\\') + InvalidURL = httplib.InvalidURL + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("https:{}".format(schemeless_url)) + finally: + self.unfakehttp() class RequestTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst new file mode 100644 index 0000000..96af32d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst @@ -0,0 +1,3 @@ +Disallow control characters in hostnames in http.client, addressing +CVE-2019-18348. Such potentially malicious header injection URLs now cause a +InvalidURL to be raised. -- cgit v0.12