summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-03-12 09:13:36 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-03-12 09:13:36 (GMT)
commita112a8ae47813f75aa8ad27ee8c42a7c2e937d13 (patch)
treeead86515e9462a009586cfbd5f68bda5398b03a7
parentc775ad615a6370ec8424422422bbec3f0976428b (diff)
downloadcpython-a112a8ae47813f75aa8ad27ee8c42a7c2e937d13.zip
cpython-a112a8ae47813f75aa8ad27ee8c42a7c2e937d13.tar.gz
cpython-a112a8ae47813f75aa8ad27ee8c42a7c2e937d13.tar.bz2
Issue #22928: Disabled HTTP header injections in http.client.
Original patch by Demian Brecht.
-rw-r--r--Lib/http/client.py37
-rw-r--r--Lib/test/test_httplib.py57
-rw-r--r--Misc/NEWS3
3 files changed, 97 insertions, 0 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index d3d9b30..3f9e67b 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -70,6 +70,7 @@ import email.parser
import email.message
import io
import os
+import re
import socket
import collections
from urllib.parse import urlsplit
@@ -217,6 +218,34 @@ MAXAMOUNT = 1048576
_MAXLINE = 65536
_MAXHEADERS = 100
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR = %x21-7E
+# obs-text = %x80-FF
+# header-field = field-name ":" OWS field-value OWS
+# field-name = token
+# field-value = *( field-content / obs-fold )
+# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar = VCHAR / obs-text
+#
+# obs-fold = CRLF 1*( SP / HTAB )
+# ; obsolete line folding
+# ; see Section 3.2.4
+
+# token = 1*tchar
+#
+# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+# / DIGIT / ALPHA
+# ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+# the patterns for both name and value are more leniant than RFC
+# definitions to allow for backwards compatibility
+_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
+_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
+
class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in
@@ -1060,12 +1089,20 @@ class HTTPConnection:
if hasattr(header, 'encode'):
header = header.encode('ascii')
+
+ if not _is_legal_header_name(header):
+ raise ValueError('Invalid header name %r' % (header,))
+
values = list(values)
for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'):
values[i] = one_value.encode('latin-1')
elif isinstance(one_value, int):
values[i] = str(one_value).encode('ascii')
+
+ if _is_illegal_header_value(values[i]):
+ raise ValueError('Invalid header value %r' % (values[i],))
+
value = b'\r\n\t'.join(values)
header = header + b': ' + value
self._output(header)
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 4c57867..e4911d9 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -148,6 +148,33 @@ class HeaderTests(TestCase):
conn.putheader('Content-length', 42)
self.assertIn(b'Content-length: 42', conn._buffer)
+ conn.putheader('Foo', ' bar ')
+ self.assertIn(b'Foo: bar ', conn._buffer)
+ conn.putheader('Bar', '\tbaz\t')
+ self.assertIn(b'Bar: \tbaz\t', conn._buffer)
+ conn.putheader('Authorization', 'Bearer mytoken')
+ self.assertIn(b'Authorization: Bearer mytoken', conn._buffer)
+ conn.putheader('IterHeader', 'IterA', 'IterB')
+ self.assertIn(b'IterHeader: IterA\r\n\tIterB', conn._buffer)
+ conn.putheader('LatinHeader', b'\xFF')
+ self.assertIn(b'LatinHeader: \xFF', conn._buffer)
+ conn.putheader('Utf8Header', b'\xc3\x80')
+ self.assertIn(b'Utf8Header: \xc3\x80', conn._buffer)
+ conn.putheader('C1-Control', b'next\x85line')
+ self.assertIn(b'C1-Control: next\x85line', conn._buffer)
+ conn.putheader('Embedded-Fold-Space', 'is\r\n allowed')
+ self.assertIn(b'Embedded-Fold-Space: is\r\n allowed', conn._buffer)
+ conn.putheader('Embedded-Fold-Tab', 'is\r\n\tallowed')
+ self.assertIn(b'Embedded-Fold-Tab: is\r\n\tallowed', conn._buffer)
+ conn.putheader('Key Space', 'value')
+ self.assertIn(b'Key Space: value', conn._buffer)
+ conn.putheader('KeySpace ', 'value')
+ self.assertIn(b'KeySpace : value', conn._buffer)
+ conn.putheader(b'Nonbreak\xa0Space', 'value')
+ self.assertIn(b'Nonbreak\xa0Space: value', conn._buffer)
+ conn.putheader(b'\xa0NonbreakSpace', 'value')
+ self.assertIn(b'\xa0NonbreakSpace: value', conn._buffer)
+
def test_ipv6host_header(self):
# Default host header on IPv6 transaction should wrapped by [] if
# its actual IPv6 address
@@ -177,6 +204,36 @@ class HeaderTests(TestCase):
self.assertEqual(resp.getheader('First'), 'val')
self.assertEqual(resp.getheader('Second'), 'val')
+ def test_invalid_headers(self):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket('')
+ conn.putrequest('GET', '/')
+
+ # http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no
+ # longer allowed in header names
+ cases = (
+ (b'Invalid\r\nName', b'ValidValue'),
+ (b'Invalid\rName', b'ValidValue'),
+ (b'Invalid\nName', b'ValidValue'),
+ (b'\r\nInvalidName', b'ValidValue'),
+ (b'\rInvalidName', b'ValidValue'),
+ (b'\nInvalidName', b'ValidValue'),
+ (b' InvalidName', b'ValidValue'),
+ (b'\tInvalidName', b'ValidValue'),
+ (b'Invalid:Name', b'ValidValue'),
+ (b':InvalidName', b'ValidValue'),
+ (b'ValidName', b'Invalid\r\nValue'),
+ (b'ValidName', b'Invalid\rValue'),
+ (b'ValidName', b'Invalid\nValue'),
+ (b'ValidName', b'InvalidValue\r\n'),
+ (b'ValidName', b'InvalidValue\r'),
+ (b'ValidName', b'InvalidValue\n'),
+ )
+ for name, value in cases:
+ with self.subTest((name, value)):
+ with self.assertRaisesRegex(ValueError, 'Invalid header'):
+ conn.putheader(name, value)
+
class BasicTest(TestCase):
def test_status_lines(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 2d69b76..282492c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,6 +18,9 @@ Core and Builtins
Library
-------
+- Issue #22928: Disabled HTTP header injections in http.client.
+ Original patch by Demian Brecht.
+
- Issue #23615: Modules bz2, tarfile and tokenize now can be reloaded with
imp.reload(). Patch by Thomas Kluyver.