summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xLib/cgi.py34
-rw-r--r--Lib/test/test_cgi.py49
-rw-r--r--Lib/test/test_urlparse.py7
-rw-r--r--Lib/urllib/parse.py22
-rw-r--r--Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst2
5 files changed, 102 insertions, 12 deletions
diff --git a/Lib/cgi.py b/Lib/cgi.py
index b655a05..adf4dcb 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -311,7 +311,8 @@ class FieldStorage:
"""
def __init__(self, fp=None, headers=None, outerboundary=b'',
environ=os.environ, keep_blank_values=0, strict_parsing=0,
- limit=None, encoding='utf-8', errors='replace'):
+ limit=None, encoding='utf-8', errors='replace',
+ max_num_fields=None):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
@@ -351,10 +352,14 @@ class FieldStorage:
for the page sending the form (content-type : meta http-equiv or
header)
+ max_num_fields: int. If set, then __init__ throws a ValueError
+ if there are more than n fields read by parse_qsl().
+
"""
method = 'GET'
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
+ self.max_num_fields = max_num_fields
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
@@ -578,12 +583,11 @@ class FieldStorage:
qs = qs.decode(self.encoding, self.errors)
if self.qs_on_post:
qs += '&' + self.qs_on_post
- self.list = []
query = urllib.parse.parse_qsl(
qs, self.keep_blank_values, self.strict_parsing,
- encoding=self.encoding, errors=self.errors)
- for key, value in query:
- self.list.append(MiniFieldStorage(key, value))
+ encoding=self.encoding, errors=self.errors,
+ max_num_fields=self.max_num_fields)
+ self.list = [MiniFieldStorage(key, value) for key, value in query]
self.skip_lines()
FieldStorageClass = None
@@ -597,9 +601,9 @@ class FieldStorage:
if self.qs_on_post:
query = urllib.parse.parse_qsl(
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
- encoding=self.encoding, errors=self.errors)
- for key, value in query:
- self.list.append(MiniFieldStorage(key, value))
+ encoding=self.encoding, errors=self.errors,
+ max_num_fields=self.max_num_fields)
+ self.list.extend(MiniFieldStorage(key, value) for key, value in query)
klass = self.FieldStorageClass or self.__class__
first_line = self.fp.readline() # bytes
@@ -633,11 +637,23 @@ class FieldStorage:
if 'content-length' in headers:
del headers['content-length']
+ # Propagate max_num_fields into the sub class appropriately
+ sub_max_num_fields = self.max_num_fields
+ if sub_max_num_fields is not None:
+ sub_max_num_fields -= len(self.list)
+
part = klass(self.fp, headers, ib, environ, keep_blank_values,
strict_parsing,self.limit-self.bytes_read,
- self.encoding, self.errors)
+ self.encoding, self.errors, sub_max_num_fields)
+
+ max_num_fields = self.max_num_fields
+ if max_num_fields is not None and part.list:
+ max_num_fields -= len(part.list)
+
self.bytes_read += part.bytes_read
self.list.append(part)
+ if max_num_fields is not None and max_num_fields < len(self.list):
+ raise ValueError('Max number of fields exceeded')
if part.done or self.bytes_read >= self.length > 0:
break
self.skip_lines()
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index b284a0d..8ea9d6a 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -381,6 +381,55 @@ Larry
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)
+ def test_max_num_fields(self):
+ # For application/x-www-form-urlencoded
+ data = '&'.join(['a=a']*11)
+ environ = {
+ 'CONTENT_LENGTH': str(len(data)),
+ 'CONTENT_TYPE': 'application/x-www-form-urlencoded',
+ 'REQUEST_METHOD': 'POST',
+ }
+
+ with self.assertRaises(ValueError):
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=10,
+ )
+
+ # For multipart/form-data
+ data = """---123
+Content-Disposition: form-data; name="a"
+
+a
+---123
+Content-Type: application/x-www-form-urlencoded
+
+a=a&a=a
+---123--
+"""
+ environ = {
+ 'CONTENT_LENGTH': str(len(data)),
+ 'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
+ 'QUERY_STRING': 'a=a&a=a',
+ 'REQUEST_METHOD': 'POST',
+ }
+
+ # 2 GET entities
+ # 2 top level POST entities
+ # 2 entities within the second POST entity
+ with self.assertRaises(ValueError):
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=5,
+ )
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=6,
+ )
+
def testQSAndFormData(self):
data = """---123
Content-Disposition: form-data; name="key2"
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index cd3eabb..6738863 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -880,6 +880,13 @@ class UrlParseTestCase(unittest.TestCase):
errors="ignore")
self.assertEqual(result, [('key', '\u0141-')])
+ def test_parse_qsl_max_num_fields(self):
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
+ urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
+
def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index f21b8eb..dc21711 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -628,7 +628,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace', max_num_fields=None):
"""Parse a query given as a string argument.
Arguments:
@@ -649,11 +649,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
+ max_num_fields: int. If set, then throws a ValueError if there
+ are more than n fields read by parse_qsl().
+
Returns a dictionary.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
+ encoding=encoding, errors=errors,
+ max_num_fields=max_num_fields)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -663,7 +667,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace', max_num_fields=None):
"""Parse a query given as a string argument.
Arguments:
@@ -683,9 +687,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
+ max_num_fields: int. If set, then throws a ValueError
+ if there are more than n fields read by parse_qsl().
+
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
+
+ # If max_num_fields is defined then check that the number of fields
+ # is less than max_num_fields. This prevents a memory exhaustion DOS
+ # attack via post bodies with many fields.
+ if max_num_fields is not None:
+ num_fields = 1 + qs.count('&') + qs.count(';')
+ if max_num_fields < num_fields:
+ raise ValueError('Max number of fields exceeded')
+
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
diff --git a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst
new file mode 100644
index 0000000..90c146c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst
@@ -0,0 +1,2 @@
+Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
+limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.