bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660)

Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by limiting the number of `MiniFieldStorage` objects created by `FieldStorage`. (cherry picked from commit 209144831b0a19715bda3bd72b14a3e6192d9cc1) Co-authored-by: matthewbelisle-wf <matthew.belisle@workiva.com>
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2018-10-19 11:16:57 (GMT)
committer: GitHub <noreply@github.com> 2018-10-19 11:16:57 (GMT)
commit: 322a914965368ffd7e4f97ede50b351fdf48d870 (patch)
tree: 6a79e88c4ef83a6df6fc5655b760db0f6a36056e /Lib/urllib
parent: d85c2726b9f305ac5128764bda773a78e52101cd (diff)
download: cpython-322a914965368ffd7e4f97ede50b351fdf48d870.zip
cpython-322a914965368ffd7e4f97ede50b351fdf48d870.tar.gz
cpython-322a914965368ffd7e4f97ede50b351fdf48d870.tar.bz2
1 files changed, 19 insertions, 3 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index f959212..85e68c8 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -624,7 +624,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
 
 
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-             encoding='utf-8', errors='replace'):
+             encoding='utf-8', errors='replace', max_num_fields=None):
     """Parse a query given as a string argument.
 
         Arguments:
@@ -645,11 +645,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
         encoding and errors: specify how to decode percent-encoded sequences
             into Unicode characters, as accepted by the bytes.decode() method.
 
+        max_num_fields: int. If set, then throws a ValueError if there
+            are more than n fields read by parse_qsl().
+
         Returns a dictionary.
     """
     parsed_result = {}
     pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
-                      encoding=encoding, errors=errors)
+                      encoding=encoding, errors=errors,
+                      max_num_fields=max_num_fields)
     for name, value in pairs:
         if name in parsed_result:
             parsed_result[name].append(value)
@@ -659,7 +663,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 
 
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-              encoding='utf-8', errors='replace'):
+              encoding='utf-8', errors='replace', max_num_fields=None):
     """Parse a query given as a string argument.
 
         Arguments:
@@ -679,9 +683,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
         encoding and errors: specify how to decode percent-encoded sequences
             into Unicode characters, as accepted by the bytes.decode() method.
 
+        max_num_fields: int. If set, then throws a ValueError
+            if there are more than n fields read by parse_qsl().
+
         Returns a list, as G-d intended.
     """
     qs, _coerce_result = _coerce_args(qs)
+
+    # If max_num_fields is defined then check that the number of fields
+    # is less than max_num_fields. This prevents a memory exhaustion DOS
+    # attack via post bodies with many fields.
+    if max_num_fields is not None:
+        num_fields = 1 + qs.count('&') + qs.count(';')
+        if max_num_fields < num_fields:
+            raise ValueError('Max number of fields exceeded')
+
     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
     r = []
     for name_value in pairs:
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2018-10-19 11:16:57 (GMT)
committer	GitHub <noreply@github.com>	2018-10-19 11:16:57 (GMT)
commit	322a914965368ffd7e4f97ede50b351fdf48d870 (patch)
tree	6a79e88c4ef83a6df6fc5655b760db0f6a36056e /Lib/urllib
parent	d85c2726b9f305ac5128764bda773a78e52101cd (diff)
download	cpython-322a914965368ffd7e4f97ede50b351fdf48d870.zip cpython-322a914965368ffd7e4f97ede50b351fdf48d870.tar.gz cpython-322a914965368ffd7e4f97ede50b351fdf48d870.tar.bz2