bpo-42967: only use '&' as a query string separator (#24297)

bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl(). urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator. Co-authored-by: Éric Araujo <merwok@netwok.org> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Éric Araujo <merwok@netwok.org>
author: Adam Goldschmidt <adamgold7@gmail.com> 2021-02-14 22:41:57 (GMT)
committer: GitHub <noreply@github.com> 2021-02-14 22:41:57 (GMT)
commit: fcbe0cb04d35189401c0c880ebfb4311e952d776 (patch)
tree: 48ca1701d13be00517881423fcfd99b8a9ae9445 /Lib/urllib
parent: 1b57426e3a7842b4e6f9fc13ffb657c78e5443d4 (diff)
download: cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.zip
cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.gz
cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.bz2
1 files changed, 15 insertions, 5 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index ea897c3..5bd0678 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
 
 
 def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-             encoding='utf-8', errors='replace', max_num_fields=None):
+             encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
     """Parse a query given as a string argument.
 
         Arguments:
@@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
         max_num_fields: int. If set, then throws a ValueError if there
             are more than n fields read by parse_qsl().
 
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
+
         Returns a dictionary.
     """
     parsed_result = {}
     pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
                       encoding=encoding, errors=errors,
-                      max_num_fields=max_num_fields)
+                      max_num_fields=max_num_fields, separator=separator)
     for name, value in pairs:
         if name in parsed_result:
             parsed_result[name].append(value)
@@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 
 
 def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-              encoding='utf-8', errors='replace', max_num_fields=None):
+              encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
     """Parse a query given as a string argument.
 
         Arguments:
@@ -724,19 +727,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
         max_num_fields: int. If set, then throws a ValueError
             if there are more than n fields read by parse_qsl().
 
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
+
         Returns a list, as G-d intended.
     """
     qs, _coerce_result = _coerce_args(qs)
 
+    if not separator or (not isinstance(separator, str)
+        and not isinstance(separator, bytes)):
+        raise ValueError("Separator must be of type string or bytes.")
+
     # If max_num_fields is defined then check that the number of fields
     # is less than max_num_fields. This prevents a memory exhaustion DOS
     # attack via post bodies with many fields.
     if max_num_fields is not None:
-        num_fields = 1 + qs.count('&') + qs.count(';')
+        num_fields = 1 + qs.count(separator)
         if max_num_fields < num_fields:
             raise ValueError('Max number of fields exceeded')
 
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+    pairs = [s1 for s1 in qs.split(separator)]
     r = []
     for name_value in pairs:
         if not name_value and not strict_parsing:
author	Adam Goldschmidt <adamgold7@gmail.com>	2021-02-14 22:41:57 (GMT)
committer	GitHub <noreply@github.com>	2021-02-14 22:41:57 (GMT)
commit	fcbe0cb04d35189401c0c880ebfb4311e952d776 (patch)
tree	48ca1701d13be00517881423fcfd99b8a9ae9445 /Lib/urllib
parent	1b57426e3a7842b4e6f9fc13ffb657c78e5443d4 (diff)
download	cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.zip cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.gz cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.bz2