summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
authorAdam Goldschmidt <adamgold7@gmail.com>2021-02-14 22:41:57 (GMT)
committerGitHub <noreply@github.com>2021-02-14 22:41:57 (GMT)
commitfcbe0cb04d35189401c0c880ebfb4311e952d776 (patch)
tree48ca1701d13be00517881423fcfd99b8a9ae9445 /Lib/urllib
parent1b57426e3a7842b4e6f9fc13ffb657c78e5443d4 (diff)
downloadcpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.zip
cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.gz
cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.bz2
bpo-42967: only use '&' as a query string separator (#24297)
bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl(). urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator. Co-authored-by: Éric Araujo <merwok@netwok.org> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Éric Araujo <merwok@netwok.org>
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/parse.py20
1 files changed, 15 insertions, 5 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index ea897c3..5bd0678 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a dictionary.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors,
- max_num_fields=max_num_fields)
+ max_num_fields=max_num_fields, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -724,19 +727,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError
if there are more than n fields read by parse_qsl().
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
+ if not separator or (not isinstance(separator, str)
+ and not isinstance(separator, bytes)):
+ raise ValueError("Separator must be of type string or bytes.")
+
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count('&') + qs.count(';')
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing: