diff options
author | Adam Goldschmidt <adamgold7@gmail.com> | 2021-02-14 22:41:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-14 22:41:57 (GMT) |
commit | fcbe0cb04d35189401c0c880ebfb4311e952d776 (patch) | |
tree | 48ca1701d13be00517881423fcfd99b8a9ae9445 /Lib/urllib | |
parent | 1b57426e3a7842b4e6f9fc13ffb657c78e5443d4 (diff) | |
download | cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.zip cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.gz cpython-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.bz2 |
bpo-42967: only use '&' as a query string separator (#24297)
bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator.
Co-authored-by: Éric Araujo <merwok@netwok.org>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Co-authored-by: Éric Araujo <merwok@netwok.org>
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/parse.py | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index ea897c3..5bd0678 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'): def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a dictionary. """ parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, encoding=encoding, errors=errors, - max_num_fields=max_num_fields) + max_num_fields=max_num_fields, separator=separator) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -724,19 +727,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) + if not separator or (not isinstance(separator, str) + and not isinstance(separator, bytes)): + raise ValueError("Separator must be of type string or bytes.") + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - num_fields = 1 + qs.count('&') + qs.count(';') + num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: |