summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-03-05 15:49:50 (GMT)
committerGitHub <noreply@github.com>2024-03-05 15:49:50 (GMT)
commitbdba8ef42b15e651dc23374a08143cc2b4c4657d (patch)
tree8af049682491f0bb9bd8c49b0f7886b5544fb9c4 /Lib/urllib
parentf97f25ef5dfcdfec0d9a359fd970abd139cf3428 (diff)
downloadcpython-bdba8ef42b15e651dc23374a08143cc2b4c4657d.zip
cpython-bdba8ef42b15e651dc23374a08143cc2b4c4657d.tar.gz
cpython-bdba8ef42b15e651dc23374a08143cc2b4c4657d.tar.bz2
gh-74668: Fix support of bytes in urllib.parse.parse_qsl() (GH-115771)
urllib.parse functions parse_qs() and parse_qsl() now support bytes arguments containing raw and percent-encoded non-ASCII data.
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/parse.py50
1 files changed, 26 insertions, 24 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index c129b0d..ec52821 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -763,42 +763,44 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
Returns a list, as G-d intended.
"""
- qs, _coerce_result = _coerce_args(qs)
- separator, _ = _coerce_args(separator)
- if not separator or (not isinstance(separator, (str, bytes))):
+ if not separator or not isinstance(separator, (str, bytes)):
raise ValueError("Separator must be of type string or bytes.")
+ if isinstance(qs, str):
+ if not isinstance(separator, str):
+ separator = str(separator, 'ascii')
+ eq = '='
+ def _unquote(s):
+ return unquote_plus(s, encoding=encoding, errors=errors)
+ else:
+ qs = bytes(qs)
+ if isinstance(separator, str):
+ separator = bytes(separator, 'ascii')
+ eq = b'='
+ def _unquote(s):
+ return unquote_to_bytes(s.replace(b'+', b' '))
+
+ if not qs:
+ return []
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count(separator) if qs else 0
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
r = []
- query_args = qs.split(separator) if qs else []
- for name_value in query_args:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
+ for name_value in qs.split(separator):
+ if name_value or strict_parsing:
+ name, has_eq, value = name_value.partition(eq)
+ if not has_eq and strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = unquote(name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = unquote(value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
+ if value or keep_blank_values:
+ name = _unquote(name)
+ value = _unquote(value)
+ r.append((name, value))
return r
def unquote_plus(string, encoding='utf-8', errors='replace'):