From c17686f071c6f5b5d20366ae32188327a36e282e Mon Sep 17 00:00:00 2001 From: R David Murray Date: Sun, 17 May 2015 20:44:50 -0400 Subject: Issue #13866: add *quote_via* argument to urlencode. Patch by samwyse, completed by Arnon Yaari, and reviewed by Martin Panter. --- Doc/library/urllib.parse.rst | 20 ++++++++++++++++---- Doc/whatsnew/3.5.rst | 4 ++++ Lib/test/test_urlparse.py | 10 ++++++++++ Lib/urllib/parse.py | 29 +++++++++++++++-------------- Misc/NEWS | 2 ++ 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 3ecdda1..800f830 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -519,7 +519,8 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote_to_bytes('a%26%EF')`` yields ``b'a&\xef'``. -.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None) +.. function:: urlencode(query, doseq=False, safe='', encoding=None, \ + errors=None, quote_via=quote_plus) Convert a mapping object or a sequence of two-element tuples, which may contain :class:`str` or :class:`bytes` objects, to a "percent-encoded" @@ -528,8 +529,16 @@ task isn't already covered by the URL parsing functions above. properly encoded to bytes, otherwise it would result in a :exc:`TypeError`. The resulting string is a series of ``key=value`` pairs separated by ``'&'`` - characters, where both *key* and *value* are quoted using :func:`quote_plus` - above. When a sequence of two-element tuples is used as the *query* + characters, where both *key* and *value* are quoted using the *quote_via* + function. By default, :func:`quote_plus` is used to quote the values, which + means spaces are quoted as a ``'+'`` character and '/' characters are + encoded as ``%2F``, which follows the standard for GET requests + (``application/x-www-form-urlencoded``). An alternate function that can be + passed as *quote_via* is :func:`quote`, which will encode spaces as ``%20`` + and not encode '/' characters. For maximum control of what is quoted, use + ``quote`` and specify a value for *safe*. + + When a sequence of two-element tuples is used as the *query* argument, the first element of each tuple is a key and the second is a value. The value element in itself can be a sequence and in that case, if the optional parameter *doseq* is evaluates to *True*, individual @@ -538,7 +547,7 @@ task isn't already covered by the URL parsing functions above. string will match the order of parameter tuples in the sequence. The *safe*, *encoding*, and *errors* parameters are passed down to - :func:`quote_plus` (the *encoding* and *errors* parameters are only passed + *quote_via* (the *encoding* and *errors* parameters are only passed when a query element is a :class:`str`). To reverse this encoding process, :func:`parse_qs` and :func:`parse_qsl` are @@ -550,6 +559,9 @@ task isn't already covered by the URL parsing functions above. .. versionchanged:: 3.2 Query parameter supports bytes and string objects. + .. versionadded:: 3.5 + *quote_via* parameter. + .. seealso:: diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index 762ad22..85027ed 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -622,6 +622,10 @@ urllib sent. (Contributed by Matej Cepl in :issue:`19494` and Akshit Khurana in :issue:`7159`.) +* A new :func:`~urllib.parse.urlencode` parameter *quote_via* provides a way to + control the encoding of query parts if needed. (Contributed by Samwyse and + Arnon Yaari in :issue:`13866`.) + wsgiref ------- diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 156ccf5..4fa3dc6 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -785,6 +785,16 @@ class UrlParseTestCase(unittest.TestCase): result = urllib.parse.urlencode({'a': Trivial()}, True) self.assertEqual(result, 'a=trivial') + def test_urlencode_quote_via(self): + result = urllib.parse.urlencode({'a': 'some value'}) + self.assertEqual(result, "a=some+value") + result = urllib.parse.urlencode({'a': 'some value/another'}, + quote_via=urllib.parse.quote) + self.assertEqual(result, "a=some%20value%2Fanother") + result = urllib.parse.urlencode({'a': 'some value/another'}, + safe='/', quote_via=urllib.parse.quote) + self.assertEqual(result, "a=some%20value/another") + def test_quote_from_bytes(self): self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo') result = urllib.parse.quote_from_bytes(b'archaeological arcana') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index e313371..01c9e58 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -750,7 +750,8 @@ def quote_from_bytes(bs, safe='/'): _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ return ''.join([quoter(char) for char in bs]) -def urlencode(query, doseq=False, safe='', encoding=None, errors=None): +def urlencode(query, doseq=False, safe='', encoding=None, errors=None, + quote_via=quote_plus): """Encode a dict or sequence of two-element tuples into a URL query string. If any values in the query arg are sequences and doseq is true, each @@ -762,8 +763,8 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None): The components of a query arg may each be either a string or a bytes type. - The safe, encoding, and errors parameters are passed down to quote_plus() - (encoding and errors only if a component is a str). + The safe, encoding, and errors parameters are passed down to the function + specified by quote_via (encoding and errors only if a component is a str). """ if hasattr(query, "items"): @@ -789,27 +790,27 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None): if not doseq: for k, v in query: if isinstance(k, bytes): - k = quote_plus(k, safe) + k = quote_via(k, safe) else: - k = quote_plus(str(k), safe, encoding, errors) + k = quote_via(str(k), safe, encoding, errors) if isinstance(v, bytes): - v = quote_plus(v, safe) + v = quote_via(v, safe) else: - v = quote_plus(str(v), safe, encoding, errors) + v = quote_via(str(v), safe, encoding, errors) l.append(k + '=' + v) else: for k, v in query: if isinstance(k, bytes): - k = quote_plus(k, safe) + k = quote_via(k, safe) else: - k = quote_plus(str(k), safe, encoding, errors) + k = quote_via(str(k), safe, encoding, errors) if isinstance(v, bytes): - v = quote_plus(v, safe) + v = quote_via(v, safe) l.append(k + '=' + v) elif isinstance(v, str): - v = quote_plus(v, safe, encoding, errors) + v = quote_via(v, safe, encoding, errors) l.append(k + '=' + v) else: try: @@ -817,15 +818,15 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None): x = len(v) except TypeError: # not a sequence - v = quote_plus(str(v), safe, encoding, errors) + v = quote_via(str(v), safe, encoding, errors) l.append(k + '=' + v) else: # loop over the sequence for elt in v: if isinstance(elt, bytes): - elt = quote_plus(elt, safe) + elt = quote_via(elt, safe) else: - elt = quote_plus(str(elt), safe, encoding, errors) + elt = quote_via(str(elt), safe, encoding, errors) l.append(k + '=' + elt) return '&'.join(l) diff --git a/Misc/NEWS b/Misc/NEWS index 2ae5a6e..850ec50 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,8 @@ Core and Builtins Library ------- +- Issue #13866: *quote_via* argument added to urllib.parse.urlencode. + - Issue #20098: New mangle_from_ policy option for email, default True for compat32, but False for all other policies. -- cgit v0.12