From c17686f071c6f5b5d20366ae32188327a36e282e Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Sun, 17 May 2015 20:44:50 -0400
Subject: Issue #13866: add *quote_via* argument to urlencode.

Patch by samwyse, completed by Arnon Yaari, and reviewed by
Martin Panter.
---
 Doc/library/urllib.parse.rst | 20 ++++++++++++++++----
 Doc/whatsnew/3.5.rst         |  4 ++++
 Lib/test/test_urlparse.py    | 10 ++++++++++
 Lib/urllib/parse.py          | 29 +++++++++++++++--------------
 Misc/NEWS                    |  2 ++
 5 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 3ecdda1..800f830 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -519,7 +519,8 @@ task isn't already covered by the URL parsing functions above.
    Example: ``unquote_to_bytes('a%26%EF')`` yields ``b'a&\xef'``.
 
 
-.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
+.. function:: urlencode(query, doseq=False, safe='', encoding=None, \
+                        errors=None, quote_via=quote_plus)
 
    Convert a mapping object or a sequence of two-element tuples, which may
    contain :class:`str` or :class:`bytes` objects, to a "percent-encoded"
@@ -528,8 +529,16 @@ task isn't already covered by the URL parsing functions above.
    properly encoded to bytes, otherwise it would result in a :exc:`TypeError`.
 
    The resulting string is a series of ``key=value`` pairs separated by ``'&'``
-   characters, where both *key* and *value* are quoted using :func:`quote_plus`
-   above. When a sequence of two-element tuples is used as the *query*
+   characters, where both *key* and *value* are quoted using the *quote_via*
+   function.  By default, :func:`quote_plus` is used to quote the values, which
+   means spaces are quoted as a ``'+'`` character and '/' characters are
+   encoded as ``%2F``, which follows the standard for GET requests
+   (``application/x-www-form-urlencoded``).  An alternate function that can be
+   passed as *quote_via* is :func:`quote`, which will encode spaces as ``%20``
+   and not encode '/' characters.  For maximum control of what is quoted, use
+   ``quote`` and specify a value for *safe*.
+
+   When a sequence of two-element tuples is used as the *query*
    argument, the first element of each tuple is a key and the second is a
    value. The value element in itself can be a sequence and in that case, if
    the optional parameter *doseq* is evaluates to *True*, individual
@@ -538,7 +547,7 @@ task isn't already covered by the URL parsing functions above.
    string will match the order of parameter tuples in the sequence.
 
    The *safe*, *encoding*, and *errors* parameters are passed down to
-   :func:`quote_plus` (the *encoding* and *errors* parameters are only passed
+   *quote_via* (the *encoding* and *errors* parameters are only passed
    when a query element is a :class:`str`).
 
    To reverse this encoding process, :func:`parse_qs` and :func:`parse_qsl` are
@@ -550,6 +559,9 @@ task isn't already covered by the URL parsing functions above.
    .. versionchanged:: 3.2
       Query parameter supports bytes and string objects.
 
+   .. versionadded:: 3.5
+      *quote_via* parameter.
+
 
 .. seealso::
 
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 762ad22..85027ed 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -622,6 +622,10 @@ urllib
   sent.  (Contributed by Matej Cepl in :issue:`19494` and Akshit Khurana in
   :issue:`7159`.)
 
+* A new :func:`~urllib.parse.urlencode` parameter *quote_via* provides a way to
+  control the encoding of query parts if needed.  (Contributed by Samwyse and
+  Arnon Yaari in :issue:`13866`.)
+
 wsgiref
 -------
 
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 156ccf5..4fa3dc6 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -785,6 +785,16 @@ class UrlParseTestCase(unittest.TestCase):
         result = urllib.parse.urlencode({'a': Trivial()}, True)
         self.assertEqual(result, 'a=trivial')
 
+    def test_urlencode_quote_via(self):
+        result = urllib.parse.urlencode({'a': 'some value'})
+        self.assertEqual(result, "a=some+value")
+        result = urllib.parse.urlencode({'a': 'some value/another'},
+                                        quote_via=urllib.parse.quote)
+        self.assertEqual(result, "a=some%20value%2Fanother")
+        result = urllib.parse.urlencode({'a': 'some value/another'},
+                                        safe='/', quote_via=urllib.parse.quote)
+        self.assertEqual(result, "a=some%20value/another")
+
     def test_quote_from_bytes(self):
         self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
         result = urllib.parse.quote_from_bytes(b'archaeological arcana')
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index e313371..01c9e58 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -750,7 +750,8 @@ def quote_from_bytes(bs, safe='/'):
         _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
     return ''.join([quoter(char) for char in bs])
 
-def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
+def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
+              quote_via=quote_plus):
     """Encode a dict or sequence of two-element tuples into a URL query string.
 
     If any values in the query arg are sequences and doseq is true, each
@@ -762,8 +763,8 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
 
     The components of a query arg may each be either a string or a bytes type.
 
-    The safe, encoding, and errors parameters are passed down to quote_plus()
-    (encoding and errors only if a component is a str).
+    The safe, encoding, and errors parameters are passed down to the function
+    specified by quote_via (encoding and errors only if a component is a str).
     """
 
     if hasattr(query, "items"):
@@ -789,27 +790,27 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
     if not doseq:
         for k, v in query:
             if isinstance(k, bytes):
-                k = quote_plus(k, safe)
+                k = quote_via(k, safe)
             else:
-                k = quote_plus(str(k), safe, encoding, errors)
+                k = quote_via(str(k), safe, encoding, errors)
 
             if isinstance(v, bytes):
-                v = quote_plus(v, safe)
+                v = quote_via(v, safe)
             else:
-                v = quote_plus(str(v), safe, encoding, errors)
+                v = quote_via(str(v), safe, encoding, errors)
             l.append(k + '=' + v)
     else:
         for k, v in query:
             if isinstance(k, bytes):
-                k = quote_plus(k, safe)
+                k = quote_via(k, safe)
             else:
-                k = quote_plus(str(k), safe, encoding, errors)
+                k = quote_via(str(k), safe, encoding, errors)
 
             if isinstance(v, bytes):
-                v = quote_plus(v, safe)
+                v = quote_via(v, safe)
                 l.append(k + '=' + v)
             elif isinstance(v, str):
-                v = quote_plus(v, safe, encoding, errors)
+                v = quote_via(v, safe, encoding, errors)
                 l.append(k + '=' + v)
             else:
                 try:
@@ -817,15 +818,15 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
                     x = len(v)
                 except TypeError:
                     # not a sequence
-                    v = quote_plus(str(v), safe, encoding, errors)
+                    v = quote_via(str(v), safe, encoding, errors)
                     l.append(k + '=' + v)
                 else:
                     # loop over the sequence
                     for elt in v:
                         if isinstance(elt, bytes):
-                            elt = quote_plus(elt, safe)
+                            elt = quote_via(elt, safe)
                         else:
-                            elt = quote_plus(str(elt), safe, encoding, errors)
+                            elt = quote_via(str(elt), safe, encoding, errors)
                         l.append(k + '=' + elt)
     return '&'.join(l)
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 2ae5a6e..850ec50 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -47,6 +47,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #13866: *quote_via* argument added to urllib.parse.urlencode.
+
 - Issue #20098: New mangle_from_ policy option for email, default True
   for compat32, but False for all other policies.
 
-- 
cgit v0.12