diff options
author | Stein Karlsen <karlsen.stein@gmail.com> | 2019-10-14 10:36:29 (GMT) |
---|---|---|
committer | Tal Einat <taleinat+github@gmail.com> | 2019-10-14 10:36:29 (GMT) |
commit | aad2ee01561f260c69af1951c0d6fcaf75c4d41b (patch) | |
tree | 6ffb53582b94065c4eb04012aaffa49a1af5422f | |
parent | 9cb51f4e20033f5fd4fed46036e347f263bb6d5b (diff) | |
download | cpython-aad2ee01561f260c69af1951c0d6fcaf75c4d41b.zip cpython-aad2ee01561f260c69af1951c0d6fcaf75c4d41b.tar.gz cpython-aad2ee01561f260c69af1951c0d6fcaf75c4d41b.tar.bz2 |
bpo-32498: urllib.parse.unquote also accepts bytes (GH-7768)
-rw-r--r-- | Doc/library/urllib.parse.rst | 7 | ||||
-rw-r--r-- | Lib/test/test_urllib.py | 25 | ||||
-rw-r--r-- | Lib/urllib/parse.py | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst | 2 |
4 files changed, 33 insertions, 3 deletions
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 49276da..84d289b 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -571,7 +571,7 @@ task isn't already covered by the URL parsing functions above. percent-encoded sequences into Unicode characters, as accepted by the :meth:`bytes.decode` method. - *string* must be a :class:`str`. + *string* may be either a :class:`str` or a :class:`bytes`. *encoding* defaults to ``'utf-8'``. *errors* defaults to ``'replace'``, meaning invalid sequences are replaced @@ -579,6 +579,11 @@ task isn't already covered by the URL parsing functions above. Example: ``unquote('/El%20Ni%C3%B1o/')`` yields ``'/El NiƱo/'``. + .. versionchanged:: 3.9 + *string* parameter supports bytes and str objects (previously only str). + + + .. function:: unquote_plus(string, encoding='utf-8', errors='replace') diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 9a6b5f6..3f59c66 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1049,8 +1049,6 @@ class UnquotingTests(unittest.TestCase): "%s" % result) self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) - with support.check_warnings(('', BytesWarning), quiet=True): - self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'') def test_unquoting_badpercent(self): # Test unquoting on bad percent-escapes @@ -1210,6 +1208,29 @@ class UnquotingTests(unittest.TestCase): self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) + def test_unquoting_with_bytes_input(self): + # ASCII characters decoded to a string + given = b'blueberryjam' + expect = 'blueberryjam' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII hex-encoded characters and ASCII characters + given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y' + expect = 'bl\u00e5b\u00e6rsyltet\u00f8y' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + # A mix of non-ASCII percent-encoded characters and ASCII characters + given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j' + expect = 'bl\u00e5b\u00e6rsyltet\u00f8j' + result = urllib.parse.unquote(given) + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) + + class urlencode_Tests(unittest.TestCase): """Tests for urlencode()""" diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b660878..3a38dc1 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -631,6 +631,8 @@ def unquote(string, encoding='utf-8', errors='replace'): unquote('abc%20def') -> 'abc def'. """ + if isinstance(string, bytes): + return unquote_to_bytes(string).decode(encoding, errors) if '%' not in string: string.split return string diff --git a/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst new file mode 100644 index 0000000..9df9e65 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst @@ -0,0 +1,2 @@ +Made :func:`urllib.parse.unquote()` accept bytes in addition to strings. +Patch by Stein Karlsen. |