From ad2fa76ffa25cac1e7ddea05f23d9a8c4b222de4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 15 Oct 2005 16:44:57 +0000 Subject: Teach unquote() to handle unicode inputs --- Lib/test/test_urllib.py | 4 ++++ Lib/urllib.py | 2 ++ Misc/NEWS | 3 +++ 3 files changed, 9 insertions(+) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3621476..e8a6c6a 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -409,6 +409,10 @@ class UnquotingTests(unittest.TestCase): self.assertEqual(expect, result, "using unquote_plus(): %s != %s" % (expect, result)) + def test_unquote_with_unicode(self): + r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc') + self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc') + class urlencode_Tests(unittest.TestCase): """Tests for urlencode()""" diff --git a/Lib/urllib.py b/Lib/urllib.py index bc16be0..f00d02f 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1061,6 +1061,8 @@ def unquote(s): res[i] = _hextochr[item[:2]] + item[2:] except KeyError: res[i] = '%' + item + except UnicodeDecodeError: + res[i] = unichr(int(item[:2], 16)) + item[2:] return "".join(res) def unquote_plus(s): diff --git a/Misc/NEWS b/Misc/NEWS index e0ddfe8..84ee619 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -30,6 +30,9 @@ Extension Modules Library ------- +- urllib.unquote() now handles Unicode strings correctly. Formerly, it would + either ignore the substitution or raise UnicodeDecodeError. + - SF #1313496: the bisect module now accepts named arguments. - Bug #729103: pydoc.py: Fix docother() method to accept additional -- cgit v0.12