diff options
-rw-r--r-- | Lib/test/test_unicode.py | 13 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 26 |
3 files changed, 36 insertions, 7 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9ad9eed..65b26c5 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1459,6 +1459,19 @@ class UnicodeTest(string_tests.CommonTest, text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") + text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz') + self.assertEqual(text, 'repr=abc') + + # Test string decode from parameter of %s using utf-8. + # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of + # '\u4eba\u6c11' + text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') + self.assertEqual(text, 'repr=\u4eba\u6c11') + + #Test replace error handler. + text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') + self.assertEqual(text, 'repr=abc\ufffd') + # Test PyUnicode_AsWideChar() def test_aswidechar(self): from _testcapi import unicode_aswidechar @@ -10,6 +10,10 @@ What's New in Python 3.2.1? Core and Builtins ----------------- +- Issue #11246: Fix PyUnicode_FromFormat("%V") to decode the byte string from + UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode). + Patch written by Ray Allen. + - Issue #11286: Raise a ValueError from calling PyMemoryView_FromBuffer with a buffer struct having a NULL data pointer. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 423a533..cbda725 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -752,7 +752,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (*f == '%') { if (*(f+1)=='%') continue; - if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A') + if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V') ++callcount; while (Py_ISDIGIT((unsigned)*f)) width = (width*10) + *f++ - '0'; @@ -872,12 +872,20 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) { PyObject *obj = va_arg(count, PyObject *); const char *str = va_arg(count, const char *); + PyObject *str_obj; assert(obj || str); assert(!obj || PyUnicode_Check(obj)); - if (obj) + if (obj) { n += PyUnicode_GET_SIZE(obj); - else - n += strlen(str); + *callresult++ = NULL; + } + else { + str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace"); + if (!str_obj) + goto fail; + n += PyUnicode_GET_SIZE(str_obj); + *callresult++ = str_obj; + } break; } case 'S': @@ -1080,14 +1088,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) case 'V': { PyObject *obj = va_arg(vargs, PyObject *); - const char *str = va_arg(vargs, const char *); + va_arg(vargs, const char *); if (obj) { Py_ssize_t size = PyUnicode_GET_SIZE(obj); Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); s += size; } else { - appendstring(str); + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), + PyUnicode_GET_SIZE(*callresult)); + s += PyUnicode_GET_SIZE(*callresult); + Py_DECREF(*callresult); } + ++callresult; break; } case 'S': @@ -1144,7 +1156,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (callresults) { PyObject **callresult2 = callresults; while (callresult2 < callresult) { - Py_DECREF(*callresult2); + Py_XDECREF(*callresult2); ++callresult2; } PyObject_Free(callresults); |