summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-03-01 22:48:49 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-03-01 22:48:49 (GMT)
commit2b574a2332a6c909de619f480d02ca144fd2e517 (patch)
treea69c751120c35b319745387a1db0170cb08f10dc
parentd84dfee7c1cc08063725bd65b7abd67098b7104e (diff)
downloadcpython-2b574a2332a6c909de619f480d02ca144fd2e517.zip
cpython-2b574a2332a6c909de619f480d02ca144fd2e517.tar.gz
cpython-2b574a2332a6c909de619f480d02ca144fd2e517.tar.bz2
Merged revisions 88697 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r88697 | victor.stinner | 2011-03-01 23:46:52 +0100 (mar., 01 mars 2011) | 4 lines Issue #11246: Fix PyUnicode_FromFormat("%V") Decode the byte string from UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode). Patch written by Ray Allen. ........
-rw-r--r--Lib/test/test_unicode.py13
-rw-r--r--Misc/NEWS4
-rw-r--r--Objects/unicodeobject.c26
3 files changed, 36 insertions, 7 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 9ad9eed..65b26c5 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1459,6 +1459,19 @@ class UnicodeTest(string_tests.CommonTest,
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
+ text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
+ self.assertEqual(text, 'repr=abc')
+
+ # Test string decode from parameter of %s using utf-8.
+ # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
+ # '\u4eba\u6c11'
+ text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
+ self.assertEqual(text, 'repr=\u4eba\u6c11')
+
+ #Test replace error handler.
+ text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
+ self.assertEqual(text, 'repr=abc\ufffd')
+
# Test PyUnicode_AsWideChar()
def test_aswidechar(self):
from _testcapi import unicode_aswidechar
diff --git a/Misc/NEWS b/Misc/NEWS
index 665f6c8..7336245 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.2.1?
Core and Builtins
-----------------
+- Issue #11246: Fix PyUnicode_FromFormat("%V") to decode the byte string from
+ UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode).
+ Patch written by Ray Allen.
+
- Issue #11286: Raise a ValueError from calling PyMemoryView_FromBuffer with
a buffer struct having a NULL data pointer.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 423a533..cbda725 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -752,7 +752,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (*f == '%') {
if (*(f+1)=='%')
continue;
- if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
+ if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
++callcount;
while (Py_ISDIGIT((unsigned)*f))
width = (width*10) + *f++ - '0';
@@ -872,12 +872,20 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
{
PyObject *obj = va_arg(count, PyObject *);
const char *str = va_arg(count, const char *);
+ PyObject *str_obj;
assert(obj || str);
assert(!obj || PyUnicode_Check(obj));
- if (obj)
+ if (obj) {
n += PyUnicode_GET_SIZE(obj);
- else
- n += strlen(str);
+ *callresult++ = NULL;
+ }
+ else {
+ str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace");
+ if (!str_obj)
+ goto fail;
+ n += PyUnicode_GET_SIZE(str_obj);
+ *callresult++ = str_obj;
+ }
break;
}
case 'S':
@@ -1080,14 +1088,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
case 'V':
{
PyObject *obj = va_arg(vargs, PyObject *);
- const char *str = va_arg(vargs, const char *);
+ va_arg(vargs, const char *);
if (obj) {
Py_ssize_t size = PyUnicode_GET_SIZE(obj);
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
s += size;
} else {
- appendstring(str);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+ PyUnicode_GET_SIZE(*callresult));
+ s += PyUnicode_GET_SIZE(*callresult);
+ Py_DECREF(*callresult);
}
+ ++callresult;
break;
}
case 'S':
@@ -1144,7 +1156,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (callresults) {
PyObject **callresult2 = callresults;
while (callresult2 < callresult) {
- Py_DECREF(*callresult2);
+ Py_XDECREF(*callresult2);
++callresult2;
}
PyObject_Free(callresults);