diff options
author | Walter Dörwald <walter@livinglogic.de> | 2009-05-03 22:38:54 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2009-05-03 22:38:54 (GMT) |
commit | f11232e5c57cced549291091b8509aac4272bd46 (patch) | |
tree | ebce58ceb000c1627dc55f56ad97e14dd603c3d8 /Objects/unicodeobject.c | |
parent | 921825233265979c58cd0d64967de616d0c8ecd8 (diff) | |
download | cpython-f11232e5c57cced549291091b8509aac4272bd46.zip cpython-f11232e5c57cced549291091b8509aac4272bd46.tar.gz cpython-f11232e5c57cced549291091b8509aac4272bd46.tar.bz2 |
Merged revisions 72260 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r72260 | walter.doerwald | 2009-05-04 00:36:33 +0200 (Mo, 04 Mai 2009) | 5 lines
Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call
PyUnicode_DecodeUTF8() once, remember the result and output it in a second
step. This avoids problems with counting UTF-8 bytes that ignores the effect
of using the replace error handler in PyUnicode_DecodeUTF8().
........
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 80 |
1 files changed, 32 insertions, 48 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5c27b04..fc6b3e9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -616,15 +616,25 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) count = vargs; #endif #endif - /* step 1: count the number of %S/%R format specifications - * (we call PyObject_Str()/PyObject_Repr() for these objects - * once during step 3 and put the result in an array) */ + /* step 1: count the number of %S/%R/%s format specifications + * (we call PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() for these + * objects once during step 3 and put the result in an array) */ for (f = format; *f; f++) { - if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R')) - ++callcount; + if (*f == '%') { + if (*(f+1)=='%') + continue; + if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A') + ++callcount; + while (isdigit((unsigned)*f)) + width = (width*10) + *f++ - '0'; + while (*++f && *f != '%' && !isalpha((unsigned)*f)) + ; + if (*f == 's') + ++callcount; + } } /* step 2: allocate memory for the results of - * PyObject_Str()/PyObject_Repr() calls */ + * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ if (callcount) { callresults = PyObject_Malloc(sizeof(PyObject *)*callcount); if (!callresults) { @@ -673,35 +683,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) case 's': { /* UTF-8 */ - unsigned char*s; - s = va_arg(count, unsigned char*); - while (*s) { - if (*s < 128) { - n++; s++; - } else if (*s < 0xc0) { - /* invalid UTF-8 */ - n++; s++; - } else if (*s < 0xc0) { - n++; - s++; if(!*s)break; - s++; - } else if (*s < 0xe0) { - n++; - s++; if(!*s)break; - s++; if(!*s)break; - s++; - } else { -#ifdef Py_UNICODE_WIDE - n++; -#else - n+=2; -#endif - s++; if(!*s)break; - s++; if(!*s)break; - s++; if(!*s)break; - s++; - } - } + unsigned char *s = va_arg(count, unsigned char*); + PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); + if (!str) + goto fail; + n += PyUnicode_GET_SIZE(str); + /* Remember the str and switch to the next slot */ + *callresult++ = str; break; } case 'U': @@ -857,19 +845,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) break; case 's': { - /* Parameter must be UTF-8 encoded. - In case of encoding errors, use - the replacement character. */ - PyObject *u; - p = va_arg(vargs, char*); - u = PyUnicode_DecodeUTF8(p, strlen(p), - "replace"); - if (!u) - goto fail; - Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), - PyUnicode_GET_SIZE(u)); - s += PyUnicode_GET_SIZE(u); - Py_DECREF(u); + /* unused, since we already have the result */ + (void) va_arg(vargs, char *); + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), + PyUnicode_GET_SIZE(*callresult)); + s += PyUnicode_GET_SIZE(*callresult); + /* We're done with the unicode()/repr() => forget it */ + Py_DECREF(*callresult); + /* switch to next unicode()/repr() result */ + ++callresult; break; } case 'U': |