diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 80 |
1 files changed, 32 insertions, 48 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5c27b04..fc6b3e9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -616,15 +616,25 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) count = vargs; #endif #endif - /* step 1: count the number of %S/%R format specifications - * (we call PyObject_Str()/PyObject_Repr() for these objects - * once during step 3 and put the result in an array) */ + /* step 1: count the number of %S/%R/%s format specifications + * (we call PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() for these + * objects once during step 3 and put the result in an array) */ for (f = format; *f; f++) { - if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R')) - ++callcount; + if (*f == '%') { + if (*(f+1)=='%') + continue; + if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A') + ++callcount; + while (isdigit((unsigned)*f)) + width = (width*10) + *f++ - '0'; + while (*++f && *f != '%' && !isalpha((unsigned)*f)) + ; + if (*f == 's') + ++callcount; + } } /* step 2: allocate memory for the results of - * PyObject_Str()/PyObject_Repr() calls */ + * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ if (callcount) { callresults = PyObject_Malloc(sizeof(PyObject *)*callcount); if (!callresults) { @@ -673,35 +683,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) case 's': { /* UTF-8 */ - unsigned char*s; - s = va_arg(count, unsigned char*); - while (*s) { - if (*s < 128) { - n++; s++; - } else if (*s < 0xc0) { - /* invalid UTF-8 */ - n++; s++; - } else if (*s < 0xc0) { - n++; - s++; if(!*s)break; - s++; - } else if (*s < 0xe0) { - n++; - s++; if(!*s)break; - s++; if(!*s)break; - s++; - } else { -#ifdef Py_UNICODE_WIDE - n++; -#else - n+=2; -#endif - s++; if(!*s)break; - s++; if(!*s)break; - s++; if(!*s)break; - s++; - } - } + unsigned char *s = va_arg(count, unsigned char*); + PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); + if (!str) + goto fail; + n += PyUnicode_GET_SIZE(str); + /* Remember the str and switch to the next slot */ + *callresult++ = str; break; } case 'U': @@ -857,19 +845,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) break; case 's': { - /* Parameter must be UTF-8 encoded. - In case of encoding errors, use - the replacement character. */ - PyObject *u; - p = va_arg(vargs, char*); - u = PyUnicode_DecodeUTF8(p, strlen(p), - "replace"); - if (!u) - goto fail; - Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), - PyUnicode_GET_SIZE(u)); - s += PyUnicode_GET_SIZE(u); - Py_DECREF(u); + /* unused, since we already have the result */ + (void) va_arg(vargs, char *); + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), + PyUnicode_GET_SIZE(*callresult)); + s += PyUnicode_GET_SIZE(*callresult); + /* We're done with the unicode()/repr() => forget it */ + Py_DECREF(*callresult); + /* switch to next unicode()/repr() result */ + ++callresult; break; } case 'U': |