diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-08-31 11:01:23 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-08-31 11:01:23 (GMT) |
commit | 90d1fcd1011b7b5633eaeeec9c59779bc357c5b4 (patch) | |
tree | 31be0e376e887c05410a7339594aed933b830881 /Objects/unicodeobject.c | |
parent | bef9d21d14722bc43fa2161ae55088d152d42ff8 (diff) | |
download | cpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.zip cpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.tar.gz cpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.tar.bz2 |
Change %s argument for PyUnicode_FromFormat to
be UTF-8. Fixes #1070.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 48 |
1 files changed, 46 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f9d3068..e9ce08c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -621,8 +621,39 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) abuffersize = width; break; case 's': - n += strlen(va_arg(count, char*)); + { + /* UTF-8 */ + unsigned char*s; + s = va_arg(count, unsigned char*); + while (*s) { + if (*s < 128) { + n++; s++; + } else if (*s < 0xc0) { + /* invalid UTF-8 */ + n++; s++; + } else if (*s < 0xc0) { + n++; + s++; if(!*s)break; + s++; + } else if (*s < 0xe0) { + n++; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } else { + #ifdef Py_UNICODE_WIDE + n++; + #else + n+=2; + #endif + s++; if(!*s)break; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } + } break; + } case 'U': { PyObject *obj = va_arg(count, PyObject *); @@ -775,9 +806,22 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) appendstring(realbuffer); break; case 's': + { + /* Parameter must be UTF-8 encoded. + In case of encoding errors, use + the replacement character. */ + PyObject *u; p = va_arg(vargs, char*); - appendstring(p); + u = PyUnicode_DecodeUTF8(p, strlen(p), + "replace"); + if (!u) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), + PyUnicode_GET_SIZE(u)); + s += PyUnicode_GET_SIZE(u); + Py_DECREF(u); break; + } case 'U': { PyObject *obj = va_arg(vargs, PyObject *); |