summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2007-08-31 11:01:23 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2007-08-31 11:01:23 (GMT)
commit90d1fcd1011b7b5633eaeeec9c59779bc357c5b4 (patch)
tree31be0e376e887c05410a7339594aed933b830881 /Objects/unicodeobject.c
parentbef9d21d14722bc43fa2161ae55088d152d42ff8 (diff)
downloadcpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.zip
cpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.tar.gz
cpython-90d1fcd1011b7b5633eaeeec9c59779bc357c5b4.tar.bz2
Change %s argument for PyUnicode_FromFormat to
be UTF-8. Fixes #1070.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c48
1 files changed, 46 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f9d3068..e9ce08c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -621,8 +621,39 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
abuffersize = width;
break;
case 's':
- n += strlen(va_arg(count, char*));
+ {
+ /* UTF-8 */
+ unsigned char*s;
+ s = va_arg(count, unsigned char*);
+ while (*s) {
+ if (*s < 128) {
+ n++; s++;
+ } else if (*s < 0xc0) {
+ /* invalid UTF-8 */
+ n++; s++;
+ } else if (*s < 0xc0) {
+ n++;
+ s++; if(!*s)break;
+ s++;
+ } else if (*s < 0xe0) {
+ n++;
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++;
+ } else {
+ #ifdef Py_UNICODE_WIDE
+ n++;
+ #else
+ n+=2;
+ #endif
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++;
+ }
+ }
break;
+ }
case 'U':
{
PyObject *obj = va_arg(count, PyObject *);
@@ -775,9 +806,22 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
appendstring(realbuffer);
break;
case 's':
+ {
+ /* Parameter must be UTF-8 encoded.
+ In case of encoding errors, use
+ the replacement character. */
+ PyObject *u;
p = va_arg(vargs, char*);
- appendstring(p);
+ u = PyUnicode_DecodeUTF8(p, strlen(p),
+ "replace");
+ if (!u)
+ goto fail;
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
+ PyUnicode_GET_SIZE(u));
+ s += PyUnicode_GET_SIZE(u);
+ Py_DECREF(u);
break;
+ }
case 'U':
{
PyObject *obj = va_arg(vargs, PyObject *);