diff options
author | Marc-André Lemburg <mal@egenix.com> | 2002-02-25 14:30:49 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2002-02-25 14:30:49 (GMT) |
commit | d53e226f819035d88c1d40c833d2d30bc3f154f1 (patch) | |
tree | 46e2a0afbbc267ca0b8dbb7aaf2466c1a578eac8 | |
parent | 858c9a1a5b3555dc8c95f009e4bc1863f34a3d14 (diff) | |
download | cpython-d53e226f819035d88c1d40c833d2d30bc3f154f1.zip cpython-d53e226f819035d88c1d40c833d2d30bc3f154f1.tar.gz cpython-d53e226f819035d88c1d40c833d2d30bc3f154f1.tar.bz2 |
Fix UTF-8 encoder pointer arithmetic and restore 2.2 behaviour.
-rw-r--r-- | Objects/unicodeobject.c | 23 |
1 files changed, 10 insertions, 13 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 388cfc66..459ebff 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1171,9 +1171,9 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, { PyObject *v; char *p; + char *q; Py_UCS4 ch2; unsigned int cbAllocated = 3 * size; - unsigned int cbWritten = 0; int i = 0; v = PyString_FromStringAndSize(NULL, cbAllocated); @@ -1182,17 +1182,15 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, if (size == 0) return v; - p = PyString_AS_STRING(v); + p = q = PyString_AS_STRING(v); while (i < size) { Py_UCS4 ch = s[i++]; if (ch < 0x80) { *p++ = (char) ch; - cbWritten++; } else if (ch < 0x0800) { *p++ = 0xc0 | (ch >> 6); *p++ = 0x80 | (ch & 0x3f); - cbWritten += 2; } else if (ch < 0x10000) { /* Check for high surrogate */ @@ -1201,13 +1199,13 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, ch2 = s[i]; if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { - if (cbWritten >= (cbAllocated - 4)) { + if ((p - q) >= (cbAllocated - 4)) { /* Provide enough room for some more surrogates */ cbAllocated += 4*10; if (_PyString_Resize(&v, cbAllocated)) goto onError; - p = PyString_AS_STRING(v) + cbWritten; + p = PyString_AS_STRING(v) + (p - q); } /* combine the two values */ @@ -1216,33 +1214,32 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, *p++ = (char)((ch >> 18) | 0xf0); *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); i++; - cbWritten += 4; } } } else { *p++ = (char)(0xe0 | (ch >> 12)); - cbWritten += 3; } *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); *p++ = (char)(0x80 | (ch & 0x3f)); } else { - if (cbWritten >= (cbAllocated - 4)) { - /* Provide enough room for some more large characters. */ + if ((p - q) >= (cbAllocated - 4)) { + /* Provide enough room for some more + surrogates */ cbAllocated += 4*10; if (_PyString_Resize(&v, cbAllocated)) goto onError; - p = PyString_AS_STRING(v) + cbWritten; + p = PyString_AS_STRING(v) + (p - q); } + *p++ = 0xf0 | (ch>>18); *p++ = 0x80 | ((ch>>12) & 0x3f); *p++ = 0x80 | ((ch>>6) & 0x3f); *p++ = 0x80 | (ch & 0x3f); - cbWritten += 4; } } *p = '\0'; - if (_PyString_Resize(&v, cbWritten)) + if (_PyString_Resize(&v, p - q)) goto onError; return v; |