diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-08-06 13:56:26 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-08-06 13:56:26 (GMT) |
commit | e822b034e766e03cd8fbe7ab52fbc2d46fff6d33 (patch) | |
tree | a2e79671034ae47c4a0d6d0d030aecda007c8123 /Python | |
parent | 5ad3514822a80c094f1cfe47ae59450001043482 (diff) | |
download | cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.zip cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.gz cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.bz2 |
Issue #15866: The xmlcharrefreplace error handler no more produces two XML
entities for a non-BMP character on narrow build.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/codecs.c | 66 |
1 files changed, 36 insertions, 30 deletions
diff --git a/Python/codecs.c b/Python/codecs.c index 7334eb3..91147a0 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -556,6 +556,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) PyObject *res; Py_UNICODE *p; Py_UNICODE *startp; + Py_UNICODE *e; Py_UNICODE *outp; int ressize; if (PyUnicodeEncodeError_GetStart(exc, &start)) @@ -565,26 +566,31 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; startp = PyUnicode_AS_UNICODE(object); - for (p = startp+start, ressize = 0; p < startp+end; ++p) { - if (*p<10) + e = startp + end; + for (p = startp+start, ressize = 0; p < e;) { + Py_UCS4 ch = *p++; +#ifndef Py_UNICODE_WIDE + if ((0xD800 <= ch && ch <= 0xDBFF) && + (p < e) && + (0xDC00 <= *p && *p <= 0xDFFF)) { + ch = ((((ch & 0x03FF) << 10) | + ((Py_UCS4)*p++ & 0x03FF)) + 0x10000); + } +#endif + if (ch < 10) ressize += 2+1+1; - else if (*p<100) + else if (ch < 100) ressize += 2+2+1; - else if (*p<1000) + else if (ch < 1000) ressize += 2+3+1; - else if (*p<10000) + else if (ch < 10000) ressize += 2+4+1; -#ifndef Py_UNICODE_WIDE - else - ressize += 2+5+1; -#else - else if (*p<100000) + else if (ch < 100000) ressize += 2+5+1; - else if (*p<1000000) + else if (ch < 1000000) ressize += 2+6+1; else ressize += 2+7+1; -#endif } /* allocate replacement */ res = PyUnicode_FromUnicode(NULL, ressize); @@ -593,40 +599,41 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) return NULL; } /* generate replacement */ - for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); - p < startp+end; ++p) { - Py_UNICODE c = *p; + for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) { int digits; int base; + Py_UCS4 ch = *p++; +#ifndef Py_UNICODE_WIDE + if ((0xD800 <= ch && ch <= 0xDBFF) && + (p < startp+end) && + (0xDC00 <= *p && *p <= 0xDFFF)) { + ch = ((((ch & 0x03FF) << 10) | + ((Py_UCS4)*p++ & 0x03FF)) + 0x10000); + } +#endif *outp++ = '&'; *outp++ = '#'; - if (*p<10) { + if (ch < 10) { digits = 1; base = 1; } - else if (*p<100) { + else if (ch < 100) { digits = 2; base = 10; } - else if (*p<1000) { + else if (ch < 1000) { digits = 3; base = 100; } - else if (*p<10000) { + else if (ch < 10000) { digits = 4; base = 1000; } -#ifndef Py_UNICODE_WIDE - else { - digits = 5; - base = 10000; - } -#else - else if (*p<100000) { + else if (ch < 100000) { digits = 5; base = 10000; } - else if (*p<1000000) { + else if (ch < 1000000) { digits = 6; base = 100000; } @@ -634,10 +641,9 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) digits = 7; base = 1000000; } -#endif while (digits-->0) { - *outp++ = '0' + c/base; - c %= base; + *outp++ = '0' + ch/base; + ch %= base; base /= 10; } *outp++ = ';'; |