summaryrefslogtreecommitdiffstats
path: root/Python/codecs.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2011-11-04 10:16:41 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2011-11-04 10:16:41 (GMT)
commitb09af03b8acc54076762e84393e446b68c861500 (patch)
tree441e94573e938e54cc2536478a80b2c9ecb64229 /Python/codecs.c
parent495dcbd5c13007dfd679edc8067131228c65bccb (diff)
downloadcpython-b09af03b8acc54076762e84393e446b68c861500.zip
cpython-b09af03b8acc54076762e84393e446b68c861500.tar.gz
cpython-b09af03b8acc54076762e84393e446b68c861500.tar.bz2
Port error handlers from Py_UNICODE indexing to code point indexing.
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c123
1 files changed, 46 insertions, 77 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index eca2b88..47cdb20 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -573,82 +573,72 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i, o;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
+ unsigned char *outp;
int ressize;
+ Py_UCS4 ch;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
- if (*p<10)
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ ch = PyUnicode_READ_CHAR(object, i);
+ if (ch<10)
ressize += 2+1+1;
- else if (*p<100)
+ else if (ch<100)
ressize += 2+2+1;
- else if (*p<1000)
+ else if (ch<1000)
ressize += 2+3+1;
- else if (*p<10000)
+ else if (ch<10000)
ressize += 2+4+1;
-#ifndef Py_UNICODE_WIDE
- else
- ressize += 2+5+1;
-#else
- else if (*p<100000)
+ else if (ch<100000)
ressize += 2+5+1;
- else if (*p<1000000)
+ else if (ch<1000000)
ressize += 2+6+1;
else
ressize += 2+7+1;
-#endif
}
/* allocate replacement */
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res == NULL) {
Py_DECREF(object);
return NULL;
}
+ outp = PyUnicode_1BYTE_DATA(res);
/* generate replacement */
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UNICODE c = *p;
+ for (i = start, o = 0; i < end; ++i) {
+ ch = PyUnicode_READ_CHAR(object, i);
int digits;
int base;
*outp++ = '&';
*outp++ = '#';
- if (*p<10) {
+ if (ch<10) {
digits = 1;
base = 1;
}
- else if (*p<100) {
+ else if (ch<100) {
digits = 2;
base = 10;
}
- else if (*p<1000) {
+ else if (ch<1000) {
digits = 3;
base = 100;
}
- else if (*p<10000) {
+ else if (ch<10000) {
digits = 4;
base = 1000;
}
-#ifndef Py_UNICODE_WIDE
- else {
- digits = 5;
- base = 10000;
- }
-#else
- else if (*p<100000) {
+ else if (ch<100000) {
digits = 5;
base = 10000;
}
- else if (*p<1000000) {
+ else if (ch<1000000) {
digits = 6;
base = 100000;
}
@@ -656,10 +646,9 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
digits = 7;
base = 1000000;
}
-#endif
while (digits-->0) {
- *outp++ = '0' + c/base;
- c %= base;
+ *outp++ = '0' + ch/base;
+ ch %= base;
base /= 10;
}
*outp++ = ';';
@@ -677,58 +666,41 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
-#ifndef Py_UNICODE_WIDE
-#define IS_SURROGATE_PAIR(p, end) \
- (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
- *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
-#else
-#define IS_SURROGATE_PAIR(p, end) 0
-#endif
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
+ unsigned char *outp;
int ressize;
+ Py_UCS4 c;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-#ifdef Py_UNICODE_WIDE
- if (*p >= 0x00010000)
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ c = PyUnicode_READ_CHAR(object, i);
+ if (c >= 0x10000) {
ressize += 1+1+8;
- else
-#endif
- if (*p >= 0x100) {
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- ressize += 1+1+8;
- ++p;
- }
- else
- ressize += 1+1+4;
+ }
+ else if (c >= 0x100) {
+ ressize += 1+1+4;
}
else
ressize += 1+1+2;
}
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res==NULL)
return NULL;
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UCS4 c = (Py_UCS4) *p;
+ for (i = start, outp = PyUnicode_1BYTE_DATA(res);
+ i < end; ++i) {
+ c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\';
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
- ++p;
- }
if (c >= 0x00010000) {
*outp++ = 'U';
*outp++ = Py_hexdigits[(c>>28)&0xf];
@@ -758,7 +730,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
-#undef IS_SURROGATE_PAIR
}
/* This handler is declared static until someone demonstrates
@@ -768,12 +739,11 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -781,15 +751,15 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xd800 || ch > 0xdfff) {
/* Not a surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -847,12 +817,11 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -860,15 +829,15 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, end-start);
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);