summaryrefslogtreecommitdiffstats
path: root/Python/codecs.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c230
1 files changed, 103 insertions, 127 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 90f1cf6..5470500 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h"
#include <ctype.h>
+const char *Py_hexdigits = "0123456789abcdef";
+
/* --- Codec Registry ----------------------------------------------------- */
/* Import the standard encodings package which will register the first
@@ -465,9 +467,11 @@ PyObject *PyCodec_LookupError(const char *name)
static void wrong_exception_type(PyObject *exc)
{
- PyObject *type = PyObject_GetAttrString(exc, "__class__");
+ _Py_IDENTIFIER(__class__);
+ _Py_IDENTIFIER(__name__);
+ PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
if (type != NULL) {
- PyObject *name = PyObject_GetAttrString(type, "__name__");
+ PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
Py_DECREF(type);
if (name != NULL) {
PyErr_Format(PyExc_TypeError,
@@ -506,57 +510,58 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
- /* ouch: passing NULL, 0, pos gives None instead of u'' */
- return Py_BuildValue("(u#n)", &end, 0, end);
+ return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
}
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
- PyObject *restuple;
- Py_ssize_t start;
- Py_ssize_t end;
- Py_ssize_t i;
+ Py_ssize_t start, end, i, len;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *res;
- Py_UNICODE *p;
+ int kind;
+ void *data;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
- res = PyUnicode_FromUnicode(NULL, end-start);
+ len = end - start;
+ res = PyUnicode_New(len, '?');
if (res == NULL)
return NULL;
- for (p = PyUnicode_AS_UNICODE(res), i = start;
- i<end; ++p, ++i)
- *p = '?';
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- return restuple;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ for (i = 0; i < len; ++i)
+ PyUnicode_WRITE(kind, data, i, '?');
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return Py_BuildValue("(Nn)", res, end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
- Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
- return Py_BuildValue("(u#n)", &res, 1, end);
+ return Py_BuildValue("(Cn)",
+ (int)Py_UNICODE_REPLACEMENT_CHARACTER,
+ end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
PyObject *res;
- Py_UNICODE *p;
+ int kind;
+ void *data;
if (PyUnicodeTranslateError_GetStart(exc, &start))
return NULL;
if (PyUnicodeTranslateError_GetEnd(exc, &end))
return NULL;
- res = PyUnicode_FromUnicode(NULL, end-start);
+ len = end - start;
+ res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
if (res == NULL)
return NULL;
- for (p = PyUnicode_AS_UNICODE(res), i = start;
- i<end; ++p, ++i)
- *p = Py_UNICODE_REPLACEMENT_CHARACTER;
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- return restuple;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ for (i=0; i < len; i++)
+ PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return Py_BuildValue("(Nn)", res, end);
}
else {
wrong_exception_type(exc);
@@ -569,82 +574,72 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
+ unsigned char *outp;
int ressize;
+ Py_UCS4 ch;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
- if (*p<10)
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ ch = PyUnicode_READ_CHAR(object, i);
+ if (ch<10)
ressize += 2+1+1;
- else if (*p<100)
+ else if (ch<100)
ressize += 2+2+1;
- else if (*p<1000)
+ else if (ch<1000)
ressize += 2+3+1;
- else if (*p<10000)
+ else if (ch<10000)
ressize += 2+4+1;
-#ifndef Py_UNICODE_WIDE
- else
+ else if (ch<100000)
ressize += 2+5+1;
-#else
- else if (*p<100000)
- ressize += 2+5+1;
- else if (*p<1000000)
+ else if (ch<1000000)
ressize += 2+6+1;
else
ressize += 2+7+1;
-#endif
}
/* allocate replacement */
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res == NULL) {
Py_DECREF(object);
return NULL;
}
+ outp = PyUnicode_1BYTE_DATA(res);
/* generate replacement */
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UNICODE c = *p;
+ for (i = start; i < end; ++i) {
int digits;
int base;
+ ch = PyUnicode_READ_CHAR(object, i);
*outp++ = '&';
*outp++ = '#';
- if (*p<10) {
+ if (ch<10) {
digits = 1;
base = 1;
}
- else if (*p<100) {
+ else if (ch<100) {
digits = 2;
base = 10;
}
- else if (*p<1000) {
+ else if (ch<1000) {
digits = 3;
base = 100;
}
- else if (*p<10000) {
+ else if (ch<10000) {
digits = 4;
base = 1000;
}
-#ifndef Py_UNICODE_WIDE
- else {
+ else if (ch<100000) {
digits = 5;
base = 10000;
}
-#else
- else if (*p<100000) {
- digits = 5;
- base = 10000;
- }
- else if (*p<1000000) {
+ else if (ch<1000000) {
digits = 6;
base = 100000;
}
@@ -652,16 +647,15 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
digits = 7;
base = 1000000;
}
-#endif
while (digits-->0) {
- *outp++ = '0' + c/base;
- c %= base;
+ *outp++ = '0' + ch/base;
+ ch %= base;
base /= 10;
}
*outp++ = ';';
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
@@ -671,87 +665,65 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
}
}
-static Py_UNICODE hexdigits[] = {
- '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-};
-
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
-#ifndef Py_UNICODE_WIDE
-#define IS_SURROGATE_PAIR(p, end) \
- (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
- *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
-#else
-#define IS_SURROGATE_PAIR(p, end) 0
-#endif
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
+ unsigned char *outp;
int ressize;
+ Py_UCS4 c;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-#ifdef Py_UNICODE_WIDE
- if (*p >= 0x00010000)
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ c = PyUnicode_READ_CHAR(object, i);
+ if (c >= 0x10000) {
ressize += 1+1+8;
- else
-#endif
- if (*p >= 0x100) {
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- ressize += 1+1+8;
- ++p;
- }
- else
- ressize += 1+1+4;
+ }
+ else if (c >= 0x100) {
+ ressize += 1+1+4;
}
else
ressize += 1+1+2;
}
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res==NULL)
return NULL;
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UCS4 c = (Py_UCS4) *p;
+ for (i = start, outp = PyUnicode_1BYTE_DATA(res);
+ i < end; ++i) {
+ c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\';
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
- ++p;
- }
if (c >= 0x00010000) {
*outp++ = 'U';
- *outp++ = hexdigits[(c>>28)&0xf];
- *outp++ = hexdigits[(c>>24)&0xf];
- *outp++ = hexdigits[(c>>20)&0xf];
- *outp++ = hexdigits[(c>>16)&0xf];
- *outp++ = hexdigits[(c>>12)&0xf];
- *outp++ = hexdigits[(c>>8)&0xf];
+ *outp++ = Py_hexdigits[(c>>28)&0xf];
+ *outp++ = Py_hexdigits[(c>>24)&0xf];
+ *outp++ = Py_hexdigits[(c>>20)&0xf];
+ *outp++ = Py_hexdigits[(c>>16)&0xf];
+ *outp++ = Py_hexdigits[(c>>12)&0xf];
+ *outp++ = Py_hexdigits[(c>>8)&0xf];
}
else if (c >= 0x100) {
*outp++ = 'u';
- *outp++ = hexdigits[(c>>12)&0xf];
- *outp++ = hexdigits[(c>>8)&0xf];
+ *outp++ = Py_hexdigits[(c>>12)&0xf];
+ *outp++ = Py_hexdigits[(c>>8)&0xf];
}
else
*outp++ = 'x';
- *outp++ = hexdigits[(c>>4)&0xf];
- *outp++ = hexdigits[c&0xf];
+ *outp++ = Py_hexdigits[(c>>4)&0xf];
+ *outp++ = Py_hexdigits[c&0xf];
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
@@ -759,7 +731,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
-#undef IS_SURROGATE_PAIR
}
/* This handler is declared static until someone demonstrates
@@ -769,12 +740,11 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -782,15 +752,15 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xd800 || ch > 0xdfff) {
/* Not a surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -809,7 +779,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
unsigned char *p;
- Py_UNICODE ch = 0;
+ Py_UCS4 ch = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start))
return NULL;
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
@@ -836,7 +806,10 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
- return Py_BuildValue("(u#n)", &ch, 1, start+3);
+ res = PyUnicode_FromOrdinal(ch);
+ if (res == NULL)
+ return NULL;
+ return Py_BuildValue("(Nn)", res, start+3);
}
else {
wrong_exception_type(exc);
@@ -849,12 +822,11 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -862,15 +834,15 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, end-start);
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -886,8 +858,9 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
return restuple;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ PyObject *str;
unsigned char *p;
- Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+ Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
int consumed = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start))
return NULL;
@@ -912,7 +885,10 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
- return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
+ if (str == NULL)
+ return NULL;
+ return Py_BuildValue("(Nn)", str, start+consumed);
}
else {
wrong_exception_type(exc);
@@ -1049,7 +1025,7 @@ static int _PyCodecRegistry_Init(void)
interp->codec_error_registry = PyDict_New();
if (interp->codec_error_registry) {
- for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
+ for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
PyObject *func = PyCFunction_New(&methods[i].def, NULL);
int res;
if (!func)