diff options
Diffstat (limited to 'Python/codecs.c')
| -rw-r--r-- | Python/codecs.c | 272 |
1 files changed, 30 insertions, 242 deletions
diff --git a/Python/codecs.c b/Python/codecs.c index 45d9929..7334eb3 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -61,22 +61,18 @@ PyObject *normalizestring(const char *string) return NULL; } - p = PyMem_Malloc(len + 1); - if (p == NULL) + v = PyString_FromStringAndSize(NULL, len); + if (v == NULL) return NULL; + p = PyString_AS_STRING(v); for (i = 0; i < len; i++) { register char ch = string[i]; if (ch == ' ') ch = '-'; else - ch = tolower(Py_CHARMASK(ch)); + ch = Py_TOLOWER(Py_CHARMASK(ch)); p[i] = ch; } - p[i] = '\0'; - v = PyUnicode_FromString(p); - if (v == NULL) - return NULL; - PyMem_Free(p); return v; } @@ -116,7 +112,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) v = normalizestring(encoding); if (v == NULL) goto onError; - PyUnicode_InternInPlace(&v); + PyString_InternInPlace(&v); /* First, try to lookup the name in the registry dictionary */ result = PyDict_GetItem(interp->codec_search_cache, v); @@ -171,10 +167,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } /* Cache and return the result */ - if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { - Py_DECREF(result); - goto onError; - } + PyDict_SetItem(interp->codec_search_cache, v, result); Py_DECREF(args); return result; @@ -183,23 +176,6 @@ PyObject *_PyCodec_Lookup(const char *encoding) return NULL; } -/* Codec registry encoding check API. */ - -int PyCodec_KnownEncoding(const char *encoding) -{ - PyObject *codecs; - - codecs = _PyCodec_Lookup(encoding); - if (!codecs) { - PyErr_Clear(); - return 0; - } - else { - Py_DECREF(codecs); - return 1; - } -} - static PyObject *args_tuple(PyObject *object, const char *errors) @@ -214,7 +190,7 @@ PyObject *args_tuple(PyObject *object, if (errors) { PyObject *v; - v = PyUnicode_FromString(errors); + v = PyString_FromString(errors); if (v == NULL) { Py_DECREF(args); return NULL; @@ -341,7 +317,7 @@ PyObject *PyCodec_Encode(PyObject *object, { PyObject *encoder = NULL; PyObject *args = NULL, *result = NULL; - PyObject *v = NULL; + PyObject *v; encoder = PyCodec_Encoder(encoding); if (encoder == NULL) @@ -351,14 +327,14 @@ PyObject *PyCodec_Encode(PyObject *object, if (args == NULL) goto onError; - result = PyEval_CallObject(encoder, args); + result = PyEval_CallObject(encoder,args); if (result == NULL) goto onError; if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 2) { PyErr_SetString(PyExc_TypeError, - "encoder must return a tuple (object, integer)"); + "encoder must return a tuple (object,integer)"); goto onError; } v = PyTuple_GET_ITEM(result,0); @@ -470,9 +446,14 @@ static void wrong_exception_type(PyObject *exc) PyObject *name = PyObject_GetAttrString(type, "__name__"); Py_DECREF(type); if (name != NULL) { - PyErr_Format(PyExc_TypeError, - "don't know how to handle %S in error callback", name); + PyObject *string = PyObject_Str(name); Py_DECREF(name); + if (string != NULL) { + PyErr_Format(PyExc_TypeError, + "don't know how to handle %.400s in error callback", + PyString_AS_STRING(string)); + Py_DECREF(string); + } } } } @@ -487,6 +468,7 @@ PyObject *PyCodec_StrictErrors(PyObject *exc) } +#ifdef Py_USING_UNICODE PyObject *PyCodec_IgnoreErrors(PyObject *exc) { Py_ssize_t end; @@ -678,13 +660,6 @@ static Py_UNICODE hexdigits[] = { PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) { -#ifndef Py_UNICODE_WIDE -#define IS_SURROGATE_PAIR(p, end) \ - (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \ - *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF) -#else -#define IS_SURROGATE_PAIR(p, end) 0 -#endif if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { PyObject *restuple; PyObject *object; @@ -709,12 +684,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) else #endif if (*p >= 0x100) { - if (IS_SURROGATE_PAIR(p, startp+end)) { - ressize += 1+1+8; - ++p; - } - else - ressize += 1+1+4; + ressize += 1+1+4; } else ressize += 1+1+2; @@ -724,12 +694,9 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return NULL; for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < startp+end; ++p) { - Py_UCS4 c = (Py_UCS4) *p; + Py_UNICODE c = *p; *outp++ = '\\'; - if (IS_SURROGATE_PAIR(p, startp+end)) { - c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000; - ++p; - } +#ifdef Py_UNICODE_WIDE if (c >= 0x00010000) { *outp++ = 'U'; *outp++ = hexdigits[(c>>28)&0xf]; @@ -739,7 +706,9 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) *outp++ = hexdigits[(c>>12)&0xf]; *outp++ = hexdigits[(c>>8)&0xf]; } - else if (c >= 0x100) { + else +#endif + if (c >= 0x100) { *outp++ = 'u'; *outp++ = hexdigits[(c>>12)&0xf]; *outp++ = hexdigits[(c>>8)&0xf]; @@ -759,166 +728,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) wrong_exception_type(exc); return NULL; } -#undef IS_SURROGATE_PAIR -} - -/* This handler is declared static until someone demonstrates - a need to call it directly. */ -static PyObject * -PyCodec_SurrogatePassErrors(PyObject *exc) -{ - PyObject *restuple; - PyObject *object; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; - if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { - Py_UNICODE *p; - Py_UNICODE *startp; - char *outp; - if (PyUnicodeEncodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) - return NULL; - startp = PyUnicode_AS_UNICODE(object); - res = PyBytes_FromStringAndSize(NULL, 3*(end-start)); - if (!res) { - Py_DECREF(object); - return NULL; - } - outp = PyBytes_AsString(res); - for (p = startp+start; p < startp+end; p++) { - Py_UNICODE ch = *p; - if (ch < 0xd800 || ch > 0xdfff) { - /* Not a surrogate, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(res); - Py_DECREF(object); - return NULL; - } - *outp++ = (char)(0xe0 | (ch >> 12)); - *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f)); - *outp++ = (char)(0x80 | (ch & 0x3f)); - } - restuple = Py_BuildValue("(On)", res, end); - Py_DECREF(res); - Py_DECREF(object); - return restuple; - } - else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { - unsigned char *p; - Py_UNICODE ch = 0; - if (PyUnicodeDecodeError_GetStart(exc, &start)) - return NULL; - if (!(object = PyUnicodeDecodeError_GetObject(exc))) - return NULL; - if (!(p = (unsigned char*)PyBytes_AsString(object))) { - Py_DECREF(object); - return NULL; - } - /* Try decoding a single surrogate character. If - there are more, let the codec call us again. */ - p += start; - if ((p[0] & 0xf0) == 0xe0 || - (p[1] & 0xc0) == 0x80 || - (p[2] & 0xc0) == 0x80) { - /* it's a three-byte code */ - ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); - if (ch < 0xd800 || ch > 0xdfff) - /* it's not a surrogate - fail */ - ch = 0; - } - Py_DECREF(object); - if (ch == 0) { - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - return NULL; - } - return Py_BuildValue("(u#n)", &ch, 1, start+3); - } - else { - wrong_exception_type(exc); - return NULL; - } -} - -static PyObject * -PyCodec_SurrogateEscapeErrors(PyObject *exc) -{ - PyObject *restuple; - PyObject *object; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; - if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { - Py_UNICODE *p; - Py_UNICODE *startp; - char *outp; - if (PyUnicodeEncodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) - return NULL; - startp = PyUnicode_AS_UNICODE(object); - res = PyBytes_FromStringAndSize(NULL, end-start); - if (!res) { - Py_DECREF(object); - return NULL; - } - outp = PyBytes_AsString(res); - for (p = startp+start; p < startp+end; p++) { - Py_UNICODE ch = *p; - if (ch < 0xdc80 || ch > 0xdcff) { - /* Not a UTF-8b surrogate, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(res); - Py_DECREF(object); - return NULL; - } - *outp++ = ch - 0xdc00; - } - restuple = Py_BuildValue("(On)", res, end); - Py_DECREF(res); - Py_DECREF(object); - return restuple; - } - else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { - unsigned char *p; - Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */ - int consumed = 0; - if (PyUnicodeDecodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeDecodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeDecodeError_GetObject(exc))) - return NULL; - if (!(p = (unsigned char*)PyBytes_AsString(object))) { - Py_DECREF(object); - return NULL; - } - while (consumed < 4 && consumed < end-start) { - /* Refuse to escape ASCII bytes. */ - if (p[start+consumed] < 128) - break; - ch[consumed] = 0xdc00 + p[start+consumed]; - consumed++; - } - Py_DECREF(object); - if (!consumed) { - /* codec complained about ASCII byte. */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - return NULL; - } - return Py_BuildValue("(u#n)", ch, consumed, start+consumed); - } - else { - wrong_exception_type(exc); - return NULL; - } } - +#endif static PyObject *strict_errors(PyObject *self, PyObject *exc) { @@ -926,6 +737,7 @@ static PyObject *strict_errors(PyObject *self, PyObject *exc) } +#ifdef Py_USING_UNICODE static PyObject *ignore_errors(PyObject *self, PyObject *exc) { return PyCodec_IgnoreErrors(exc); @@ -948,16 +760,7 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) { return PyCodec_BackslashReplaceErrors(exc); } - -static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) -{ - return PyCodec_SurrogatePassErrors(exc); -} - -static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) -{ - return PyCodec_SurrogateEscapeErrors(exc); -} +#endif static int _PyCodecRegistry_Init(void) { @@ -976,6 +779,7 @@ static int _PyCodecRegistry_Init(void) "raises a UnicodeError on coding errors.") } }, +#ifdef Py_USING_UNICODE { "ignore", { @@ -1017,23 +821,8 @@ static int _PyCodecRegistry_Init(void) "which replaces an unencodable character with a " "backslashed escape sequence.") } - }, - { - "surrogatepass", - { - "surrogatepass", - surrogatepass_errors, - METH_O - } - }, - { - "surrogateescape", - { - "surrogateescape", - surrogateescape_errors, - METH_O - } } +#endif }; PyInterpreterState *interp = PyThreadState_GET()->interp; @@ -1065,7 +854,7 @@ static int _PyCodecRegistry_Init(void) interp->codec_error_registry == NULL) Py_FatalError("can't initialize codec registry"); - mod = PyImport_ImportModuleNoBlock("encodings"); + mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0); if (mod == NULL) { if (PyErr_ExceptionMatches(PyExc_ImportError)) { /* Ignore ImportErrors... this is done so that @@ -1079,6 +868,5 @@ static int _PyCodecRegistry_Init(void) return -1; } Py_DECREF(mod); - interp->codecs_initialized = 1; return 0; } |
