diff options
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r-- | Modules/_codecsmodule.c | 255 |
1 files changed, 157 insertions, 98 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index eb740f9..7818f9a 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -38,6 +38,10 @@ Copyright (c) Corporation for National Research Initiatives. #define PY_SSIZE_T_CLEAN #include "Python.h" +#ifdef MS_WINDOWS +#include <windows.h> +#endif + /* --- Registry ----------------------------------------------------------- */ PyDoc_STRVAR(register__doc__, @@ -162,7 +166,6 @@ static PyObject * escape_encode(PyObject *self, PyObject *args) { - static const char *hexdigits = "0123456789abcdef"; PyObject *str; Py_ssize_t size; Py_ssize_t newsize; @@ -205,8 +208,8 @@ escape_encode(PyObject *self, else if (c < ' ' || c >= 0x7f) { *p++ = '\\'; *p++ = 'x'; - *p++ = hexdigits[(c & 0xf0) >> 4]; - *p++ = hexdigits[c & 0xf]; + *p++ = Py_hexdigits[(c & 0xf0) >> 4]; + *p++ = Py_hexdigits[c & 0xf]; } else *p++ = c; @@ -236,8 +239,10 @@ unicode_internal_decode(PyObject *self, return NULL; if (PyUnicode_Check(obj)) { + if (PyUnicode_READY(obj) < 0) + return NULL; Py_INCREF(obj); - return codec_tuple(obj, PyUnicode_GET_SIZE(obj)); + return codec_tuple(obj, PyUnicode_GET_LENGTH(obj)); } else { if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) @@ -588,7 +593,7 @@ charmap_decode(PyObject *self, return codec_tuple(unicode, pbuf.len); } -#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) +#ifdef HAVE_MBCS static PyObject * mbcs_decode(PyObject *self, @@ -613,7 +618,32 @@ mbcs_decode(PyObject *self, return codec_tuple(decoded, consumed); } -#endif /* MS_WINDOWS */ +static PyObject * +code_page_decode(PyObject *self, + PyObject *args) +{ + Py_buffer pbuf; + const char *errors = NULL; + int final = 0; + Py_ssize_t consumed; + PyObject *decoded = NULL; + int code_page; + + if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode", + &code_page, &pbuf, &errors, &final)) + return NULL; + consumed = pbuf.len; + + decoded = PyUnicode_DecodeCodePageStateful(code_page, + pbuf.buf, pbuf.len, errors, + final ? NULL : &consumed); + PyBuffer_Release(&pbuf); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); +} + +#endif /* HAVE_MBCS */ /* --- Encoder ------------------------------------------------------------ */ @@ -645,17 +675,31 @@ unicode_internal_encode(PyObject *self, PyObject *obj; const char *errors = NULL; const char *data; - Py_ssize_t size; + Py_ssize_t len, size; + + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "unicode_internal codec has been deprecated", + 1)) + return NULL; if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode", &obj, &errors)) return NULL; if (PyUnicode_Check(obj)) { - data = PyUnicode_AS_DATA(obj); - size = PyUnicode_GET_DATA_SIZE(obj); - return codec_tuple(PyBytes_FromStringAndSize(data, size), - PyUnicode_GET_SIZE(obj)); + Py_UNICODE *u; + + if (PyUnicode_READY(obj) < 0) + return NULL; + + u = PyUnicode_AsUnicodeAndSize(obj, &len); + if (u == NULL) + return NULL; + if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) + return PyErr_NoMemory(); + size = len * sizeof(Py_UNICODE); + return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), + PyUnicode_GET_LENGTH(obj)); } else { if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) @@ -676,14 +720,12 @@ utf_7_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) - return NULL; - v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - 0, - 0, - errors), - PyUnicode_GET_SIZE(str)); + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); + return NULL; + } + v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -700,12 +742,12 @@ utf_8_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -730,13 +772,12 @@ utf_16_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - byteorder), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -753,13 +794,12 @@ utf_16_le_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - -1), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -776,13 +816,12 @@ utf_16_be_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - +1), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -807,13 +846,12 @@ utf_32_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - byteorder), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -830,13 +868,12 @@ utf_32_le_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - -1), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -853,13 +890,12 @@ utf_32_be_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors, - +1), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -876,11 +912,12 @@ unicode_escape_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str)), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -897,12 +934,12 @@ raw_unicode_escape_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape( - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str)), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -919,13 +956,12 @@ latin_1_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeLatin1( - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_AsLatin1String(str, errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -942,13 +978,12 @@ ascii_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeASCII( - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(_PyUnicode_AsASCIIString(str, errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -968,14 +1003,12 @@ charmap_encode(PyObject *self, mapping = NULL; str = PyUnicode_FromObject(str); - if (str == NULL) - return NULL; - v = codec_tuple(PyUnicode_EncodeCharmap( - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - mapping, - errors), - PyUnicode_GET_SIZE(str)); + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); + return NULL; + } + v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } @@ -989,7 +1022,7 @@ charmap_build(PyObject *self, PyObject *args) return PyUnicode_BuildEncodingMap(map); } -#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) +#ifdef HAVE_MBCS static PyObject * mbcs_encode(PyObject *self, @@ -1003,18 +1036,42 @@ mbcs_encode(PyObject *self, return NULL; str = PyUnicode_FromObject(str); - if (str == NULL) + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); return NULL; - v = codec_tuple(PyUnicode_EncodeMBCS( - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str), - errors), - PyUnicode_GET_SIZE(str)); + } + v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), + PyUnicode_GET_LENGTH(str)); + Py_DECREF(str); + return v; +} + +static PyObject * +code_page_encode(PyObject *self, + PyObject *args) +{ + PyObject *str, *v; + const char *errors = NULL; + int code_page; + + if (!PyArg_ParseTuple(args, "iO|z:code_page_encode", + &code_page, &str, &errors)) + return NULL; + + str = PyUnicode_FromObject(str); + if (str == NULL || PyUnicode_READY(str) < 0) { + Py_XDECREF(str); + return NULL; + } + v = codec_tuple(PyUnicode_EncodeCodePage(code_page, + str, + errors), + PyUnicode_GET_LENGTH(str)); Py_DECREF(str); return v; } -#endif /* MS_WINDOWS */ +#endif /* HAVE_MBCS */ /* --- Error handler registry --------------------------------------------- */ @@ -1101,9 +1158,11 @@ static PyMethodDef _codecs_functions[] = { {"charmap_decode", charmap_decode, METH_VARARGS}, {"charmap_build", charmap_build, METH_VARARGS}, {"readbuffer_encode", readbuffer_encode, METH_VARARGS}, -#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) +#ifdef HAVE_MBCS {"mbcs_encode", mbcs_encode, METH_VARARGS}, {"mbcs_decode", mbcs_decode, METH_VARARGS}, + {"code_page_encode", code_page_encode, METH_VARARGS}, + {"code_page_decode", code_page_decode, METH_VARARGS}, #endif {"register_error", register_error, METH_VARARGS, register_error__doc__}, |