diff options
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r-- | Modules/_codecsmodule.c | 303 |
1 files changed, 99 insertions, 204 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 9b1194e..586b73a 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -20,10 +20,6 @@ <encoding>_decode(char_buffer_obj[,errors='strict']) -> (Unicode object, bytes consumed) - <encoding>_encode() interfaces also accept non-Unicode object as - input. The objects are then converted to Unicode using - PyUnicode_FromObject() prior to applying the conversion. - These <encoding>s are available: utf_8, unicode_escape, raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), mbcs (on win32). @@ -608,7 +604,7 @@ _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, return codec_tuple(decoded, data->len); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /*[clinic input] _codecs.mbcs_decode @@ -630,6 +626,25 @@ _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data, } /*[clinic input] +_codecs.oem_decode + data: Py_buffer + errors: str(accept={str, NoneType}) = NULL + final: int(c_default="0") = False + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_decode_impl(PyObject *module, Py_buffer *data, + const char *errors, int final) +/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/ +{ + Py_ssize_t consumed = data->len; + PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, + data->buf, data->len, errors, final ? NULL : &consumed); + return codec_tuple(decoded, consumed); +} + +/*[clinic input] _codecs.code_page_decode codepage: int data: Py_buffer @@ -651,7 +666,7 @@ _codecs_code_page_decode_impl(PyObject *module, int codepage, return codec_tuple(decoded, consumed); } -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Encoder ------------------------------------------------------------ */ @@ -718,7 +733,7 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, /*[clinic input] _codecs.utf_7_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -726,24 +741,15 @@ _codecs.utf_7_encode static PyObject * _codecs_utf_7_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=0feda21ffc921bc8 input=fd91a78f103b0421]*/ +/*[clinic end generated code: output=0feda21ffc921bc8 input=d1a47579e79cbe15]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.utf_8_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -751,19 +757,10 @@ _codecs.utf_8_encode static PyObject * _codecs_utf_8_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=02bf47332b9c796c input=2c22d40532f071f3]*/ +/*[clinic end generated code: output=02bf47332b9c796c input=42e3ba73c4392eef]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_AsUTF8String(str, errors), + PyUnicode_GET_LENGTH(str)); } /* This version provides access to the byteorder parameter of the @@ -775,7 +772,7 @@ _codecs_utf_8_encode_impl(PyObject *module, PyObject *str, /*[clinic input] _codecs.utf_16_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL byteorder: int = 0 / @@ -784,24 +781,15 @@ _codecs.utf_16_encode static PyObject * _codecs_utf_16_encode_impl(PyObject *module, PyObject *str, const char *errors, int byteorder) -/*[clinic end generated code: output=c654e13efa2e64e4 input=3935a489b2d5385e]*/ +/*[clinic end generated code: output=c654e13efa2e64e4 input=ff46416b04edb944]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.utf_16_le_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -809,24 +797,15 @@ _codecs.utf_16_le_encode static PyObject * _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=431b01e55f2d4995 input=bc27df05d1d20dfe]*/ +/*[clinic end generated code: output=431b01e55f2d4995 input=cb385455ea8f2fe0]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.utf_16_be_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -834,19 +813,10 @@ _codecs.utf_16_be_encode static PyObject * _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=96886a6fd54dcae3 input=5a69d4112763462b]*/ +/*[clinic end generated code: output=96886a6fd54dcae3 input=9119997066bdaefd]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), + PyUnicode_GET_LENGTH(str)); } /* This version provides access to the byteorder parameter of the @@ -858,7 +828,7 @@ _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, /*[clinic input] _codecs.utf_32_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL byteorder: int = 0 / @@ -867,24 +837,15 @@ _codecs.utf_32_encode static PyObject * _codecs_utf_32_encode_impl(PyObject *module, PyObject *str, const char *errors, int byteorder) -/*[clinic end generated code: output=5c760da0c09a8b83 input=434a1efa492b8d58]*/ +/*[clinic end generated code: output=5c760da0c09a8b83 input=c5e77da82fbe5c2a]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.utf_32_le_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -892,24 +853,15 @@ _codecs.utf_32_le_encode static PyObject * _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=b65cd176de8e36d6 input=dfa2d7dc78b99422]*/ +/*[clinic end generated code: output=b65cd176de8e36d6 input=9993b25fe0877848]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.utf_32_be_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -917,24 +869,15 @@ _codecs.utf_32_be_encode static PyObject * _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=1d9e71a9358709e9 input=4595617b18169002]*/ +/*[clinic end generated code: output=1d9e71a9358709e9 input=d3e0ccaa02920431]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.unicode_escape_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -942,24 +885,15 @@ _codecs.unicode_escape_encode static PyObject * _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=66271b30bc4f7a3c input=8273506f14076912]*/ +/*[clinic end generated code: output=66271b30bc4f7a3c input=65d9eefca65b455a]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(PyUnicode_AsUnicodeEscapeString(str), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.raw_unicode_escape_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -967,24 +901,15 @@ _codecs.raw_unicode_escape_encode static PyObject * _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=a66a806ed01c830a input=181755d5dfacef3c]*/ +/*[clinic end generated code: output=a66a806ed01c830a input=5aa33e4a133391ab]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.latin_1_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -992,24 +917,15 @@ _codecs.latin_1_encode static PyObject * _codecs_latin_1_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=2c28c83a27884e08 input=f03f6dcf1d84bee4]*/ +/*[clinic end generated code: output=2c28c83a27884e08 input=30b11c9e49a65150]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_AsLatin1String(str, errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_AsLatin1String(str, errors), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.ascii_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -1017,24 +933,15 @@ _codecs.ascii_encode static PyObject * _codecs_ascii_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=b5e035182d33befc input=d87e25a10a593fee]*/ +/*[clinic end generated code: output=b5e035182d33befc input=843a1d268e6dfa8e]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_AsASCIIString(str, errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_AsASCIIString(str, errors), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.charmap_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL mapping: object = NULL / @@ -1043,22 +950,13 @@ _codecs.charmap_encode static PyObject * _codecs_charmap_encode_impl(PyObject *module, PyObject *str, const char *errors, PyObject *mapping) -/*[clinic end generated code: output=047476f48495a9e9 input=85f4172661e8dad9]*/ +/*[clinic end generated code: output=047476f48495a9e9 input=0752cde07a6d6d00]*/ { - PyObject *v; - if (mapping == Py_None) mapping = NULL; - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] @@ -1074,36 +972,42 @@ _codecs_charmap_build_impl(PyObject *module, PyObject *map) return PyUnicode_BuildEncodingMap(map); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /*[clinic input] _codecs.mbcs_encode - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ static PyObject * _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors) -/*[clinic end generated code: output=76e2e170c966c080 input=65c09ee1e4203263]*/ +/*[clinic end generated code: output=76e2e170c966c080 input=de471e0815947553]*/ { - PyObject *v; + return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), + PyUnicode_GET_LENGTH(str)); +} - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; +/*[clinic input] +_codecs.oem_encode + str: unicode + errors: str(accept={str, NoneType}) = NULL + / +[clinic start generated code]*/ + +static PyObject * +_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) +/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/ +{ + return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), + PyUnicode_GET_LENGTH(str)); } /*[clinic input] _codecs.code_page_encode code_page: int - str: object + str: unicode errors: str(accept={str, NoneType}) = NULL / [clinic start generated code]*/ @@ -1111,24 +1015,13 @@ _codecs.code_page_encode static PyObject * _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, const char *errors) -/*[clinic end generated code: output=45673f6085657a9e input=c8562ec460c2e309]*/ +/*[clinic end generated code: output=45673f6085657a9e input=786421ae617d680b]*/ { - PyObject *v; - - str = PyUnicode_FromObject(str); - if (str == NULL || PyUnicode_READY(str) < 0) { - Py_XDECREF(str); - return NULL; - } - v = codec_tuple(PyUnicode_EncodeCodePage(code_page, - str, - errors), - PyUnicode_GET_LENGTH(str)); - Py_DECREF(str); - return v; + return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors), + PyUnicode_GET_LENGTH(str)); } -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Error handler registry --------------------------------------------- */ @@ -1216,6 +1109,8 @@ static PyMethodDef _codecs_functions[] = { _CODECS_READBUFFER_ENCODE_METHODDEF _CODECS_MBCS_ENCODE_METHODDEF _CODECS_MBCS_DECODE_METHODDEF + _CODECS_OEM_ENCODE_METHODDEF + _CODECS_OEM_DECODE_METHODDEF _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF |