diff options
author | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
commit | 69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch) | |
tree | 088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Modules/_codecsmodule.c | |
parent | a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff) | |
download | cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2 |
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r-- | Modules/_codecsmodule.c | 81 |
1 files changed, 57 insertions, 24 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 7509c1b..ccad827 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -269,13 +269,20 @@ utf_8_decode(PyObject *self, const char *data; int size; const char *errors = NULL; - - if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode", - &data, &size, &errors)) - return NULL; + int final = 0; + int consumed; + PyObject *decoded = NULL; - return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors), - size); + if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode", + &data, &size, &errors, &final)) + return NULL; + consumed = size; + + decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors, + final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); } static PyObject * @@ -286,12 +293,19 @@ utf_16_decode(PyObject *self, int size; const char *errors = NULL; int byteorder = 0; - - if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode", - &data, &size, &errors)) + int final = 0; + int consumed; + PyObject *decoded; + + if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode", + &data, &size, &errors, &final)) return NULL; - return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder), - size); + consumed = size; + decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder, + final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); } static PyObject * @@ -302,12 +316,20 @@ utf_16_le_decode(PyObject *self, int size; const char *errors = NULL; int byteorder = -1; + int final = 0; + int consumed; + PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode", - &data, &size, &errors)) + if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode", + &data, &size, &errors, &final)) return NULL; - return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder), - size); + consumed = size; + decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, + &byteorder, final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); + } static PyObject * @@ -318,12 +340,19 @@ utf_16_be_decode(PyObject *self, int size; const char *errors = NULL; int byteorder = 1; + int final = 0; + int consumed; + PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode", - &data, &size, &errors)) + if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode", + &data, &size, &errors, &final)) return NULL; - return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder), - size); + consumed = size; + decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, + &byteorder, final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); } /* This non-standard version also provides access to the byteorder @@ -343,15 +372,19 @@ utf_16_ex_decode(PyObject *self, const char *errors = NULL; int byteorder = 0; PyObject *unicode, *tuple; - - if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode", - &data, &size, &errors, &byteorder)) + int final = 0; + int consumed; + + if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode", + &data, &size, &errors, &byteorder, &final)) return NULL; - unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder); + consumed = size; + unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder, + final ? NULL : &consumed); if (unicode == NULL) return NULL; - tuple = Py_BuildValue("Oii", unicode, size, byteorder); + tuple = Py_BuildValue("Oii", unicode, consumed, byteorder); Py_DECREF(unicode); return tuple; } |