summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
commit69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Modules
parenta708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
downloadcpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_codecsmodule.c81
1 files changed, 57 insertions, 24 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 7509c1b..ccad827 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -269,13 +269,20 @@ utf_8_decode(PyObject *self,
const char *data;
int size;
const char *errors = NULL;
-
- if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
- &data, &size, &errors))
- return NULL;
+ int final = 0;
+ int consumed;
+ PyObject *decoded = NULL;
- return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
- size);
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
+ &data, &size, &errors, &final))
+ return NULL;
+ consumed = size;
+
+ decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
+ final ? NULL : &consumed);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
}
static PyObject *
@@ -286,12 +293,19 @@ utf_16_decode(PyObject *self,
int size;
const char *errors = NULL;
int byteorder = 0;
-
- if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
- &data, &size, &errors))
+ int final = 0;
+ int consumed;
+ PyObject *decoded;
+
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
+ &data, &size, &errors, &final))
return NULL;
- return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
- size);
+ consumed = size;
+ decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
+ final ? NULL : &consumed);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
}
static PyObject *
@@ -302,12 +316,20 @@ utf_16_le_decode(PyObject *self,
int size;
const char *errors = NULL;
int byteorder = -1;
+ int final = 0;
+ int consumed;
+ PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
- &data, &size, &errors))
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
+ &data, &size, &errors, &final))
return NULL;
- return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
- size);
+ consumed = size;
+ decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
+ &byteorder, final ? NULL : &consumed);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
+
}
static PyObject *
@@ -318,12 +340,19 @@ utf_16_be_decode(PyObject *self,
int size;
const char *errors = NULL;
int byteorder = 1;
+ int final = 0;
+ int consumed;
+ PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
- &data, &size, &errors))
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
+ &data, &size, &errors, &final))
return NULL;
- return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
- size);
+ consumed = size;
+ decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
+ &byteorder, final ? NULL : &consumed);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
}
/* This non-standard version also provides access to the byteorder
@@ -343,15 +372,19 @@ utf_16_ex_decode(PyObject *self,
const char *errors = NULL;
int byteorder = 0;
PyObject *unicode, *tuple;
-
- if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
- &data, &size, &errors, &byteorder))
+ int final = 0;
+ int consumed;
+
+ if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
+ &data, &size, &errors, &byteorder, &final))
return NULL;
- unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
+ consumed = size;
+ unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
+ final ? NULL : &consumed);
if (unicode == NULL)
return NULL;
- tuple = Py_BuildValue("Oii", unicode, size, byteorder);
+ tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
Py_DECREF(unicode);
return tuple;
}