SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support

decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
author: Walter Dörwald <walter@livinglogic.de> 2004-09-07 20:24:22 (GMT)
committer: Walter Dörwald <walter@livinglogic.de> 2004-09-07 20:24:22 (GMT)
commit: 69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree: 088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Include
parent: a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
download: cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2
1 files changed, 21 insertions, 0 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 5126870..0ded1c7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -160,7 +160,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
+# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
+# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
@@ -233,7 +235,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
+# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
+# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
@@ -658,6 +662,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
     const char *errors		/* error handling */
     );
 
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
+    const char *string, 	/* UTF-8 encoded string */
+    int length,	 		/* size of string */
+    const char *errors,		/* error handling */
+    int *consumed		/* bytes consumed */
+    );
+
 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
     PyObject *unicode	 	/* Unicode object */
     );
@@ -702,6 +713,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
 				   exit */
     );
 
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
+    const char *string, 	/* UTF-16 encoded string */
+    int length,	 		/* size of string */
+    const char *errors,		/* error handling */
+    int *byteorder,		/* pointer to byteorder to use
+				   0=native;-1=LE,1=BE; updated on
+				   exit */
+    int *consumed		/* bytes consumed */
+    );
+
 /* Returns a Python string using the UTF-16 encoding in native byte
    order. The string always starts with a BOM mark.  */
author	Walter Dörwald <walter@livinglogic.de>	2004-09-07 20:24:22 (GMT)
committer	Walter Dörwald <walter@livinglogic.de>	2004-09-07 20:24:22 (GMT)
commit	69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree	088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Include
parent	a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
download	cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2