SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support

decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
author: Walter Dörwald <walter@livinglogic.de> 2004-09-07 20:24:22 (GMT)
committer: Walter Dörwald <walter@livinglogic.de> 2004-09-07 20:24:22 (GMT)
commit: 69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree: 088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Doc/api
parent: a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
download: cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2
1 files changed, 25 insertions, 0 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index a77a584..96b4faf 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -1076,6 +1076,17 @@ These are the UTF-8 codec APIs:
   by the codec.
 \end{cfuncdesc}
 
+\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8Stateful}{const char *s,
+                                               int size,
+                                               const char *errors,
+                                               int *consumed}
+  If \var{consumed} is \NULL{}, behaves like \cfunction{PyUnicode_DecodeUTF8()}.
+  If \var{consumed} is not \NULL{}, trailing incomplete UTF-8 byte sequences
+  will not be treated as an error. Those bytes will not be decoded and the
+  number of bytes that have been decoded will be stored in \var{consumed}.
+  \versionadded{2.4}
+\end{cfuncdesc}
+
 \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
                                                int size,
                                                const char *errors}
@@ -1121,6 +1132,20 @@ These are the UTF-16 codec APIs:
   Returns \NULL{} if an exception was raised by the codec.
 \end{cfuncdesc}
 
+\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16Stateful}{const char *s,
+                                               int size,
+                                               const char *errors,
+                                               int *byteorder,
+                                               int *consumed}
+  If \var{consumed} is \NULL{}, behaves like
+  \cfunction{PyUnicode_DecodeUTF16()}. If \var{consumed} is not \NULL{},
+  \cfunction{PyUnicode_DecodeUTF16Stateful()} will not treat trailing incomplete
+  UTF-16 byte sequences (i.e. an odd number of bytes or a split surrogate pair)
+  as an error. Those bytes will not be decoded and the number of bytes that
+  have been decoded will be stored in \var{consumed}.
+  \versionadded{2.4}
+\end{cfuncdesc}
+
 \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
                                                int size,
                                                const char *errors,
author	Walter Dörwald <walter@livinglogic.de>	2004-09-07 20:24:22 (GMT)
committer	Walter Dörwald <walter@livinglogic.de>	2004-09-07 20:24:22 (GMT)
commit	69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree	088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Doc/api
parent	a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
download	cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2