summaryrefslogtreecommitdiffstats
path: root/Doc/api
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
commit69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Doc/api
parenta708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
downloadcpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Doc/api')
-rw-r--r--Doc/api/concrete.tex25
1 files changed, 25 insertions, 0 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index a77a584..96b4faf 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -1076,6 +1076,17 @@ These are the UTF-8 codec APIs:
by the codec.
\end{cfuncdesc}
+\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8Stateful}{const char *s,
+ int size,
+ const char *errors,
+ int *consumed}
+ If \var{consumed} is \NULL{}, behaves like \cfunction{PyUnicode_DecodeUTF8()}.
+ If \var{consumed} is not \NULL{}, trailing incomplete UTF-8 byte sequences
+ will not be treated as an error. Those bytes will not be decoded and the
+ number of bytes that have been decoded will be stored in \var{consumed}.
+ \versionadded{2.4}
+\end{cfuncdesc}
+
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
int size,
const char *errors}
@@ -1121,6 +1132,20 @@ These are the UTF-16 codec APIs:
Returns \NULL{} if an exception was raised by the codec.
\end{cfuncdesc}
+\begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16Stateful}{const char *s,
+ int size,
+ const char *errors,
+ int *byteorder,
+ int *consumed}
+ If \var{consumed} is \NULL{}, behaves like
+ \cfunction{PyUnicode_DecodeUTF16()}. If \var{consumed} is not \NULL{},
+ \cfunction{PyUnicode_DecodeUTF16Stateful()} will not treat trailing incomplete
+ UTF-16 byte sequences (i.e. an odd number of bytes or a split surrogate pair)
+ as an error. Those bytes will not be decoded and the number of bytes that
+ have been decoded will be stored in \var{consumed}.
+ \versionadded{2.4}
+\end{cfuncdesc}
+
\begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
int size,
const char *errors,