diff options
author | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
commit | 69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch) | |
tree | 088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Include | |
parent | a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff) | |
download | cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2 |
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/unicodeobject.h | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 5126870..0ded1c7 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -160,7 +160,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 +# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 +# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape # define PyUnicode_Encode PyUnicodeUCS2_Encode # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII @@ -233,7 +235,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 +# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 +# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape # define PyUnicode_Encode PyUnicodeUCS4_Encode # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII @@ -658,6 +662,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( const char *errors /* error handling */ ); +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( + const char *string, /* UTF-8 encoded string */ + int length, /* size of string */ + const char *errors, /* error handling */ + int *consumed /* bytes consumed */ + ); + PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); @@ -702,6 +713,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( exit */ ); +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( + const char *string, /* UTF-16 encoded string */ + int length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + int *consumed /* bytes consumed */ + ); + /* Returns a Python string using the UTF-16 encoding in native byte order. The string always starts with a BOM mark. */ |