diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
commit | 6e390806495cf30c836615996b94e5ffa258cbef (patch) | |
tree | eef913ca3061a114ff6d301a042408d4d3243ecc /Include | |
parent | 437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff) | |
download | cpython-6e390806495cf30c836615996b94e5ffa258cbef.zip cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2 |
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/unicodeobject.h | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 5d9263b..3e9dcee 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -145,6 +145,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString +# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode @@ -159,6 +160,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape +# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 +# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 @@ -170,6 +173,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape +# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape @@ -223,6 +227,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString +# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode @@ -237,6 +242,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape +# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 +# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 @@ -248,6 +255,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape +# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape @@ -701,6 +709,80 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( const char *errors /* error handling */ ); +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +/* Decodes length bytes from a UTF-32 encoded buffer string and returns + the corresponding Unicode object. + + errors (if non-NULL) defines the error handling. It defaults + to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + In native mode, the first four bytes of the stream are checked for a + BOM mark. If found, the BOM mark is analysed, the byte order + adjusted and the BOM skipped. In the other modes, no BOM mark + interpretation is done. After completion, *byteorder is set to the + current byte order at the end of input data. + + If byteorder is NULL, the codec starts in native order mode. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* Returns a Python string using the UTF-32 encoding in native byte + order. The string always starts with a BOM mark. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( + PyObject *unicode /* Unicode object */ + ); + +/* Returns a Python string object holding the UTF-32 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ); + /* --- UTF-16 Codecs ------------------------------------------------------ */ /* Decodes length bytes from a UTF-16 encoded buffer string and returns |