summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
commit6e390806495cf30c836615996b94e5ffa258cbef (patch)
treeeef913ca3061a114ff6d301a042408d4d3243ecc /Include
parent437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff)
downloadcpython-6e390806495cf30c836615996b94e5ffa258cbef.zip
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Include')
-rw-r--r--Include/unicodeobject.h82
1 files changed, 82 insertions, 0 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 5d9263b..3e9dcee 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -145,6 +145,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
+# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
@@ -159,6 +160,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
+# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
+# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
@@ -170,6 +173,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
+# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
@@ -223,6 +227,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
+# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
@@ -237,6 +242,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
+# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
+# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
@@ -248,6 +255,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
+# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
@@ -701,6 +709,80 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
const char *errors /* error handling */
);
+/* --- UTF-32 Codecs ------------------------------------------------------ */
+
+/* Decodes length bytes from a UTF-32 encoded buffer string and returns
+ the corresponding Unicode object.
+
+ errors (if non-NULL) defines the error handling. It defaults
+ to "strict".
+
+ If byteorder is non-NULL, the decoder starts decoding using the
+ given byte order:
+
+ *byteorder == -1: little endian
+ *byteorder == 0: native order
+ *byteorder == 1: big endian
+
+ In native mode, the first four bytes of the stream are checked for a
+ BOM mark. If found, the BOM mark is analysed, the byte order
+ adjusted and the BOM skipped. In the other modes, no BOM mark
+ interpretation is done. After completion, *byteorder is set to the
+ current byte order at the end of input data.
+
+ If byteorder is NULL, the codec starts in native order mode.
+
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
+ const char *string, /* UTF-32 encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ int *byteorder /* pointer to byteorder to use
+ 0=native;-1=LE,1=BE; updated on
+ exit */
+ );
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
+ const char *string, /* UTF-32 encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ int *byteorder, /* pointer to byteorder to use
+ 0=native;-1=LE,1=BE; updated on
+ exit */
+ Py_ssize_t *consumed /* bytes consumed */
+ );
+
+/* Returns a Python string using the UTF-32 encoding in native byte
+ order. The string always starts with a BOM mark. */
+
+PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
+ PyObject *unicode /* Unicode object */
+ );
+
+/* Returns a Python string object holding the UTF-32 encoded value of
+ the Unicode data.
+
+ If byteorder is not 0, output is written according to the following
+ byte order:
+
+ byteorder == -1: little endian
+ byteorder == 0: native byte order (writes a BOM mark)
+ byteorder == 1: big endian
+
+ If byteorder is 0, the output string will always start with the
+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+ prepended.
+
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ );
+
/* --- UTF-16 Codecs ------------------------------------------------------ */
/* Decodes length bytes from a UTF-16 encoded buffer string and returns