summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2001-06-26 22:22:37 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2001-06-26 22:22:37 (GMT)
commit0ba70cc3c893f70cc9deb09447277539d7625403 (patch)
treef26c28cc3ad9c594683c7c04e5b06786471d395c /Include
parentff1cc902fe80605fbc7d5fee8226e4c26832c5a9 (diff)
downloadcpython-0ba70cc3c893f70cc9deb09447277539d7625403.zip
cpython-0ba70cc3c893f70cc9deb09447277539d7625403.tar.gz
cpython-0ba70cc3c893f70cc9deb09447277539d7625403.tar.bz2
Support using UCS-4 as the Py_UNICODE type:
Add configure option --enable-unicode. Add config.h macros Py_USING_UNICODE, PY_UNICODE_TYPE, Py_UNICODE_SIZE, SIZEOF_WCHAR_T. Define Py_UCS2. Encode and decode large UTF-8 characters into single Py_UNICODE values for wide Unicode types; likewise for UTF-16. Remove test whether sizeof Py_UNICODE is two.
Diffstat (limited to 'Include')
-rw-r--r--Include/unicodeobject.h53
1 files changed, 23 insertions, 30 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index e330fd1..32002e0 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -60,16 +60,9 @@ Copyright (c) Corporation for National Research Initiatives.
/* experimental UCS-4 support. enable at your own risk! */
#undef USE_UCS4_STORAGE
-
-/*
- * Use this typedef when you need to represent a UTF-16 surrogate pair
- * as single unsigned integer.
- */
-#if SIZEOF_INT >= 4
-typedef unsigned int Py_UCS4;
-#elif SIZEOF_LONG >= 4
-typedef unsigned long Py_UCS4;
-#endif
+#if Py_UNICODE_SIZE == 4
+#define USE_UCS4_STORAGE
+#endif
/* Set these flags if the platform has "wchar.h", "wctype.h" and the
wchar_t type is a 16-bit unsigned type */
@@ -77,11 +70,16 @@ typedef unsigned long Py_UCS4;
/* #define HAVE_USABLE_WCHAR_T */
/* Defaults for various platforms */
-#ifndef HAVE_USABLE_WCHAR_T
+#ifndef PY_UNICODE_TYPE
/* Windows has a usable wchar_t type (unless we're using UCS-4) */
# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE)
# define HAVE_USABLE_WCHAR_T
+# define PY_UNICODE_TYPE wchar_t
+# endif
+
+# if defined(USE_UCS4_STORAGE)
+# define PY_UNICODE_TYPE Py_UCS4
# endif
#endif
@@ -104,28 +102,23 @@ typedef unsigned long Py_UCS4;
# include "wchar.h"
#endif
-#ifdef HAVE_USABLE_WCHAR_T
-
-/* If the compiler defines whcar_t as a 16-bit unsigned type we can
- use the compiler type directly. Works fine with all modern Windows
- platforms. */
-
-typedef wchar_t Py_UNICODE;
-
-#else
-
-/* Use if you have a standard ANSI compiler, without wchar_t support.
- If a short is not 16 bits on your platform, you have to fix the
- typedef below, or the module initialization code will complain. */
-
-#ifdef USE_UCS4_STORAGE
-typedef Py_UCS4 Py_UNICODE;
-#else
-typedef unsigned short Py_UNICODE;
+/*
+ * Use this typedef when you need to represent a UTF-16 surrogate pair
+ * as single unsigned integer.
+ */
+#if SIZEOF_INT >= 4
+typedef unsigned int Py_UCS4;
+#elif SIZEOF_LONG >= 4
+typedef unsigned long Py_UCS4;
#endif
-#endif
+#if SIZEOF_SHORT == 2
+typedef unsigned short Py_UCS2;
+#else
+#error Cannot find a two-byte type
+#endif
+typedef PY_UNICODE_TYPE Py_UNICODE;
/* --- Internal Unicode Operations ---------------------------------------- */