From 1294ad0c59d4a8bfbdd3f4606653d7aa20b2969e Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Tue, 26 Jun 2001 17:17:07 +0000 Subject: experimental UCS-4 support: added USE_UCS4_STORAGE define to unicodeobject.h, which forces sizeof(Py_UNICODE) == sizeof(Py_UCS4). (this may be good enough for platforms that doesn't have a 16-bit type. the UTF-16 codecs don't work, though) --- Include/unicodeobject.h | 31 +++++++++++++++++++------------ Modules/sre.h | 4 ++++ Objects/unicodeobject.c | 2 ++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index f91a5a0..e330fd1 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -58,6 +58,19 @@ Copyright (c) Corporation for National Research Initiatives. /* --- Internal Unicode Format -------------------------------------------- */ +/* experimental UCS-4 support. enable at your own risk! */ +#undef USE_UCS4_STORAGE + +/* + * Use this typedef when you need to represent a UTF-16 surrogate pair + * as single unsigned integer. + */ +#if SIZEOF_INT >= 4 +typedef unsigned int Py_UCS4; +#elif SIZEOF_LONG >= 4 +typedef unsigned long Py_UCS4; +#endif + /* Set these flags if the platform has "wchar.h", "wctype.h" and the wchar_t type is a 16-bit unsigned type */ /* #define HAVE_WCHAR_H */ @@ -66,8 +79,8 @@ Copyright (c) Corporation for National Research Initiatives. /* Defaults for various platforms */ #ifndef HAVE_USABLE_WCHAR_T -/* Windows has a usable wchar_t type */ -# if defined(MS_WIN32) +/* Windows has a usable wchar_t type (unless we're using UCS-4) */ +# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE) # define HAVE_USABLE_WCHAR_T # endif @@ -105,19 +118,13 @@ typedef wchar_t Py_UNICODE; If a short is not 16 bits on your platform, you have to fix the typedef below, or the module initialization code will complain. */ +#ifdef USE_UCS4_STORAGE +typedef Py_UCS4 Py_UNICODE; +#else typedef unsigned short Py_UNICODE; - #endif -/* - * Use this typedef when you need to represent a UTF-16 surrogate pair - * as single unsigned integer. - */ -#if SIZEOF_INT >= 4 -typedef unsigned int Py_UCS4; -#elif SIZEOF_LONG >= 4 -typedef unsigned long Py_UCS4; -#endif +#endif /* --- Internal Unicode Operations ---------------------------------------- */ diff --git a/Modules/sre.h b/Modules/sre.h index bf58eb5..61a0208 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -14,7 +14,11 @@ #include "sre_constants.h" /* size of a code word (must be unsigned short or larger) */ +#ifdef USE_UCS4_STORAGE +#define SRE_CODE unsigned long +#else #define SRE_CODE unsigned short +#endif typedef struct { PyObject_VAR_HEAD diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c62f65b..742c770 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5282,9 +5282,11 @@ void _PyUnicode_Init(void) int i; /* Doublecheck the configuration... */ +#ifndef USE_UCS4_STORAGE if (sizeof(Py_UNICODE) != 2) Py_FatalError("Unicode configuration error: " "sizeof(Py_UNICODE) != 2 bytes"); +#endif /* Init the implementation */ unicode_freelist = NULL; -- cgit v0.12