diff options
-rw-r--r-- | Include/unicodeobject.h | 13 | ||||
-rw-r--r-- | Modules/sre.h | 5 | ||||
-rw-r--r-- | Objects/unicodectype.c | 6 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 8 | ||||
-rw-r--r-- | Python/bltinmodule.c | 2 |
5 files changed, 18 insertions, 16 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index d89537f..205b8bb 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -66,10 +66,11 @@ Copyright (c) Corporation for National Research Initiatives. #error Must define Py_UNICODE_SIZE #endif -/* experimental UCS-4 support. enable at your own risk! */ -#undef USE_UCS4_STORAGE -#if Py_UNICODE_SIZE == 4 -#define USE_UCS4_STORAGE +/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode + strings are stored as UCS-2 (with limited support for UTF-16) */ + +#if Py_UNICODE_SIZE >= 4 +#define Py_UNICODE_WIDE #endif /* Set these flags if the platform has "wchar.h", "wctype.h" and the @@ -81,12 +82,12 @@ Copyright (c) Corporation for National Research Initiatives. #ifndef PY_UNICODE_TYPE /* Windows has a usable wchar_t type (unless we're using UCS-4) */ -# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE) +# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2 # define HAVE_USABLE_WCHAR_T # define PY_UNICODE_TYPE wchar_t # endif -# if defined(USE_UCS4_STORAGE) +# if defined(Py_UNICODE_WIDE) # define PY_UNICODE_TYPE Py_UCS4 # endif diff --git a/Modules/sre.h b/Modules/sre.h index 61a0208..632f47e 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -13,8 +13,9 @@ #include "sre_constants.h" -/* size of a code word (must be unsigned short or larger) */ -#ifdef USE_UCS4_STORAGE +/* size of a code word (must be unsigned short or larger, and + large enough to hold a Py_UNICODE character) */ +#ifdef Py_UNICODE_WIDE #define SRE_CODE unsigned long #else #define SRE_CODE unsigned short diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index 13fc612..c1b5a0d 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -68,7 +68,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) else ch += ctype->upper; -#ifdef USE_UCS4_STORAGE +#ifdef Py_UNICODE_WIDE /* The database assumes that the values wrap around at 0x10000. */ if (ch > 0x10000) ch -= 0x10000; @@ -360,7 +360,7 @@ Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch) const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); ch += ctype->upper; -#ifdef USE_UCS4_STORAGE +#ifdef Py_UNICODE_WIDE /* The database assumes that the values wrap around at 0x10000. */ if (ch > 0x10000) ch -= 0x10000; @@ -376,7 +376,7 @@ Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch) const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); ch += ctype->lower; -#ifdef USE_UCS4_STORAGE +#ifdef Py_UNICODE_WIDE /* The database assumes that the values wrap around at 0x10000. */ if (ch > 0x10000) ch -= 0x10000; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2f66c3c..08e8089 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -106,7 +106,7 @@ static char unicode_default_encoding[100]; Py_UNICODE PyUnicode_GetMax() { -#ifdef USE_UCS4_STORAGE +#ifdef Py_UNICODE_WIDE return 0x10FFFF; #else /* This is actually an illegal character, so it should @@ -791,7 +791,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, errmsg = "illegal encoding"; goto utf8Error; } -#if Py_UNICODE_SIZE == 4 +#ifdef Py_UNICODE_WIDE *p++ = (Py_UNICODE)ch; #else /* compute and append the two surrogates: */ @@ -1080,7 +1080,7 @@ PyObject *PyUnicode_DecodeUTF16(const char *s, ch2 = (ch2 >> 8) | (ch2 << 8); #endif if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { -#if Py_UNICODE_SIZE == 2 +#ifndef Py_UNICODE_WIDE /* This is valid data (a UTF-16 surrogate pair), but we are not able to store this information since our Py_UNICODE type only has 16 bits... this might @@ -1326,7 +1326,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, *p++ = (Py_UNICODE) chr; else if (chr <= 0x10ffff) { /* UCS-4 character. Either store directly, or as surrogate pair. */ -#if Py_UNICODE_SIZE == 4 +#ifdef Py_UNICODE_WIDE *p++ = chr; #else chr -= 0x10000L; diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 8917f45..11d6f4c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -325,7 +325,7 @@ builtin_unichr(PyObject *self, PyObject *args) return PyUnicode_FromUnicode(s, 1); } else { -#if Py_UNICODE_SIZE == 2 +#ifndef Py_UNICODE_WIDE /* UCS-4 character. store as two surrogate characters */ x -= 0x10000L; s[0] = 0xD800 + (Py_UNICODE) (x >> 10); |