diff options
| author | Martin v. Löwis <martin@v.loewis.de> | 2001-06-27 06:28:56 (GMT) |
|---|---|---|
| committer | Martin v. Löwis <martin@v.loewis.de> | 2001-06-27 06:28:56 (GMT) |
| commit | ce9b5a55e164f1128756478b6a2bb548abec1980 (patch) | |
| tree | 0b616e0fae5ec7204f723235d196ae2b7c124d78 /Objects/unicodeobject.c | |
| parent | 236d8b79748fec890d57ad0dd99ea3f1c3ba57df (diff) | |
| download | cpython-ce9b5a55e164f1128756478b6a2bb548abec1980.zip cpython-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.gz cpython-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.bz2 | |
Encode surrogates in UTF-8 even for a wide Py_UNICODE.
Implement sys.maxunicode.
Explicitly wrap around upper/lower computations for wide Py_UNICODE.
When decoding large characters with UTF-8, represent expected test
results using the \U notation.
Diffstat (limited to 'Objects/unicodeobject.c')
| -rw-r--r-- | Objects/unicodeobject.c | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ffac371..2f66c3c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -103,6 +103,18 @@ static PyUnicodeObject *unicode_latin1[256]; */ static char unicode_default_encoding[100]; +Py_UNICODE +PyUnicode_GetMax() +{ +#ifdef USE_UCS4_STORAGE + return 0x10FFFF; +#else + /* This is actually an illegal character, so it should + not be passed to unichr. */ + return 0xFFFF; +#endif +} + /* --- Unicode Object ----------------------------------------------------- */ static @@ -884,12 +896,6 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, cbWritten += 2; } else if (ch < 0x10000) { -#if Py_UNICODE_SIZE == 4 - *p++ = 0xe0 | (ch>>12); - *p++ = 0x80 | ((ch>>6) & 0x3f); - *p++ = 0x80 | (ch & 0x3f); - cbWritten += 3; -#else /* Check for high surrogate */ if (0xD800 <= ch && ch <= 0xDBFF) { if (i != size) { @@ -920,7 +926,6 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s, } *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); *p++ = (char)(0x80 | (ch & 0x3f)); -#endif } else { *p++ = 0xf0 | (ch>>18); *p++ = 0x80 | ((ch>>12) & 0x3f); |
