summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2001-06-27 06:28:56 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2001-06-27 06:28:56 (GMT)
commitce9b5a55e164f1128756478b6a2bb548abec1980 (patch)
tree0b616e0fae5ec7204f723235d196ae2b7c124d78 /Objects/unicodeobject.c
parent236d8b79748fec890d57ad0dd99ea3f1c3ba57df (diff)
downloadcpython-ce9b5a55e164f1128756478b6a2bb548abec1980.zip
cpython-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.gz
cpython-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.bz2
Encode surrogates in UTF-8 even for a wide Py_UNICODE.
Implement sys.maxunicode. Explicitly wrap around upper/lower computations for wide Py_UNICODE. When decoding large characters with UTF-8, represent expected test results using the \U notation.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c19
1 files changed, 12 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ffac371..2f66c3c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -103,6 +103,18 @@ static PyUnicodeObject *unicode_latin1[256];
*/
static char unicode_default_encoding[100];
+Py_UNICODE
+PyUnicode_GetMax()
+{
+#ifdef USE_UCS4_STORAGE
+ return 0x10FFFF;
+#else
+ /* This is actually an illegal character, so it should
+ not be passed to unichr. */
+ return 0xFFFF;
+#endif
+}
+
/* --- Unicode Object ----------------------------------------------------- */
static
@@ -884,12 +896,6 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
cbWritten += 2;
}
else if (ch < 0x10000) {
-#if Py_UNICODE_SIZE == 4
- *p++ = 0xe0 | (ch>>12);
- *p++ = 0x80 | ((ch>>6) & 0x3f);
- *p++ = 0x80 | (ch & 0x3f);
- cbWritten += 3;
-#else
/* Check for high surrogate */
if (0xD800 <= ch && ch <= 0xDBFF) {
if (i != size) {
@@ -920,7 +926,6 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
}
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
-#endif
} else {
*p++ = 0xf0 | (ch>>18);
*p++ = 0x80 | ((ch>>12) & 0x3f);