diff options
author | Marc-André Lemburg <mal@egenix.com> | 2001-07-25 16:05:59 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2001-07-25 16:05:59 (GMT) |
commit | 80d1dd5f3b83c96c5c8e4a51417f1c748318de94 (patch) | |
tree | 2a7a8fb732e2ed0a3e0ee62057a170d526b2b89a | |
parent | 784d3df09ff34cb7b301be246f701eb0c17901cb (diff) | |
download | cpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.zip cpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.tar.gz cpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.tar.bz2 |
Fix for bug #444493: u'\U00010001' segfaults with current CVS on
wide builds.
-rw-r--r-- | Lib/test/test_unicode.py | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 27 |
2 files changed, 23 insertions, 7 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index eb74854..dde16ef 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -455,7 +455,8 @@ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', u = u'\U00010001\U00020002\U00030003\U00040004\U00050005' for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', - 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + #'raw_unicode_escape', + 'unicode_escape', 'unicode_internal'): verify(unicode(u.encode(encoding),encoding) == u) u = u''.join(map(unichr, range(256))) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 172c61c..08ba065 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1415,7 +1415,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, { PyObject *repr; char *p; - char *q; static const char *hexdigit = "0123456789abcdef"; @@ -1423,7 +1422,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, if (repr == NULL) return NULL; - p = q = PyString_AS_STRING(repr); + p = PyString_AS_STRING(repr); if (quotes) { *p++ = 'u'; @@ -1432,14 +1431,26 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, } while (size-- > 0) { Py_UNICODE ch = *s++; + /* Escape quotes */ - if (quotes && (ch == (Py_UNICODE) q[1] || ch == '\\')) { + if (quotes && + (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) { *p++ = '\\'; *p++ = (char) ch; } + #ifdef Py_UNICODE_WIDE /* Map 21-bit characters to '\U00xxxxxx' */ else if (ch >= 0x10000) { + int offset = p - PyString_AS_STRING(repr); + + /* Resize the string if necessary */ + if (offset + 12 > PyString_GET_SIZE(repr)) { + if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100)) + goto onError; + p = PyString_AS_STRING(repr) + offset; + } + *p++ = '\\'; *p++ = 'U'; *p++ = hexdigit[(ch >> 28) & 0x0000000F]; @@ -1449,7 +1460,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = hexdigit[(ch >> 12) & 0x0000000F]; *p++ = hexdigit[(ch >> 8) & 0x0000000F]; *p++ = hexdigit[(ch >> 4) & 0x0000000F]; - *p++ = hexdigit[ch & 15]; + *p++ = hexdigit[ch & 0x0000000F]; + continue; } #endif /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ @@ -1487,6 +1499,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = hexdigit[(ch >> 4) & 0x000F]; *p++ = hexdigit[ch & 0x000F]; } + /* Map special whitespace to '\t', \n', '\r' */ else if (ch == '\t') { *p++ = '\\'; @@ -1500,6 +1513,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = '\\'; *p++ = 'r'; } + /* Map non-printable US ASCII to '\xhh' */ else if (ch < ' ' || ch >= 128) { *p++ = '\\'; @@ -1507,15 +1521,16 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = hexdigit[(ch >> 4) & 0x000F]; *p++ = hexdigit[ch & 0x000F]; } + /* Copy everything else as-is */ else *p++ = (char) ch; } if (quotes) - *p++ = q[1]; + *p++ = PyString_AS_STRING(repr)[1]; *p = '\0'; - if (_PyString_Resize(&repr, p - q)) + if (_PyString_Resize(&repr, p - PyString_AS_STRING(repr))) goto onError; return repr; |