summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2001-07-25 16:05:59 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2001-07-25 16:05:59 (GMT)
commit80d1dd5f3b83c96c5c8e4a51417f1c748318de94 (patch)
tree2a7a8fb732e2ed0a3e0ee62057a170d526b2b89a
parent784d3df09ff34cb7b301be246f701eb0c17901cb (diff)
downloadcpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.zip
cpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.tar.gz
cpython-80d1dd5f3b83c96c5c8e4a51417f1c748318de94.tar.bz2
Fix for bug #444493: u'\U00010001' segfaults with current CVS on
wide builds.
-rw-r--r--Lib/test/test_unicode.py3
-rw-r--r--Objects/unicodeobject.c27
2 files changed, 23 insertions, 7 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index eb74854..dde16ef 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -455,7 +455,8 @@ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
for encoding in ('utf-8',
'utf-16', 'utf-16-le', 'utf-16-be',
- 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+ #'raw_unicode_escape',
+ 'unicode_escape', 'unicode_internal'):
verify(unicode(u.encode(encoding),encoding) == u)
u = u''.join(map(unichr, range(256)))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 172c61c..08ba065 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1415,7 +1415,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
{
PyObject *repr;
char *p;
- char *q;
static const char *hexdigit = "0123456789abcdef";
@@ -1423,7 +1422,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
if (repr == NULL)
return NULL;
- p = q = PyString_AS_STRING(repr);
+ p = PyString_AS_STRING(repr);
if (quotes) {
*p++ = 'u';
@@ -1432,14 +1431,26 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
}
while (size-- > 0) {
Py_UNICODE ch = *s++;
+
/* Escape quotes */
- if (quotes && (ch == (Py_UNICODE) q[1] || ch == '\\')) {
+ if (quotes &&
+ (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) {
*p++ = '\\';
*p++ = (char) ch;
}
+
#ifdef Py_UNICODE_WIDE
/* Map 21-bit characters to '\U00xxxxxx' */
else if (ch >= 0x10000) {
+ int offset = p - PyString_AS_STRING(repr);
+
+ /* Resize the string if necessary */
+ if (offset + 12 > PyString_GET_SIZE(repr)) {
+ if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
+ goto onError;
+ p = PyString_AS_STRING(repr) + offset;
+ }
+
*p++ = '\\';
*p++ = 'U';
*p++ = hexdigit[(ch >> 28) & 0x0000000F];
@@ -1449,7 +1460,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = hexdigit[(ch >> 12) & 0x0000000F];
*p++ = hexdigit[(ch >> 8) & 0x0000000F];
*p++ = hexdigit[(ch >> 4) & 0x0000000F];
- *p++ = hexdigit[ch & 15];
+ *p++ = hexdigit[ch & 0x0000000F];
+ continue;
}
#endif
/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
@@ -1487,6 +1499,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = hexdigit[(ch >> 4) & 0x000F];
*p++ = hexdigit[ch & 0x000F];
}
+
/* Map special whitespace to '\t', \n', '\r' */
else if (ch == '\t') {
*p++ = '\\';
@@ -1500,6 +1513,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = '\\';
*p++ = 'r';
}
+
/* Map non-printable US ASCII to '\xhh' */
else if (ch < ' ' || ch >= 128) {
*p++ = '\\';
@@ -1507,15 +1521,16 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = hexdigit[(ch >> 4) & 0x000F];
*p++ = hexdigit[ch & 0x000F];
}
+
/* Copy everything else as-is */
else
*p++ = (char) ch;
}
if (quotes)
- *p++ = q[1];
+ *p++ = PyString_AS_STRING(repr)[1];
*p = '\0';
- if (_PyString_Resize(&repr, p - q))
+ if (_PyString_Resize(&repr, p - PyString_AS_STRING(repr)))
goto onError;
return repr;