diff options
author | Neal Norwitz <nnorwitz@gmail.com> | 2006-08-21 22:21:19 (GMT) |
---|---|---|
committer | Neal Norwitz <nnorwitz@gmail.com> | 2006-08-21 22:21:19 (GMT) |
commit | 17753ecbfabc224080ef3e3e0801b7514ef3b455 (patch) | |
tree | 5b82040f4a72260e43278a3fe6eebc78f03a40d6 /Objects | |
parent | 0c6ae5bad473c57b3d5b2a3c71b26792e63df14c (diff) | |
download | cpython-17753ecbfabc224080ef3e3e0801b7514ef3b455.zip cpython-17753ecbfabc224080ef3e3e0801b7514ef3b455.tar.gz cpython-17753ecbfabc224080ef3e3e0801b7514ef3b455.tar.bz2 |
Patch #1541585: fix buffer overrun when performing repr() on
a unicode string in a build with wide unicode (UCS-4) support.
This code could be improved, so add an XXX comment.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 41 |
1 files changed, 29 insertions, 12 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d93f780..20daf66 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2040,7 +2040,32 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, static const char *hexdigit = "0123456789abcdef"; - repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1); + /* XXX(nnorwitz): rather than over-allocating, it would be + better to choose a different scheme. Perhaps scan the + first N-chars of the string and allocate based on that size. + */ + /* Initial allocation is based on the longest-possible unichr + escape. + + In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source + unichr, so in this case it's the longest unichr escape. In + narrow (UTF-16) builds this is five chars per source unichr + since there are two unichrs in the surrogate pair, so in narrow + (UTF-16) builds it's not the longest unichr escape. + + In wide or narrow builds '\uxxxx' is 6 chars per source unichr, + so in the narrow (UTF-16) build case it's the longest unichr + escape. + */ + + repr = PyString_FromStringAndSize(NULL, + 2 +#ifdef Py_UNICODE_WIDE + + 10*size +#else + + 6*size +#endif + + 1); if (repr == NULL) return NULL; @@ -2065,15 +2090,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, #ifdef Py_UNICODE_WIDE /* Map 21-bit characters to '\U00xxxxxx' */ else if (ch >= 0x10000) { - Py_ssize_t offset = p - PyString_AS_STRING(repr); - - /* Resize the string if necessary */ - if (offset + 12 > PyString_GET_SIZE(repr)) { - if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100)) - return NULL; - p = PyString_AS_STRING(repr) + offset; - } - *p++ = '\\'; *p++ = 'U'; *p++ = hexdigit[(ch >> 28) & 0x0000000F]; @@ -2086,8 +2102,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = hexdigit[ch & 0x0000000F]; continue; } -#endif - /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ +#else + /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ else if (ch >= 0xD800 && ch < 0xDC00) { Py_UNICODE ch2; Py_UCS4 ucs; @@ -2112,6 +2128,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, s--; size++; } +#endif /* Map 16-bit characters to '\uxxxx' */ if (ch >= 256) { |