summaryrefslogtreecommitdiffstats
path: root/Python/ast.c
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-10-29 01:22:38 (GMT)
committerBenjamin Peterson <benjamin@python.org>2009-10-29 01:22:38 (GMT)
commit7dc5ac5ec690db232359620d28cf102c3b6a5165 (patch)
treeb3a9f727da23adcd733325dd8296c5ba0b061303 /Python/ast.c
parent1531f528b3d89096817824d5c597ca873f59e822 (diff)
downloadcpython-7dc5ac5ec690db232359620d28cf102c3b6a5165.zip
cpython-7dc5ac5ec690db232359620d28cf102c3b6a5165.tar.gz
cpython-7dc5ac5ec690db232359620d28cf102c3b6a5165.tar.bz2
Merged revisions 75928 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r75928 | benjamin.peterson | 2009-10-28 16:59:39 -0500 (Wed, 28 Oct 2009) | 5 lines in wide builds, avoid storing high unicode characters from source code with surrogates This is accomplished by decoding with utf-32 instead of utf-16 on all builds. The patch is by Adam Olsen. ........
Diffstat (limited to 'Python/ast.c')
-rw-r--r--Python/ast.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/Python/ast.c b/Python/ast.c
index b0684c5..2a806af 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3217,10 +3217,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL;
} else {
/* check for integer overflow */
- if (len > PY_SIZE_MAX / 4)
+ if (len > PY_SIZE_MAX / 6)
return NULL;
- /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
- u = PyBytes_FromStringAndSize((char *)NULL, len * 4);
+ /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+ "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+ u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL)
return NULL;
p = buf = PyBytes_AsString(u);
@@ -3237,20 +3238,24 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w;
char *r;
Py_ssize_t rn, i;
- w = decode_utf8(c, &s, end, "utf-16-be");
+ w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyBytes_AS_STRING(w);
rn = Py_SIZE(w);
- assert(rn % 2 == 0);
- for (i = 0; i < rn; i += 2) {
- sprintf(p, "\\u%02x%02x",
+ assert(rn % 4 == 0);
+ for (i = 0; i < rn; i += 4) {
+ sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
- r[i + 1] & 0xFF);
- p += 6;
+ r[i + 1] & 0xFF,
+ r[i + 2] & 0xFF,
+ r[i + 3] & 0xFF);
+ p += 10;
}
+ /* Should be impossible to overflow */
+ assert(p - buf <= Py_SIZE(u));
Py_DECREF(w);
} else {
*p++ = *s++;