diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2011-09-28 05:41:54 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2011-09-28 05:41:54 (GMT) |
commit | d63a3b8beb4a0841cb59fb3515347ccaab34b733 (patch) | |
tree | 3b4e3cc63151c5a5a910c3550a190aefaea96ad4 /Python/ast.c | |
parent | 48d49497c50e79d14e9df9527d766ca3a0a38be5 (diff) | |
download | cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.zip cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.gz cpython-d63a3b8beb4a0841cb59fb3515347ccaab34b733.tar.bz2 |
Implement PEP 393.
Diffstat (limited to 'Python/ast.c')
-rw-r--r-- | Python/ast.c | 61 |
1 files changed, 24 insertions, 37 deletions
diff --git a/Python/ast.c b/Python/ast.c index 14500ee..a52fd09 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -528,26 +528,21 @@ static identifier new_identifier(const char* n, PyArena *arena) { PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); - Py_UNICODE *u; - if (!id) + if (!id || PyUnicode_READY(id) == -1) return NULL; - u = PyUnicode_AS_UNICODE(id); /* Check whether there are non-ASCII characters in the identifier; if so, normalize to NFKC. */ - for (; *u; u++) { - if (*u >= 128) { - PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); - PyObject *id2; - if (!m) - return NULL; - id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id); - Py_DECREF(m); - if (!id2) - return NULL; - Py_DECREF(id); - id = id2; - break; - } + if (PyUnicode_MAX_CHAR_VALUE((PyUnicodeObject *)id) >= 128) { + PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); + PyObject *id2; + if (!m) + return NULL; + id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id); + Py_DECREF(m); + if (!id2) + return NULL; + Py_DECREF(id); + id = id2; } PyUnicode_InternInPlace(&id); PyArena_AddPyObject(arena, id); @@ -3660,20 +3655,14 @@ parsenumber(struct compiling *c, const char *s) } static PyObject * -decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding) +decode_utf8(struct compiling *c, const char **sPtr, const char *end) { - PyObject *u, *v; char *s, *t; t = s = (char *)*sPtr; /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ while (s < end && (*s & 0x80)) s++; *sPtr = s; - u = PyUnicode_DecodeUTF8(t, s - t, NULL); - if (u == NULL) - return NULL; - v = PyUnicode_AsEncodedString(u, encoding, NULL); - Py_DECREF(u); - return v; + return PyUnicode_DecodeUTF8(t, s - t, NULL); } static PyObject * @@ -3707,22 +3696,20 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons } if (*s & 0x80) { /* XXX inefficient */ PyObject *w; - char *r; - Py_ssize_t rn, i; - w = decode_utf8(c, &s, end, "utf-32-be"); + int kind; + void *data; + Py_ssize_t len, i; + w = decode_utf8(c, &s, end); if (w == NULL) { Py_DECREF(u); return NULL; } - r = PyBytes_AS_STRING(w); - rn = Py_SIZE(w); - assert(rn % 4 == 0); - for (i = 0; i < rn; i += 4) { - sprintf(p, "\\U%02x%02x%02x%02x", - r[i + 0] & 0xFF, - r[i + 1] & 0xFF, - r[i + 2] & 0xFF, - r[i + 3] & 0xFF); + kind = PyUnicode_KIND(w); + data = PyUnicode_DATA(w); + len = PyUnicode_GET_LENGTH(w); + for (i = 0; i < len; i++) { + Py_UCS4 chr = PyUnicode_READ(kind, data, i); + sprintf(p, "\\U%08x", chr); p += 10; } /* Should be impossible to overflow */ |