diff options
author | Benjamin Peterson <benjamin@python.org> | 2011-08-13 03:18:19 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2011-08-13 03:18:19 (GMT) |
commit | f8e7543df95d5cc12f7478d2eed690621919d68a (patch) | |
tree | 2f47f682ca67384427ceff0be4363b72322105ab /Objects | |
parent | 98d95a50c90bdc4a64fb7db9da74d46a3bf129ba (diff) | |
parent | f413b80806bb7d077a1611610273dab6d916908d (diff) | |
download | cpython-f8e7543df95d5cc12f7478d2eed690621919d68a.zip cpython-f8e7543df95d5cc12f7478d2eed690621919d68a.tar.gz cpython-f8e7543df95d5cc12f7478d2eed690621919d68a.tar.bz2 |
merge 3.2 (#12732)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0918671..61b253d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8044,14 +8044,30 @@ unicode_isnumeric(PyUnicodeObject *self) return PyBool_FromLong(1); } +static Py_UCS4 +decode_ucs4(const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size) +{ + Py_UCS4 ch; + assert(*i < size); + ch = s[(*i)++]; +#ifndef Py_UNICODE_WIDE + if ((ch & 0xfffffc00) == 0xd800 && + *i < size + && (s[*i] & 0xFFFFFC00) == 0xDC00) + ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00; +#endif + return ch; +} + int PyUnicode_IsIdentifier(PyObject *self) { - register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self); - register const Py_UNICODE *e; + Py_ssize_t i = 0, size = PyUnicode_GET_SIZE(self); + Py_UCS4 first; + const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self); /* Special case for empty strings */ - if (PyUnicode_GET_SIZE(self) == 0) + if (!size) return 0; /* PEP 3131 says that the first character must be in @@ -8062,14 +8078,13 @@ PyUnicode_IsIdentifier(PyObject *self) definition of XID_Start and XID_Continue, it is sufficient to check just for these, except that _ must be allowed as starting an identifier. */ - if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */) + first = decode_ucs4(p, &i, size); + if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */) return 0; - e = p + PyUnicode_GET_SIZE(self); - for (p++; p < e; p++) { - if (!_PyUnicode_IsXidContinue(*p)) + while (i < size) + if (!_PyUnicode_IsXidContinue(decode_ucs4(p, &i, size))) return 0; - } return 1; } |