diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-08-15 07:32:56 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-08-15 07:32:56 (GMT) |
commit | 47383403a0a11259acb640406a8efc38981d2255 (patch) | |
tree | ad461e275dc3f2607bab86bb596366d71489b453 /Objects | |
parent | 32c4ac014387d3bffea5461339b8ad3044d0dafb (diff) | |
download | cpython-47383403a0a11259acb640406a8efc38981d2255.zip cpython-47383403a0a11259acb640406a8efc38981d2255.tar.gz cpython-47383403a0a11259acb640406a8efc38981d2255.tar.bz2 |
Implement PEP 3131. Add isidentifier to str.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 45 |
1 files changed, 44 insertions, 1 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 47109a5..e9f97df 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -227,7 +227,8 @@ int unicode_resize(register PyUnicodeObject *unicode, } /* We allocate one more byte to make sure the string is - Ux0000 terminated -- XXX is this needed ? + Ux0000 terminated; some code (e.g. new_identifier) + relies on that. XXX This allocator could further be enhanced by assuring that the free list never reduces its size below 1. @@ -6679,6 +6680,47 @@ unicode_isnumeric(PyUnicodeObject *self) return PyBool_FromLong(1); } +int +PyUnicode_IsIdentifier(PyObject *self) +{ + register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self); + register const Py_UNICODE *e; + + /* Special case for empty strings */ + if (PyUnicode_GET_SIZE(self) == 0) + return 0; + + /* PEP 3131 says that the first character must be in + XID_Start and subsequent characters in XID_Continue, + and for the ASCII range, the 2.x rules apply (i.e + start with letters and underscore, continue with + letters, digits, underscore). However, given the current + definition of XID_Start and XID_Continue, it is sufficient + to check just for these, except that _ must be allowed + as starting an identifier. */ + if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */) + return 0; + + e = p + PyUnicode_GET_SIZE(self); + for (p++; p < e; p++) { + if (!_PyUnicode_IsXidContinue(*p)) + return 0; + } + return 1; +} + +PyDoc_STRVAR(isidentifier__doc__, +"S.isidentifier() -> bool\n\ +\n\ +Return True if S is a valid identifier according\n\ +to the language definition."); + +static PyObject* +unicode_isidentifier(PyObject *self) +{ + return PyBool_FromLong(PyUnicode_IsIdentifier(self)); +} + PyDoc_STRVAR(join__doc__, "S.join(sequence) -> unicode\n\ \n\ @@ -7714,6 +7756,7 @@ static PyMethodDef unicode_methods[] = { {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, + {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, |