summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-02-11 13:29:33 (GMT)
committerGitHub <noreply@github.com>2020-02-11 13:29:33 (GMT)
commitf3e7ea5b8c220cd63101e419d529c8563f9c6115 (patch)
tree8df6f5eac4ef584b0a511e070e41848cf678a7f2
parent1ea45ae257971ee7b648e3b031603a31fc059f81 (diff)
downloadcpython-f3e7ea5b8c220cd63101e419d529c8563f9c6115.zip
cpython-f3e7ea5b8c220cd63101e419d529c8563f9c6115.tar.gz
cpython-f3e7ea5b8c220cd63101e419d529c8563f9c6115.tar.bz2
bpo-39500: Document PyUnicode_IsIdentifier() function (GH-18397)
PyUnicode_IsIdentifier() does not call Py_FatalError() anymore if the string is not ready.
-rw-r--r--Doc/c-api/unicode.rst10
-rw-r--r--Misc/NEWS.d/next/C API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst2
-rw-r--r--Objects/unicodeobject.c47
-rw-r--r--Parser/tokenizer.c3
4 files changed, 47 insertions, 15 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 96d77c4..b1787ed 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -240,6 +240,16 @@ access internal read-only data of Unicode objects:
:c:func:`PyUnicode_nBYTE_DATA` family of macros.
+.. c:function:: int PyUnicode_IsIdentifier(PyObject *o)
+
+ Return ``1`` if the string is a valid identifier according to the language
+ definition, section :ref:`identifiers`. Return ``0`` otherwise.
+
+ .. versionchanged:: 3.9
+ The function does not call :c:func:`Py_FatalError` anymore if the string
+ is not ready.
+
+
Unicode Character Properties
""""""""""""""""""""""""""""
diff --git a/Misc/NEWS.d/next/C API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst b/Misc/NEWS.d/next/C API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst
new file mode 100644
index 0000000..2ca359f
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst
@@ -0,0 +1,2 @@
+:c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
+anymore if the string is not ready.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fd08ddb..aa874f2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12198,22 +12198,33 @@ unicode_isnumeric_impl(PyObject *self)
int
PyUnicode_IsIdentifier(PyObject *self)
{
- int kind;
- void *data;
Py_ssize_t i;
- Py_UCS4 first;
+ int ready = PyUnicode_IS_READY(self);
- if (PyUnicode_READY(self) == -1) {
- Py_FatalError("identifier not ready");
+ Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
+ if (len == 0) {
+ /* an empty string is not a valid identifier */
return 0;
}
- /* Special case for empty strings */
- if (PyUnicode_GET_LENGTH(self) == 0)
- return 0;
- kind = PyUnicode_KIND(self);
- data = PyUnicode_DATA(self);
+ int kind;
+ void *data;
+ wchar_t *wstr;
+ if (ready) {
+ kind = PyUnicode_KIND(self);
+ data = PyUnicode_DATA(self);
+ }
+ else {
+ wstr = _PyUnicode_WSTR(self);
+ }
+ Py_UCS4 ch;
+ if (ready) {
+ ch = PyUnicode_READ(kind, data, 0);
+ }
+ else {
+ ch = wstr[0];
+ }
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
@@ -12222,13 +12233,21 @@ PyUnicode_IsIdentifier(PyObject *self)
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
as starting an identifier. */
- first = PyUnicode_READ(kind, data, 0);
- if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
+ if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
+ }
- for (i = 1; i < PyUnicode_GET_LENGTH(self); i++)
- if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i)))
+ for (i = 1; i < len; i++) {
+ if (ready) {
+ ch = PyUnicode_READ(kind, data, i);
+ }
+ else {
+ ch = wstr[i];
+ }
+ if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
+ }
+ }
return 1;
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index f73c326..c37cd92 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1079,8 +1079,9 @@ verify_identifier(struct tok_state *tok)
}
result = PyUnicode_IsIdentifier(s);
Py_DECREF(s);
- if (result == 0)
+ if (result == 0) {
tok->done = E_IDENTIFIER;
+ }
return result;
}