diff options
author | Victor Stinner <vstinner@python.org> | 2023-10-20 15:59:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-20 15:59:29 (GMT) |
commit | d731579bfb9a497cfb0076cb6b221058a20088fe (patch) | |
tree | 63eb8f9b8dfb7af9a5de6d4499ab375b80f208d4 /Objects | |
parent | 59ea0f523e155ac1a471cd292b41a76241fccd36 (diff) | |
download | cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.zip cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.gz cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.bz2 |
gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)
* PyUnicode_AsUTF8() now raises an exception if the string contains
embedded null characters.
* Update related C API tests (test_capi.test_unicode).
* type_new_set_doc() uses PyUnicode_AsUTF8AndSize() to silently
truncate doc containing null bytes.
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/typeobject.c | 5 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 8 |
2 files changed, 10 insertions, 3 deletions
diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3261a14..2508569 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3499,13 +3499,14 @@ type_new_set_doc(PyTypeObject *type) return 0; } - const char *doc_str = PyUnicode_AsUTF8(doc); + Py_ssize_t doc_size; + const char *doc_str = PyUnicode_AsUTF8AndSize(doc, &doc_size); if (doc_str == NULL) { return -1; } // Silently truncate the docstring if it contains a null byte - Py_ssize_t size = strlen(doc_str) + 1; + Py_ssize_t size = doc_size + 1; char *tp_doc = (char *)PyObject_Malloc(size); if (tp_doc == NULL) { PyErr_NoMemory(); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 33cbc98..07d1b6e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3837,7 +3837,13 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) const char * PyUnicode_AsUTF8(PyObject *unicode) { - return PyUnicode_AsUTF8AndSize(unicode, NULL); + Py_ssize_t size; + const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size); + if (utf8 != NULL && strlen(utf8) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return utf8; } /* |