summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-10-20 15:59:29 (GMT)
committerGitHub <noreply@github.com>2023-10-20 15:59:29 (GMT)
commitd731579bfb9a497cfb0076cb6b221058a20088fe (patch)
tree63eb8f9b8dfb7af9a5de6d4499ab375b80f208d4 /Objects
parent59ea0f523e155ac1a471cd292b41a76241fccd36 (diff)
downloadcpython-d731579bfb9a497cfb0076cb6b221058a20088fe.zip
cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.gz
cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.bz2
gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)
* PyUnicode_AsUTF8() now raises an exception if the string contains embedded null characters. * Update related C API tests (test_capi.test_unicode). * type_new_set_doc() uses PyUnicode_AsUTF8AndSize() to silently truncate doc containing null bytes. Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r--Objects/typeobject.c5
-rw-r--r--Objects/unicodeobject.c8
2 files changed, 10 insertions, 3 deletions
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 3261a14..2508569 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3499,13 +3499,14 @@ type_new_set_doc(PyTypeObject *type)
return 0;
}
- const char *doc_str = PyUnicode_AsUTF8(doc);
+ Py_ssize_t doc_size;
+ const char *doc_str = PyUnicode_AsUTF8AndSize(doc, &doc_size);
if (doc_str == NULL) {
return -1;
}
// Silently truncate the docstring if it contains a null byte
- Py_ssize_t size = strlen(doc_str) + 1;
+ Py_ssize_t size = doc_size + 1;
char *tp_doc = (char *)PyObject_Malloc(size);
if (tp_doc == NULL) {
PyErr_NoMemory();
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 33cbc98..07d1b6e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3837,7 +3837,13 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
const char *
PyUnicode_AsUTF8(PyObject *unicode)
{
- return PyUnicode_AsUTF8AndSize(unicode, NULL);
+ Py_ssize_t size;
+ const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size);
+ if (utf8 != NULL && strlen(utf8) != (size_t)size) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ return NULL;
+ }
+ return utf8;
}
/*