summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-10-20 15:59:29 (GMT)
committerGitHub <noreply@github.com>2023-10-20 15:59:29 (GMT)
commitd731579bfb9a497cfb0076cb6b221058a20088fe (patch)
tree63eb8f9b8dfb7af9a5de6d4499ab375b80f208d4 /Include
parent59ea0f523e155ac1a471cd292b41a76241fccd36 (diff)
downloadcpython-d731579bfb9a497cfb0076cb6b221058a20088fe.zip
cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.gz
cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.bz2
gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)
* PyUnicode_AsUTF8() now raises an exception if the string contains embedded null characters. * Update related C API tests (test_capi.test_unicode). * type_new_set_doc() uses PyUnicode_AsUTF8AndSize() to silently truncate doc containing null bytes. Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Include')
-rw-r--r--Include/cpython/unicodeobject.h20
-rw-r--r--Include/unicodeobject.h20
2 files changed, 19 insertions, 21 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 859ab71..d67553c 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -442,18 +442,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
/* --- Manage the default encoding ---------------------------------------- */
-/* Returns a pointer to the default encoding (UTF-8) of the
- Unicode object unicode.
-
- Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
- in the unicodeobject.
-
- Use of this API is DEPRECATED since no size information can be
- extracted from the returned data.
-*/
-
+// Returns a pointer to the default encoding (UTF-8) of the
+// Unicode object unicode.
+//
+// Raise an exception if the string contains embedded null characters.
+// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters.
+//
+// This function caches the UTF-8 encoded string in the Unicode object
+// and subsequent calls will return the same string. The memory is released
+// when the Unicode object is deallocated.
PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
+
/* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index dee0071..1e5753d 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -443,17 +443,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
PyObject *unicode /* Unicode object */
);
-/* Returns a pointer to the default encoding (UTF-8) of the
- Unicode object unicode and the size of the encoded representation
- in bytes stored in *size.
-
- In case of an error, no *size is set.
-
- This function caches the UTF-8 encoded string in the unicodeobject
- and subsequent calls will return the same string. The memory is released
- when the unicodeobject is deallocated.
-*/
-
+// Returns a pointer to the default encoding (UTF-8) of the
+// Unicode object unicode and the size of the encoded representation
+// in bytes stored in `*size` (if size is not NULL).
+//
+// On error, `*size` is set to 0 (if size is not NULL).
+//
+// This function caches the UTF-8 encoded string in the Unicode object
+// and subsequent calls will return the same string. The memory is released
+// when the Unicode object is deallocated.
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000
PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
PyObject *unicode,