diff options
author | Victor Stinner <vstinner@python.org> | 2023-10-20 15:59:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-20 15:59:29 (GMT) |
commit | d731579bfb9a497cfb0076cb6b221058a20088fe (patch) | |
tree | 63eb8f9b8dfb7af9a5de6d4499ab375b80f208d4 /Include | |
parent | 59ea0f523e155ac1a471cd292b41a76241fccd36 (diff) | |
download | cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.zip cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.gz cpython-d731579bfb9a497cfb0076cb6b221058a20088fe.tar.bz2 |
gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)
* PyUnicode_AsUTF8() now raises an exception if the string contains
embedded null characters.
* Update related C API tests (test_capi.test_unicode).
* type_new_set_doc() uses PyUnicode_AsUTF8AndSize() to silently
truncate doc containing null bytes.
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Include')
-rw-r--r-- | Include/cpython/unicodeobject.h | 20 | ||||
-rw-r--r-- | Include/unicodeobject.h | 20 |
2 files changed, 19 insertions, 21 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 859ab71..d67553c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -442,18 +442,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( /* --- Manage the default encoding ---------------------------------------- */ -/* Returns a pointer to the default encoding (UTF-8) of the - Unicode object unicode. - - Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation - in the unicodeobject. - - Use of this API is DEPRECATED since no size information can be - extracted from the returned data. -*/ - +// Returns a pointer to the default encoding (UTF-8) of the +// Unicode object unicode. +// +// Raise an exception if the string contains embedded null characters. +// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters. +// +// This function caches the UTF-8 encoded string in the Unicode object +// and subsequent calls will return the same string. The memory is released +// when the Unicode object is deallocated. PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); + /* === Characters Type APIs =============================================== */ /* These should not be used directly. Use the Py_UNICODE_IS* and diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee0071..1e5753d 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -443,17 +443,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); -/* Returns a pointer to the default encoding (UTF-8) of the - Unicode object unicode and the size of the encoded representation - in bytes stored in *size. - - In case of an error, no *size is set. - - This function caches the UTF-8 encoded string in the unicodeobject - and subsequent calls will return the same string. The memory is released - when the unicodeobject is deallocated. -*/ - +// Returns a pointer to the default encoding (UTF-8) of the +// Unicode object unicode and the size of the encoded representation +// in bytes stored in `*size` (if size is not NULL). +// +// On error, `*size` is set to 0 (if size is not NULL). +// +// This function caches the UTF-8 encoded string in the Unicode object +// and subsequent calls will return the same string. The memory is released +// when the Unicode object is deallocated. #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( PyObject *unicode, |