diff options
author | Victor Stinner <vstinner@python.org> | 2023-11-07 22:36:13 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-07 22:36:13 (GMT) |
commit | 11e83488c5a4a6e75a4f363a2e1a45574fd53573 (patch) | |
tree | 4d3ad20c063f098a2b142aace0baade00e465ac9 /Objects | |
parent | ea970fb116a114f2c47cc8f21df00166d43ab78b (diff) | |
download | cpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.zip cpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.tar.gz cpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.tar.bz2 |
gh-111089: Revert PyUnicode_AsUTF8() changes (#111833)
* Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)"
This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2.
* Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)"
This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd.
* Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)"
This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe.
* Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)"
This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44.
* Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)"
This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/clinic/bytearrayobject.c.h | 30 | ||||
-rw-r--r-- | Objects/clinic/bytesobject.c.h | 30 | ||||
-rw-r--r-- | Objects/clinic/floatobject.c.h | 9 | ||||
-rw-r--r-- | Objects/clinic/memoryobject.c.h | 9 | ||||
-rw-r--r-- | Objects/clinic/unicodeobject.c.h | 30 | ||||
-rw-r--r-- | Objects/typeobject.c | 5 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 8 |
7 files changed, 92 insertions, 29 deletions
diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 355ae49..d952450 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -68,10 +68,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytearray", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -80,10 +85,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytearray", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytearray___init___impl((PyByteArrayObject *)self, arg, encoding, errors); @@ -950,10 +960,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg _PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -962,10 +977,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg _PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytearray_decode_impl(self, encoding, errors); @@ -1241,4 +1261,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=5a7de6295a7ce6cc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0797a5e03cda2a16 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 042d0bf..1e45be3 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -720,10 +720,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj _PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -732,10 +737,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj _PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytes_decode_impl(self, encoding, errors); @@ -987,10 +997,15 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytes", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -999,14 +1014,19 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytes", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytes_new_impl(type, x, encoding, errors); exit: return return_value; } -/*[clinic end generated code: output=97aab3f6ae398664 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8a49dbbd78914a6f input=a9049054013a1b77]*/ diff --git a/Objects/clinic/floatobject.c.h b/Objects/clinic/floatobject.c.h index 8c33119..10f6149 100644 --- a/Objects/clinic/floatobject.c.h +++ b/Objects/clinic/floatobject.c.h @@ -275,10 +275,15 @@ float___getformat__(PyTypeObject *type, PyObject *arg) _PyArg_BadArgument("__getformat__", "argument", "str", arg); goto exit; } - typestr = PyUnicode_AsUTF8(arg); + Py_ssize_t typestr_length; + typestr = PyUnicode_AsUTF8AndSize(arg, &typestr_length); if (typestr == NULL) { goto exit; } + if (strlen(typestr) != (size_t)typestr_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = float___getformat___impl(type, typestr); exit: @@ -313,4 +318,4 @@ float___format__(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=01f6fbd082eefead input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c79743c8551c30d9 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h index ebc1e06..f199434 100644 --- a/Objects/clinic/memoryobject.c.h +++ b/Objects/clinic/memoryobject.c.h @@ -305,10 +305,15 @@ memoryview_tobytes(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t n order = NULL; } else if (PyUnicode_Check(args[0])) { - order = PyUnicode_AsUTF8(args[0]); + Py_ssize_t order_length; + order = PyUnicode_AsUTF8AndSize(args[0], &order_length); if (order == NULL) { goto exit; } + if (strlen(order) != (size_t)order_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("tobytes", "argument 'order'", "str or None", args[0]); @@ -408,4 +413,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=abd8c0ce804d8992 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7e76a09106921ba2 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 83e3bf2..7711434 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -203,10 +203,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("encode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -215,10 +220,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("encode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = unicode_encode_impl(self, encoding, errors); @@ -1463,10 +1473,15 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("str", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -1475,14 +1490,19 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("str", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = unicode_new_impl(type, x, encoding, errors); exit: return return_value; } -/*[clinic end generated code: output=20313d6339272ddc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=873d8b3d09af3095 input=a9049054013a1b77]*/ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index f44e30c..557464c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3501,14 +3501,13 @@ type_new_set_doc(PyTypeObject *type) return 0; } - Py_ssize_t doc_size; - const char *doc_str = PyUnicode_AsUTF8AndSize(doc, &doc_size); + const char *doc_str = PyUnicode_AsUTF8(doc); if (doc_str == NULL) { return -1; } // Silently truncate the docstring if it contains a null byte - Py_ssize_t size = doc_size + 1; + Py_ssize_t size = strlen(doc_str) + 1; char *tp_doc = (char *)PyObject_Malloc(size); if (tp_doc == NULL) { PyErr_NoMemory(); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 87636ef..53e1e56 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3844,13 +3844,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) const char * PyUnicode_AsUTF8(PyObject *unicode) { - Py_ssize_t size; - const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size); - if (utf8 != NULL && strlen(utf8) != (size_t)size) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } - return utf8; + return PyUnicode_AsUTF8AndSize(unicode, NULL); } /* |