summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-11-07 22:36:13 (GMT)
committerGitHub <noreply@github.com>2023-11-07 22:36:13 (GMT)
commit11e83488c5a4a6e75a4f363a2e1a45574fd53573 (patch)
tree4d3ad20c063f098a2b142aace0baade00e465ac9 /Objects
parentea970fb116a114f2c47cc8f21df00166d43ab78b (diff)
downloadcpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.zip
cpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.tar.gz
cpython-11e83488c5a4a6e75a4f363a2e1a45574fd53573.tar.bz2
gh-111089: Revert PyUnicode_AsUTF8() changes (#111833)
* Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)" This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2. * Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)" This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd. * Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)" This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe. * Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)" This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44. * Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)" This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/clinic/bytearrayobject.c.h30
-rw-r--r--Objects/clinic/bytesobject.c.h30
-rw-r--r--Objects/clinic/floatobject.c.h9
-rw-r--r--Objects/clinic/memoryobject.c.h9
-rw-r--r--Objects/clinic/unicodeobject.c.h30
-rw-r--r--Objects/typeobject.c5
-rw-r--r--Objects/unicodeobject.c8
7 files changed, 92 insertions, 29 deletions
diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h
index 355ae49..d952450 100644
--- a/Objects/clinic/bytearrayobject.c.h
+++ b/Objects/clinic/bytearrayobject.c.h
@@ -68,10 +68,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("bytearray", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(fastargs[1]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -80,10 +85,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("bytearray", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
- errors = PyUnicode_AsUTF8(fastargs[2]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = bytearray___init___impl((PyByteArrayObject *)self, arg, encoding, errors);
@@ -950,10 +960,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg
_PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(args[0]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -962,10 +977,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg
_PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]);
goto exit;
}
- errors = PyUnicode_AsUTF8(args[1]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = bytearray_decode_impl(self, encoding, errors);
@@ -1241,4 +1261,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
{
return bytearray_sizeof_impl(self);
}
-/*[clinic end generated code: output=5a7de6295a7ce6cc input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0797a5e03cda2a16 input=a9049054013a1b77]*/
diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h
index 042d0bf..1e45be3 100644
--- a/Objects/clinic/bytesobject.c.h
+++ b/Objects/clinic/bytesobject.c.h
@@ -720,10 +720,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj
_PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(args[0]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -732,10 +737,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj
_PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]);
goto exit;
}
- errors = PyUnicode_AsUTF8(args[1]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = bytes_decode_impl(self, encoding, errors);
@@ -987,10 +997,15 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("bytes", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(fastargs[1]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -999,14 +1014,19 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("bytes", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
- errors = PyUnicode_AsUTF8(fastargs[2]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = bytes_new_impl(type, x, encoding, errors);
exit:
return return_value;
}
-/*[clinic end generated code: output=97aab3f6ae398664 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=8a49dbbd78914a6f input=a9049054013a1b77]*/
diff --git a/Objects/clinic/floatobject.c.h b/Objects/clinic/floatobject.c.h
index 8c33119..10f6149 100644
--- a/Objects/clinic/floatobject.c.h
+++ b/Objects/clinic/floatobject.c.h
@@ -275,10 +275,15 @@ float___getformat__(PyTypeObject *type, PyObject *arg)
_PyArg_BadArgument("__getformat__", "argument", "str", arg);
goto exit;
}
- typestr = PyUnicode_AsUTF8(arg);
+ Py_ssize_t typestr_length;
+ typestr = PyUnicode_AsUTF8AndSize(arg, &typestr_length);
if (typestr == NULL) {
goto exit;
}
+ if (strlen(typestr) != (size_t)typestr_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
return_value = float___getformat___impl(type, typestr);
exit:
@@ -313,4 +318,4 @@ float___format__(PyObject *self, PyObject *arg)
exit:
return return_value;
}
-/*[clinic end generated code: output=01f6fbd082eefead input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c79743c8551c30d9 input=a9049054013a1b77]*/
diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h
index ebc1e06..f199434 100644
--- a/Objects/clinic/memoryobject.c.h
+++ b/Objects/clinic/memoryobject.c.h
@@ -305,10 +305,15 @@ memoryview_tobytes(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t n
order = NULL;
}
else if (PyUnicode_Check(args[0])) {
- order = PyUnicode_AsUTF8(args[0]);
+ Py_ssize_t order_length;
+ order = PyUnicode_AsUTF8AndSize(args[0], &order_length);
if (order == NULL) {
goto exit;
}
+ if (strlen(order) != (size_t)order_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
}
else {
_PyArg_BadArgument("tobytes", "argument 'order'", "str or None", args[0]);
@@ -408,4 +413,4 @@ skip_optional_pos:
exit:
return return_value;
}
-/*[clinic end generated code: output=abd8c0ce804d8992 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=7e76a09106921ba2 input=a9049054013a1b77]*/
diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h
index 83e3bf2..7711434 100644
--- a/Objects/clinic/unicodeobject.c.h
+++ b/Objects/clinic/unicodeobject.c.h
@@ -203,10 +203,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject
_PyArg_BadArgument("encode", "argument 'encoding'", "str", args[0]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(args[0]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -215,10 +220,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject
_PyArg_BadArgument("encode", "argument 'errors'", "str", args[1]);
goto exit;
}
- errors = PyUnicode_AsUTF8(args[1]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = unicode_encode_impl(self, encoding, errors);
@@ -1463,10 +1473,15 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("str", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
- encoding = PyUnicode_AsUTF8(fastargs[1]);
+ Py_ssize_t encoding_length;
+ encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
+ if (strlen(encoding) != (size_t)encoding_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
if (!--noptargs) {
goto skip_optional_pos;
}
@@ -1475,14 +1490,19 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("str", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
- errors = PyUnicode_AsUTF8(fastargs[2]);
+ Py_ssize_t errors_length;
+ errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
+ if (strlen(errors) != (size_t)errors_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ goto exit;
+ }
skip_optional_pos:
return_value = unicode_new_impl(type, x, encoding, errors);
exit:
return return_value;
}
-/*[clinic end generated code: output=20313d6339272ddc input=a9049054013a1b77]*/
+/*[clinic end generated code: output=873d8b3d09af3095 input=a9049054013a1b77]*/
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index f44e30c..557464c 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3501,14 +3501,13 @@ type_new_set_doc(PyTypeObject *type)
return 0;
}
- Py_ssize_t doc_size;
- const char *doc_str = PyUnicode_AsUTF8AndSize(doc, &doc_size);
+ const char *doc_str = PyUnicode_AsUTF8(doc);
if (doc_str == NULL) {
return -1;
}
// Silently truncate the docstring if it contains a null byte
- Py_ssize_t size = doc_size + 1;
+ Py_ssize_t size = strlen(doc_str) + 1;
char *tp_doc = (char *)PyObject_Malloc(size);
if (tp_doc == NULL) {
PyErr_NoMemory();
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 87636ef..53e1e56 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3844,13 +3844,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
const char *
PyUnicode_AsUTF8(PyObject *unicode)
{
- Py_ssize_t size;
- const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size);
- if (utf8 != NULL && strlen(utf8) != (size_t)size) {
- PyErr_SetString(PyExc_ValueError, "embedded null character");
- return NULL;
- }
- return utf8;
+ return PyUnicode_AsUTF8AndSize(unicode, NULL);
}
/*