summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>2024-12-13 16:16:22 (GMT)
committerGitHub <noreply@github.com>2024-12-13 16:16:22 (GMT)
commit8bc18182a7c28f86265c9d82bd0338137480921c (patch)
treef5cd49588b391d442e53d47caa06446d7ad345fc
parent6446408d426814bf2bc9d3911a91741f04d4bc4e (diff)
downloadcpython-8bc18182a7c28f86265c9d82bd0338137480921c.zip
cpython-8bc18182a7c28f86265c9d82bd0338137480921c.tar.gz
cpython-8bc18182a7c28f86265c9d82bd0338137480921c.tar.bz2
gh-127691: add type checks when using `PyUnicodeError` objects (GH-127694)
-rw-r--r--Doc/whatsnew/3.14.rst6
-rw-r--r--Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst3
-rw-r--r--Objects/exceptions.c216
3 files changed, 157 insertions, 68 deletions
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 5ce398a..0959492 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -1045,6 +1045,12 @@ New features
* Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling
deferred reference counting, as outlined in :pep:`703`.
+* The :ref:`Unicode Exception Objects <unicodeexceptions>` C API
+ now raises a :exc:`TypeError` if its exception argument is not
+ a :exc:`UnicodeError` object.
+ (Contributed by Bénédikt Tran in :gh:`127691`.)
+
+
Porting to Python 3.14
----------------------
diff --git a/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst
new file mode 100644
index 0000000..c942ff3
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst
@@ -0,0 +1,3 @@
+The :ref:`Unicode Exception Objects <unicodeexceptions>` C API
+now raises a :exc:`TypeError` if its exception argument is not
+a :exc:`UnicodeError` object. Patch by Bénédikt Tran.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 287cbc2..6880c24 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2668,7 +2668,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error.");
static PyObject *
-get_string(PyObject *attr, const char *name)
+get_bytes(PyObject *attr, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
@@ -2748,40 +2748,74 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
return end;
}
+#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR))
+#define PyUnicodeError_Check(PTR) \
+ PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError)
+#define PyUnicodeError_CAST(PTR) \
+ (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR))
+
+
+static inline int
+check_unicode_error_type(PyObject *self, const char *expect_type)
+{
+ if (!PyUnicodeError_Check(self)) {
+ PyErr_Format(PyExc_TypeError,
+ "expecting a %s object, got %T", expect_type, self);
+ return -1;
+ }
+ return 0;
+}
+
+
+static inline PyUnicodeErrorObject *
+as_unicode_error(PyObject *self, const char *expect_type)
+{
+ int rc = check_unicode_error_type(self, expect_type);
+ return rc < 0 ? NULL : _PyUnicodeError_CAST(self);
+}
+
PyObject *
-PyUnicodeEncodeError_GetEncoding(PyObject *exc)
+PyUnicodeEncodeError_GetEncoding(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
}
PyObject *
-PyUnicodeDecodeError_GetEncoding(PyObject *exc)
+PyUnicodeDecodeError_GetEncoding(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
}
PyObject *
-PyUnicodeEncodeError_GetObject(PyObject *exc)
+PyUnicodeEncodeError_GetObject(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ return exc == NULL ? NULL : get_unicode(exc->object, "object");
}
PyObject *
-PyUnicodeDecodeError_GetObject(PyObject *exc)
+PyUnicodeDecodeError_GetObject(PyObject *self)
{
- return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ return exc == NULL ? NULL : get_bytes(exc->object, "object");
}
PyObject *
-PyUnicodeTranslateError_GetObject(PyObject *exc)
+PyUnicodeTranslateError_GetObject(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
+ return exc == NULL ? NULL : get_unicode(exc->object, "object");
}
int
PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ if (exc == NULL) {
+ return -1;
+ }
PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) {
return -1;
@@ -2796,8 +2830,11 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
int
PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
- PyObject *obj = get_string(exc->object, "object");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ if (exc == NULL) {
+ return -1;
+ }
+ PyObject *obj = get_bytes(exc->object, "object");
if (obj == NULL) {
return -1;
}
@@ -2809,45 +2846,63 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
int
-PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
+PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start)
{
- return PyUnicodeEncodeError_GetStart(exc, start);
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
+ if (exc == NULL) {
+ return -1;
+ }
+ PyObject *obj = get_unicode(exc->object, "object");
+ if (obj == NULL) {
+ return -1;
+ }
+ Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
+ Py_DECREF(obj);
+ *start = unicode_error_adjust_start(exc->start, size);
+ return 0;
}
static inline int
unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
{
- ((PyUnicodeErrorObject *)self)->start = start;
+ PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
+ exc->start = start;
return 0;
}
int
-PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
+PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start)
{
- return unicode_error_set_start_impl(exc, start);
+ int rc = check_unicode_error_type(self, "UnicodeEncodeError");
+ return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
int
-PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
+PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start)
{
- return unicode_error_set_start_impl(exc, start);
+ int rc = check_unicode_error_type(self, "UnicodeDecodeError");
+ return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
int
-PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
+PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start)
{
- return unicode_error_set_start_impl(exc, start);
+ int rc = check_unicode_error_type(self, "UnicodeTranslateError");
+ return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
int
PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ if (exc == NULL) {
+ return -1;
+ }
PyObject *obj = get_unicode(exc->object, "object");
if (obj == NULL) {
return -1;
@@ -2862,8 +2917,11 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
int
PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
- PyObject *obj = get_string(exc->object, "object");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ if (exc == NULL) {
+ return -1;
+ }
+ PyObject *obj = get_bytes(exc->object, "object");
if (obj == NULL) {
return -1;
}
@@ -2875,108 +2933,130 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
int
-PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end)
+PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- return PyUnicodeEncodeError_GetEnd(exc, end);
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
+ if (exc == NULL) {
+ return -1;
+ }
+ PyObject *obj = get_unicode(exc->object, "object");
+ if (obj == NULL) {
+ return -1;
+ }
+ Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
+ Py_DECREF(obj);
+ *end = unicode_error_adjust_end(exc->end, size);
+ return 0;
}
static inline int
-unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end)
+unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
{
- ((PyUnicodeErrorObject *)exc)->end = end;
+ PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
+ exc->end = end;
return 0;
}
int
-PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end)
{
- return unicode_error_set_end_impl(exc, end);
+ int rc = check_unicode_error_type(self, "UnicodeEncodeError");
+ return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
int
-PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end)
{
- return unicode_error_set_end_impl(exc, end);
+ int rc = check_unicode_error_type(self, "UnicodeDecodeError");
+ return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
int
-PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
+PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end)
{
- return unicode_error_set_end_impl(exc, end);
+ int rc = check_unicode_error_type(self, "UnicodeTranslateError");
+ return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
+
PyObject *
-PyUnicodeEncodeError_GetReason(PyObject *exc)
+PyUnicodeEncodeError_GetReason(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
}
PyObject *
-PyUnicodeDecodeError_GetReason(PyObject *exc)
+PyUnicodeDecodeError_GetReason(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
}
PyObject *
-PyUnicodeTranslateError_GetReason(PyObject *exc)
+PyUnicodeTranslateError_GetReason(PyObject *self)
{
- return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
+ return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
}
int
-PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
+PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason)
{
- return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
- reason);
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
+ return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
}
int
-PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
+PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason)
{
- return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
- reason);
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
+ return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
}
int
-PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
+PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason)
{
- return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
- reason);
+ PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
+ return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
}
static int
-UnicodeError_clear(PyUnicodeErrorObject *self)
+UnicodeError_clear(PyObject *self)
{
- Py_CLEAR(self->encoding);
- Py_CLEAR(self->object);
- Py_CLEAR(self->reason);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ Py_CLEAR(exc->encoding);
+ Py_CLEAR(exc->object);
+ Py_CLEAR(exc->reason);
return BaseException_clear((PyBaseExceptionObject *)self);
}
static void
-UnicodeError_dealloc(PyUnicodeErrorObject *self)
+UnicodeError_dealloc(PyObject *self)
{
+ PyTypeObject *type = Py_TYPE(self);
_PyObject_GC_UNTRACK(self);
- UnicodeError_clear(self);
- Py_TYPE(self)->tp_free((PyObject *)self);
+ (void)UnicodeError_clear(self);
+ type->tp_free(self);
}
static int
-UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg)
+UnicodeError_traverse(PyObject *self, visitproc visit, void *arg)
{
- Py_VISIT(self->encoding);
- Py_VISIT(self->object);
- Py_VISIT(self->reason);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ Py_VISIT(exc->encoding);
+ Py_VISIT(exc->object);
+ Py_VISIT(exc->reason);
return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
}
@@ -3015,7 +3095,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
return -1;
}
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->encoding, Py_NewRef(encoding));
Py_XSETREF(exc->object, Py_NewRef(object));
exc->start = start;
@@ -3027,7 +3107,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeEncodeError_str(PyObject *self)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL;
PyObject *reason_str = NULL;
PyObject *encoding_str = NULL;
@@ -3135,7 +3215,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
}
}
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->encoding, Py_NewRef(encoding));
Py_XSETREF(exc->object, object /* already a strong reference */);
exc->start = start;
@@ -3147,7 +3227,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeDecodeError_str(PyObject *self)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL;
PyObject *reason_str = NULL;
PyObject *encoding_str = NULL;
@@ -3236,7 +3316,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
return -1;
}
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
Py_XSETREF(exc->object, Py_NewRef(object));
exc->start = start;
exc->end = end;
@@ -3248,7 +3328,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeTranslateError_str(PyObject *self)
{
- PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
PyObject *result = NULL;
PyObject *reason_str = NULL;