summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>2024-10-08 11:37:59 (GMT)
committerGitHub <noreply@github.com>2024-10-08 11:37:59 (GMT)
commitba14dfafd97d1fd03938ac8ddec4ca5b2f12985d (patch)
treefe2735d7b7d1dbcec3f0bd4e255b26e9a7b0bf00
parent19984fe024bfd90649f1c36b78c9abf3ed72b27d (diff)
downloadcpython-ba14dfafd97d1fd03938ac8ddec4ca5b2f12985d.zip
cpython-ba14dfafd97d1fd03938ac8ddec4ca5b2f12985d.tar.gz
cpython-ba14dfafd97d1fd03938ac8ddec4ca5b2f12985d.tar.bz2
gh-123378: fix a crash in `UnicodeError.__str__` (#124935)
-rw-r--r--Lib/test/test_exceptions.py24
-rw-r--r--Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst3
-rw-r--r--Objects/exceptions.c111
3 files changed, 93 insertions, 45 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index ba858c4..b3c21cd 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -8,6 +8,7 @@ import pickle
import weakref
import errno
from codecs import BOM_UTF8
+from itertools import product
from textwrap import dedent
from test.support import (captured_stderr, check_impl_detail,
@@ -1336,6 +1337,29 @@ class ExceptionTests(unittest.TestCase):
for klass in klasses:
self.assertEqual(str(klass.__new__(klass)), "")
+ def test_unicode_error_str_does_not_crash(self):
+ # Test that str(UnicodeError(...)) does not crash.
+ # See https://github.com/python/cpython/issues/123378.
+
+ for start, end, objlen in product(
+ range(-5, 5),
+ range(-5, 5),
+ range(7),
+ ):
+ obj = 'a' * objlen
+ with self.subTest('encode', objlen=objlen, start=start, end=end):
+ exc = UnicodeEncodeError('utf-8', obj, start, end, '')
+ self.assertIsInstance(str(exc), str)
+
+ with self.subTest('translate', objlen=objlen, start=start, end=end):
+ exc = UnicodeTranslateError(obj, start, end, '')
+ self.assertIsInstance(str(exc), str)
+
+ encoded = obj.encode()
+ with self.subTest('decode', objlen=objlen, start=start, end=end):
+ exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
+ self.assertIsInstance(str(exc), str)
+
@no_tracing
def test_badisinstance(self):
# Bug #2542: if issubclass(e, MyException) raises an exception,
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst
new file mode 100644
index 0000000..5cd3453
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst
@@ -0,0 +1,3 @@
+Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
+objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
+values are invalid or out-of-range. Patch by Bénédikt Tran.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index b391085..c685481 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2994,46 +2994,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeEncodeError_str(PyObject *self)
{
- PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL;
PyObject *reason_str = NULL;
PyObject *encoding_str = NULL;
- if (!uself->object)
+ if (exc->object == NULL) {
/* Not properly initialized. */
return PyUnicode_FromString("");
+ }
/* Get reason and encoding as strings, which they might not be if
they've been modified after we were constructed. */
- reason_str = PyObject_Str(uself->reason);
- if (reason_str == NULL)
+ reason_str = PyObject_Str(exc->reason);
+ if (reason_str == NULL) {
goto done;
- encoding_str = PyObject_Str(uself->encoding);
- if (encoding_str == NULL)
+ }
+ encoding_str = PyObject_Str(exc->encoding);
+ if (encoding_str == NULL) {
goto done;
+ }
+
+ Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
+ Py_ssize_t start = exc->start, end = exc->end;
- if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
- Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
+ if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+ Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
const char *fmt;
- if (badchar <= 0xff)
+ if (badchar <= 0xff) {
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
- else if (badchar <= 0xffff)
+ }
+ else if (badchar <= 0xffff) {
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
- else
+ }
+ else {
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
+ }
result = PyUnicode_FromFormat(
fmt,
encoding_str,
(int)badchar,
- uself->start,
+ start,
reason_str);
}
else {
result = PyUnicode_FromFormat(
"'%U' codec can't encode characters in position %zd-%zd: %U",
encoding_str,
- uself->start,
- uself->end-1,
+ start,
+ end - 1,
reason_str);
}
done:
@@ -3107,41 +3116,46 @@ error:
static PyObject *
UnicodeDecodeError_str(PyObject *self)
{
- PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL;
PyObject *reason_str = NULL;
PyObject *encoding_str = NULL;
- if (!uself->object)
+ if (exc->object == NULL) {
/* Not properly initialized. */
return PyUnicode_FromString("");
+ }
/* Get reason and encoding as strings, which they might not be if
they've been modified after we were constructed. */
- reason_str = PyObject_Str(uself->reason);
- if (reason_str == NULL)
+ reason_str = PyObject_Str(exc->reason);
+ if (reason_str == NULL) {
goto done;
- encoding_str = PyObject_Str(uself->encoding);
- if (encoding_str == NULL)
+ }
+ encoding_str = PyObject_Str(exc->encoding);
+ if (encoding_str == NULL) {
goto done;
+ }
+
+ Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
+ Py_ssize_t start = exc->start, end = exc->end;
- if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
- int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
+ if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+ int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
result = PyUnicode_FromFormat(
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
encoding_str,
- byte,
- uself->start,
+ badbyte,
+ start,
reason_str);
}
else {
result = PyUnicode_FromFormat(
"'%U' codec can't decode bytes in position %zd-%zd: %U",
encoding_str,
- uself->start,
- uself->end-1,
- reason_str
- );
+ start,
+ end - 1,
+ reason_str);
}
done:
Py_XDECREF(reason_str);
@@ -3204,42 +3218,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
static PyObject *
UnicodeTranslateError_str(PyObject *self)
{
- PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+ PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
PyObject *result = NULL;
PyObject *reason_str = NULL;
- if (!uself->object)
+ if (exc->object == NULL) {
/* Not properly initialized. */
return PyUnicode_FromString("");
+ }
/* Get reason as a string, which it might not be if it's been
modified after we were constructed. */
- reason_str = PyObject_Str(uself->reason);
- if (reason_str == NULL)
+ reason_str = PyObject_Str(exc->reason);
+ if (reason_str == NULL) {
goto done;
+ }
+
+ Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
+ Py_ssize_t start = exc->start, end = exc->end;
- if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
- Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
+ if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+ Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
const char *fmt;
- if (badchar <= 0xff)
+ if (badchar <= 0xff) {
fmt = "can't translate character '\\x%02x' in position %zd: %U";
- else if (badchar <= 0xffff)
+ }
+ else if (badchar <= 0xffff) {
fmt = "can't translate character '\\u%04x' in position %zd: %U";
- else
+ }
+ else {
fmt = "can't translate character '\\U%08x' in position %zd: %U";
+ }
result = PyUnicode_FromFormat(
fmt,
(int)badchar,
- uself->start,
- reason_str
- );
- } else {
+ start,
+ reason_str);
+ }
+ else {
result = PyUnicode_FromFormat(
"can't translate characters in position %zd-%zd: %U",
- uself->start,
- uself->end-1,
- reason_str
- );
+ start,
+ end - 1,
+ reason_str);
}
done:
Py_XDECREF(reason_str);