From bc0f2e945993747c8b1a6dd66cbe902fddd5758b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:13:52 +0100 Subject: gh-123378: Ensure results of `PyUnicode*Error_Get{Start,End}` are clamped (GH-123380) Co-authored-by: Sergey B Kirpichev --- Doc/c-api/exceptions.rst | 20 +- Doc/library/exceptions.rst | 6 + Lib/test/test_capi/test_exceptions.py | 150 +++++++++++++ .../2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 6 + .../2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst | 6 + Modules/_testcapi/exceptions.c | 167 ++++++++++++++ Objects/exceptions.c | 248 ++++++++++++--------- 7 files changed, 492 insertions(+), 111 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index fc2336d..c1f0bd7 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C. *\*start*. *start* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``. + + .. seealso:: :attr:`UnicodeError.start` + .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) - Set the *start* attribute of the given exception object to *start*. Return - ``0`` on success, ``-1`` on failure. + Set the *start* attribute of the given exception object to *start*. + Return ``0`` on success, ``-1`` on failure. + + .. note:: + + While passing a negative *start* does not raise an exception, + the corresponding getters will not consider it as a relative + offset. .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) @@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C. *\*end*. *end* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``. + .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) @@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C. Set the *end* attribute of the given exception object to *end*. Return ``0`` on success, ``-1`` on failure. + .. seealso:: :attr:`UnicodeError.end` + .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc) PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc) PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index b5ba86f..f72b11e 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -644,10 +644,16 @@ The following exceptions are the exceptions that are usually raised. The first index of invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. attribute:: end The index after the last invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. exception:: UnicodeEncodeError diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index b22ddd8..666e2f2 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -415,6 +415,156 @@ class Test_ErrSetAndRestore(unittest.TestCase): # CRASHES formatunraisable(NULL, NULL) +class TestUnicodeTranslateError(UnicodeTranslateError): + # UnicodeTranslateError takes 4 arguments instead of 5, + # so we just make a UnicodeTranslateError class that is + # compatible with the UnicodeError.__init__. + def __init__(self, encoding, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class TestUnicodeError(unittest.TestCase): + + def _check_no_crash(self, exc): + # ensure that the __str__() method does not crash + _ = str(exc) + + def test_unicode_encode_error_get_start(self): + get_start = _testcapi.unicode_encode_get_start + self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) + + def test_unicode_decode_error_get_start(self): + get_start = _testcapi.unicode_decode_get_start + self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start) + + def test_unicode_translate_error_get_start(self): + get_start = _testcapi.unicode_translate_get_start + self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start) + + def _test_unicode_error_get_start(self, literal, exc_type, get_start): + for obj_len, start, c_start in [ + # normal cases + (5, 0, 0), + (5, 1, 1), + (5, 2, 2), + # out of range start is clamped to max(0, obj_len - 1) + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (5, 5, 4), + (5, 10, 4), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 0), + (2, -1, 0), + (2, -2, 0), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, start=start): + exc = exc_type('utf-8', obj, start, obj_len, 'reason') + self.assertEqual(get_start(exc), c_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_start(self): + set_start = _testcapi.unicode_encode_set_start + self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start) + + def test_unicode_decode_error_set_start(self): + set_start = _testcapi.unicode_decode_set_start + self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start) + + def test_unicode_translate_error_set_start(self): + set_start = _testcapi.unicode_translate_set_start + self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start) + + def _test_unicode_error_set_start(self, literal, exc_type, set_start): + obj_len = 5 + obj = literal * obj_len + for new_start in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_start(exc, new_start) + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.start = new_start + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_get_end(self): + get_end = _testcapi.unicode_encode_get_end + self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end) + + def test_unicode_decode_error_get_end(self): + get_end = _testcapi.unicode_decode_get_end + self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end) + + def test_unicode_translate_error_get_end(self): + get_end = _testcapi.unicode_translate_get_end + self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end) + + def _test_unicode_error_get_end(self, literal, exc_type, get_end): + for obj_len, end, c_end in [ + # normal cases + (5, 0, 1), + (5, 1, 1), + (5, 2, 2), + # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (1, 1, 1), + (1, 2, 1), + (5, 5, 5), + (5, 5, 5), + (5, 10, 5), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 1), + (2, -1, 1), + (2, -2, 1), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, end=end): + exc = exc_type('utf-8', obj, 0, end, 'reason') + self.assertEqual(get_end(exc), c_end) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_end(self): + set_end = _testcapi.unicode_encode_set_end + self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end) + + def test_unicode_decode_error_set_end(self): + set_end = _testcapi.unicode_decode_set_end + self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end) + + def test_unicode_translate_error_set_end(self): + set_end = _testcapi.unicode_translate_set_end + self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end) + + def _test_unicode_error_set_end(self, literal, exc_type, set_end): + obj_len = 5 + obj = literal * obj_len + for new_end in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_end(exc, new_end) + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.end = new_end + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): def setUp(self): diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst new file mode 100644 index 0000000..2cfb8b8 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.start ` +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +``[0, max(0, objlen - 1)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar +arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` +and their corresponding C interface. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst new file mode 100644 index 0000000..1077515 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.end ` +retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), +max(min(1, objlen), objlen)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar arguments +apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their +corresponding C interface. Patch by Bénédikt Tran. diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 316ef0e..e92d967 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -359,6 +359,161 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module, return PyUnstable_Exc_PrepReraiseStar(orig, excs); } +/* Test PyUnicodeEncodeError_GetStart */ +static PyObject * +unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeDecodeError_GetStart */ +static PyObject * +unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeTranslateError_GetStart */ +static PyObject * +unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeEncodeError_SetStart */ +static PyObject * +unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetStart */ +static PyObject * +unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetStart */ +static PyObject * +unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeEncodeError_GetEnd */ +static PyObject * +unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeDecodeError_GetEnd */ +static PyObject * +unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeTranslateError_GetEnd */ +static PyObject * +unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeEncodeError_SetEnd */ +static PyObject * +unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetEnd */ +static PyObject * +unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetEnd */ +static PyObject * +unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} /* * Define the PyRecurdingInfinitelyError_Type @@ -403,6 +558,18 @@ static PyMethodDef test_methods[] = { _TESTCAPI_SET_EXCEPTION_METHODDEF _TESTCAPI_TRACEBACK_PRINT_METHODDEF _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF + {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, + {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, + {"unicode_translate_get_start", unicode_translate_get_start, METH_O}, + {"unicode_encode_set_start", unicode_encode_set_start, METH_VARARGS}, + {"unicode_decode_set_start", unicode_decode_set_start, METH_VARARGS}, + {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS}, + {"unicode_encode_get_end", unicode_encode_get_end, METH_O}, + {"unicode_decode_get_end", unicode_decode_get_end, METH_O}, + {"unicode_translate_get_end", unicode_translate_get_end, METH_O}, + {"unicode_encode_set_end", unicode_encode_set_end, METH_VARARGS}, + {"unicode_decode_set_end", unicode_decode_set_end, METH_VARARGS}, + {"unicode_translate_set_end", unicode_translate_set_end, METH_VARARGS}, {NULL}, }; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6fbe0f1..124b591 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2708,6 +2708,46 @@ set_unicodefromstring(PyObject **attr, const char *value) return 0; } +/* + * Adjust the (inclusive) 'start' value of a UnicodeError object. + * + * The 'start' can be negative or not, but when adjusting the value, + * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (start < 0) { + start = 0; + } + if (start >= objlen) { + start = objlen == 0 ? 0 : objlen - 1; + } + return start; +} + +/* + * Adjust the (exclusive) 'end' value of a UnicodeError object. + * + * The 'end' can be negative or not, but when adjusting the value, + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but + * do not intepret it as a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (end < 1) { + end = 1; + } + if (end > objlen) { + end = objlen; + } + return end; +} + PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc) { @@ -2739,38 +2779,31 @@ PyUnicodeTranslateError_GetObject(PyObject *exc) } int -PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *start = ((PyUnicodeErrorObject *)exc)->start; - size = PyUnicode_GET_LENGTH(obj); - if (*start<0) - *start = 0; /*XXX check for values <0*/ - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } int -PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *start = ((PyUnicodeErrorObject *)exc)->start; - if (*start<0) - *start = 0; - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } @@ -2782,63 +2815,61 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) } +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + ((PyUnicodeErrorObject *)self)->start = start; + return 0; +} + + int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int -PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *end = ((PyUnicodeErrorObject *)exc)->end; - size = PyUnicode_GET_LENGTH(obj); - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } int -PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *end = ((PyUnicodeErrorObject *)exc)->end; - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } @@ -2850,8 +2881,8 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) } -int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +static inline int +unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { ((PyUnicodeErrorObject *)exc)->end = end; return 0; @@ -2859,18 +2890,23 @@ PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) int +PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return unicode_error_set_end_impl(exc, end); +} + + +int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } PyObject * @@ -2966,28 +3002,25 @@ static PyMemberDef UnicodeError_members[] = { static int UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *err; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - err = (PyUnicodeErrorObject *)self; - - Py_CLEAR(err->encoding); - Py_CLEAR(err->object); - Py_CLEAR(err->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UUnnU", - &err->encoding, &err->object, - &err->start, &err->end, &err->reason)) { - err->encoding = err->object = err->reason = NULL; + &encoding, &object, &start, &end, &reason)) + { return -1; } - Py_INCREF(err->encoding); - Py_INCREF(err->object); - Py_INCREF(err->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } @@ -3073,44 +3106,42 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; static int UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *ude; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - ude = (PyUnicodeErrorObject *)self; - - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UOnnU", - &ude->encoding, &ude->object, - &ude->start, &ude->end, &ude->reason)) { - ude->encoding = ude->object = ude->reason = NULL; - return -1; + &encoding, &object, &start, &end, &reason)) + { + return -1; } - Py_INCREF(ude->encoding); - Py_INCREF(ude->object); - Py_INCREF(ude->reason); - - if (!PyBytes_Check(ude->object)) { + if (PyBytes_Check(object)) { + Py_INCREF(object); // make 'object' a strong reference + } + else { Py_buffer view; - if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0) - goto error; - Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len)); + if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { + return -1; + } + // 'object' is borrowed, so we can re-use the variable + object = PyBytes_FromStringAndSize(view.buf, view.len); PyBuffer_Release(&view); - if (!ude->object) - goto error; + if (object == NULL) { + return -1; + } } - return 0; -error: - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); - return -1; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, object /* already a strong reference */); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); + return 0; } static PyObject * @@ -3192,25 +3223,24 @@ PyUnicodeDecodeError_Create( */ static int -UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, - PyObject *kwds) +UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) { - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyObject *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; - if (!PyArg_ParseTuple(args, "UnnU", - &self->object, - &self->start, &self->end, &self->reason)) { - self->object = self->reason = NULL; + if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) { return -1; } - Py_INCREF(self->object); - Py_INCREF(self->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } -- cgit v0.12