From bc0f2e945993747c8b1a6dd66cbe902fddd5758b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:13:52 +0100
Subject: gh-123378: Ensure results of `PyUnicode*Error_Get{Start,End}` are
 clamped (GH-123380)

Co-authored-by: Sergey B Kirpichev <skirpichev@gmail.com>
---
 Doc/c-api/exceptions.rst                           |  20 +-
 Doc/library/exceptions.rst                         |   6 +
 Lib/test/test_capi/test_exceptions.py              | 150 +++++++++++++
 .../2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst |   6 +
 .../2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst |   6 +
 Modules/_testcapi/exceptions.c                     | 167 ++++++++++++++
 Objects/exceptions.c                               | 248 ++++++++++++---------
 7 files changed, 492 insertions(+), 111 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
 create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst

diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index fc2336d..c1f0bd7 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C.
    *\*start*.  *start* must not be ``NULL``.  Return ``0`` on success, ``-1`` on
    failure.
 
+   If the :attr:`UnicodeError.object` is an empty sequence, the resulting
+   *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``.
+
+   .. seealso:: :attr:`UnicodeError.start`
+
 .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
                 int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
                 int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 
-   Set the *start* attribute of the given exception object to *start*.  Return
-   ``0`` on success, ``-1`` on failure.
+   Set the *start* attribute of the given exception object to *start*.
+   Return ``0`` on success, ``-1`` on failure.
+
+   .. note::
+
+      While passing a negative *start* does not raise an exception,
+      the corresponding getters will not consider it as a relative
+      offset.
 
 .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
                 int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
@@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C.
    *\*end*.  *end* must not be ``NULL``.  Return ``0`` on success, ``-1`` on
    failure.
 
+   If the :attr:`UnicodeError.object` is an empty sequence, the resulting
+   *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``.
+
 .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
                 int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
                 int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
@@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C.
    Set the *end* attribute of the given exception object to *end*.  Return ``0``
    on success, ``-1`` on failure.
 
+   .. seealso:: :attr:`UnicodeError.end`
+
 .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc)
                 PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc)
                 PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc)
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index b5ba86f..f72b11e 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -644,10 +644,16 @@ The following exceptions are the exceptions that are usually raised.
 
        The first index of invalid data in :attr:`object`.
 
+       This value should not be negative as it is interpreted as an
+       absolute offset but this constraint is not enforced at runtime.
+
    .. attribute:: end
 
        The index after the last invalid data in :attr:`object`.
 
+       This value should not be negative as it is interpreted as an
+       absolute offset but this constraint is not enforced at runtime.
+
 
 .. exception:: UnicodeEncodeError
 
diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index b22ddd8..666e2f2 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -415,6 +415,156 @@ class Test_ErrSetAndRestore(unittest.TestCase):
         # CRASHES formatunraisable(NULL, NULL)
 
 
+class TestUnicodeTranslateError(UnicodeTranslateError):
+    # UnicodeTranslateError takes 4 arguments instead of 5,
+    # so we just make a UnicodeTranslateError class that is
+    # compatible with the UnicodeError.__init__.
+    def __init__(self, encoding, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+
+class TestUnicodeError(unittest.TestCase):
+
+    def _check_no_crash(self, exc):
+        # ensure that the __str__() method does not crash
+        _ = str(exc)
+
+    def test_unicode_encode_error_get_start(self):
+        get_start = _testcapi.unicode_encode_get_start
+        self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start)
+
+    def test_unicode_decode_error_get_start(self):
+        get_start = _testcapi.unicode_decode_get_start
+        self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start)
+
+    def test_unicode_translate_error_get_start(self):
+        get_start = _testcapi.unicode_translate_get_start
+        self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start)
+
+    def _test_unicode_error_get_start(self, literal, exc_type, get_start):
+        for obj_len, start, c_start in [
+            # normal cases
+            (5, 0, 0),
+            (5, 1, 1),
+            (5, 2, 2),
+            # out of range start is clamped to max(0, obj_len - 1)
+            (0, 0, 0),
+            (0, 1, 0),
+            (0, 10, 0),
+            (5, 5, 4),
+            (5, 10, 4),
+            # negative values are allowed but clipped in the getter
+            (0, -1, 0),
+            (1, -1, 0),
+            (2, -1, 0),
+            (2, -2, 0),
+        ]:
+            obj = literal * obj_len
+            with self.subTest(obj, exc_type=exc_type, start=start):
+                exc = exc_type('utf-8', obj, start, obj_len, 'reason')
+                self.assertEqual(get_start(exc), c_start)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_set_start(self):
+        set_start = _testcapi.unicode_encode_set_start
+        self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start)
+
+    def test_unicode_decode_error_set_start(self):
+        set_start = _testcapi.unicode_decode_set_start
+        self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start)
+
+    def test_unicode_translate_error_set_start(self):
+        set_start = _testcapi.unicode_translate_set_start
+        self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start)
+
+    def _test_unicode_error_set_start(self, literal, exc_type, set_start):
+        obj_len = 5
+        obj = literal * obj_len
+        for new_start in range(-2 * obj_len, 2 * obj_len):
+            with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the C API setter
+                set_start(exc, new_start)
+                self.assertEqual(exc.start, new_start)
+                self._check_no_crash(exc)
+
+            with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the attribute setter
+                exc.start = new_start
+                self.assertEqual(exc.start, new_start)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_get_end(self):
+        get_end = _testcapi.unicode_encode_get_end
+        self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end)
+
+    def test_unicode_decode_error_get_end(self):
+        get_end = _testcapi.unicode_decode_get_end
+        self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end)
+
+    def test_unicode_translate_error_get_end(self):
+        get_end = _testcapi.unicode_translate_get_end
+        self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end)
+
+    def _test_unicode_error_get_end(self, literal, exc_type, get_end):
+        for obj_len, end, c_end in [
+            # normal cases
+            (5, 0, 1),
+            (5, 1, 1),
+            (5, 2, 2),
+            # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)]
+            (0, 0, 0),
+            (0, 1, 0),
+            (0, 10, 0),
+            (1, 1, 1),
+            (1, 2, 1),
+            (5, 5, 5),
+            (5, 5, 5),
+            (5, 10, 5),
+            # negative values are allowed but clipped in the getter
+            (0, -1, 0),
+            (1, -1, 1),
+            (2, -1, 1),
+            (2, -2, 1),
+        ]:
+            obj = literal * obj_len
+            with self.subTest(obj, exc_type=exc_type, end=end):
+                exc = exc_type('utf-8', obj, 0, end, 'reason')
+                self.assertEqual(get_end(exc), c_end)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_set_end(self):
+        set_end = _testcapi.unicode_encode_set_end
+        self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end)
+
+    def test_unicode_decode_error_set_end(self):
+        set_end = _testcapi.unicode_decode_set_end
+        self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end)
+
+    def test_unicode_translate_error_set_end(self):
+        set_end = _testcapi.unicode_translate_set_end
+        self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end)
+
+    def _test_unicode_error_set_end(self, literal, exc_type, set_end):
+        obj_len = 5
+        obj = literal * obj_len
+        for new_end in range(-2 * obj_len, 2 * obj_len):
+            with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the C API setter
+                set_end(exc, new_end)
+                self.assertEqual(exc.end, new_end)
+                self._check_no_crash(exc)
+
+            with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the attribute setter
+                exc.end = new_end
+                self.assertEqual(exc.end, new_end)
+                self._check_no_crash(exc)
+
+
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):
 
     def setUp(self):
diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
new file mode 100644
index 0000000..2cfb8b8
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
@@ -0,0 +1,6 @@
+Ensure that the value of :attr:`UnicodeEncodeError.start <UnicodeError.start>`
+retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in
+``[0, max(0, objlen - 1)]`` where *objlen* is the length of
+:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar
+arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError`
+and their corresponding C interface. Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
new file mode 100644
index 0000000..1077515
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
@@ -0,0 +1,6 @@
+Ensure that the value of :attr:`UnicodeEncodeError.end <UnicodeError.end>`
+retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen),
+max(min(1, objlen), objlen)]`` where *objlen* is the length of
+:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar arguments
+apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their
+corresponding C interface. Patch by Bénédikt Tran.
diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c
index 316ef0e..e92d967 100644
--- a/Modules/_testcapi/exceptions.c
+++ b/Modules/_testcapi/exceptions.c
@@ -359,6 +359,161 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module,
     return PyUnstable_Exc_PrepReraiseStar(orig, excs);
 }
 
+/* Test PyUnicodeEncodeError_GetStart */
+static PyObject *
+unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
+
+/* Test PyUnicodeDecodeError_GetStart */
+static PyObject *
+unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
+
+/* Test PyUnicodeTranslateError_GetStart */
+static PyObject *
+unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
+
+/* Test PyUnicodeEncodeError_SetStart */
+static PyObject *
+unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeEncodeError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeDecodeError_SetStart */
+static PyObject *
+unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeDecodeError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeTranslateError_SetStart */
+static PyObject *
+unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeTranslateError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeEncodeError_GetEnd */
+static PyObject *
+unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeDecodeError_GetEnd */
+static PyObject *
+unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeTranslateError_GetEnd */
+static PyObject *
+unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeEncodeError_SetEnd */
+static PyObject *
+unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeDecodeError_SetEnd */
+static PyObject *
+unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeTranslateError_SetEnd */
+static PyObject *
+unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
 
 /*
  * Define the PyRecurdingInfinitelyError_Type
@@ -403,6 +558,18 @@ static PyMethodDef test_methods[] = {
     _TESTCAPI_SET_EXCEPTION_METHODDEF
     _TESTCAPI_TRACEBACK_PRINT_METHODDEF
     _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF
+    {"unicode_encode_get_start", unicode_encode_get_start,       METH_O},
+    {"unicode_decode_get_start", unicode_decode_get_start,       METH_O},
+    {"unicode_translate_get_start", unicode_translate_get_start, METH_O},
+    {"unicode_encode_set_start", unicode_encode_set_start,       METH_VARARGS},
+    {"unicode_decode_set_start", unicode_decode_set_start,       METH_VARARGS},
+    {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS},
+    {"unicode_encode_get_end", unicode_encode_get_end,           METH_O},
+    {"unicode_decode_get_end", unicode_decode_get_end,           METH_O},
+    {"unicode_translate_get_end", unicode_translate_get_end,     METH_O},
+    {"unicode_encode_set_end", unicode_encode_set_end,           METH_VARARGS},
+    {"unicode_decode_set_end", unicode_decode_set_end,           METH_VARARGS},
+    {"unicode_translate_set_end", unicode_translate_set_end,     METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 6fbe0f1..124b591 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2708,6 +2708,46 @@ set_unicodefromstring(PyObject **attr, const char *value)
     return 0;
 }
 
+/*
+ * Adjust the (inclusive) 'start' value of a UnicodeError object.
+ *
+ * The 'start' can be negative or not, but when adjusting the value,
+ * we clip it in [0, max(0, objlen - 1)] but do not intepret it as
+ * a relative offset.
+ */
+static inline Py_ssize_t
+unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
+{
+    assert(objlen >= 0);
+    if (start < 0) {
+        start = 0;
+    }
+    if (start >= objlen) {
+        start = objlen == 0 ? 0 : objlen - 1;
+    }
+    return start;
+}
+
+/*
+ * Adjust the (exclusive) 'end' value of a UnicodeError object.
+ *
+ * The 'end' can be negative or not, but when adjusting the value,
+ * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but
+ * do not intepret it as a relative offset.
+ */
+static inline Py_ssize_t
+unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
+{
+    assert(objlen >= 0);
+    if (end < 1) {
+        end = 1;
+    }
+    if (end > objlen) {
+        end = objlen;
+    }
+    return end;
+}
+
 PyObject *
 PyUnicodeEncodeError_GetEncoding(PyObject *exc)
 {
@@ -2739,38 +2779,31 @@ PyUnicodeTranslateError_GetObject(PyObject *exc)
 }
 
 int
-PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
-                                "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_unicode(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    *start = ((PyUnicodeErrorObject *)exc)->start;
-    size = PyUnicode_GET_LENGTH(obj);
-    if (*start<0)
-        *start = 0; /*XXX check for values <0*/
-    if (*start>=size)
-        *start = size-1;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     Py_DECREF(obj);
+    *start = unicode_error_adjust_start(exc->start, size);
     return 0;
 }
 
 
 int
-PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_string(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    size = PyBytes_GET_SIZE(obj);
-    *start = ((PyUnicodeErrorObject *)exc)->start;
-    if (*start<0)
-        *start = 0;
-    if (*start>=size)
-        *start = size-1;
+    }
+    Py_ssize_t size = PyBytes_GET_SIZE(obj);
     Py_DECREF(obj);
+    *start = unicode_error_adjust_start(exc->start, size);
     return 0;
 }
 
@@ -2782,63 +2815,61 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
 }
 
 
+static inline int
+unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
+{
+    ((PyUnicodeErrorObject *)self)->start = start;
+    return 0;
+}
+
+
 int
 PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
 PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
 PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
-PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
-                                "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_unicode(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    *end = ((PyUnicodeErrorObject *)exc)->end;
-    size = PyUnicode_GET_LENGTH(obj);
-    if (*end<1)
-        *end = 1;
-    if (*end>size)
-        *end = size;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     Py_DECREF(obj);
+    *end = unicode_error_adjust_end(exc->end, size);
     return 0;
 }
 
 
 int
-PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_string(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    size = PyBytes_GET_SIZE(obj);
-    *end = ((PyUnicodeErrorObject *)exc)->end;
-    if (*end<1)
-        *end = 1;
-    if (*end>size)
-        *end = size;
+    }
+    Py_ssize_t size = PyBytes_GET_SIZE(obj);
     Py_DECREF(obj);
+    *end = unicode_error_adjust_end(exc->end, size);
     return 0;
 }
 
@@ -2850,8 +2881,8 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end)
 }
 
 
-int
-PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+static inline int
+unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end)
 {
     ((PyUnicodeErrorObject *)exc)->end = end;
     return 0;
@@ -2859,18 +2890,23 @@ PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
 
 
 int
+PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+{
+    return unicode_error_set_end_impl(exc, end);
+}
+
+
+int
 PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
 {
-    ((PyUnicodeErrorObject *)exc)->end = end;
-    return 0;
+    return unicode_error_set_end_impl(exc, end);
 }
 
 
 int
 PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
 {
-    ((PyUnicodeErrorObject *)exc)->end = end;
-    return 0;
+    return unicode_error_set_end_impl(exc, end);
 }
 
 PyObject *
@@ -2966,28 +3002,25 @@ static PyMemberDef UnicodeError_members[] = {
 static int
 UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    PyUnicodeErrorObject *err;
-
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
 
-    err = (PyUnicodeErrorObject *)self;
-
-    Py_CLEAR(err->encoding);
-    Py_CLEAR(err->object);
-    Py_CLEAR(err->reason);
+    PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
 
     if (!PyArg_ParseTuple(args, "UUnnU",
-                          &err->encoding, &err->object,
-                          &err->start, &err->end, &err->reason)) {
-        err->encoding = err->object = err->reason = NULL;
+                          &encoding, &object, &start, &end, &reason))
+    {
         return -1;
     }
 
-    Py_INCREF(err->encoding);
-    Py_INCREF(err->object);
-    Py_INCREF(err->reason);
-
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->encoding, Py_NewRef(encoding));
+    Py_XSETREF(exc->object, Py_NewRef(object));
+    exc->start = start;
+    exc->end = end;
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
     return 0;
 }
 
@@ -3073,44 +3106,42 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError;
 static int
 UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    PyUnicodeErrorObject *ude;
-
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
 
-    ude = (PyUnicodeErrorObject *)self;
-
-    Py_CLEAR(ude->encoding);
-    Py_CLEAR(ude->object);
-    Py_CLEAR(ude->reason);
+    PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
 
     if (!PyArg_ParseTuple(args, "UOnnU",
-                          &ude->encoding, &ude->object,
-                          &ude->start, &ude->end, &ude->reason)) {
-             ude->encoding = ude->object = ude->reason = NULL;
-             return -1;
+                          &encoding, &object, &start, &end, &reason))
+    {
+        return -1;
     }
 
-    Py_INCREF(ude->encoding);
-    Py_INCREF(ude->object);
-    Py_INCREF(ude->reason);
-
-    if (!PyBytes_Check(ude->object)) {
+    if (PyBytes_Check(object)) {
+        Py_INCREF(object);  // make 'object' a strong reference
+    }
+    else {
         Py_buffer view;
-        if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0)
-            goto error;
-        Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len));
+        if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) {
+            return -1;
+        }
+        // 'object' is borrowed, so we can re-use the variable
+        object = PyBytes_FromStringAndSize(view.buf, view.len);
         PyBuffer_Release(&view);
-        if (!ude->object)
-            goto error;
+        if (object == NULL) {
+            return -1;
+        }
     }
-    return 0;
 
-error:
-    Py_CLEAR(ude->encoding);
-    Py_CLEAR(ude->object);
-    Py_CLEAR(ude->reason);
-    return -1;
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->encoding, Py_NewRef(encoding));
+    Py_XSETREF(exc->object, object /* already a strong reference */);
+    exc->start = start;
+    exc->end = end;
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
+    return 0;
 }
 
 static PyObject *
@@ -3192,25 +3223,24 @@ PyUnicodeDecodeError_Create(
  */
 
 static int
-UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
-                           PyObject *kwds)
+UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
 
-    Py_CLEAR(self->object);
-    Py_CLEAR(self->reason);
+    PyObject *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
 
-    if (!PyArg_ParseTuple(args, "UnnU",
-                          &self->object,
-                          &self->start, &self->end, &self->reason)) {
-        self->object = self->reason = NULL;
+    if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) {
         return -1;
     }
 
-    Py_INCREF(self->object);
-    Py_INCREF(self->reason);
-
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->object, Py_NewRef(object));
+    exc->start = start;
+    exc->end = end;
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
     return 0;
 }
 
-- 
cgit v0.12