Fix the code page decoder

* unicode_decode_call_errorhandler() now supports the PyUnicode_WCHAR_KIND kind * unicode_decode_call_errorhandler() calls copy_characters() instead of PyUnicode_CopyCharacters()
author: Victor Stinner <victor.stinner@haypocalc.com> 2011-11-08 23:02:18 (GMT)
committer: Victor Stinner <victor.stinner@haypocalc.com> 2011-11-08 23:02:18 (GMT)
commit: 596a6c4ffcf35de733959dd0e96f0176bf4f4c30 (patch)
tree: c0c049c389cf8db8c80289502e9fd0c161da5197 /Objects
parent: 257a14c2a524ed029b499f5676c456d1c6c0f3a0 (diff)
download: cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.zip
cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.gz
cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.bz2
1 files changed, 44 insertions, 17 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 4ae8766..477827a0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3622,14 +3622,18 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
 
     PyObject *restuple = NULL;
     PyObject *repunicode = NULL;
-    Py_ssize_t outsize = PyUnicode_GET_LENGTH(*output);
+    Py_ssize_t outsize;
     Py_ssize_t insize;
     Py_ssize_t requiredsize;
     Py_ssize_t newpos;
     PyObject *inputobj = NULL;
-    Py_ssize_t replen;
     int res = -1;
 
+    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
+        outsize = PyUnicode_GET_LENGTH(*output);
+    else
+        outsize = _PyUnicode_WSTR_LENGTH(*output);
+
     if (*errorHandler == NULL) {
         *errorHandler = PyCodec_LookupError(errors);
         if (*errorHandler == NULL)
@@ -3678,24 +3682,46 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
         goto onError;
     }
 
-    /* need more space? (at least enough for what we
-       have+the replacement+the rest of the string (starting
-       at the new input position), so we won't have to check space
-       when there are no errors in the rest of the string) */
-    replen = PyUnicode_GET_LENGTH(repunicode);
-    requiredsize = *outpos + replen + insize-newpos;
-    if (requiredsize > outsize) {
-        if (requiredsize<2*outsize)
-            requiredsize = 2*outsize;
-        if (unicode_resize(output, requiredsize) < 0)
+    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
+        /* need more space? (at least enough for what we
+           have+the replacement+the rest of the string (starting
+           at the new input position), so we won't have to check space
+           when there are no errors in the rest of the string) */
+        Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
+        requiredsize = *outpos + replen + insize-newpos;
+        if (requiredsize > outsize) {
+            if (requiredsize<2*outsize)
+                requiredsize = 2*outsize;
+            if (unicode_resize(output, requiredsize) < 0)
+                goto onError;
+        }
+        if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
             goto onError;
+        copy_characters(*output, *outpos, repunicode, 0, replen);
+        *outpos += replen;
+    }
+    else {
+        wchar_t *repwstr;
+        Py_ssize_t repwlen;
+        repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+        if (repwstr == NULL)
+            goto onError;
+        /* need more space? (at least enough for what we
+           have+the replacement+the rest of the string (starting
+           at the new input position), so we won't have to check space
+           when there are no errors in the rest of the string) */
+        requiredsize = *outpos + repwlen + insize-newpos;
+        if (requiredsize > outsize) {
+            if (requiredsize < 2*outsize)
+                requiredsize = 2*outsize;
+            if (unicode_resize(output, requiredsize) < 0)
+                goto onError;
+        }
+        wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+        *outpos += repwlen;
     }
-    if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
-        goto onError;
     *endinpos = newpos;
     *inptr = *input + newpos;
-    PyUnicode_CopyCharacters(*output, *outpos, repunicode, 0, replen);
-    *outpos += replen;
 
     /* we made it! */
     res = 0;
@@ -6976,10 +7002,11 @@ decode_code_page_errors(UINT code_page,
                     errors, &errorHandler,
                     encoding, reason,
                     &startin, &endin, &startinpos, &endinpos, &exc, &in,
-                    v, &outpos, &out))
+                    v, &outpos))
             {
                 goto error;
             }
+            out = PyUnicode_AS_UNICODE(*v) + outpos;
         }
         else {
             in += insize;
author	Victor Stinner <victor.stinner@haypocalc.com>	2011-11-08 23:02:18 (GMT)
committer	Victor Stinner <victor.stinner@haypocalc.com>	2011-11-08 23:02:18 (GMT)
commit	596a6c4ffcf35de733959dd0e96f0176bf4f4c30 (patch)
tree	c0c049c389cf8db8c80289502e9fd0c161da5197 /Objects
parent	257a14c2a524ed029b499f5676c456d1c6c0f3a0 (diff)
download	cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.zip cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.gz cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.bz2