diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-08 23:02:18 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-11-08 23:02:18 (GMT) |
commit | 596a6c4ffcf35de733959dd0e96f0176bf4f4c30 (patch) | |
tree | c0c049c389cf8db8c80289502e9fd0c161da5197 /Objects | |
parent | 257a14c2a524ed029b499f5676c456d1c6c0f3a0 (diff) | |
download | cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.zip cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.gz cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.bz2 |
Fix the code page decoder
* unicode_decode_call_errorhandler() now supports the PyUnicode_WCHAR_KIND
kind
* unicode_decode_call_errorhandler() calls copy_characters() instead of
PyUnicode_CopyCharacters()
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 61 |
1 files changed, 44 insertions, 17 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ae8766..477827a0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3622,14 +3622,18 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, PyObject *restuple = NULL; PyObject *repunicode = NULL; - Py_ssize_t outsize = PyUnicode_GET_LENGTH(*output); + Py_ssize_t outsize; Py_ssize_t insize; Py_ssize_t requiredsize; Py_ssize_t newpos; PyObject *inputobj = NULL; - Py_ssize_t replen; int res = -1; + if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) + outsize = PyUnicode_GET_LENGTH(*output); + else + outsize = _PyUnicode_WSTR_LENGTH(*output); + if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); if (*errorHandler == NULL) @@ -3678,24 +3682,46 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, goto onError; } - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; - if (requiredsize > outsize) { - if (requiredsize<2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) + if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) { + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ + Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); + requiredsize = *outpos + replen + insize-newpos; + if (requiredsize > outsize) { + if (requiredsize<2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) + goto onError; + } + if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) goto onError; + copy_characters(*output, *outpos, repunicode, 0, replen); + *outpos += replen; + } + else { + wchar_t *repwstr; + Py_ssize_t repwlen; + repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); + if (repwstr == NULL) + goto onError; + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ + requiredsize = *outpos + repwlen + insize-newpos; + if (requiredsize > outsize) { + if (requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) + goto onError; + } + wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); + *outpos += repwlen; } - if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) - goto onError; *endinpos = newpos; *inptr = *input + newpos; - PyUnicode_CopyCharacters(*output, *outpos, repunicode, 0, replen); - *outpos += replen; /* we made it! */ res = 0; @@ -6976,10 +7002,11 @@ decode_code_page_errors(UINT code_page, errors, &errorHandler, encoding, reason, &startin, &endin, &startinpos, &endinpos, &exc, &in, - v, &outpos, &out)) + v, &outpos)) { goto error; } + out = PyUnicode_AS_UNICODE(*v) + outpos; } else { in += insize; |