summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-11-08 23:02:18 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-11-08 23:02:18 (GMT)
commit596a6c4ffcf35de733959dd0e96f0176bf4f4c30 (patch)
treec0c049c389cf8db8c80289502e9fd0c161da5197 /Objects
parent257a14c2a524ed029b499f5676c456d1c6c0f3a0 (diff)
downloadcpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.zip
cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.gz
cpython-596a6c4ffcf35de733959dd0e96f0176bf4f4c30.tar.bz2
Fix the code page decoder
* unicode_decode_call_errorhandler() now supports the PyUnicode_WCHAR_KIND kind * unicode_decode_call_errorhandler() calls copy_characters() instead of PyUnicode_CopyCharacters()
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c61
1 files changed, 44 insertions, 17 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 4ae8766..477827a0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3622,14 +3622,18 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
PyObject *restuple = NULL;
PyObject *repunicode = NULL;
- Py_ssize_t outsize = PyUnicode_GET_LENGTH(*output);
+ Py_ssize_t outsize;
Py_ssize_t insize;
Py_ssize_t requiredsize;
Py_ssize_t newpos;
PyObject *inputobj = NULL;
- Py_ssize_t replen;
int res = -1;
+ if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
+ outsize = PyUnicode_GET_LENGTH(*output);
+ else
+ outsize = _PyUnicode_WSTR_LENGTH(*output);
+
if (*errorHandler == NULL) {
*errorHandler = PyCodec_LookupError(errors);
if (*errorHandler == NULL)
@@ -3678,24 +3682,46 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
goto onError;
}
- /* need more space? (at least enough for what we
- have+the replacement+the rest of the string (starting
- at the new input position), so we won't have to check space
- when there are no errors in the rest of the string) */
- replen = PyUnicode_GET_LENGTH(repunicode);
- requiredsize = *outpos + replen + insize-newpos;
- if (requiredsize > outsize) {
- if (requiredsize<2*outsize)
- requiredsize = 2*outsize;
- if (unicode_resize(output, requiredsize) < 0)
+ if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
+ /* need more space? (at least enough for what we
+ have+the replacement+the rest of the string (starting
+ at the new input position), so we won't have to check space
+ when there are no errors in the rest of the string) */
+ Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
+ requiredsize = *outpos + replen + insize-newpos;
+ if (requiredsize > outsize) {
+ if (requiredsize<2*outsize)
+ requiredsize = 2*outsize;
+ if (unicode_resize(output, requiredsize) < 0)
+ goto onError;
+ }
+ if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
goto onError;
+ copy_characters(*output, *outpos, repunicode, 0, replen);
+ *outpos += replen;
+ }
+ else {
+ wchar_t *repwstr;
+ Py_ssize_t repwlen;
+ repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+ if (repwstr == NULL)
+ goto onError;
+ /* need more space? (at least enough for what we
+ have+the replacement+the rest of the string (starting
+ at the new input position), so we won't have to check space
+ when there are no errors in the rest of the string) */
+ requiredsize = *outpos + repwlen + insize-newpos;
+ if (requiredsize > outsize) {
+ if (requiredsize < 2*outsize)
+ requiredsize = 2*outsize;
+ if (unicode_resize(output, requiredsize) < 0)
+ goto onError;
+ }
+ wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+ *outpos += repwlen;
}
- if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
- goto onError;
*endinpos = newpos;
*inptr = *input + newpos;
- PyUnicode_CopyCharacters(*output, *outpos, repunicode, 0, replen);
- *outpos += replen;
/* we made it! */
res = 0;
@@ -6976,10 +7002,11 @@ decode_code_page_errors(UINT code_page,
errors, &errorHandler,
encoding, reason,
&startin, &endin, &startinpos, &endinpos, &exc, &in,
- v, &outpos, &out))
+ v, &outpos))
{
goto error;
}
+ out = PyUnicode_AS_UNICODE(*v) + outpos;
}
else {
in += insize;