diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2018-12-04 08:25:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-04 08:25:50 (GMT) |
commit | eeb719eac6347f5b6e85389aa13a386024766806 (patch) | |
tree | ea6fc00032d349d50bb2139e0834f05ee1479000 /Objects | |
parent | 7fc633f5a56d9e672cd24133e2e1376347abac6c (diff) | |
download | cpython-eeb719eac6347f5b6e85389aa13a386024766806.zip cpython-eeb719eac6347f5b6e85389aa13a386024766806.tar.gz cpython-eeb719eac6347f5b6e85389aa13a386024766806.tar.bz2 |
bpo-35365: Use a wchar_t* buffer in the code page decoder. (GH-10837)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 112 |
1 files changed, 52 insertions, 60 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1351eec..d0f0358 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4059,6 +4059,21 @@ onError: } #ifdef MS_WINDOWS +static int +widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize) +{ + if (newsize > *size) { + wchar_t *newbuf = *buf; + if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) { + PyErr_NoMemory(); + return -1; + } + *buf = newbuf; + } + *size = newsize; + return 0; +} + /* error handling callback helper: build arguments, call the callback and check the arguments, if no exception occurred, copy the replacement to the output @@ -4072,7 +4087,7 @@ unicode_decode_call_errorhandler_wchar( const char *encoding, const char *reason, const char **input, const char **inend, Py_ssize_t *startinpos, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, - PyObject **output, Py_ssize_t *outpos) + wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos) { static const char *argparse = "Un;decoding error handler must return (str, int) tuple"; @@ -4086,9 +4101,6 @@ unicode_decode_call_errorhandler_wchar( wchar_t *repwstr; Py_ssize_t repwlen; - assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND); - outsize = _PyUnicode_WSTR_LENGTH(*output); - if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); if (*errorHandler == NULL) @@ -4146,13 +4158,15 @@ unicode_decode_call_errorhandler_wchar( if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) goto overflow; requiredsize += insize - newpos; + outsize = *bufsize; if (requiredsize > outsize) { if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) + if (widechar_resize(buf, bufsize, requiredsize) < 0) { goto onError; + } } - wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); + wcsncpy(*buf + *outpos, repwstr, repwlen); *outpos += repwlen; *endinpos = newpos; *inptr = *input + newpos; @@ -7146,7 +7160,8 @@ decode_code_page_flags(UINT code_page) */ static int decode_code_page_strict(UINT code_page, - PyObject **v, + wchar_t **buf, + Py_ssize_t *bufsize, const char *in, int insize) { @@ -7160,21 +7175,12 @@ decode_code_page_strict(UINT code_page, if (outsize <= 0) goto error; - if (*v == NULL) { - /* Create unicode object */ - /* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */ - *v = (PyObject*)_PyUnicode_New(outsize); - if (*v == NULL) - return -1; - out = PyUnicode_AS_UNICODE(*v); - } - else { - /* Extend unicode object */ - Py_ssize_t n = PyUnicode_GET_SIZE(*v); - if (unicode_resize(v, n + outsize) < 0) - return -1; - out = PyUnicode_AS_UNICODE(*v) + n; + /* Extend a wchar_t* buffer */ + Py_ssize_t n = *bufsize; /* Get the current length */ + if (widechar_resize(buf, bufsize, n + outsize) < 0) { + return -1; } + out = *buf + n; /* Do the conversion */ outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize); @@ -7198,7 +7204,8 @@ error: */ static int decode_code_page_errors(UINT code_page, - PyObject **v, + wchar_t **buf, + Py_ssize_t *bufsize, const char *in, const int size, const char *errors, int final) { @@ -7238,29 +7245,16 @@ decode_code_page_errors(UINT code_page, goto error; } - if (*v == NULL) { - /* Create unicode object */ - if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { - PyErr_NoMemory(); - goto error; - } - /* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */ - *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer)); - if (*v == NULL) - goto error; - out = PyUnicode_AS_UNICODE(*v); + /* Extend a wchar_t* buffer */ + Py_ssize_t n = *bufsize; /* Get the current length */ + if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + PyErr_NoMemory(); + goto error; } - else { - /* Extend unicode object */ - Py_ssize_t n = PyUnicode_GET_SIZE(*v); - if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { - PyErr_NoMemory(); - goto error; - } - if (unicode_resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) - goto error; - out = PyUnicode_AS_UNICODE(*v) + n; + if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) { + goto error; } + out = *buf + n; /* Decode the byte string character per character */ while (in < endin) @@ -7295,16 +7289,16 @@ decode_code_page_errors(UINT code_page, startinpos = in - startin; endinpos = startinpos + 1; - outpos = out - PyUnicode_AS_UNICODE(*v); + outpos = out - *buf; if (unicode_decode_call_errorhandler_wchar( errors, &errorHandler, encoding, reason, &startin, &endin, &startinpos, &endinpos, &exc, &in, - v, &outpos)) + buf, bufsize, &outpos)) { goto error; } - out = PyUnicode_AS_UNICODE(*v) + outpos; + out = *buf + outpos; } else { in += insize; @@ -7313,14 +7307,9 @@ decode_code_page_errors(UINT code_page, } } - /* write a NUL character at the end */ - *out = 0; - - /* Extend unicode object */ - outsize = out - PyUnicode_AS_UNICODE(*v); - assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); - if (unicode_resize(v, outsize) < 0) - goto error; + /* Shrink the buffer */ + assert(out - *buf <= *bufsize); + *bufsize = out - *buf; /* (in - startin) <= size and size is an int */ ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int); @@ -7336,7 +7325,8 @@ decode_code_page_stateful(int code_page, const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed) { - PyObject *v = NULL; + wchar_t *buf = NULL; + Py_ssize_t bufsize = 0; int chunk_size, final, converted, done; if (code_page < 0) { @@ -7368,21 +7358,21 @@ decode_code_page_stateful(int code_page, } if (chunk_size == 0 && done) { - if (v != NULL) + if (buf != NULL) break; _Py_RETURN_UNICODE_EMPTY(); } - converted = decode_code_page_strict(code_page, &v, + converted = decode_code_page_strict(code_page, &buf, &bufsize, s, chunk_size); if (converted == -2) - converted = decode_code_page_errors(code_page, &v, + converted = decode_code_page_errors(code_page, &buf, &bufsize, s, chunk_size, errors, final); assert(converted != 0 || done); if (converted < 0) { - Py_XDECREF(v); + PyMem_Free(buf); return NULL; } @@ -7393,7 +7383,9 @@ decode_code_page_stateful(int code_page, size -= converted; } while (!done); - return unicode_result(v); + PyObject *v = PyUnicode_FromWideChar(buf, bufsize); + PyMem_Free(buf); + return v; } PyObject * |