From e613e6add5f07ff6aad5802924596b631b707d2a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 27 Jun 2017 16:03:14 +0300 Subject: bpo-30708: Check for null characters in PyUnicode_AsWideCharString(). (#2285) Raise a ValueError if the second argument is NULL and the wchar_t\* string contains null characters. --- Doc/c-api/unicode.rst | 16 +++++++++----- Doc/whatsnew/3.7.rst | 4 ++++ Lib/ctypes/test/test_slicing.py | 4 ++-- Misc/NEWS | 4 ++++ Modules/_io/winconsoleio.c | 9 +------- Objects/unicodeobject.c | 49 ++++++++++++++++++----------------------- 6 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 84059e4..45aff1b 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -934,16 +934,22 @@ wchar_t Support Convert the Unicode object to a wide character string. The output string always ends with a null character. If *size* is not *NULL*, write the number of wide characters (excluding the trailing null termination character) into - *\*size*. + *\*size*. Note that the resulting :c:type:`wchar_t` string might contain + null characters, which would cause the string to be truncated when used with + most C functions. If *size* is *NULL* and the :c:type:`wchar_t*` string + contains null characters a :exc:`ValueError` is raised. Returns a buffer allocated by :c:func:`PyMem_Alloc` (use - :c:func:`PyMem_Free` to free it) on success. On error, returns *NULL*, - *\*size* is undefined and raises a :exc:`MemoryError`. Note that the - resulting :c:type:`wchar_t` string might contain null characters, which - would cause the string to be truncated when used with most C functions. + :c:func:`PyMem_Free` to free it) on success. On error, returns *NULL* + and *\*size* is undefined. Raises a :exc:`MemoryError` if memory allocation + is failed. .. versionadded:: 3.2 + .. versionchanged:: 3.7 + Raises a :exc:`ValueError` if *size* is *NULL* and the :c:type:`wchar_t*` + string contains null characters. + .. _builtincodecs: diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 927ae5b..e34268e 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -360,6 +360,10 @@ Changes in the C API :c:type:`unsigned long`. (Contributed by Serhiy Storchaka in :issue:`6532`.) +- :c:func:`PyUnicode_AsWideCharString` now raises a :exc:`ValueError` if the + second argument is *NULL* and the :c:type:`wchar_t*` string contains null + characters. (Contributed by Serhiy Storchaka in :issue:`30708`.) + Removed ======= diff --git a/Lib/ctypes/test/test_slicing.py b/Lib/ctypes/test/test_slicing.py index 240dc0c..a3932f1 100644 --- a/Lib/ctypes/test/test_slicing.py +++ b/Lib/ctypes/test/test_slicing.py @@ -134,7 +134,7 @@ class SlicesTestCase(unittest.TestCase): dll.my_wcsdup.restype = POINTER(c_wchar) dll.my_wcsdup.argtypes = POINTER(c_wchar), dll.my_free.restype = None - res = dll.my_wcsdup(s) + res = dll.my_wcsdup(s[:-1]) self.assertEqual(res[:len(s)], s) self.assertEqual(res[:len(s):], s) self.assertEqual(res[len(s)-1:-1:-1], s[::-1]) @@ -153,7 +153,7 @@ class SlicesTestCase(unittest.TestCase): dll.my_wcsdup.restype = POINTER(c_long) else: self.skipTest('Pointers to c_wchar are not supported') - res = dll.my_wcsdup(s) + res = dll.my_wcsdup(s[:-1]) tmpl = list(range(ord("a"), ord("z")+1)) self.assertEqual(res[:len(s)-1], tmpl) self.assertEqual(res[:len(s)-1:], tmpl) diff --git a/Misc/NEWS b/Misc/NEWS index 83eb530..232fb754 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1245,6 +1245,10 @@ Windows C API ----- +- bpo-30708: PyUnicode_AsWideCharString() now raises a ValueError if the + second argument is NULL and the wchar_t\* string contains null + characters. + - bpo-16500: Deprecate PyOS_AfterFork() and add PyOS_BeforeFork(), PyOS_AfterFork_Parent() and PyOS_AfterFork_Child(). diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index 781518a..6600fde 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -304,18 +304,11 @@ _io__WindowsConsoleIO___init___impl(winconsoleio *self, PyObject *nameobj, if (!d) return -1; - Py_ssize_t length; - name = PyUnicode_AsWideCharString(decodedname, &length); + name = PyUnicode_AsWideCharString(decodedname, NULL); console_type = _PyIO_get_console_type(decodedname); Py_CLEAR(decodedname); if (name == NULL) return -1; - - if (wcslen(name) != length) { - PyMem_Free(name); - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return -1; - } } s = mode; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b680421..646de0e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2953,8 +2953,7 @@ PyUnicode_FromFormat(const char *format, ...) #ifdef HAVE_WCHAR_H -/* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(): - convert a Unicode object to a wide character string. +/* Convert a Unicode object to a wide character string. - If w is NULL: return the number of wide characters (including the null character) required to convert the unicode object. Ignore size argument. @@ -2962,14 +2961,18 @@ PyUnicode_FromFormat(const char *format, ...) - Otherwise: return the number of wide characters (excluding the null character) written into w. Write at most size wide characters (including the null character). */ -static Py_ssize_t -unicode_aswidechar(PyObject *unicode, - wchar_t *w, - Py_ssize_t size) +Py_ssize_t +PyUnicode_AsWideChar(PyObject *unicode, + wchar_t *w, + Py_ssize_t size) { Py_ssize_t res; const wchar_t *wstr; + if (unicode == NULL) { + PyErr_BadInternalCall(); + return -1; + } wstr = PyUnicode_AsUnicodeAndSize(unicode, &res); if (wstr == NULL) return -1; @@ -2986,23 +2989,12 @@ unicode_aswidechar(PyObject *unicode, return res + 1; } -Py_ssize_t -PyUnicode_AsWideChar(PyObject *unicode, - wchar_t *w, - Py_ssize_t size) -{ - if (unicode == NULL) { - PyErr_BadInternalCall(); - return -1; - } - return unicode_aswidechar(unicode, w, size); -} - wchar_t* PyUnicode_AsWideCharString(PyObject *unicode, Py_ssize_t *size) { - wchar_t* buffer; + const wchar_t *wstr; + wchar_t *buffer; Py_ssize_t buflen; if (unicode == NULL) { @@ -3010,19 +3002,22 @@ PyUnicode_AsWideCharString(PyObject *unicode, return NULL; } - buflen = unicode_aswidechar(unicode, NULL, 0); - if (buflen == -1) + wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen); + if (wstr == NULL) { return NULL; - buffer = PyMem_NEW(wchar_t, buflen); - if (buffer == NULL) { - PyErr_NoMemory(); + } + if (size == NULL && wcslen(wstr) != (size_t)buflen) { + PyErr_SetString(PyExc_ValueError, + "embedded null character"); return NULL; } - buflen = unicode_aswidechar(unicode, buffer, buflen); - if (buflen == -1) { - PyMem_FREE(buffer); + + buffer = PyMem_NEW(wchar_t, buflen + 1); + if (buffer == NULL) { + PyErr_NoMemory(); return NULL; } + memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t)); if (size != NULL) *size = buflen; return buffer; -- cgit v0.12