diff options
author | Marc-André Lemburg <mal@egenix.com> | 2004-11-22 13:02:31 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2004-11-22 13:02:31 (GMT) |
commit | a9cadcd41b27fd045626c4e3b98315aaa257ca75 (patch) | |
tree | de54d0594b72a0b5fbdd6eaecd28a47837597d02 | |
parent | 6d60c0962444bb8f6d13208489095144e7752924 (diff) | |
download | cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.zip cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.gz cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.bz2 |
Correct the handling of 0-termination of PyUnicode_AsWideChar()
and its usage in PyLocale_strcoll().
Clarify the documentation on this.
Thanks to Andreas Degert for pointing this out.
-rw-r--r-- | Doc/api/concrete.tex | 10 | ||||
-rw-r--r-- | Include/unicodeobject.h | 10 | ||||
-rw-r--r-- | Modules/_localemodule.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 8 |
4 files changed, 25 insertions, 7 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index af026ff..001d0ad 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -995,9 +995,13 @@ following functions. Support is optimized if Python's own wchar_t *w, int size} Copies the Unicode object contents into the \ctype{wchar_t} buffer - \var{w}. At most \var{size} \ctype{wchar_t} characters are copied. - Returns the number of \ctype{wchar_t} characters copied or -1 in - case of an error. + \var{w}. At most \var{size} \ctype{wchar_t} characters are copied + (excluding a possibly trailing 0-termination character). Returns + the number of \ctype{wchar_t} characters copied or -1 in case of an + error. Note that the resulting \ctype{wchar_t} string may or may + not be 0-terminated. It is the responsibility of the caller to make + sure that the \ctype{wchar_t} string is 0-terminated in case this is + required by the application. \end{cfuncdesc} diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0a82caf..6738cbd 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -512,10 +512,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( int size /* size of buffer */ ); -/* Copies the Unicode Object contents into the whcar_t buffer w. At +/* Copies the Unicode Object contents into the wchar_t buffer w. At most size wchar_t characters are copied. - Returns the number of wchar_t characters copied or -1 in case of an + Note that the resulting wchar_t string may or may not be + 0-terminated. It is the responsibility of the caller to make sure + that the wchar_t string is 0-terminated in case this is required by + the application. + + Returns the number of wchar_t characters copied (excluding a + possibly trailing 0-termination character) or -1 in case of an error. */ PyAPI_FUNC(int) PyUnicode_AsWideChar( diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 3e3df22..de470e0 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -305,7 +305,6 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } /* Convert the unicode strings to wchar[]. */ len1 = PyUnicode_GET_SIZE(os1) + 1; - len2 = PyUnicode_GET_SIZE(os2) + 1; ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t)); if (!ws1) { PyErr_NoMemory(); @@ -313,6 +312,8 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1) goto done; + ws1[len1 - 1] = 0; + len2 = PyUnicode_GET_SIZE(os2) + 1; ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t)); if (!ws2) { PyErr_NoMemory(); @@ -320,6 +321,7 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1) goto done; + ws2[len2 - 1] = 0; /* Collate the strings. */ result = PyInt_FromLong(wcscoll(ws1, ws2)); done: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 48821bd..5e5dac5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -384,8 +384,11 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode, PyErr_BadInternalCall(); return -1; } + + /* If possible, try to copy the 0-termination as well */ if (size > PyUnicode_GET_SIZE(unicode)) - size = PyUnicode_GET_SIZE(unicode); + size = PyUnicode_GET_SIZE(unicode) + 1; + #ifdef HAVE_USABLE_WCHAR_T memcpy(w, unicode->str, size * sizeof(wchar_t)); #else @@ -398,6 +401,9 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode, } #endif + if (size > PyUnicode_GET_SIZE(unicode)) + return PyUnicode_GET_SIZE(unicode); + else return size; } |