Correct the handling of 0-termination of PyUnicode_AsWideChar()

and its usage in PyLocale_strcoll(). Clarify the documentation on this. Thanks to Andreas Degert for pointing this out.
author: Marc-André Lemburg <mal@egenix.com> 2004-11-22 13:02:31 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2004-11-22 13:02:31 (GMT)
commit: a9cadcd41b27fd045626c4e3b98315aaa257ca75 (patch)
tree: de54d0594b72a0b5fbdd6eaecd28a47837597d02
parent: 6d60c0962444bb8f6d13208489095144e7752924 (diff)
download: cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.zip
cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.gz
cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.bz2
4 files changed, 25 insertions, 7 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index af026ff..001d0ad 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -995,9 +995,13 @@ following functions. Support is optimized if Python's own
                                              wchar_t *w,
                                              int size}
   Copies the Unicode object contents into the \ctype{wchar_t} buffer
-  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied.
-  Returns the number of \ctype{wchar_t} characters copied or -1 in
-  case of an error.
+  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied
+  (excluding a possibly trailing 0-termination character).  Returns
+  the number of \ctype{wchar_t} characters copied or -1 in case of an
+  error.  Note that the resulting \ctype{wchar_t} string may or may
+  not be 0-terminated.  It is the responsibility of the caller to make
+  sure that the \ctype{wchar_t} string is 0-terminated in case this is
+  required by the application.
 \end{cfuncdesc}
 
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0a82caf..6738cbd 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -512,10 +512,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
     int size                    /* size of buffer */
     );
 
-/* Copies the Unicode Object contents into the whcar_t buffer w.  At
+/* Copies the Unicode Object contents into the wchar_t buffer w.  At
    most size wchar_t characters are copied.
 
-   Returns the number of wchar_t characters copied or -1 in case of an
+   Note that the resulting wchar_t string may or may not be
+   0-terminated.  It is the responsibility of the caller to make sure
+   that the wchar_t string is 0-terminated in case this is required by
+   the application.
+
+   Returns the number of wchar_t characters copied (excluding a
+   possibly trailing 0-termination character) or -1 in case of an
    error. */
 
 PyAPI_FUNC(int) PyUnicode_AsWideChar(
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 3e3df22..de470e0 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -305,7 +305,6 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
     }
     /* Convert the unicode strings to wchar[]. */
     len1 = PyUnicode_GET_SIZE(os1) + 1;
-    len2 = PyUnicode_GET_SIZE(os2) + 1;
     ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
     if (!ws1) {
         PyErr_NoMemory();
@@ -313,6 +312,8 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
     }
     if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1)
         goto done;
+    ws1[len1 - 1] = 0;
+    len2 = PyUnicode_GET_SIZE(os2) + 1;
     ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t));
     if (!ws2) {
         PyErr_NoMemory();
@@ -320,6 +321,7 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
     }
     if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1)
         goto done;
+    ws2[len2 - 1] = 0;
     /* Collate the strings. */
     result = PyInt_FromLong(wcscoll(ws1, ws2));
   done:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 48821bd..5e5dac5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -384,8 +384,11 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
 	PyErr_BadInternalCall();
 	return -1;
     }
+
+    /* If possible, try to copy the 0-termination as well */
     if (size > PyUnicode_GET_SIZE(unicode))
-	size = PyUnicode_GET_SIZE(unicode);
+	size = PyUnicode_GET_SIZE(unicode) + 1;
+
 #ifdef HAVE_USABLE_WCHAR_T
     memcpy(w, unicode->str, size * sizeof(wchar_t));
 #else
@@ -398,6 +401,9 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
     }
 #endif
 
+    if (size > PyUnicode_GET_SIZE(unicode))
+        return PyUnicode_GET_SIZE(unicode);
+    else
     return size;
 }
author	Marc-André Lemburg <mal@egenix.com>	2004-11-22 13:02:31 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2004-11-22 13:02:31 (GMT)
commit	a9cadcd41b27fd045626c4e3b98315aaa257ca75 (patch)
tree	de54d0594b72a0b5fbdd6eaecd28a47837597d02
parent	6d60c0962444bb8f6d13208489095144e7752924 (diff)
download	cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.zip cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.gz cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.bz2