summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2004-11-22 13:02:31 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2004-11-22 13:02:31 (GMT)
commita9cadcd41b27fd045626c4e3b98315aaa257ca75 (patch)
treede54d0594b72a0b5fbdd6eaecd28a47837597d02
parent6d60c0962444bb8f6d13208489095144e7752924 (diff)
downloadcpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.zip
cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.gz
cpython-a9cadcd41b27fd045626c4e3b98315aaa257ca75.tar.bz2
Correct the handling of 0-termination of PyUnicode_AsWideChar()
and its usage in PyLocale_strcoll(). Clarify the documentation on this. Thanks to Andreas Degert for pointing this out.
-rw-r--r--Doc/api/concrete.tex10
-rw-r--r--Include/unicodeobject.h10
-rw-r--r--Modules/_localemodule.c4
-rw-r--r--Objects/unicodeobject.c8
4 files changed, 25 insertions, 7 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index af026ff..001d0ad 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -995,9 +995,13 @@ following functions. Support is optimized if Python's own
wchar_t *w,
int size}
Copies the Unicode object contents into the \ctype{wchar_t} buffer
- \var{w}. At most \var{size} \ctype{wchar_t} characters are copied.
- Returns the number of \ctype{wchar_t} characters copied or -1 in
- case of an error.
+ \var{w}. At most \var{size} \ctype{wchar_t} characters are copied
+ (excluding a possibly trailing 0-termination character). Returns
+ the number of \ctype{wchar_t} characters copied or -1 in case of an
+ error. Note that the resulting \ctype{wchar_t} string may or may
+ not be 0-terminated. It is the responsibility of the caller to make
+ sure that the \ctype{wchar_t} string is 0-terminated in case this is
+ required by the application.
\end{cfuncdesc}
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0a82caf..6738cbd 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -512,10 +512,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
int size /* size of buffer */
);
-/* Copies the Unicode Object contents into the whcar_t buffer w. At
+/* Copies the Unicode Object contents into the wchar_t buffer w. At
most size wchar_t characters are copied.
- Returns the number of wchar_t characters copied or -1 in case of an
+ Note that the resulting wchar_t string may or may not be
+ 0-terminated. It is the responsibility of the caller to make sure
+ that the wchar_t string is 0-terminated in case this is required by
+ the application.
+
+ Returns the number of wchar_t characters copied (excluding a
+ possibly trailing 0-termination character) or -1 in case of an
error. */
PyAPI_FUNC(int) PyUnicode_AsWideChar(
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 3e3df22..de470e0 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -305,7 +305,6 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
}
/* Convert the unicode strings to wchar[]. */
len1 = PyUnicode_GET_SIZE(os1) + 1;
- len2 = PyUnicode_GET_SIZE(os2) + 1;
ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
if (!ws1) {
PyErr_NoMemory();
@@ -313,6 +312,8 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
}
if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1)
goto done;
+ ws1[len1 - 1] = 0;
+ len2 = PyUnicode_GET_SIZE(os2) + 1;
ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t));
if (!ws2) {
PyErr_NoMemory();
@@ -320,6 +321,7 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
}
if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1)
goto done;
+ ws2[len2 - 1] = 0;
/* Collate the strings. */
result = PyInt_FromLong(wcscoll(ws1, ws2));
done:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 48821bd..5e5dac5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -384,8 +384,11 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
PyErr_BadInternalCall();
return -1;
}
+
+ /* If possible, try to copy the 0-termination as well */
if (size > PyUnicode_GET_SIZE(unicode))
- size = PyUnicode_GET_SIZE(unicode);
+ size = PyUnicode_GET_SIZE(unicode) + 1;
+
#ifdef HAVE_USABLE_WCHAR_T
memcpy(w, unicode->str, size * sizeof(wchar_t));
#else
@@ -398,6 +401,9 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
}
#endif
+ if (size > PyUnicode_GET_SIZE(unicode))
+ return PyUnicode_GET_SIZE(unicode);
+ else
return size;
}