From 168e117e0a8825bc3ae0c08f0b08a33ac351a14f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 16 Oct 2010 23:16:16 +0000 Subject: Add an optional size argument to _Py_char2wchar() _Py_char2wchar() callers usually need the result size in characters. Since it's trivial to compute it in _Py_char2wchar() (O(1) whereas wcslen() is O(n)), add an option to get it. --- Include/fileutils.h | 3 ++- Modules/main.c | 6 ++++-- Modules/python.c | 2 +- Objects/unicodeobject.c | 5 +++-- Python/fileutils.c | 27 ++++++++++++++++----------- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/Include/fileutils.h b/Include/fileutils.h index cb15936..11ebebf 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -6,7 +6,8 @@ extern "C" { #endif PyAPI_FUNC(wchar_t *) _Py_char2wchar( - const char *arg); + const char *arg, + size_t *size); PyAPI_FUNC(char*) _Py_wchar2char( const wchar_t *text); diff --git a/Modules/main.c b/Modules/main.c index bbf695f..0c38fac 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -486,10 +486,12 @@ Py_Main(int argc, wchar_t **argv) /* Use utf-8 on Mac OS X */ unicode = PyUnicode_FromString(p); #else - wchar_t *wchar = _Py_char2wchar(p); + wchar_t *wchar; + size_t len; + wchar = _Py_char2wchar(p, &len); if (wchar == NULL) continue; - unicode = PyUnicode_FromWideChar(wchar, wcslen(wchar)); + unicode = PyUnicode_FromWideChar(wchar, len); PyMem_Free(wchar); #endif if (unicode == NULL) diff --git a/Modules/python.c b/Modules/python.c index 9a71cd0..47685a4 100644 --- a/Modules/python.c +++ b/Modules/python.c @@ -41,7 +41,7 @@ main(int argc, char **argv) oldloc = strdup(setlocale(LC_ALL, NULL)); setlocale(LC_ALL, ""); for (i = 0; i < argc; i++) { - argv_copy[i] = _Py_char2wchar(argv[i]); + argv_copy[i] = _Py_char2wchar(argv[i], NULL); if (!argv_copy[i]) return 1; argv_copy2[i] = argv_copy[i]; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 98427e3..9fe9c42 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1783,17 +1783,18 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) /* locale encoding with surrogateescape */ wchar_t *wchar; PyObject *unicode; + size_t len; if (s[size] != '\0' || size != strlen(s)) { PyErr_SetString(PyExc_TypeError, "embedded NUL character"); return NULL; } - wchar = _Py_char2wchar(s); + wchar = _Py_char2wchar(s, &len); if (wchar == NULL) return NULL; - unicode = PyUnicode_FromWideChar(wchar, -1); + unicode = PyUnicode_FromWideChar(wchar, len); PyMem_Free(wchar); return unicode; } diff --git a/Python/fileutils.c b/Python/fileutils.c index b8910b7..03fc0cb 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -13,11 +13,12 @@ Use _Py_wchar2char() to encode the character string back to a byte string. - Return a pointer to a newly allocated (wide) character string (use - PyMem_Free() to free the memory), or NULL on error (conversion error or - memory error). */ + Return a pointer to a newly allocated wide character string (use + PyMem_Free() to free the memory) and write the number of written wide + characters excluding the null character into *size if size is not NULL, or + NULL on error (conversion error or memory error). */ wchar_t* -_Py_char2wchar(const char* arg) +_Py_char2wchar(const char* arg, size_t *size) { wchar_t *res; #ifdef HAVE_BROKEN_MBSTOWCS @@ -47,8 +48,11 @@ _Py_char2wchar(const char* arg) for (tmp = res; *tmp != 0 && (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) ; - if (*tmp == 0) + if (*tmp == 0) { + if (size != NULL) + *size = count; return res; + } } PyMem_Free(res); } @@ -113,6 +117,8 @@ _Py_char2wchar(const char* arg) *out++ = 0xdc00 + *in++; *out = 0; #endif + if (size != NULL) + *size = out - res; return res; oom: fprintf(stderr, "out of memory\n"); @@ -325,12 +331,11 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ - wbuf = _Py_char2wchar(cbuf); + wbuf = _Py_char2wchar(cbuf, &r1); if (wbuf == NULL) { errno = EINVAL; return -1; } - r1 = wcslen(wbuf); if (bufsiz <= r1) { PyMem_Free(wbuf); errno = EINVAL; @@ -366,12 +371,11 @@ _Py_wrealpath(const wchar_t *path, if (res == NULL) return NULL; - wresolved_path = _Py_char2wchar(cresolved_path); + wresolved_path = _Py_char2wchar(cresolved_path, &r); if (wresolved_path == NULL) { errno = EINVAL; return NULL; } - r = wcslen(wresolved_path); if (resolved_path_size <= r) { PyMem_Free(wresolved_path); errno = EINVAL; @@ -394,13 +398,14 @@ _Py_wgetcwd(wchar_t *buf, size_t size) #else char fname[PATH_MAX]; wchar_t *wname; + size_t len; if (getcwd(fname, PATH_MAX) == NULL) return NULL; - wname = _Py_char2wchar(fname); + wname = _Py_char2wchar(fname, &len); if (wname == NULL) return NULL; - if (size <= wcslen(wname)) { + if (size <= len) { PyMem_Free(wname); return NULL; } -- cgit v0.12