From f2e08b34f1fa50e99f8cab0a21721be2d1bb38b8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Aug 2010 23:29:08 +0000 Subject: Create _Py_wchar2char() function, reverse of _Py_char2wchar() * Use _Py_wchar2char() in _wstat() and _Py_wfopen() * Document _Py_char2wchar() --- Include/Python.h | 3 ++ Modules/getpath.c | 11 ++++--- Modules/main.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 97 insertions(+), 11 deletions(-) diff --git a/Include/Python.h b/Include/Python.h index 1def75b..d5ac13e 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -126,12 +126,15 @@ #ifdef __cplusplus extern "C" { #endif + /* _Py_Mangle is defined in compile.c */ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); /* These functions live in main.c */ PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *); +PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text); PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode); + #ifdef __cplusplus } #endif diff --git a/Modules/getpath.c b/Modules/getpath.c index fff502e..faf8b56 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION; static int _wstat(const wchar_t* path, struct stat *buf) { - char fname[PATH_MAX]; - size_t res = wcstombs(fname, path, sizeof(fname)); - if (res == (size_t)-1) { + int err; + char *fname; + fname = _Py_wchar2char(path); + if (fname == NULL) { errno = EINVAL; return -1; } - return stat(fname, buf); + err = stat(fname, buf); + PyMem_Free(fname); + return err; } #endif diff --git a/Modules/main.c b/Modules/main.c index d605bab..f9d0c48 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -105,20 +105,21 @@ FILE * _Py_wfopen(const wchar_t *path, const wchar_t *mode) { #ifndef MS_WINDOWS - char cpath[PATH_MAX]; + FILE *f; + char *cpath; char cmode[10]; size_t r; - r = wcstombs(cpath, path, PATH_MAX); - if (r == (size_t)-1 || r >= PATH_MAX) { - errno = EINVAL; - return NULL; - } r = wcstombs(cmode, mode, 10); if (r == (size_t)-1 || r >= 10) { errno = EINVAL; return NULL; } - return fopen(cpath, cmode); + cpath = _Py_wchar2char(path); + if (cpath == NULL) + return NULL; + f = fopen(cpath, cmode); + PyMem_Free(cpath); + return f; #else return _wfopen(path, mode); #endif @@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv) } +/* Encode a (wide) character string to the locale encoding with the + surrogateescape error handler (characters in range U+DC80..U+DCFF are + converted to bytes 0x80..0xFF). + + This function is the reverse of _Py_char2wchar(). + + Return a pointer to a newly allocated byte string (use PyMem_Free() to free + the memory), or NULL on error (conversion error or memory error). */ +char* +_Py_wchar2char(const wchar_t *text) +{ + const size_t len = wcslen(text); + char *result = NULL, *bytes = NULL; + size_t i, size, converted; + wchar_t c, buf[2]; + + /* The function works in two steps: + 1. compute the length of the output buffer in bytes (size) + 2. outputs the bytes */ + size = 0; + buf[1] = 0; + while (1) { + for (i=0; i < len; i++) { + c = text[i]; + if (c >= 0xdc80 && c <= 0xdcff) { + /* UTF-8b surrogate */ + if (bytes != NULL) { + *bytes++ = c - 0xdc00; + size--; + } + else + size++; + continue; + } + else { + buf[0] = c; + if (bytes != NULL) + converted = wcstombs(bytes, buf, size); + else + converted = wcstombs(NULL, buf, 0); + if (converted == (size_t)-1) { + if (result != NULL) + PyMem_Free(result); + return NULL; + } + if (bytes != NULL) { + bytes += converted; + size -= converted; + } + else + size += converted; + } + } + if (result != NULL) { + *bytes = 0; + break; + } + + size += 1; /* nul byte at the end */ + result = PyMem_Malloc(size); + if (result == NULL) + return NULL; + bytes = result; + } + return result; +} + + +/* Decode a byte string from the locale encoding with the + surrogateescape error handler (undecodable bytes are decoded as characters + in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate + character, escape the bytes using the surrogateescape error handler instead + of decoding them. + + Use _Py_wchar2char() to encode the character string back to a byte string. + + Return a pointer to a newly allocated (wide) character string (use + PyMem_Free() to free the memory), or NULL on error (conversion error or + memory error). */ wchar_t* _Py_char2wchar(char* arg) { -- cgit v0.12