summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-08-13 23:29:08 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-08-13 23:29:08 (GMT)
commitf2e08b34f1fa50e99f8cab0a21721be2d1bb38b8 (patch)
tree01336b2c925f2c2ae2b7dd9df20010c4c25c49cd
parente9b428f9977f8733e6b0d2c321c093779f95080f (diff)
downloadcpython-f2e08b34f1fa50e99f8cab0a21721be2d1bb38b8.zip
cpython-f2e08b34f1fa50e99f8cab0a21721be2d1bb38b8.tar.gz
cpython-f2e08b34f1fa50e99f8cab0a21721be2d1bb38b8.tar.bz2
Create _Py_wchar2char() function, reverse of _Py_char2wchar()
* Use _Py_wchar2char() in _wstat() and _Py_wfopen() * Document _Py_char2wchar()
-rw-r--r--Include/Python.h3
-rw-r--r--Modules/getpath.c11
-rw-r--r--Modules/main.c94
3 files changed, 97 insertions, 11 deletions
diff --git a/Include/Python.h b/Include/Python.h
index 1def75b..d5ac13e 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -126,12 +126,15 @@
#ifdef __cplusplus
extern "C" {
#endif
+
/* _Py_Mangle is defined in compile.c */
PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
/* These functions live in main.c */
PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
+PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text);
PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode);
+
#ifdef __cplusplus
}
#endif
diff --git a/Modules/getpath.c b/Modules/getpath.c
index fff502e..faf8b56 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION;
static int
_wstat(const wchar_t* path, struct stat *buf)
{
- char fname[PATH_MAX];
- size_t res = wcstombs(fname, path, sizeof(fname));
- if (res == (size_t)-1) {
+ int err;
+ char *fname;
+ fname = _Py_wchar2char(path);
+ if (fname == NULL) {
errno = EINVAL;
return -1;
}
- return stat(fname, buf);
+ err = stat(fname, buf);
+ PyMem_Free(fname);
+ return err;
}
#endif
diff --git a/Modules/main.c b/Modules/main.c
index d605bab..f9d0c48 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -105,20 +105,21 @@ FILE *
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
{
#ifndef MS_WINDOWS
- char cpath[PATH_MAX];
+ FILE *f;
+ char *cpath;
char cmode[10];
size_t r;
- r = wcstombs(cpath, path, PATH_MAX);
- if (r == (size_t)-1 || r >= PATH_MAX) {
- errno = EINVAL;
- return NULL;
- }
r = wcstombs(cmode, mode, 10);
if (r == (size_t)-1 || r >= 10) {
errno = EINVAL;
return NULL;
}
- return fopen(cpath, cmode);
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL)
+ return NULL;
+ f = fopen(cpath, cmode);
+ PyMem_Free(cpath);
+ return f;
#else
return _wfopen(path, mode);
#endif
@@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
}
+/* Encode a (wide) character string to the locale encoding with the
+ surrogateescape error handler (characters in range U+DC80..U+DCFF are
+ converted to bytes 0x80..0xFF).
+
+ This function is the reverse of _Py_char2wchar().
+
+ Return a pointer to a newly allocated byte string (use PyMem_Free() to free
+ the memory), or NULL on error (conversion error or memory error). */
+char*
+_Py_wchar2char(const wchar_t *text)
+{
+ const size_t len = wcslen(text);
+ char *result = NULL, *bytes = NULL;
+ size_t i, size, converted;
+ wchar_t c, buf[2];
+
+ /* The function works in two steps:
+ 1. compute the length of the output buffer in bytes (size)
+ 2. outputs the bytes */
+ size = 0;
+ buf[1] = 0;
+ while (1) {
+ for (i=0; i < len; i++) {
+ c = text[i];
+ if (c >= 0xdc80 && c <= 0xdcff) {
+ /* UTF-8b surrogate */
+ if (bytes != NULL) {
+ *bytes++ = c - 0xdc00;
+ size--;
+ }
+ else
+ size++;
+ continue;
+ }
+ else {
+ buf[0] = c;
+ if (bytes != NULL)
+ converted = wcstombs(bytes, buf, size);
+ else
+ converted = wcstombs(NULL, buf, 0);
+ if (converted == (size_t)-1) {
+ if (result != NULL)
+ PyMem_Free(result);
+ return NULL;
+ }
+ if (bytes != NULL) {
+ bytes += converted;
+ size -= converted;
+ }
+ else
+ size += converted;
+ }
+ }
+ if (result != NULL) {
+ *bytes = 0;
+ break;
+ }
+
+ size += 1; /* nul byte at the end */
+ result = PyMem_Malloc(size);
+ if (result == NULL)
+ return NULL;
+ bytes = result;
+ }
+ return result;
+}
+
+
+/* Decode a byte string from the locale encoding with the
+ surrogateescape error handler (undecodable bytes are decoded as characters
+ in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+ character, escape the bytes using the surrogateescape error handler instead
+ of decoding them.
+
+ Use _Py_wchar2char() to encode the character string back to a byte string.
+
+ Return a pointer to a newly allocated (wide) character string (use
+ PyMem_Free() to free the memory), or NULL on error (conversion error or
+ memory error). */
wchar_t*
_Py_char2wchar(char* arg)
{