summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/Python.h12
-rw-r--r--Include/fileutils.h55
-rw-r--r--Makefile.pre.in8
-rw-r--r--Modules/getpath.c81
-rw-r--r--Modules/main.c208
-rw-r--r--PCbuild/pythoncore.vcproj8
-rw-r--r--Python/fileutils.c758
-rw-r--r--Python/import.c65
-rw-r--r--Python/sysmodule.c29
9 files changed, 832 insertions, 392 deletions
diff --git a/Include/Python.h b/Include/Python.h
index 5afde02..6ca7a58 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -122,6 +122,7 @@
#include "pystrtod.h"
#include "pystrcmp.h"
#include "dtoa.h"
+#include "fileutils.h"
#ifdef __cplusplus
extern "C" {
@@ -130,17 +131,6 @@ extern "C" {
/* _Py_Mangle is defined in compile.c */
PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
-/* These functions live in main.c */
-PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
-PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text);
-PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode);
-
-/* These functions live in import.c */
-PyAPI_FUNC(FILE*) _Py_fopen(PyObject *unicode, const char *mode);
-#ifdef HAVE_STAT
-int _Py_stat(PyObject *unicode, struct stat *statbuf);
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/Include/fileutils.h b/Include/fileutils.h
new file mode 100644
index 0000000..2971d9d
--- /dev/null
+++ b/Include/fileutils.h
@@ -0,0 +1,55 @@
+#ifndef Py_FILEUTILS_H
+#define Py_FILEUTILS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+PyAPI_FUNC(wchar_t *) _Py_char2wchar(
+ char *arg);
+
+PyAPI_FUNC(char*) _Py_wchar2char(
+ const wchar_t *text);
+
+#if defined(MS_WINDOWS) || defined(HAVE_STAT)
+PyAPI_FUNC(int) _Py_wstat(
+ const wchar_t* path,
+ struct stat *buf);
+#endif
+
+#ifdef HAVE_STAT
+PyAPI_FUNC(int) _Py_stat(
+ PyObject *unicode,
+ struct stat *statbuf);
+#endif
+
+PyAPI_FUNC(FILE *) _Py_wfopen(
+ const wchar_t *path,
+ const wchar_t *mode);
+
+PyAPI_FUNC(FILE*) _Py_fopen(
+ PyObject *unicode,
+ const char *mode);
+
+#ifdef HAVE_READLINK
+PyAPI_FUNC(int) _Py_wreadlink(
+ const wchar_t *path,
+ wchar_t *buf,
+ size_t bufsiz);
+#endif
+
+#ifdef HAVE_REALPATH
+PyAPI_FUNC(wchar_t*) _Py_wrealpath(
+ const wchar_t *path,
+ wchar_t *resolved_path);
+#endif
+
+PyAPI_FUNC(wchar_t*) _Py_wgetcwd(
+ wchar_t *buf,
+ size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !Py_FILEUTILS_H */
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 294a03c..8e8727d 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -325,6 +325,7 @@ PYTHON_OBJS= \
Python/pystrtod.o \
Python/dtoa.o \
Python/formatter_unicode.o \
+ Python/fileutils.o \
Python/$(DYNLOADFILE) \
$(LIBOBJS) \
$(MACHDEP_OBJS) \
@@ -454,7 +455,7 @@ libpython$(VERSION).so: $(LIBRARY_OBJS)
libpython$(VERSION).dylib: $(LIBRARY_OBJS)
$(CC) -dynamiclib -Wl,-single_module $(PY_LDFLAGS) -undefined dynamic_lookup -Wl,-install_name,$(prefix)/lib/libpython$(VERSION).dylib -Wl,-compatibility_version,$(VERSION) -Wl,-current_version,$(VERSION) -o $@ $(LIBRARY_OBJS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST); \
-
+
libpython$(VERSION).sl: $(LIBRARY_OBJS)
$(LDSHARED) $(PY_LDFLAGS) -o $@ $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST)
@@ -620,7 +621,7 @@ BYTESTR_DEPS = \
Objects/bytesobject.o: $(srcdir)/Objects/bytesobject.c $(BYTESTR_DEPS)
-Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS)
+Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS)
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
$(BYTESTR_DEPS)
@@ -665,6 +666,7 @@ PYTHON_HEADERS= \
Include/errcode.h \
Include/eval.h \
Include/fileobject.h \
+ Include/fileutils.h \
Include/floatobject.h \
Include/frameobject.h \
Include/funcobject.h \
@@ -1283,7 +1285,7 @@ Python/thread.o: @THREADHEADERS@
.PHONY: maninstall libinstall inclinstall libainstall sharedinstall
.PHONY: frameworkinstall frameworkinstallframework frameworkinstallstructure
.PHONY: frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools
-.PHONY: frameworkaltinstallunixtools recheck autoconf clean clobber distclean
+.PHONY: frameworkaltinstallunixtools recheck autoconf clean clobber distclean
.PHONY: smelly funny patchcheck
.PHONY: gdbhooks
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 06d0cae..139d753 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -136,78 +136,6 @@ static wchar_t progpath[MAXPATHLEN+1];
static wchar_t *module_search_path = NULL;
static wchar_t *lib_python = L"lib/python" VERSION;
-/* In principle, this should use HAVE__WSTAT, and _wstat
- should be detected by autoconf. However, no current
- POSIX system provides that function, so testing for
- it is pointless.
- Not sure whether the MS_WINDOWS guards are necessary:
- perhaps for cygwin/mingw builds?
-*/
-#ifndef MS_WINDOWS
-static int
-_wstat(const wchar_t* path, struct stat *buf)
-{
- int err;
- char *fname;
- fname = _Py_wchar2char(path);
- if (fname == NULL) {
- errno = EINVAL;
- return -1;
- }
- err = stat(fname, buf);
- PyMem_Free(fname);
- return err;
-}
-#endif
-
-#ifndef MS_WINDOWS
-static wchar_t*
-_wgetcwd(wchar_t *buf, size_t size)
-{
- char fname[PATH_MAX];
- if (getcwd(fname, PATH_MAX) == NULL)
- return NULL;
- if (mbstowcs(buf, fname, size) >= size) {
- errno = ERANGE;
- return NULL;
- }
- return buf;
-}
-#endif
-
-#ifdef HAVE_READLINK
-int
-_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
-{
- char *cpath;
- char cbuf[PATH_MAX];
- int res;
- size_t r1;
-
- cpath = _Py_wchar2char(path);
- if (cpath == NULL) {
- errno = EINVAL;
- return -1;
- }
- res = (int)readlink(cpath, cbuf, PATH_MAX);
- PyMem_Free(cpath);
- if (res == -1)
- return -1;
- if (res == PATH_MAX) {
- errno = EINVAL;
- return -1;
- }
- cbuf[res] = '\0'; /* buf will be null terminated */
- r1 = mbstowcs(buf, cbuf, bufsiz);
- if (r1 == -1) {
- errno = EINVAL;
- return -1;
- }
- return (int)r1;
-
-}
-#endif
-
static void
reduce(wchar_t *dir)
{
@@ -217,12 +145,11 @@ reduce(wchar_t *dir)
dir[i] = '\0';
}
-
static int
isfile(wchar_t *filename) /* Is file, not directory */
{
struct stat buf;
- if (_wstat(filename, &buf) != 0)
+ if (_Py_wstat(filename, &buf) != 0)
return 0;
if (!S_ISREG(buf.st_mode))
return 0;
@@ -250,7 +177,7 @@ static int
isxfile(wchar_t *filename) /* Is executable file */
{
struct stat buf;
- if (_wstat(filename, &buf) != 0)
+ if (_Py_wstat(filename, &buf) != 0)
return 0;
if (!S_ISREG(buf.st_mode))
return 0;
@@ -264,7 +191,7 @@ static int
isdir(wchar_t *filename) /* Is directory */
{
struct stat buf;
- if (_wstat(filename, &buf) != 0)
+ if (_Py_wstat(filename, &buf) != 0)
return 0;
if (!S_ISDIR(buf.st_mode))
return 0;
@@ -309,7 +236,7 @@ copy_absolute(wchar_t *path, wchar_t *p)
if (p[0] == SEP)
wcscpy(path, p);
else {
- _wgetcwd(path, MAXPATHLEN);
+ _Py_wgetcwd(path, MAXPATHLEN);
if (p[0] == '.' && p[1] == SEP)
p += 2;
joinpath(path, p);
diff --git a/Modules/main.c b/Modules/main.c
index 5e9f82a..4a10d93 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -104,31 +104,6 @@ static char *usage_5 =
#endif
;
-FILE *
-_Py_wfopen(const wchar_t *path, const wchar_t *mode)
-{
-#ifndef MS_WINDOWS
- FILE *f;
- char *cpath;
- char cmode[10];
- size_t r;
- r = wcstombs(cmode, mode, 10);
- if (r == (size_t)-1 || r >= 10) {
- errno = EINVAL;
- return NULL;
- }
- cpath = _Py_wchar2char(path);
- if (cpath == NULL)
- return NULL;
- f = fopen(cpath, cmode);
- PyMem_Free(cpath);
- return f;
-#else
- return _wfopen(path, mode);
-#endif
-}
-
-
static int
usage(int exitcode, wchar_t* program)
{
@@ -756,189 +731,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
*argv = orig_argv;
}
-
-/* Encode a (wide) character string to the locale encoding with the
- surrogateescape error handler (characters in range U+DC80..U+DCFF are
- converted to bytes 0x80..0xFF).
-
- This function is the reverse of _Py_char2wchar().
-
- Return a pointer to a newly allocated byte string (use PyMem_Free() to free
- the memory), or NULL on error (conversion error or memory error). */
-char*
-_Py_wchar2char(const wchar_t *text)
-{
- const size_t len = wcslen(text);
- char *result = NULL, *bytes = NULL;
- size_t i, size, converted;
- wchar_t c, buf[2];
-
- /* The function works in two steps:
- 1. compute the length of the output buffer in bytes (size)
- 2. outputs the bytes */
- size = 0;
- buf[1] = 0;
- while (1) {
- for (i=0; i < len; i++) {
- c = text[i];
- if (c >= 0xdc80 && c <= 0xdcff) {
- /* UTF-8b surrogate */
- if (bytes != NULL) {
- *bytes++ = c - 0xdc00;
- size--;
- }
- else
- size++;
- continue;
- }
- else {
- buf[0] = c;
- if (bytes != NULL)
- converted = wcstombs(bytes, buf, size);
- else
- converted = wcstombs(NULL, buf, 0);
- if (converted == (size_t)-1) {
- if (result != NULL)
- PyMem_Free(result);
- return NULL;
- }
- if (bytes != NULL) {
- bytes += converted;
- size -= converted;
- }
- else
- size += converted;
- }
- }
- if (result != NULL) {
- *bytes = 0;
- break;
- }
-
- size += 1; /* nul byte at the end */
- result = PyMem_Malloc(size);
- if (result == NULL)
- return NULL;
- bytes = result;
- }
- return result;
-}
-
-
-/* Decode a byte string from the locale encoding with the
- surrogateescape error handler (undecodable bytes are decoded as characters
- in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
- character, escape the bytes using the surrogateescape error handler instead
- of decoding them.
-
- Use _Py_wchar2char() to encode the character string back to a byte string.
-
- Return a pointer to a newly allocated (wide) character string (use
- PyMem_Free() to free the memory), or NULL on error (conversion error or
- memory error). */
-wchar_t*
-_Py_char2wchar(char* arg)
-{
- wchar_t *res;
-#ifdef HAVE_BROKEN_MBSTOWCS
- /* Some platforms have a broken implementation of
- * mbstowcs which does not count the characters that
- * would result from conversion. Use an upper bound.
- */
- size_t argsize = strlen(arg);
-#else
- size_t argsize = mbstowcs(NULL, arg, 0);
-#endif
- size_t count;
- unsigned char *in;
- wchar_t *out;
-#ifdef HAVE_MBRTOWC
- mbstate_t mbs;
-#endif
- if (argsize != (size_t)-1) {
- res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
- if (!res)
- goto oom;
- count = mbstowcs(res, arg, argsize+1);
- if (count != (size_t)-1) {
- wchar_t *tmp;
- /* Only use the result if it contains no
- surrogate characters. */
- for (tmp = res; *tmp != 0 &&
- (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
- ;
- if (*tmp == 0)
- return res;
- }
- PyMem_Free(res);
- }
- /* Conversion failed. Fall back to escaping with surrogateescape. */
-#ifdef HAVE_MBRTOWC
- /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
-
- /* Overallocate; as multi-byte characters are in the argument, the
- actual output could use less memory. */
- argsize = strlen(arg) + 1;
- res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- memset(&mbs, 0, sizeof mbs);
- while (argsize) {
- size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
- if (converted == 0)
- /* Reached end of string; null char stored. */
- break;
- if (converted == (size_t)-2) {
- /* Incomplete character. This should never happen,
- since we provide everything that we have -
- unless there is a bug in the C library, or I
- misunderstood how mbrtowc works. */
- fprintf(stderr, "unexpected mbrtowc result -2\n");
- return NULL;
- }
- if (converted == (size_t)-1) {
- /* Conversion error. Escape as UTF-8b, and start over
- in the initial shift state. */
- *out++ = 0xdc00 + *in++;
- argsize--;
- memset(&mbs, 0, sizeof mbs);
- continue;
- }
- if (*out >= 0xd800 && *out <= 0xdfff) {
- /* Surrogate character. Escape the original
- byte sequence with surrogateescape. */
- argsize -= converted;
- while (converted--)
- *out++ = 0xdc00 + *in++;
- continue;
- }
- /* successfully converted some bytes */
- in += converted;
- argsize -= converted;
- out++;
- }
-#else
- /* Cannot use C locale for escaping; manually escape as if charset
- is ASCII (i.e. escape all bytes > 128. This will still roundtrip
- correctly in the locale's charset, which must be an ASCII superset. */
- res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
- if (!res) goto oom;
- in = (unsigned char*)arg;
- out = res;
- while(*in)
- if(*in < 128)
- *out++ = *in++;
- else
- *out++ = 0xdc00 + *in++;
- *out = 0;
-#endif
- return res;
-oom:
- fprintf(stderr, "out of memory\n");
- return NULL;
-}
-
#ifdef __cplusplus
}
#endif
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj
index 2ba4722..70747f9 100644
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -723,6 +723,10 @@
>
</File>
<File
+ RelativePath="..\Include\fileutils.h"
+ >
+ </File>
+ <File
RelativePath="..\Include\floatobject.h"
>
</File>
@@ -1687,6 +1691,10 @@
>
</File>
<File
+ RelativePath="..\Python\fileutils.c"
+ >
+ </File>
+ <File
RelativePath="..\Python\formatter_unicode.c"
>
</File>
diff --git a/Python/fileutils.c b/Python/fileutils.c
new file mode 100644
index 0000000..ad8b840
--- /dev/null
+++ b/Python/fileutils.c
@@ -0,0 +1,758 @@
+#include "Python.h"
+
+#ifdef HAVE_STAT
+
+/* Decode a byte string from the locale encoding with the
+ surrogateescape error handler (undecodable bytes are decoded as characters
+ in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+ character, escape the bytes using the surrogateescape error handler instead
+ of decoding them.
+
+ Use _Py_wchar2char() to encode the character string back to a byte string.
+
+ Return a pointer to a newly allocated (wide) character string (use
+ PyMem_Free() to free the memory), or NULL on error (conversion error or
+ memory error). */
+wchar_t*
+_Py_char2wchar(char* arg)
+{
+ wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+ /* Some platforms have a broken implementation of
+ * mbstowcs which does not count the characters that
+ * would result from conversion. Use an upper bound.
+ */
+ size_t argsize = strlen(arg);
+#else
+ size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+ size_t count;
+ unsigned char *in;
+ wchar_t *out;
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbs;
+#endif
+ if (argsize != (size_t)-1) {
+ res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+ if (!res)
+ goto oom;
+ count = mbstowcs(res, arg, argsize+1);
+ if (count != (size_t)-1) {
+ wchar_t *tmp;
+ /* Only use the result if it contains no
+ surrogate characters. */
+ for (tmp = res; *tmp != 0 &&
+ (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+ ;
+ if (*tmp == 0)
+ return res;
+ }
+ PyMem_Free(res);
+ }
+ /* Conversion failed. Fall back to escaping with surrogateescape. */
+#ifdef HAVE_MBRTOWC
+ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+ /* Overallocate; as multi-byte characters are in the argument, the
+ actual output could use less memory. */
+ argsize = strlen(arg) + 1;
+ res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ memset(&mbs, 0, sizeof mbs);
+ while (argsize) {
+ size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+ if (converted == 0)
+ /* Reached end of string; null char stored. */
+ break;
+ if (converted == (size_t)-2) {
+ /* Incomplete character. This should never happen,
+ since we provide everything that we have -
+ unless there is a bug in the C library, or I
+ misunderstood how mbrtowc works. */
+ fprintf(stderr, "unexpected mbrtowc result -2\n");
+ return NULL;
+ }
+ if (converted == (size_t)-1) {
+ /* Conversion error. Escape as UTF-8b, and start over
+ in the initial shift state. */
+ *out++ = 0xdc00 + *in++;
+ argsize--;
+ memset(&mbs, 0, sizeof mbs);
+ continue;
+ }
+ if (*out >= 0xd800 && *out <= 0xdfff) {
+ /* Surrogate character. Escape the original
+ byte sequence with surrogateescape. */
+ argsize -= converted;
+ while (converted--)
+ *out++ = 0xdc00 + *in++;
+ continue;
+ }
+ /* successfully converted some bytes */
+ in += converted;
+ argsize -= converted;
+ out++;
+ }
+#else
+ /* Cannot use C locale for escaping; manually escape as if charset
+ is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+ correctly in the locale's charset, which must be an ASCII superset. */
+ res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ while(*in)
+ if(*in < 128)
+ *out++ = *in++;
+ else
+ *out++ = 0xdc00 + *in++;
+ *out = 0;
+#endif
+ return res;
+oom:
+ fprintf(stderr, "out of memory\n");
+ return NULL;
+}
+
+/* Encode a (wide) character string to the locale encoding with the
+ surrogateescape error handler (characters in range U+DC80..U+DCFF are
+ converted to bytes 0x80..0xFF).
+
+ This function is the reverse of _Py_char2wchar().
+
+ Return a pointer to a newly allocated byte string (use PyMem_Free() to free
+ the memory), or NULL on error (conversion error or memory error). */
+char*
+_Py_wchar2char(const wchar_t *text)
+{
+ const size_t len = wcslen(text);
+ char *result = NULL, *bytes = NULL;
+ size_t i, size, converted;
+ wchar_t c, buf[2];
+
+ /* The function works in two steps:
+ 1. compute the length of the output buffer in bytes (size)
+ 2. outputs the bytes */
+ size = 0;
+ buf[1] = 0;
+ while (1) {
+ for (i=0; i < len; i++) {
+ c = text[i];
+ if (c >= 0xdc80 && c <= 0xdcff) {
+ /* UTF-8b surrogate */
+ if (bytes != NULL) {
+ *bytes++ = c - 0xdc00;
+ size--;
+ }
+ else
+ size++;
+ continue;
+ }
+ else {
+ buf[0] = c;
+ if (bytes != NULL)
+ converted = wcstombs(bytes, buf, size);
+ else
+ converted = wcstombs(NULL, buf, 0);
+ if (converted == (size_t)-1) {
+ if (result != NULL)
+ PyMem_Free(result);
+ return NULL;
+ }
+ if (bytes != NULL) {
+ bytes += converted;
+ size -= converted;
+ }
+ else
+ size += converted;
+ }
+ }
+ if (result != NULL) {
+ *bytes = 0;
+ break;
+ }
+
+ size += 1; /* nul byte at the end */
+ result = PyMem_Malloc(size);
+ if (result == NULL)
+ return NULL;
+ bytes = result;
+ }
+ return result;
+}
+
+#if defined(MS_WINDOWS) || defined(HAVE_STAT)
+int
+_Py_wstat(const wchar_t* path, struct stat *buf)
+{
+/* In principle, this should use HAVE__WSTAT, and _wstat
+ should be detected by autoconf. However, no current
+ POSIX system provides that function, so testing for
+ it is pointless.
+ Not sure whether the MS_WINDOWS guards are necessary:
+ perhaps for cygwin/mingw builds?
+*/
+#ifdef MS_WINDOWS
+ return _wstat(path, buf);
+#else
+ int err;
+ char *fname;
+ fname = _Py_wchar2char(path);
+ if (fname == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ err = stat(fname, buf);
+ PyMem_Free(fname);
+ return err;
+#endif
+}
+#endif
+
+/* Call _wstat() on Windows, or stat() otherwise. Only fill st_mode
+ attribute on Windows. Return 0 on success, -1 on stat error or (if
+ PyErr_Occurred()) unicode error. */
+
+int
+_Py_stat(PyObject *unicode, struct stat *statbuf)
+{
+#ifdef MS_WINDOWS
+ wchar_t *path;
+ int err;
+ struct _stat wstatbuf;
+
+ path = PyUnicode_AsWideCharString(unicode, NULL);
+ if (path == NULL)
+ return -1;
+ err = _wstat(path, &wstatbuf);
+ PyMem_Free(path);
+ if (!err)
+ statbuf->st_mode = wstatbuf.st_mode;
+ return err;
+#else
+ int ret;
+ PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
+ if (bytes == NULL)
+ return -1;
+ ret = stat(PyBytes_AS_STRING(bytes), statbuf);
+ Py_DECREF(bytes);
+ return ret;
+#endif
+}
+
+FILE *
+_Py_wfopen(const wchar_t *path, const wchar_t *mode)
+{
+#ifndef MS_WINDOWS
+ FILE *f;
+ char *cpath;
+ char cmode[10];
+ size_t r;
+ r = wcstombs(cmode, mode, 10);
+ if (r == (size_t)-1 || r >= 10) {
+ errno = EINVAL;
+ return NULL;
+ }
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL)
+ return NULL;
+ f = fopen(cpath, cmode);
+ PyMem_Free(cpath);
+ return f;
+#else
+ return _wfopen(path, mode);
+#endif
+}
+
+/* Call _wfopen() on Windows, or fopen() otherwise. Return the new file
+ object on success, or NULL if the file cannot be open or (if
+ PyErr_Occurred()) on unicode error */
+
+FILE*
+_Py_fopen(PyObject *unicode, const char *mode)
+{
+#ifdef MS_WINDOWS
+ wchar_t *path;
+ wchar_t wmode[10];
+ int usize;
+ FILE *f;
+
+ usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
+ if (usize == 0)
+ return NULL;
+
+ path = PyUnicode_AsWideCharString(unicode, NULL);
+ if (path == NULL)
+ return NULL;
+ f = _wfopen(path, wmode);
+ PyMem_Free(path);
+ return f;
+#else
+ FILE *f;
+ PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
+ if (bytes == NULL)
+ return NULL;
+ f = fopen(PyBytes_AS_STRING(bytes), mode);
+ Py_DECREF(bytes);
+ return f;
+#endif
+}
+
+#ifdef HAVE_READLINK
+int
+_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
+{
+ char *cpath;
+ char cbuf[PATH_MAX];
+ int res;
+ size_t r1;
+
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ res = (int)readlink(cpath, cbuf, PATH_MAX);
+ PyMem_Free(cpath);
+ if (res == -1)
+ return -1;
+ if (res == PATH_MAX) {
+ errno = EINVAL;
+ return -1;
+ }
+ cbuf[res] = '\0'; /* buf will be null terminated */
+ r1 = mbstowcs(buf, cbuf, bufsiz);
+ if (r1 == -1) {
+ errno = EINVAL;
+ return -1;
+ }
+ return (int)r1;
+}
+#endif
+
+#ifdef HAVE_REALPATH
+wchar_t*
+_Py_wrealpath(const wchar_t *path, wchar_t *resolved_path)
+{
+ char *cpath;
+ char cresolved_path[PATH_MAX];
+ char *res;
+ size_t r;
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
+ res = realpath(cpath, cresolved_path);
+ PyMem_Free(cpath);
+ if (res == NULL)
+ return NULL;
+ r = mbstowcs(resolved_path, cresolved_path, PATH_MAX);
+ if (r == (size_t)-1 || r >= PATH_MAX) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return resolved_path;
+}
+#endif
+
+wchar_t*
+_Py_wgetcwd(wchar_t *buf, size_t size)
+{
+#ifdef MS_WINDOWS
+ return _wgetcwd(buf, size);
+#else
+ char fname[PATH_MAX];
+ if (getcwd(fname, PATH_MAX) == NULL)
+ return NULL;
+ if (mbstowcs(buf, fname, size) >= size) {
+ errno = ERANGE;
+ return NULL;
+ }
+ return buf;
+#endif
+}
+
+#endif
+
+#include "Python.h"
+
+#ifdef HAVE_STAT
+
+/* Decode a byte string from the locale encoding with the
+ surrogateescape error handler (undecodable bytes are decoded as characters
+ in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+ character, escape the bytes using the surrogateescape error handler instead
+ of decoding them.
+
+ Use _Py_wchar2char() to encode the character string back to a byte string.
+
+ Return a pointer to a newly allocated (wide) character string (use
+ PyMem_Free() to free the memory), or NULL on error (conversion error or
+ memory error). */
+wchar_t*
+_Py_char2wchar(char* arg)
+{
+ wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+ /* Some platforms have a broken implementation of
+ * mbstowcs which does not count the characters that
+ * would result from conversion. Use an upper bound.
+ */
+ size_t argsize = strlen(arg);
+#else
+ size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+ size_t count;
+ unsigned char *in;
+ wchar_t *out;
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbs;
+#endif
+ if (argsize != (size_t)-1) {
+ res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+ if (!res)
+ goto oom;
+ count = mbstowcs(res, arg, argsize+1);
+ if (count != (size_t)-1) {
+ wchar_t *tmp;
+ /* Only use the result if it contains no
+ surrogate characters. */
+ for (tmp = res; *tmp != 0 &&
+ (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+ ;
+ if (*tmp == 0)
+ return res;
+ }
+ PyMem_Free(res);
+ }
+ /* Conversion failed. Fall back to escaping with surrogateescape. */
+#ifdef HAVE_MBRTOWC
+ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+ /* Overallocate; as multi-byte characters are in the argument, the
+ actual output could use less memory. */
+ argsize = strlen(arg) + 1;
+ res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ memset(&mbs, 0, sizeof mbs);
+ while (argsize) {
+ size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+ if (converted == 0)
+ /* Reached end of string; null char stored. */
+ break;
+ if (converted == (size_t)-2) {
+ /* Incomplete character. This should never happen,
+ since we provide everything that we have -
+ unless there is a bug in the C library, or I
+ misunderstood how mbrtowc works. */
+ fprintf(stderr, "unexpected mbrtowc result -2\n");
+ return NULL;
+ }
+ if (converted == (size_t)-1) {
+ /* Conversion error. Escape as UTF-8b, and start over
+ in the initial shift state. */
+ *out++ = 0xdc00 + *in++;
+ argsize--;
+ memset(&mbs, 0, sizeof mbs);
+ continue;
+ }
+ if (*out >= 0xd800 && *out <= 0xdfff) {
+ /* Surrogate character. Escape the original
+ byte sequence with surrogateescape. */
+ argsize -= converted;
+ while (converted--)
+ *out++ = 0xdc00 + *in++;
+ continue;
+ }
+ /* successfully converted some bytes */
+ in += converted;
+ argsize -= converted;
+ out++;
+ }
+#else
+ /* Cannot use C locale for escaping; manually escape as if charset
+ is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+ correctly in the locale's charset, which must be an ASCII superset. */
+ res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ while(*in)
+ if(*in < 128)
+ *out++ = *in++;
+ else
+ *out++ = 0xdc00 + *in++;
+ *out = 0;
+#endif
+ return res;
+oom:
+ fprintf(stderr, "out of memory\n");
+ return NULL;
+}
+
+/* Encode a (wide) character string to the locale encoding with the
+ surrogateescape error handler (characters in range U+DC80..U+DCFF are
+ converted to bytes 0x80..0xFF).
+
+ This function is the reverse of _Py_char2wchar().
+
+ Return a pointer to a newly allocated byte string (use PyMem_Free() to free
+ the memory), or NULL on error (conversion error or memory error). */
+char*
+_Py_wchar2char(const wchar_t *text)
+{
+ const size_t len = wcslen(text);
+ char *result = NULL, *bytes = NULL;
+ size_t i, size, converted;
+ wchar_t c, buf[2];
+
+ /* The function works in two steps:
+ 1. compute the length of the output buffer in bytes (size)
+ 2. outputs the bytes */
+ size = 0;
+ buf[1] = 0;
+ while (1) {
+ for (i=0; i < len; i++) {
+ c = text[i];
+ if (c >= 0xdc80 && c <= 0xdcff) {
+ /* UTF-8b surrogate */
+ if (bytes != NULL) {
+ *bytes++ = c - 0xdc00;
+ size--;
+ }
+ else
+ size++;
+ continue;
+ }
+ else {
+ buf[0] = c;
+ if (bytes != NULL)
+ converted = wcstombs(bytes, buf, size);
+ else
+ converted = wcstombs(NULL, buf, 0);
+ if (converted == (size_t)-1) {
+ if (result != NULL)
+ PyMem_Free(result);
+ return NULL;
+ }
+ if (bytes != NULL) {
+ bytes += converted;
+ size -= converted;
+ }
+ else
+ size += converted;
+ }
+ }
+ if (result != NULL) {
+ *bytes = 0;
+ break;
+ }
+
+ size += 1; /* nul byte at the end */
+ result = PyMem_Malloc(size);
+ if (result == NULL)
+ return NULL;
+ bytes = result;
+ }
+ return result;
+}
+
+#if defined(MS_WINDOWS) || defined(HAVE_STAT)
+int
+_Py_wstat(const wchar_t* path, struct stat *buf)
+{
+/* In principle, this should use HAVE__WSTAT, and _wstat
+ should be detected by autoconf. However, no current
+ POSIX system provides that function, so testing for
+ it is pointless.
+ Not sure whether the MS_WINDOWS guards are necessary:
+ perhaps for cygwin/mingw builds?
+*/
+#ifdef MS_WINDOWS
+ return _wstat(path, buf);
+#else
+ int err;
+ char *fname;
+ fname = _Py_wchar2char(path);
+ if (fname == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ err = stat(fname, buf);
+ PyMem_Free(fname);
+ return err;
+#endif
+}
+#endif
+
+/* Call _wstat() on Windows, or stat() otherwise. Only fill st_mode
+ attribute on Windows. Return 0 on success, -1 on stat error or (if
+ PyErr_Occurred()) unicode error. */
+
+int
+_Py_stat(PyObject *unicode, struct stat *statbuf)
+{
+#ifdef MS_WINDOWS
+ wchar_t *path;
+ int err;
+ struct _stat wstatbuf;
+
+ path = PyUnicode_AsWideCharString(unicode, NULL);
+ if (path == NULL)
+ return -1;
+ err = _wstat(path, &wstatbuf);
+ PyMem_Free(path);
+ if (!err)
+ statbuf->st_mode = wstatbuf.st_mode;
+ return err;
+#else
+ int ret;
+ PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
+ if (bytes == NULL)
+ return -1;
+ ret = stat(PyBytes_AS_STRING(bytes), statbuf);
+ Py_DECREF(bytes);
+ return ret;
+#endif
+}
+
+FILE *
+_Py_wfopen(const wchar_t *path, const wchar_t *mode)
+{
+#ifndef MS_WINDOWS
+ FILE *f;
+ char *cpath;
+ char cmode[10];
+ size_t r;
+ r = wcstombs(cmode, mode, 10);
+ if (r == (size_t)-1 || r >= 10) {
+ errno = EINVAL;
+ return NULL;
+ }
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL)
+ return NULL;
+ f = fopen(cpath, cmode);
+ PyMem_Free(cpath);
+ return f;
+#else
+ return _wfopen(path, mode);
+#endif
+}
+
+/* Call _wfopen() on Windows, or fopen() otherwise. Return the new file
+ object on success, or NULL if the file cannot be open or (if
+ PyErr_Occurred()) on unicode error */
+
+FILE*
+_Py_fopen(PyObject *unicode, const char *mode)
+{
+#ifdef MS_WINDOWS
+ wchar_t *path;
+ wchar_t wmode[10];
+ int usize;
+ FILE *f;
+
+ usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
+ if (usize == 0)
+ return NULL;
+
+ path = PyUnicode_AsWideCharString(unicode, NULL);
+ if (path == NULL)
+ return NULL;
+ f = _wfopen(path, wmode);
+ PyMem_Free(path);
+ return f;
+#else
+ FILE *f;
+ PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
+ if (bytes == NULL)
+ return NULL;
+ f = fopen(PyBytes_AS_STRING(bytes), mode);
+ Py_DECREF(bytes);
+ return f;
+#endif
+}
+
+#ifdef HAVE_READLINK
+int
+_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
+{
+ char *cpath;
+ char cbuf[PATH_MAX];
+ int res;
+ size_t r1;
+
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ res = (int)readlink(cpath, cbuf, PATH_MAX);
+ PyMem_Free(cpath);
+ if (res == -1)
+ return -1;
+ if (res == PATH_MAX) {
+ errno = EINVAL;
+ return -1;
+ }
+ cbuf[res] = '\0'; /* buf will be null terminated */
+ r1 = mbstowcs(buf, cbuf, bufsiz);
+ if (r1 == -1) {
+ errno = EINVAL;
+ return -1;
+ }
+ return (int)r1;
+}
+#endif
+
+#ifdef HAVE_REALPATH
+wchar_t*
+_Py_wrealpath(const wchar_t *path, wchar_t *resolved_path)
+{
+ char *cpath;
+ char cresolved_path[PATH_MAX];
+ char *res;
+ size_t r;
+ cpath = _Py_wchar2char(path);
+ if (cpath == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
+ res = realpath(cpath, cresolved_path);
+ PyMem_Free(cpath);
+ if (res == NULL)
+ return NULL;
+ r = mbstowcs(resolved_path, cresolved_path, PATH_MAX);
+ if (r == (size_t)-1 || r >= PATH_MAX) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return resolved_path;
+}
+#endif
+
+wchar_t*
+_Py_wgetcwd(wchar_t *buf, size_t size)
+{
+#ifdef MS_WINDOWS
+ return _wgetcwd(buf, size);
+#else
+ char fname[PATH_MAX];
+ if (getcwd(fname, PATH_MAX) == NULL)
+ return NULL;
+ if (mbstowcs(buf, fname, size) >= size) {
+ errno = ERANGE;
+ return NULL;
+ }
+ return buf;
+#endif
+}
+
+#endif
+
diff --git a/Python/import.c b/Python/import.c
index ab1615c..48fd205 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1953,73 +1953,8 @@ case_ok(char *buf, Py_ssize_t len, Py_ssize_t namelen, char *name)
#endif
}
-/* Call _wfopen() on Windows, or fopen() otherwise. Return the new file
- object on success, or NULL if the file cannot be open or (if
- PyErr_Occurred()) on unicode error */
-
-FILE*
-_Py_fopen(PyObject *unicode, const char *mode)
-{
-#ifdef MS_WINDOWS
- wchar_t *path;
- wchar_t wmode[10];
- int usize;
- FILE *f;
-
- usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
- if (usize == 0)
- return NULL;
-
- path = PyUnicode_AsWideCharString(unicode, NULL);
- if (path == NULL)
- return NULL;
- f = _wfopen(path, wmode);
- PyMem_Free(path);
- return f;
-#else
- FILE *f;
- PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
- if (bytes == NULL)
- return NULL;
- f = fopen(PyBytes_AS_STRING(bytes), mode);
- Py_DECREF(bytes);
- return f;
-#endif
-}
-
#ifdef HAVE_STAT
-/* Call _wstat() on Windows, or stat() otherwise. Only fill st_mode
- attribute on Windows. Return 0 on success, -1 on stat error or (if
- PyErr_Occurred()) unicode error. */
-
-int
-_Py_stat(PyObject *unicode, struct stat *statbuf)
-{
-#ifdef MS_WINDOWS
- wchar_t *path;
- int err;
- struct _stat wstatbuf;
-
- path = PyUnicode_AsWideCharString(unicode, NULL);
- if (path == NULL)
- return -1;
- err = _wstat(path, &wstatbuf);
- PyMem_Free(path);
- if (!err)
- statbuf->st_mode = wstatbuf.st_mode;
- return err;
-#else
- int ret;
- PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
- if (bytes == NULL)
- return -1;
- ret = stat(PyBytes_AS_STRING(bytes), statbuf);
- Py_DECREF(bytes);
- return ret;
-#endif
-}
-
/* Helper to look for __init__.py or __init__.py[co] in potential package */
static int
find_init_module(char *buf)
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index e95a91f..1eba28e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1657,32 +1657,6 @@ makeargvobject(int argc, wchar_t **argv)
return av;
}
-#ifdef HAVE_REALPATH
-static wchar_t*
-_wrealpath(const wchar_t *path, wchar_t *resolved_path)
-{
- char *cpath;
- char cresolved_path[PATH_MAX];
- char *res;
- size_t r;
- cpath = _Py_wchar2char(path);
- if (cpath == NULL) {
- errno = EINVAL;
- return NULL;
- }
- res = realpath(cpath, cresolved_path);
- PyMem_Free(cpath);
- if (res == NULL)
- return NULL;
- r = mbstowcs(resolved_path, cresolved_path, PATH_MAX);
- if (r == (size_t)-1 || r >= PATH_MAX) {
- errno = EINVAL;
- return NULL;
- }
- return resolved_path;
-}
-#endif
-
#define _HAVE_SCRIPT_ARGUMENT(argc, argv) \
(argc > 0 && argv0 != NULL && \
wcscmp(argv0, L"-c") != 0 && wcscmp(argv0, L"-m") != 0)
@@ -1696,7 +1670,6 @@ sys_update_path(int argc, wchar_t **argv)
PyObject *a;
PyObject *path;
#ifdef HAVE_READLINK
- extern int _Py_wreadlink(const wchar_t *, wchar_t *, size_t);
wchar_t link[MAXPATHLEN+1];
wchar_t argv0copy[2*MAXPATHLEN+1];
int nr = 0;
@@ -1769,7 +1742,7 @@ sys_update_path(int argc, wchar_t **argv)
#else /* All other filename syntaxes */
if (_HAVE_SCRIPT_ARGUMENT(argc, argv)) {
#if defined(HAVE_REALPATH)
- if (_wrealpath(argv0, fullpath)) {
+ if (_Py_wrealpath(argv0, fullpath)) {
argv0 = fullpath;
}
#endif