From ef8b654bbea15dc55767a7095e01dff7a3ca86cb Mon Sep 17 00:00:00 2001 From: Mark Hammond Date: Sun, 13 May 2001 08:04:26 +0000 Subject: Add support for Windows using "mbcs" as the default Unicode encoding when dealing with the file system. As discussed on python-dev and in patch 410465. --- Lib/ntpath.py | 17 ++--- Lib/test/output/test_unicode_file | 2 + Lib/test/test_support.py | 4 ++ Lib/test/test_unicode_file.py | 81 ++++++++++++++++++++++ Modules/posixmodule.c | 139 +++++++++++++++++++++++++++----------- Python/bltinmodule.c | 8 ++- Python/getargs.c | 2 +- 7 files changed, 197 insertions(+), 56 deletions(-) create mode 100644 Lib/test/output/test_unicode_file create mode 100644 Lib/test/test_unicode_file.py diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 63860ce7..47c1acf 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -404,21 +404,12 @@ def normpath(path): # Return an absolute path. def abspath(path): """Return the absolute version of a path""" - try: - import win32api - except ImportError: - global abspath - def _abspath(path): - if not isabs(path): - path = join(os.getcwd(), path) - return normpath(path) - abspath = _abspath - return _abspath(path) if path: # Empty path must return current working directory. + from nt import _getfullpathname try: - path = win32api.GetFullPathName(path) - except win32api.error: - pass # Bad path - return unchanged. + path = _getfullpathname(path) + except WindowsError: + pass # Bad path - return unchanged. else: path = os.getcwd() return normpath(path) diff --git a/Lib/test/output/test_unicode_file b/Lib/test/output/test_unicode_file new file mode 100644 index 0000000..6e984d8 --- /dev/null +++ b/Lib/test/output/test_unicode_file @@ -0,0 +1,2 @@ +test_unicode_file +All the Unicode tests appeared to work diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 330b9c8..88a3c5e 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -63,6 +63,10 @@ if os.name == 'java': TESTFN = '$test' elif os.name != 'riscos': TESTFN = '@test' + # Unicode name only used if TEST_FN_ENCODING exists for the platform. + TESTFN_UNICODE=u"@test-\xe0\xf2" # 2 latin characters. + if os.name=="nt": + TESTFN_ENCODING="mbcs" else: TESTFN = 'test' del os diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py new file mode 100644 index 0000000..7078197 --- /dev/null +++ b/Lib/test/test_unicode_file.py @@ -0,0 +1,81 @@ +# Test some Unicode file name semantics +# We dont test many operations on files other than +# that their names can be used with Unicode characters. +import os + +from test_support import verify, TestSkipped, TESTFN_UNICODE +try: + from test_support import TESTFN_ENCODING +except ImportError: + raise TestSkipped("No Unicode filesystem semantics on this platform.") + +TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) + +# Check with creation as Unicode string. +f = open(TESTFN_UNICODE, 'wb') +if not os.path.isfile(TESTFN_UNICODE): + print "File doesn't exist after creating it" + +if not os.path.isfile(TESTFN_ENCODED): + print "File doesn't exist (encoded string) after creating it" + +f.close() + +# Test stat and chmod +if os.stat(TESTFN_ENCODED) != os.stat(TESTFN_UNICODE): + print "os.stat() did not agree on the 2 filenames" +os.chmod(TESTFN_ENCODED, 0777) +os.chmod(TESTFN_UNICODE, 0777) + +# Test rename +os.rename(TESTFN_ENCODED, TESTFN_ENCODED + ".new") +os.rename(TESTFN_UNICODE+".new", TESTFN_ENCODED) + +os.unlink(TESTFN_ENCODED) +if os.path.isfile(TESTFN_ENCODED) or \ + os.path.isfile(TESTFN_UNICODE): + print "File exists after deleting it" + +# Check with creation as encoded string. +f = open(TESTFN_ENCODED, 'wb') +if not os.path.isfile(TESTFN_UNICODE) or \ + not os.path.isfile(TESTFN_ENCODED): + print "File doesn't exist after creating it" + +path, base = os.path.split(os.path.abspath(TESTFN_ENCODED)) +if base not in os.listdir(path): + print "Filename did not appear in os.listdir()" + +f.close() +os.unlink(TESTFN_UNICODE) +if os.path.isfile(TESTFN_ENCODED) or \ + os.path.isfile(TESTFN_UNICODE): + print "File exists after deleting it" + +# test os.open +f = os.open(TESTFN_ENCODED, os.O_CREAT) +if not os.path.isfile(TESTFN_UNICODE) or \ + not os.path.isfile(TESTFN_ENCODED): + print "File doesn't exist after creating it" +os.close(f) +os.unlink(TESTFN_UNICODE) + +# Test directories etc +cwd = os.getcwd() +abs_encoded = os.path.abspath(TESTFN_ENCODED) + ".dir" +abs_unicode = os.path.abspath(TESTFN_UNICODE) + ".dir" +os.mkdir(abs_encoded) +try: + os.chdir(abs_encoded) + os.chdir(abs_unicode) +finally: + os.chdir(cwd) + os.rmdir(abs_unicode) +os.mkdir(abs_unicode) +try: + os.chdir(abs_encoded) + os.chdir(abs_unicode) +finally: + os.chdir(cwd) + os.rmdir(abs_encoded) +print "All the Unicode tests appeared to work" diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index cb8a1d1..87d584e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -233,6 +233,16 @@ extern int lstat(const char *, struct stat *); #endif /* MS_WIN32 */ #endif /* _MSC_VER */ +/* The default encoding used by the platform file system APIs + If non-NULL, this is almost certainly different than the default + encoding for strings (otherwise it can remain NULL!) +*/ +#ifdef MS_WIN32 +const char *Py_FileSystemDefaultEncoding = "mbcs"; +#else +const char *Py_FileSystemDefaultEncoding = NULL; /* use default */ +#endif + #if defined(PYCC_VACPP) && defined(PYOS_OS2) #include #endif /* OS2 */ @@ -354,6 +364,14 @@ posix_error_with_filename(char* name) return PyErr_SetFromErrnoWithFilename(PyExc_OSError, name); } +static PyObject * +posix_error_with_allocated_filename(char* name) +{ + PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name); + PyMem_Free(name); + return rc; +} + #ifdef MS_WIN32 static PyObject * win32_error(char* function, char* filename) @@ -468,15 +486,17 @@ posix_int(PyObject *args, char *format, int (*func)(int)) static PyObject * posix_1str(PyObject *args, char *format, int (*func)(const char*)) { - char *path1; + char *path1 = NULL; int res; - if (!PyArg_ParseTuple(args, format, &path1)) + if (!PyArg_ParseTuple(args, format, + Py_FileSystemDefaultEncoding, &path1)) return NULL; Py_BEGIN_ALLOW_THREADS res = (*func)(path1); Py_END_ALLOW_THREADS if (res < 0) - return posix_error_with_filename(path1); + return posix_error_with_allocated_filename(path1); + PyMem_Free(path1); Py_INCREF(Py_None); return Py_None; } @@ -485,13 +505,17 @@ static PyObject * posix_2str(PyObject *args, char *format, int (*func)(const char *, const char *)) { - char *path1, *path2; + char *path1 = NULL, *path2 = NULL; int res; - if (!PyArg_ParseTuple(args, format, &path1, &path2)) + if (!PyArg_ParseTuple(args, format, + Py_FileSystemDefaultEncoding, &path1, + Py_FileSystemDefaultEncoding, &path2)) return NULL; Py_BEGIN_ALLOW_THREADS res = (*func)(path1, path2); Py_END_ALLOW_THREADS + PyMem_Free(path1); + PyMem_Free(path2); if (res != 0) /* XXX how to report both path1 and path2??? */ return posix_error(); @@ -551,7 +575,7 @@ posix_do_stat(PyObject *self, PyObject *args, char *format, int (*statfunc)(const char *, STRUCT_STAT *)) { STRUCT_STAT st; - char *path; + char *path = NULL; int res; #ifdef MS_WIN32 @@ -559,13 +583,15 @@ posix_do_stat(PyObject *self, PyObject *args, char *format, char pathcopy[MAX_PATH]; #endif /* MS_WIN32 */ - if (!PyArg_ParseTuple(args, format, &path)) + if (!PyArg_ParseTuple(args, format, + Py_FileSystemDefaultEncoding, &path)) return NULL; #ifdef MS_WIN32 pathlen = strlen(path); /* the library call can blow up if the file name is too long! */ if (pathlen > MAX_PATH) { + PyMem_Free(path); errno = ENAMETOOLONG; return posix_error(); } @@ -588,8 +614,9 @@ posix_do_stat(PyObject *self, PyObject *args, char *format, res = (*statfunc)(path, &st); Py_END_ALLOW_THREADS if (res != 0) - return posix_error_with_filename(path); + return posix_error_with_allocated_filename(path); + PyMem_Free(path); return _pystat_fromstructstat(st); } @@ -681,7 +708,7 @@ Change the current working directory to the specified path."; static PyObject * posix_chdir(PyObject *self, PyObject *args) { - return posix_1str(args, "s:chdir", chdir); + return posix_1str(args, "et:chdir", chdir); } @@ -692,16 +719,18 @@ Change the access permissions of a file."; static PyObject * posix_chmod(PyObject *self, PyObject *args) { - char *path; + char *path = NULL; int i; int res; - if (!PyArg_ParseTuple(args, "si", &path, &i)) + if (!PyArg_ParseTuple(args, "eti", Py_FileSystemDefaultEncoding, + &path, &i)) return NULL; Py_BEGIN_ALLOW_THREADS res = chmod(path, i); Py_END_ALLOW_THREADS if (res < 0) - return posix_error_with_filename(path); + return posix_error_with_allocated_filename(path); + PyMem_Free(path); Py_INCREF(Py_None); return Py_None; } @@ -746,16 +775,19 @@ Change the owner and group id of path to the numeric uid and gid."; static PyObject * posix_chown(PyObject *self, PyObject *args) { - char *path; + char *path = NULL; int uid, gid; int res; - if (!PyArg_ParseTuple(args, "sii:chown", &path, &uid, &gid)) + if (!PyArg_ParseTuple(args, "etii:chown", + Py_FileSystemDefaultEncoding, &path, + &uid, &gid)) return NULL; Py_BEGIN_ALLOW_THREADS res = chown(path, (uid_t) uid, (gid_t) gid); Py_END_ALLOW_THREADS if (res < 0) - return posix_error_with_filename(path); + return posix_error_with_allocated_filename(path); + PyMem_Free(path); Py_INCREF(Py_None); return Py_None; } @@ -792,7 +824,7 @@ Create a hard link to a file."; static PyObject * posix_link(PyObject *self, PyObject *args) { - return posix_2str(args, "ss:link", link); + return posix_2str(args, "etet:link", link); } #endif /* HAVE_LINK */ @@ -813,21 +845,18 @@ posix_listdir(PyObject *self, PyObject *args) in separate files instead of having them all here... */ #if defined(MS_WIN32) && !defined(HAVE_OPENDIR) - char *name; - int len; PyObject *d, *v; HANDLE hFindFile; WIN32_FIND_DATA FileData; - char namebuf[MAX_PATH+5]; + /* MAX_PATH characters could mean a bigger encoded string */ + char namebuf[MAX_PATH*2+5]; + char *bufptr = namebuf; + int len = sizeof(namebuf)/sizeof(namebuf[0]); char ch; - if (!PyArg_ParseTuple(args, "t#:listdir", &name, &len)) - return NULL; - if (len >= MAX_PATH) { - PyErr_SetString(PyExc_ValueError, "path too long"); + if (!PyArg_ParseTuple(args, "et#:listdir", + Py_FileSystemDefaultEncoding, &bufptr, &len)) return NULL; - } - strcpy(namebuf, name); ch = namebuf[len-1]; if (ch != '/' && ch != '\\' && ch != ':') namebuf[len++] = '/'; @@ -841,7 +870,7 @@ posix_listdir(PyObject *self, PyObject *args) errno = GetLastError(); if (errno == ERROR_FILE_NOT_FOUND) return PyList_New(0); - return win32_error("FindFirstFile", name); + return win32_error("FindFirstFile", namebuf); } do { if (FileData.cFileName[0] == '.' && @@ -865,7 +894,7 @@ posix_listdir(PyObject *self, PyObject *args) } while (FindNextFile(hFindFile, &FileData) == TRUE); if (FindClose(hFindFile) == FALSE) - return win32_error("FindClose", name); + return win32_error("FindClose", namebuf); return d; @@ -1042,6 +1071,28 @@ posix_listdir(PyObject *self, PyObject *args) #endif /* which OS */ } /* end of posix_listdir */ +#ifdef MS_WIN32 +/* A helper function for abspath on win32 */ +static PyObject * +posix__getfullpathname(PyObject *self, PyObject *args) +{ + /* assume encoded strings wont more than double no of chars */ + char inbuf[MAX_PATH*2]; + char *inbufp = inbuf; + int insize = sizeof(inbuf)/sizeof(inbuf[0]); + char outbuf[MAX_PATH*2]; + char *temp; + if (!PyArg_ParseTuple (args, "et#:_getfullpathname", + Py_FileSystemDefaultEncoding, &inbufp, + &insize)) + return NULL; + if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]), + outbuf, &temp)) + return win32_error("GetFullPathName", inbuf); + return PyString_FromString(outbuf); +} /* end of posix__getfullpathname */ +#endif /* MS_WIN32 */ + static char posix_mkdir__doc__[] = "mkdir(path [, mode=0777]) -> None\n\ Create a directory."; @@ -1050,9 +1101,10 @@ static PyObject * posix_mkdir(PyObject *self, PyObject *args) { int res; - char *path; + char *path = NULL; int mode = 0777; - if (!PyArg_ParseTuple(args, "s|i:mkdir", &path, &mode)) + if (!PyArg_ParseTuple(args, "et|i:mkdir", + Py_FileSystemDefaultEncoding, &path, &mode)) return NULL; Py_BEGIN_ALLOW_THREADS #if ( defined(__WATCOMC__) || defined(_MSC_VER) || defined(PYCC_VACPP) ) && !defined(__QNX__) @@ -1062,7 +1114,8 @@ posix_mkdir(PyObject *self, PyObject *args) #endif Py_END_ALLOW_THREADS if (res < 0) - return posix_error_with_filename(path); + return posix_error_with_allocated_filename(path); + PyMem_Free(path); Py_INCREF(Py_None); return Py_None; } @@ -1095,7 +1148,7 @@ Rename a file or directory."; static PyObject * posix_rename(PyObject *self, PyObject *args) { - return posix_2str(args, "ss:rename", rename); + return posix_2str(args, "etet:rename", rename); } @@ -1106,7 +1159,7 @@ Remove a directory."; static PyObject * posix_rmdir(PyObject *self, PyObject *args) { - return posix_1str(args, "s:rmdir", rmdir); + return posix_1str(args, "et:rmdir", rmdir); } @@ -1117,7 +1170,7 @@ Perform a stat system call on the given path."; static PyObject * posix_stat(PyObject *self, PyObject *args) { - return posix_do_stat(self, args, "s:stat", STAT); + return posix_do_stat(self, args, "et:stat", STAT); } @@ -1169,7 +1222,7 @@ Remove a file (same as unlink(path))."; static PyObject * posix_unlink(PyObject *self, PyObject *args) { - return posix_1str(args, "s:remove", unlink); + return posix_1str(args, "et:remove", unlink); } @@ -3113,9 +3166,9 @@ static PyObject * posix_lstat(PyObject *self, PyObject *args) { #ifdef HAVE_LSTAT - return posix_do_stat(self, args, "s:lstat", lstat); + return posix_do_stat(self, args, "et:lstat", lstat); #else /* !HAVE_LSTAT */ - return posix_do_stat(self, args, "s:lstat", STAT); + return posix_do_stat(self, args, "et:lstat", STAT); #endif /* !HAVE_LSTAT */ } @@ -3151,7 +3204,7 @@ Create a symbolic link."; static PyObject * posix_symlink(PyObject *self, PyObject *args) { - return posix_2str(args, "ss:symlink", symlink); + return posix_2str(args, "etet:symlink", symlink); } #endif /* HAVE_SYMLINK */ @@ -3328,18 +3381,21 @@ Open a file (for low level IO)."; static PyObject * posix_open(PyObject *self, PyObject *args) { - char *file; + char *file = NULL; int flag; int mode = 0777; int fd; - if (!PyArg_ParseTuple(args, "si|i", &file, &flag, &mode)) + if (!PyArg_ParseTuple(args, "eti|i", + Py_FileSystemDefaultEncoding, &file, + &flag, &mode)) return NULL; Py_BEGIN_ALLOW_THREADS fd = open(file, flag, mode); Py_END_ALLOW_THREADS if (fd < 0) - return posix_error_with_filename(file); + return posix_error_with_allocated_filename(file); + PyMem_Free(file); return PyInt_FromLong((long)fd); } @@ -5458,6 +5514,9 @@ static PyMethodDef posix_methods[] = { {"pathconf", posix_pathconf, METH_VARARGS, posix_pathconf__doc__}, #endif {"abort", posix_abort, METH_VARARGS, posix_abort__doc__}, +#ifdef MS_WIN32 + {"_getfullpathname", posix__getfullpathname, METH_VARARGS, NULL}, +#endif {NULL, NULL} /* Sentinel */ }; diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index cc1bc95..5ffecb3 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -13,6 +13,8 @@ #include #endif +extern const char *Py_FileSystemDefaultEncoding; + /* Forward */ static PyObject *filterstring(PyObject *, PyObject *); static PyObject *filtertuple (PyObject *, PyObject *); @@ -1530,14 +1532,16 @@ Return the octal representation of an integer or long integer."; static PyObject * builtin_open(PyObject *self, PyObject *args) { - char *name; + char *name = NULL; char *mode = "r"; int bufsize = -1; PyObject *f; - if (!PyArg_ParseTuple(args, "s|si:open", &name, &mode, &bufsize)) + if (!PyArg_ParseTuple(args, "et|si:open", Py_FileSystemDefaultEncoding, + &name, &mode, &bufsize)) return NULL; f = PyFile_FromString(name, mode); + PyMem_Free(name); /* free the encoded string */ if (f != NULL) PyFile_SetBufSize(f, bufsize); return f; diff --git a/Python/getargs.c b/Python/getargs.c index 6eabd75..d78faa7 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -698,7 +698,7 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va) 's' (recode all objects via Unicode) or 't' (only recode non-string objects) */ - if (*format != 's') + if (*format == 's') recode_strings = 1; else if (*format == 't') recode_strings = 0; -- cgit v0.12