summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Hammond <mhammond@skippinet.com.au>2001-05-13 08:04:26 (GMT)
committerMark Hammond <mhammond@skippinet.com.au>2001-05-13 08:04:26 (GMT)
commitef8b654bbea15dc55767a7095e01dff7a3ca86cb (patch)
tree778653b95245ae2d31e5a5ed94c0c491e1687b15
parent342c65e19ac0cc47bf2b21026c76e63440b23748 (diff)
downloadcpython-ef8b654bbea15dc55767a7095e01dff7a3ca86cb.zip
cpython-ef8b654bbea15dc55767a7095e01dff7a3ca86cb.tar.gz
cpython-ef8b654bbea15dc55767a7095e01dff7a3ca86cb.tar.bz2
Add support for Windows using "mbcs" as the default Unicode encoding when dealing with the file system. As discussed on python-dev and in patch 410465.
-rw-r--r--Lib/ntpath.py17
-rw-r--r--Lib/test/output/test_unicode_file2
-rw-r--r--Lib/test/test_support.py4
-rw-r--r--Lib/test/test_unicode_file.py81
-rw-r--r--Modules/posixmodule.c139
-rw-r--r--Python/bltinmodule.c8
-rw-r--r--Python/getargs.c2
7 files changed, 197 insertions, 56 deletions
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 63860ce7..47c1acf 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -404,21 +404,12 @@ def normpath(path):
# Return an absolute path.
def abspath(path):
"""Return the absolute version of a path"""
- try:
- import win32api
- except ImportError:
- global abspath
- def _abspath(path):
- if not isabs(path):
- path = join(os.getcwd(), path)
- return normpath(path)
- abspath = _abspath
- return _abspath(path)
if path: # Empty path must return current working directory.
+ from nt import _getfullpathname
try:
- path = win32api.GetFullPathName(path)
- except win32api.error:
- pass # Bad path - return unchanged.
+ path = _getfullpathname(path)
+ except WindowsError:
+ pass # Bad path - return unchanged.
else:
path = os.getcwd()
return normpath(path)
diff --git a/Lib/test/output/test_unicode_file b/Lib/test/output/test_unicode_file
new file mode 100644
index 0000000..6e984d8
--- /dev/null
+++ b/Lib/test/output/test_unicode_file
@@ -0,0 +1,2 @@
+test_unicode_file
+All the Unicode tests appeared to work
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
index 330b9c8..88a3c5e 100644
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -63,6 +63,10 @@ if os.name == 'java':
TESTFN = '$test'
elif os.name != 'riscos':
TESTFN = '@test'
+ # Unicode name only used if TEST_FN_ENCODING exists for the platform.
+ TESTFN_UNICODE=u"@test-\xe0\xf2" # 2 latin characters.
+ if os.name=="nt":
+ TESTFN_ENCODING="mbcs"
else:
TESTFN = 'test'
del os
diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py
new file mode 100644
index 0000000..7078197
--- /dev/null
+++ b/Lib/test/test_unicode_file.py
@@ -0,0 +1,81 @@
+# Test some Unicode file name semantics
+# We dont test many operations on files other than
+# that their names can be used with Unicode characters.
+import os
+
+from test_support import verify, TestSkipped, TESTFN_UNICODE
+try:
+ from test_support import TESTFN_ENCODING
+except ImportError:
+ raise TestSkipped("No Unicode filesystem semantics on this platform.")
+
+TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
+
+# Check with creation as Unicode string.
+f = open(TESTFN_UNICODE, 'wb')
+if not os.path.isfile(TESTFN_UNICODE):
+ print "File doesn't exist after creating it"
+
+if not os.path.isfile(TESTFN_ENCODED):
+ print "File doesn't exist (encoded string) after creating it"
+
+f.close()
+
+# Test stat and chmod
+if os.stat(TESTFN_ENCODED) != os.stat(TESTFN_UNICODE):
+ print "os.stat() did not agree on the 2 filenames"
+os.chmod(TESTFN_ENCODED, 0777)
+os.chmod(TESTFN_UNICODE, 0777)
+
+# Test rename
+os.rename(TESTFN_ENCODED, TESTFN_ENCODED + ".new")
+os.rename(TESTFN_UNICODE+".new", TESTFN_ENCODED)
+
+os.unlink(TESTFN_ENCODED)
+if os.path.isfile(TESTFN_ENCODED) or \
+ os.path.isfile(TESTFN_UNICODE):
+ print "File exists after deleting it"
+
+# Check with creation as encoded string.
+f = open(TESTFN_ENCODED, 'wb')
+if not os.path.isfile(TESTFN_UNICODE) or \
+ not os.path.isfile(TESTFN_ENCODED):
+ print "File doesn't exist after creating it"
+
+path, base = os.path.split(os.path.abspath(TESTFN_ENCODED))
+if base not in os.listdir(path):
+ print "Filename did not appear in os.listdir()"
+
+f.close()
+os.unlink(TESTFN_UNICODE)
+if os.path.isfile(TESTFN_ENCODED) or \
+ os.path.isfile(TESTFN_UNICODE):
+ print "File exists after deleting it"
+
+# test os.open
+f = os.open(TESTFN_ENCODED, os.O_CREAT)
+if not os.path.isfile(TESTFN_UNICODE) or \
+ not os.path.isfile(TESTFN_ENCODED):
+ print "File doesn't exist after creating it"
+os.close(f)
+os.unlink(TESTFN_UNICODE)
+
+# Test directories etc
+cwd = os.getcwd()
+abs_encoded = os.path.abspath(TESTFN_ENCODED) + ".dir"
+abs_unicode = os.path.abspath(TESTFN_UNICODE) + ".dir"
+os.mkdir(abs_encoded)
+try:
+ os.chdir(abs_encoded)
+ os.chdir(abs_unicode)
+finally:
+ os.chdir(cwd)
+ os.rmdir(abs_unicode)
+os.mkdir(abs_unicode)
+try:
+ os.chdir(abs_encoded)
+ os.chdir(abs_unicode)
+finally:
+ os.chdir(cwd)
+ os.rmdir(abs_encoded)
+print "All the Unicode tests appeared to work"
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index cb8a1d1..87d584e 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -233,6 +233,16 @@ extern int lstat(const char *, struct stat *);
#endif /* MS_WIN32 */
#endif /* _MSC_VER */
+/* The default encoding used by the platform file system APIs
+ If non-NULL, this is almost certainly different than the default
+ encoding for strings (otherwise it can remain NULL!)
+*/
+#ifdef MS_WIN32
+const char *Py_FileSystemDefaultEncoding = "mbcs";
+#else
+const char *Py_FileSystemDefaultEncoding = NULL; /* use default */
+#endif
+
#if defined(PYCC_VACPP) && defined(PYOS_OS2)
#include <io.h>
#endif /* OS2 */
@@ -354,6 +364,14 @@ posix_error_with_filename(char* name)
return PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
}
+static PyObject *
+posix_error_with_allocated_filename(char* name)
+{
+ PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
+ PyMem_Free(name);
+ return rc;
+}
+
#ifdef MS_WIN32
static PyObject *
win32_error(char* function, char* filename)
@@ -468,15 +486,17 @@ posix_int(PyObject *args, char *format, int (*func)(int))
static PyObject *
posix_1str(PyObject *args, char *format, int (*func)(const char*))
{
- char *path1;
+ char *path1 = NULL;
int res;
- if (!PyArg_ParseTuple(args, format, &path1))
+ if (!PyArg_ParseTuple(args, format,
+ Py_FileSystemDefaultEncoding, &path1))
return NULL;
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_filename(path1);
+ return posix_error_with_allocated_filename(path1);
+ PyMem_Free(path1);
Py_INCREF(Py_None);
return Py_None;
}
@@ -485,13 +505,17 @@ static PyObject *
posix_2str(PyObject *args, char *format,
int (*func)(const char *, const char *))
{
- char *path1, *path2;
+ char *path1 = NULL, *path2 = NULL;
int res;
- if (!PyArg_ParseTuple(args, format, &path1, &path2))
+ if (!PyArg_ParseTuple(args, format,
+ Py_FileSystemDefaultEncoding, &path1,
+ Py_FileSystemDefaultEncoding, &path2))
return NULL;
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1, path2);
Py_END_ALLOW_THREADS
+ PyMem_Free(path1);
+ PyMem_Free(path2);
if (res != 0)
/* XXX how to report both path1 and path2??? */
return posix_error();
@@ -551,7 +575,7 @@ posix_do_stat(PyObject *self, PyObject *args, char *format,
int (*statfunc)(const char *, STRUCT_STAT *))
{
STRUCT_STAT st;
- char *path;
+ char *path = NULL;
int res;
#ifdef MS_WIN32
@@ -559,13 +583,15 @@ posix_do_stat(PyObject *self, PyObject *args, char *format,
char pathcopy[MAX_PATH];
#endif /* MS_WIN32 */
- if (!PyArg_ParseTuple(args, format, &path))
+ if (!PyArg_ParseTuple(args, format,
+ Py_FileSystemDefaultEncoding, &path))
return NULL;
#ifdef MS_WIN32
pathlen = strlen(path);
/* the library call can blow up if the file name is too long! */
if (pathlen > MAX_PATH) {
+ PyMem_Free(path);
errno = ENAMETOOLONG;
return posix_error();
}
@@ -588,8 +614,9 @@ posix_do_stat(PyObject *self, PyObject *args, char *format,
res = (*statfunc)(path, &st);
Py_END_ALLOW_THREADS
if (res != 0)
- return posix_error_with_filename(path);
+ return posix_error_with_allocated_filename(path);
+ PyMem_Free(path);
return _pystat_fromstructstat(st);
}
@@ -681,7 +708,7 @@ Change the current working directory to the specified path.";
static PyObject *
posix_chdir(PyObject *self, PyObject *args)
{
- return posix_1str(args, "s:chdir", chdir);
+ return posix_1str(args, "et:chdir", chdir);
}
@@ -692,16 +719,18 @@ Change the access permissions of a file.";
static PyObject *
posix_chmod(PyObject *self, PyObject *args)
{
- char *path;
+ char *path = NULL;
int i;
int res;
- if (!PyArg_ParseTuple(args, "si", &path, &i))
+ if (!PyArg_ParseTuple(args, "eti", Py_FileSystemDefaultEncoding,
+ &path, &i))
return NULL;
Py_BEGIN_ALLOW_THREADS
res = chmod(path, i);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_filename(path);
+ return posix_error_with_allocated_filename(path);
+ PyMem_Free(path);
Py_INCREF(Py_None);
return Py_None;
}
@@ -746,16 +775,19 @@ Change the owner and group id of path to the numeric uid and gid.";
static PyObject *
posix_chown(PyObject *self, PyObject *args)
{
- char *path;
+ char *path = NULL;
int uid, gid;
int res;
- if (!PyArg_ParseTuple(args, "sii:chown", &path, &uid, &gid))
+ if (!PyArg_ParseTuple(args, "etii:chown",
+ Py_FileSystemDefaultEncoding, &path,
+ &uid, &gid))
return NULL;
Py_BEGIN_ALLOW_THREADS
res = chown(path, (uid_t) uid, (gid_t) gid);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_filename(path);
+ return posix_error_with_allocated_filename(path);
+ PyMem_Free(path);
Py_INCREF(Py_None);
return Py_None;
}
@@ -792,7 +824,7 @@ Create a hard link to a file.";
static PyObject *
posix_link(PyObject *self, PyObject *args)
{
- return posix_2str(args, "ss:link", link);
+ return posix_2str(args, "etet:link", link);
}
#endif /* HAVE_LINK */
@@ -813,21 +845,18 @@ posix_listdir(PyObject *self, PyObject *args)
in separate files instead of having them all here... */
#if defined(MS_WIN32) && !defined(HAVE_OPENDIR)
- char *name;
- int len;
PyObject *d, *v;
HANDLE hFindFile;
WIN32_FIND_DATA FileData;
- char namebuf[MAX_PATH+5];
+ /* MAX_PATH characters could mean a bigger encoded string */
+ char namebuf[MAX_PATH*2+5];
+ char *bufptr = namebuf;
+ int len = sizeof(namebuf)/sizeof(namebuf[0]);
char ch;
- if (!PyArg_ParseTuple(args, "t#:listdir", &name, &len))
- return NULL;
- if (len >= MAX_PATH) {
- PyErr_SetString(PyExc_ValueError, "path too long");
+ if (!PyArg_ParseTuple(args, "et#:listdir",
+ Py_FileSystemDefaultEncoding, &bufptr, &len))
return NULL;
- }
- strcpy(namebuf, name);
ch = namebuf[len-1];
if (ch != '/' && ch != '\\' && ch != ':')
namebuf[len++] = '/';
@@ -841,7 +870,7 @@ posix_listdir(PyObject *self, PyObject *args)
errno = GetLastError();
if (errno == ERROR_FILE_NOT_FOUND)
return PyList_New(0);
- return win32_error("FindFirstFile", name);
+ return win32_error("FindFirstFile", namebuf);
}
do {
if (FileData.cFileName[0] == '.' &&
@@ -865,7 +894,7 @@ posix_listdir(PyObject *self, PyObject *args)
} while (FindNextFile(hFindFile, &FileData) == TRUE);
if (FindClose(hFindFile) == FALSE)
- return win32_error("FindClose", name);
+ return win32_error("FindClose", namebuf);
return d;
@@ -1042,6 +1071,28 @@ posix_listdir(PyObject *self, PyObject *args)
#endif /* which OS */
} /* end of posix_listdir */
+#ifdef MS_WIN32
+/* A helper function for abspath on win32 */
+static PyObject *
+posix__getfullpathname(PyObject *self, PyObject *args)
+{
+ /* assume encoded strings wont more than double no of chars */
+ char inbuf[MAX_PATH*2];
+ char *inbufp = inbuf;
+ int insize = sizeof(inbuf)/sizeof(inbuf[0]);
+ char outbuf[MAX_PATH*2];
+ char *temp;
+ if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
+ Py_FileSystemDefaultEncoding, &inbufp,
+ &insize))
+ return NULL;
+ if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
+ outbuf, &temp))
+ return win32_error("GetFullPathName", inbuf);
+ return PyString_FromString(outbuf);
+} /* end of posix__getfullpathname */
+#endif /* MS_WIN32 */
+
static char posix_mkdir__doc__[] =
"mkdir(path [, mode=0777]) -> None\n\
Create a directory.";
@@ -1050,9 +1101,10 @@ static PyObject *
posix_mkdir(PyObject *self, PyObject *args)
{
int res;
- char *path;
+ char *path = NULL;
int mode = 0777;
- if (!PyArg_ParseTuple(args, "s|i:mkdir", &path, &mode))
+ if (!PyArg_ParseTuple(args, "et|i:mkdir",
+ Py_FileSystemDefaultEncoding, &path, &mode))
return NULL;
Py_BEGIN_ALLOW_THREADS
#if ( defined(__WATCOMC__) || defined(_MSC_VER) || defined(PYCC_VACPP) ) && !defined(__QNX__)
@@ -1062,7 +1114,8 @@ posix_mkdir(PyObject *self, PyObject *args)
#endif
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_filename(path);
+ return posix_error_with_allocated_filename(path);
+ PyMem_Free(path);
Py_INCREF(Py_None);
return Py_None;
}
@@ -1095,7 +1148,7 @@ Rename a file or directory.";
static PyObject *
posix_rename(PyObject *self, PyObject *args)
{
- return posix_2str(args, "ss:rename", rename);
+ return posix_2str(args, "etet:rename", rename);
}
@@ -1106,7 +1159,7 @@ Remove a directory.";
static PyObject *
posix_rmdir(PyObject *self, PyObject *args)
{
- return posix_1str(args, "s:rmdir", rmdir);
+ return posix_1str(args, "et:rmdir", rmdir);
}
@@ -1117,7 +1170,7 @@ Perform a stat system call on the given path.";
static PyObject *
posix_stat(PyObject *self, PyObject *args)
{
- return posix_do_stat(self, args, "s:stat", STAT);
+ return posix_do_stat(self, args, "et:stat", STAT);
}
@@ -1169,7 +1222,7 @@ Remove a file (same as unlink(path)).";
static PyObject *
posix_unlink(PyObject *self, PyObject *args)
{
- return posix_1str(args, "s:remove", unlink);
+ return posix_1str(args, "et:remove", unlink);
}
@@ -3113,9 +3166,9 @@ static PyObject *
posix_lstat(PyObject *self, PyObject *args)
{
#ifdef HAVE_LSTAT
- return posix_do_stat(self, args, "s:lstat", lstat);
+ return posix_do_stat(self, args, "et:lstat", lstat);
#else /* !HAVE_LSTAT */
- return posix_do_stat(self, args, "s:lstat", STAT);
+ return posix_do_stat(self, args, "et:lstat", STAT);
#endif /* !HAVE_LSTAT */
}
@@ -3151,7 +3204,7 @@ Create a symbolic link.";
static PyObject *
posix_symlink(PyObject *self, PyObject *args)
{
- return posix_2str(args, "ss:symlink", symlink);
+ return posix_2str(args, "etet:symlink", symlink);
}
#endif /* HAVE_SYMLINK */
@@ -3328,18 +3381,21 @@ Open a file (for low level IO).";
static PyObject *
posix_open(PyObject *self, PyObject *args)
{
- char *file;
+ char *file = NULL;
int flag;
int mode = 0777;
int fd;
- if (!PyArg_ParseTuple(args, "si|i", &file, &flag, &mode))
+ if (!PyArg_ParseTuple(args, "eti|i",
+ Py_FileSystemDefaultEncoding, &file,
+ &flag, &mode))
return NULL;
Py_BEGIN_ALLOW_THREADS
fd = open(file, flag, mode);
Py_END_ALLOW_THREADS
if (fd < 0)
- return posix_error_with_filename(file);
+ return posix_error_with_allocated_filename(file);
+ PyMem_Free(file);
return PyInt_FromLong((long)fd);
}
@@ -5458,6 +5514,9 @@ static PyMethodDef posix_methods[] = {
{"pathconf", posix_pathconf, METH_VARARGS, posix_pathconf__doc__},
#endif
{"abort", posix_abort, METH_VARARGS, posix_abort__doc__},
+#ifdef MS_WIN32
+ {"_getfullpathname", posix__getfullpathname, METH_VARARGS, NULL},
+#endif
{NULL, NULL} /* Sentinel */
};
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index cc1bc95..5ffecb3 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -13,6 +13,8 @@
#include <unistd.h>
#endif
+extern const char *Py_FileSystemDefaultEncoding;
+
/* Forward */
static PyObject *filterstring(PyObject *, PyObject *);
static PyObject *filtertuple (PyObject *, PyObject *);
@@ -1530,14 +1532,16 @@ Return the octal representation of an integer or long integer.";
static PyObject *
builtin_open(PyObject *self, PyObject *args)
{
- char *name;
+ char *name = NULL;
char *mode = "r";
int bufsize = -1;
PyObject *f;
- if (!PyArg_ParseTuple(args, "s|si:open", &name, &mode, &bufsize))
+ if (!PyArg_ParseTuple(args, "et|si:open", Py_FileSystemDefaultEncoding,
+ &name, &mode, &bufsize))
return NULL;
f = PyFile_FromString(name, mode);
+ PyMem_Free(name); /* free the encoded string */
if (f != NULL)
PyFile_SetBufSize(f, bufsize);
return f;
diff --git a/Python/getargs.c b/Python/getargs.c
index 6eabd75..d78faa7 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -698,7 +698,7 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va)
's' (recode all objects via Unicode) or
't' (only recode non-string objects)
*/
- if (*format != 's')
+ if (*format == 's')
recode_strings = 1;
else if (*format == 't')
recode_strings = 0;