summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 (GMT)
commit011e8420339245f9b55d41082ec6036f2f83a182 (patch)
tree6e278775c41c1d50c62e3a42b960797813d245ef
parent93f65a177b36396dddd1e2938cc037288a7eb400 (diff)
downloadcpython-011e8420339245f9b55d41082ec6036f2f83a182.zip
cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.gz
cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.bz2
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
-rw-r--r--Doc/library/codecs.rst4
-rw-r--r--Doc/library/os.rst38
-rw-r--r--Include/unicodeobject.h48
-rw-r--r--Lib/test/test_codecs.py29
-rw-r--r--Lib/test/test_os.py39
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/_io/fileio.c2
-rw-r--r--Modules/posixmodule.c520
-rw-r--r--Modules/python.c113
-rw-r--r--Objects/unicodeobject.c89
-rw-r--r--Python/codecs.c89
-rw-r--r--Python/pythonrun.c32
-rwxr-xr-xconfigure5
-rw-r--r--configure.in2
-rw-r--r--pyconfig.h.in3
15 files changed, 726 insertions, 289 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index ab578ea..3f1a5fe 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -322,6 +322,8 @@ and implemented by all standard Python codecs:
| ``'backslashreplace'`` | Replace with backslashed escape sequences |
| | (only for encoding). |
+-------------------------+-----------------------------------------------+
+| ``'utf8b'`` | Replace byte with surrogate U+DCxx. |
++-------------------------+-----------------------------------------------+
In addition, the following error handlers are specific to a single codec:
@@ -333,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
+------------------+---------+--------------------------------------------+
.. versionadded:: 3.1
- The ``'surrogates'`` error handler.
+ The ``'utf8b'`` and ``'surrogates'`` error handlers.
The set of allowed values can be extended via :meth:`register_error`.
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index c686baf..83f5ee9 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -51,6 +51,30 @@ the :mod:`os` module, but using them is of course a threat to portability!
``'ce'``, ``'java'``.
+.. _os-filenames:
+
+File Names, Command Line Arguments, and Environment Variables
+-------------------------------------------------------------
+
+In Python, file names, command line arguments, and environment
+variables are represented using the string type. On some systems,
+decoding these strings to and from bytes is necessary before passing
+them to the operating system. Python uses the file system encoding to
+perform this conversion (see :func:`sys.getfilesystemencoding`).
+
+.. versionchanged:: 3.1
+ On some systems, conversion using the file system encoding may
+ fail. In this case, Python uses the ``utf8b`` encoding error
+ handler, which means that undecodable bytes are replaced by a
+ Unicode character U+DCxx on decoding, and these are again
+ translated to the original byte on encoding.
+
+
+The file system encoding must guarantee to successfully decode all
+bytes below 128. If the file system encoding fails to provide this
+guarantee, API functions may raise UnicodeErrors.
+
+
.. _os-procinfo:
Process Parameters
@@ -688,12 +712,8 @@ Files and Directories
.. function:: getcwd()
- Return a string representing the current working directory. On Unix
- platforms, this function may raise :exc:`UnicodeDecodeError` if the name of
- the current directory is not decodable in the file system encoding. Use
- :func:`getcwdb` if you need the call to never fail. Availability: Unix,
- Windows.
-
+ Return a string representing the current working directory.
+ Availability: Unix, Windows.
.. function:: getcwdb()
@@ -800,10 +820,8 @@ Files and Directories
entries ``'.'`` and ``'..'`` even if they are present in the directory.
Availability: Unix, Windows.
- This function can be called with a bytes or string argument. In the bytes
- case, all filenames will be listed as returned by the underlying API. In the
- string case, filenames will be decoded using the file system encoding, and
- skipped if a decoding error occurs.
+ This function can be called with a bytes or string argument, and returns
+ filenames of the same datatype.
.. function:: lstat(path)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 9c11873..08b518a 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -198,6 +198,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -296,6 +297,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -693,25 +695,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *unicode,
const char *errors);
-/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
-
- If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
- UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
- invalid characters with '?'.
-
- The function is intended to be used for paths and file names only
- during bootstrapping process where the codecs are not set up.
-*/
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
- const char *s /* encoded string */
- );
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
- const char *s, /* encoded string */
- Py_ssize_t size /* size */
- );
-
/* Returns a pointer to the default encoding (normally, UTF-8) of the
Unicode object unicode and the size of the encoded representation
in bytes stored in *size.
@@ -1252,6 +1235,33 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
const char *errors /* error handling */
);
+/* --- File system encoding ---------------------------------------------- */
+
+/* ParseTuple converter which converts a Unicode object into the file
+ system encoding, using the PEP 383 error handler; bytes objects are
+ output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+ If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+ UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+ invalid characters with '?'.
+
+ The function is intended to be used for paths and file names only
+ during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+ const char *s /* encoded string */
+ );
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
+ const char *s, /* encoded string */
+ Py_ssize_t size /* size */
+ );
+
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 6706507..5a3834d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
+class Utf8bTest(unittest.TestCase):
+
+ def test_utf8(self):
+ # Bad byte
+ self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+ "foo\udc80bar")
+ self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+ b"foo\x80bar")
+ # bad-utf-8 encoded surrogate
+ self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+ "\udced\udcb0\udc80")
+ self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+ b"\xed\xb0\x80")
+
+ def test_ascii(self):
+ # bad byte
+ self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+ "foo\udc80bar")
+ self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+ b"foo\x80bar")
+
+ def test_charmap(self):
+ # bad byte: \xa5 is unmapped in iso-8859-3
+ self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+ "foo\udca5bar")
+ self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+ b"foo\xa5bar")
+
def test_main():
support.run_unittest(
@@ -1543,6 +1571,7 @@ def test_main():
CharmapTest,
WithStmtTest,
TypesTest,
+ Utf8bTest,
)
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 91e0432..a380505 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -7,6 +7,7 @@ import errno
import unittest
import warnings
import sys
+import shutil
from test import support
# Tests creating TESTFN
@@ -698,9 +699,44 @@ if sys.platform != 'win32':
self.assertRaises(os.error, os.setregid, 0, 0)
self.assertRaises(OverflowError, os.setregid, 1<<32, 0)
self.assertRaises(OverflowError, os.setregid, 0, 1<<32)
+
+ class Pep383Tests(unittest.TestCase):
+ filenames = [b'foo\xf6bar', 'foo\xf6bar'.encode("utf-8")]
+
+ def setUp(self):
+ self.fsencoding = sys.getfilesystemencoding()
+ sys.setfilesystemencoding("utf-8")
+ self.dir = support.TESTFN
+ self.bdir = self.dir.encode("utf-8", "utf8b")
+ os.mkdir(self.dir)
+ self.unicodefn = []
+ for fn in self.filenames:
+ f = open(os.path.join(self.bdir, fn), "w")
+ f.close()
+ self.unicodefn.append(fn.decode("utf-8", "utf8b"))
+
+ def tearDown(self):
+ shutil.rmtree(self.dir)
+ sys.setfilesystemencoding(self.fsencoding)
+
+ def test_listdir(self):
+ expected = set(self.unicodefn)
+ found = set(os.listdir(support.TESTFN))
+ self.assertEquals(found, expected)
+
+ def test_open(self):
+ for fn in self.unicodefn:
+ f = open(os.path.join(self.dir, fn))
+ f.close()
+
+ def test_stat(self):
+ for fn in self.unicodefn:
+ os.stat(os.path.join(self.dir, fn))
else:
class PosixUidGidTests(unittest.TestCase):
pass
+ class Pep383Tests(unittest.TestCase):
+ pass
def test_main():
support.run_unittest(
@@ -714,7 +750,8 @@ def test_main():
ExecTests,
Win32ErrorTests,
TestInvalidFD,
- PosixUidGidTests
+ PosixUidGidTests,
+ Pep383Tests
)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index a384c41..2e4c6bd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
+- Implement PEP 383, Non-decodable Bytes in System Character Interfaces.
+
- Issue #5890: in subclasses of 'property' the __doc__ attribute was
shadowed by classtype's, even if it was None. property now
inserts the __doc__ into the subclass instance __dict__.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 4499ee2..164f7e4 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
return -1;
stringobj = PyUnicode_AsEncodedString(
- u, Py_FileSystemDefaultEncoding, NULL);
+ u, Py_FileSystemDefaultEncoding, "utf8b");
Py_DECREF(u);
if (stringobj == NULL)
return -1;
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 0575be2..d38a4db 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -493,12 +493,14 @@ convertenviron(void)
char *p = strchr(*e, '=');
if (p == NULL)
continue;
- k = PyUnicode_FromStringAndSize(*e, (int)(p-*e));
+ k = PyUnicode_Decode(*e, (int)(p-*e),
+ Py_FileSystemDefaultEncoding, "utf8b");
if (k == NULL) {
PyErr_Clear();
continue;
}
- v = PyUnicode_FromString(p+1);
+ v = PyUnicode_Decode(p+1, strlen(p+1),
+ Py_FileSystemDefaultEncoding, "utf8b");
if (v == NULL) {
PyErr_Clear();
Py_DECREF(k);
@@ -534,6 +536,37 @@ convertenviron(void)
return d;
}
+/* Convert a bytes object to a char*. Optionally lock the buffer if it is a
+ bytes array. */
+
+static char*
+bytes2str(PyObject* o, int lock)
+{
+ if(PyBytes_Check(o))
+ return PyBytes_AsString(o);
+ else if(PyByteArray_Check(o)) {
+ if (lock && PyObject_GetBuffer(o, NULL, 0) < 0)
+ /* On a bytearray, this should not fail. */
+ PyErr_BadInternalCall();
+ return PyByteArray_AsString(o);
+ } else {
+ /* The FS converter should have verified that this
+ is either bytes or bytearray. */
+ Py_FatalError("bad object passed to bytes2str");
+ /* not reached. */
+ return "";
+ }
+}
+
+/* Release the lock, decref the object. */
+static void
+release_bytes(PyObject* o)
+{
+ if (PyByteArray_Check(o))
+ o->ob_type->tp_as_buffer->bf_releasebuffer(NULL, 0);
+ Py_DECREF(o);
+}
+
/* Set a POSIX-specific error from errno, and return NULL */
@@ -558,10 +591,11 @@ posix_error_with_unicode_filename(Py_UNICODE* name)
static PyObject *
-posix_error_with_allocated_filename(char* name)
+posix_error_with_allocated_filename(PyObject* name)
{
- PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
- PyMem_Free(name);
+ PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError,
+ bytes2str(name, 0));
+ release_bytes(name);
return rc;
}
@@ -728,17 +762,19 @@ unicode_file_names(void)
static PyObject *
posix_1str(PyObject *args, char *format, int (*func)(const char*))
{
- char *path1 = NULL;
+ PyObject *opath1 = NULL;
+ char *path1;
int res;
if (!PyArg_ParseTuple(args, format,
- Py_FileSystemDefaultEncoding, &path1))
+ PyUnicode_FSConverter, &opath1))
return NULL;
+ path1 = bytes2str(opath1, 1);
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path1);
- PyMem_Free(path1);
+ return posix_error_with_allocated_filename(opath1);
+ release_bytes(opath1);
Py_INCREF(Py_None);
return Py_None;
}
@@ -748,17 +784,20 @@ posix_2str(PyObject *args,
char *format,
int (*func)(const char *, const char *))
{
- char *path1 = NULL, *path2 = NULL;
+ PyObject *opath1, *opath2;
+ char *path1, *path2;
int res;
if (!PyArg_ParseTuple(args, format,
- Py_FileSystemDefaultEncoding, &path1,
- Py_FileSystemDefaultEncoding, &path2))
+ PyUnicode_FSConverter, &opath1,
+ PyUnicode_FSConverter, &opath2))
return NULL;
+ path1 = bytes2str(opath1, 1);
+ path2 = bytes2str(opath2, 1);
Py_BEGIN_ALLOW_THREADS
res = (*func)(path1, path2);
Py_END_ALLOW_THREADS
- PyMem_Free(path1);
- PyMem_Free(path2);
+ release_bytes(opath1);
+ release_bytes(opath2);
if (res != 0)
/* XXX how to report both path1 and path2??? */
return posix_error();
@@ -1560,8 +1599,8 @@ posix_do_stat(PyObject *self, PyObject *args,
int (*wstatfunc)(const Py_UNICODE *, STRUCT_STAT *))
{
STRUCT_STAT st;
- char *path = NULL; /* pass this to stat; do not free() it */
- char *pathfree = NULL; /* this memory must be free'd */
+ PyObject *opath;
+ char *path;
int res;
PyObject *result;
@@ -1590,25 +1629,24 @@ posix_do_stat(PyObject *self, PyObject *args,
#endif
if (!PyArg_ParseTuple(args, format,
- Py_FileSystemDefaultEncoding, &path))
+ PyUnicode_FSConverter, &opath))
return NULL;
- pathfree = path;
-
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = (*statfunc)(path, &st);
Py_END_ALLOW_THREADS
if (res != 0) {
#ifdef MS_WINDOWS
- result = win32_error("stat", pathfree);
+ result = win32_error("stat", path);
#else
- result = posix_error_with_filename(pathfree);
+ result = posix_error_with_filename(path);
#endif
}
else
result = _pystat_fromstructstat(&st);
- PyMem_Free(pathfree);
+ release_bytes(opath);
return result;
}
@@ -1625,6 +1663,7 @@ existence, or the inclusive-OR of R_OK, W_OK, and X_OK.");
static PyObject *
posix_access(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
int mode;
@@ -1644,13 +1683,14 @@ posix_access(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
- if (!PyArg_ParseTuple(args, "eti:access",
- Py_FileSystemDefaultEncoding, &path, &mode))
+ if (!PyArg_ParseTuple(args, "O&i:access",
+ PyUnicode_FSConverter, &opath, &mode))
return 0;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
attr = GetFileAttributesA(path);
Py_END_ALLOW_THREADS
- PyMem_Free(path);
+ release_bytes(opath);
finish:
if (attr == 0xFFFFFFFF)
/* File does not exist, or cannot read attributes */
@@ -1663,13 +1703,14 @@ finish:
|| (attr & FILE_ATTRIBUTE_DIRECTORY));
#else
int res;
- if (!PyArg_ParseTuple(args, "eti:access",
- Py_FileSystemDefaultEncoding, &path, &mode))
+ if (!PyArg_ParseTuple(args, "O&i:access",
+ PyUnicode_FSConverter, &opath, &mode))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = access(path, mode);
Py_END_ALLOW_THREADS
- PyMem_Free(path);
+ release_bytes(opath);
return PyBool_FromLong(res == 0);
#endif
}
@@ -1750,11 +1791,11 @@ posix_chdir(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
#elif defined(PYOS_OS2) && defined(PYCC_GCC)
- return posix_1str(args, "et:chdir", _chdir2);
+ return posix_1str(args, "O&:chdir", _chdir2);
#elif defined(__VMS)
- return posix_1str(args, "et:chdir", (int (*)(const char *))chdir);
+ return posix_1str(args, "O&:chdir", (int (*)(const char *))chdir);
#else
- return posix_1str(args, "et:chdir", chdir);
+ return posix_1str(args, "O&:chdir", chdir);
#endif
}
@@ -1779,6 +1820,7 @@ Change the access permissions of a file.");
static PyObject *
posix_chmod(PyObject *self, PyObject *args)
{
+ PyObject *opath = NULL;
char *path = NULL;
int i;
int res;
@@ -1809,9 +1851,10 @@ posix_chmod(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
- if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
- &path, &i))
+ if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+ &opath, &i))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
attr = GetFileAttributesA(path);
if (attr != 0xFFFFFFFF) {
@@ -1826,22 +1869,23 @@ posix_chmod(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (!res) {
win32_error("chmod", path);
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
- PyMem_Free(path);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#else /* Py_WIN_WIDE_FILENAMES */
- if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
- &path, &i))
+ if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+ &opath, &i))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chmod(path, i);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#endif
@@ -1877,18 +1921,20 @@ affects the link itself rather than the target.");
static PyObject *
posix_lchmod(PyObject *self, PyObject *args)
{
- char *path = NULL;
+ PyObject *opath;
+ char *path;
int i;
int res;
- if (!PyArg_ParseTuple(args, "eti:lchmod", Py_FileSystemDefaultEncoding,
- &path, &i))
+ if (!PyArg_ParseTuple(args, "O&i:lchmod", PyUnicode_FSConverter,
+ &opath, &i))
return NULL;
+ path = bytes2str(opath, 1)
Py_BEGIN_ALLOW_THREADS
res = lchmod(path, i);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_RETURN_NONE;
}
#endif /* HAVE_LCHMOD */
@@ -1902,18 +1948,20 @@ Set file flags.");
static PyObject *
posix_chflags(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
unsigned long flags;
int res;
- if (!PyArg_ParseTuple(args, "etk:chflags",
- Py_FileSystemDefaultEncoding, &path, &flags))
+ if (!PyArg_ParseTuple(args, "O&k:chflags",
+ PyUnicode_FSConverter, &opath, &flags))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chflags(path, flags);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
@@ -1928,18 +1976,20 @@ This function will not follow symbolic links.");
static PyObject *
posix_lchflags(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
unsigned long flags;
int res;
- if (!PyArg_ParseTuple(args, "etk:lchflags",
- Py_FileSystemDefaultEncoding, &path, &flags))
+ if (!PyArg_ParseTuple(args, "O&k:lchflags",
+ PyUnicode_FSConverter, &path, &flags))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = lchflags(path, flags);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
@@ -1953,7 +2003,7 @@ Change root directory to path.");
static PyObject *
posix_chroot(PyObject *self, PyObject *args)
{
- return posix_1str(args, "et:chroot", chroot);
+ return posix_1str(args, "O&:chroot", chroot);
}
#endif
@@ -1996,19 +2046,21 @@ Change the owner and group id of path to the numeric uid and gid.");
static PyObject *
posix_chown(PyObject *self, PyObject *args)
{
- char *path = NULL;
+ PyObject *opath;
+ char *path;
long uid, gid;
int res;
- if (!PyArg_ParseTuple(args, "etll:chown",
- Py_FileSystemDefaultEncoding, &path,
+ if (!PyArg_ParseTuple(args, "O&ll:chown",
+ PyUnicode_FSConverter, &opath,
&uid, &gid))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = chown(path, (uid_t) uid, (gid_t) gid);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
@@ -2045,19 +2097,21 @@ This function will not follow symbolic links.");
static PyObject *
posix_lchown(PyObject *self, PyObject *args)
{
- char *path = NULL;
+ PyObject *opath;
+ char *path;
int uid, gid;
int res;
- if (!PyArg_ParseTuple(args, "etii:lchown",
- Py_FileSystemDefaultEncoding, &path,
+ if (!PyArg_ParseTuple(args, "O&ii:lchown",
+ PyUnicode_FSConverter, &opath,
&uid, &gid))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
res = lchown(path, (uid_t) uid, (gid_t) gid);
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
}
@@ -2113,7 +2167,7 @@ posix_getcwd(int use_bytes)
return posix_error();
if (use_bytes)
return PyBytes_FromStringAndSize(buf, strlen(buf));
- return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
+ return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
}
PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2146,7 +2200,7 @@ Create a hard link to a file.");
static PyObject *
posix_link(PyObject *self, PyObject *args)
{
- return posix_2str(args, "etet:link", link);
+ return posix_2str(args, "O&O&:link", link);
}
#endif /* HAVE_LINK */
@@ -2171,6 +2225,7 @@ posix_listdir(PyObject *self, PyObject *args)
HANDLE hFindFile;
BOOL result;
WIN32_FIND_DATA FileData;
+ PyObject *opath;
char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */
char *bufptr = namebuf;
Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
@@ -2260,9 +2315,16 @@ posix_listdir(PyObject *self, PyObject *args)
}
#endif
- if (!PyArg_ParseTuple(args, "et#:listdir",
- Py_FileSystemDefaultEncoding, &bufptr, &len))
+ if (!PyArg_ParseTuple(args, "O&:listdir",
+ PyUnicode_FSConverter, &opath))
+ return NULL;
+ if (PyObject_Size(opath)+1 > MAX_PATH) {
+ PyErr_SetString(PyExc_ValueError, "path too long");
+ Py_DECREF(opath);
return NULL;
+ }
+ strcpy(namebuf, bytes2str(opath, 0));
+ len = PyObject_Size(opath);
if (len > 0) {
char ch = namebuf[len-1];
if (ch != SEP && ch != ALTSEP && ch != ':')
@@ -2324,6 +2386,7 @@ posix_listdir(PyObject *self, PyObject *args)
#ifndef MAX_PATH
#define MAX_PATH CCHMAXPATH
#endif
+ PyObject *oname;
char *name, *pt;
Py_ssize_t len;
PyObject *d, *v;
@@ -2333,11 +2396,13 @@ posix_listdir(PyObject *self, PyObject *args)
FILEFINDBUF3 ep;
APIRET rc;
- if (!PyArg_ParseTuple(args, "et#:listdir",
- Py_FileSystemDefaultEncoding, &name, &len))
+ if (!PyArg_ParseTuple(args, "O&:listdir",
+ PyUnicode_FSConverter, &oname))
return NULL;
+ name = bytes2str(oname);
+ len = PyObject_Size(oname);
if (len >= MAX_PATH) {
- PyMem_Free(name);
+ release_bytes(oname);
PyErr_SetString(PyExc_ValueError, "path too long");
return NULL;
}
@@ -2350,7 +2415,7 @@ posix_listdir(PyObject *self, PyObject *args)
strcpy(namebuf + len, "*.*");
if ((d = PyList_New(0)) == NULL) {
- PyMem_Free(name);
+ release_bytes(oname);
return NULL;
}
@@ -2363,7 +2428,7 @@ posix_listdir(PyObject *self, PyObject *args)
if (rc != NO_ERROR) {
errno = ENOENT;
- return posix_error_with_allocated_filename(name);
+ return posix_error_with_allocated_filename(oname);
}
if (srchcnt > 0) { /* If Directory is NOT Totally Empty, */
@@ -2393,11 +2458,11 @@ posix_listdir(PyObject *self, PyObject *args)
} while (DosFindNext(hdir, &ep, sizeof(ep), &srchcnt) == NO_ERROR && srchcnt > 0);
}
- PyMem_Free(name);
+ release_bytes(oname);
return d;
#else
-
- char *name = NULL;
+ PyObject *oname;
+ char *name;
PyObject *d, *v;
DIR *dirp;
struct dirent *ep;
@@ -2408,14 +2473,15 @@ posix_listdir(PyObject *self, PyObject *args)
arg_is_unicode = 0;
PyErr_Clear();
}
- if (!PyArg_ParseTuple(args, "et:listdir", Py_FileSystemDefaultEncoding, &name))
+ if (!PyArg_ParseTuple(args, "O&:listdir", PyUnicode_FSConverter, &oname))
return NULL;
+ name = bytes2str(oname, 1);
if ((dirp = opendir(name)) == NULL) {
- return posix_error_with_allocated_filename(name);
+ return posix_error_with_allocated_filename(oname);
}
if ((d = PyList_New(0)) == NULL) {
closedir(dirp);
- PyMem_Free(name);
+ release_bytes(oname);
return NULL;
}
for (;;) {
@@ -2429,7 +2495,7 @@ posix_listdir(PyObject *self, PyObject *args)
} else {
closedir(dirp);
Py_DECREF(d);
- return posix_error_with_allocated_filename(name);
+ return posix_error_with_allocated_filename(oname);
}
}
if (ep->d_name[0] == '.' &&
@@ -2447,18 +2513,16 @@ posix_listdir(PyObject *self, PyObject *args)
w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding,
- "strict");
- if (w != NULL) {
- Py_DECREF(v);
+ "utf8b");
+ Py_DECREF(v);
+ if (w != NULL)
v = w;
- }
else {
- /* Ignore undecodable filenames, as discussed
- * in issue 3187. To include these,
- * use getcwdb(). */
- PyErr_Clear();
- Py_DECREF(v);
- continue;
+ /* Encoding failed to decode ASCII bytes.
+ Raise exception. */
+ Py_DECREF(d);
+ d = NULL;
+ break;
}
}
if (PyList_Append(d, v) != 0) {
@@ -2470,7 +2534,7 @@ posix_listdir(PyObject *self, PyObject *args)
Py_DECREF(v);
}
closedir(dirp);
- PyMem_Free(name);
+ release_bytes(oname);
return d;
@@ -2482,10 +2546,8 @@ posix_listdir(PyObject *self, PyObject *args)
static PyObject *
posix__getfullpathname(PyObject *self, PyObject *args)
{
- /* assume encoded strings won't more than double no of chars */
- char inbuf[MAX_PATH*2];
- char *inbufp = inbuf;
- Py_ssize_t insize = sizeof(inbuf);
+ PyObject *opath;
+ char *path;
char outbuf[MAX_PATH*2];
char *temp;
#ifdef Py_WIN_WIDE_FILENAMES
@@ -2519,13 +2581,17 @@ posix__getfullpathname(PyObject *self, PyObject *args)
PyErr_Clear();
}
#endif
- if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
- Py_FileSystemDefaultEncoding, &inbufp,
- &insize))
+ if (!PyArg_ParseTuple (args, "O&:_getfullpathname",
+ PyUnicode_FSConverter, &opath))
return NULL;
- if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
- outbuf, &temp))
- return win32_error("GetFullPathName", inbuf);
+ path = bytes2str(opath, 1);
+ if (!GetFullPathName(path, sizeof(outbuf)/sizeof(outbuf[0]),
+ outbuf, &temp)) {
+ win32_error("GetFullPathName", path);
+ release_bytes(opath);
+ return NULL;
+ }
+ release_bytes(opath);
if (PyUnicode_Check(PyTuple_GetItem(args, 0))) {
return PyUnicode_Decode(outbuf, strlen(outbuf),
Py_FileSystemDefaultEncoding, NULL);
@@ -2542,7 +2608,8 @@ static PyObject *
posix_mkdir(PyObject *self, PyObject *args)
{
int res;
- char *path = NULL;
+ PyObject *opath;
+ char *path;
int mode = 0777;
#ifdef Py_WIN_WIDE_FILENAMES
@@ -2563,9 +2630,10 @@ posix_mkdir(PyObject *self, PyObject *args)
are also valid. */
PyErr_Clear();
}
- if (!PyArg_ParseTuple(args, "et|i:mkdir",
- Py_FileSystemDefaultEncoding, &path, &mode))
+ if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+ PyUnicode_FSConverter, &opath, &mode))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
/* PyUnicode_AS_UNICODE OK without thread lock as
it is a simple dereference. */
@@ -2573,17 +2641,18 @@ posix_mkdir(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (!res) {
win32_error("mkdir", path);
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
- PyMem_Free(path);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#else
- if (!PyArg_ParseTuple(args, "et|i:mkdir",
- Py_FileSystemDefaultEncoding, &path, &mode))
+ if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+ PyUnicode_FSConverter, &opath, &mode))
return NULL;
+ path = bytes2str(opath, 1);
Py_BEGIN_ALLOW_THREADS
#if ( defined(__WATCOMC__) || defined(PYCC_VACPP) ) && !defined(__QNX__)
res = mkdir(path);
@@ -2592,8 +2661,8 @@ posix_mkdir(PyObject *self, PyObject *args)
#endif
Py_END_ALLOW_THREADS
if (res < 0)
- return posix_error_with_allocated_filename(path);
- PyMem_Free(path);
+ return posix_error_with_allocated_filename(opath);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#endif
@@ -2685,7 +2754,7 @@ error:
Py_INCREF(Py_None);
return Py_None;
#else
- return posix_2str(args, "etet:rename", rename);
+ return posix_2str(args, "O&O&:rename", rename);
#endif
}
@@ -2700,7 +2769,7 @@ posix_rmdir(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
#else
- return posix_1str(args, "et:rmdir", rmdir);
+ return posix_1str(args, "O&:rmdir", rmdir);
#endif
}
@@ -2713,9 +2782,9 @@ static PyObject *
posix_stat(PyObject *self, PyObject *args)
{
#ifdef MS_WINDOWS
- return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
+ return posix_do_stat(self, args, "O&:stat", STAT, "U:stat", win32_wstat);
#else
- return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
+ return posix_do_stat(self, args, "O&:stat", STAT, NULL, NULL);
#endif
}
@@ -2781,7 +2850,7 @@ posix_unlink(PyObject *self, PyObject *args)
#ifdef MS_WINDOWS
return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
#else
- return posix_1str(args, "et:remove", unlink);
+ return posix_1str(args, "O&:remove", unlink);
#endif
}
@@ -2853,7 +2922,8 @@ posix_utime(PyObject *self, PyObject *args)
PyObject *arg;
PyUnicodeObject *obwpath;
wchar_t *wpath = NULL;
- char *apath = NULL;
+ PyObject *oapath;
+ char *apath;
HANDLE hFile;
long atimesec, mtimesec, ausec, musec;
FILETIME atime, mtime;
@@ -2875,9 +2945,10 @@ posix_utime(PyObject *self, PyObject *args)
PyErr_Clear();
}
if (!wpath) {
- if (!PyArg_ParseTuple(args, "etO:utime",
- Py_FileSystemDefaultEncoding, &apath, &arg))
+ if (!PyArg_ParseTuple(args, "O&O:utime",
+ PyUnicode_FSConverter, &oapath, &arg))
return NULL;
+ apath = bytes2str(oapath, 1);
Py_BEGIN_ALLOW_THREADS
hFile = CreateFileA(apath, FILE_WRITE_ATTRIBUTES, 0,
NULL, OPEN_EXISTING,
@@ -2885,10 +2956,10 @@ posix_utime(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
if (hFile == INVALID_HANDLE_VALUE) {
win32_error("utime", apath);
- PyMem_Free(apath);
+ release_bytes(oapath);
return NULL;
}
- PyMem_Free(apath);
+ release_bytes(oapath);
}
if (arg == Py_None) {
@@ -2929,7 +3000,8 @@ done:
return result;
#else /* Py_WIN_WIDE_FILENAMES */
- char *path = NULL;
+ PyObject *opath;
+ char *path;
long atime, mtime, ausec, musec;
int res;
PyObject* arg;
@@ -2952,9 +3024,10 @@ done:
#endif /* HAVE_UTIMES */
- if (!PyArg_ParseTuple(args, "etO:utime",
- Py_FileSystemDefaultEncoding, &path, &arg))
+ if (!PyArg_ParseTuple(args, "O&O:utime",
+ PyUnicode_FSConverter, &opath, &arg))
return NULL;
+ path = bytes2str(opath, 1);
if (arg == Py_None) {
/* optional time values not given */
Py_BEGIN_ALLOW_THREADS
@@ -2964,18 +3037,18 @@ done:
else if (!PyTuple_Check(arg) || PyTuple_Size(arg) != 2) {
PyErr_SetString(PyExc_TypeError,
"utime() arg 2 must be a tuple (atime, mtime)");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
else {
if (extract_time(PyTuple_GET_ITEM(arg, 0),
&atime, &ausec) == -1) {
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
if (extract_time(PyTuple_GET_ITEM(arg, 1),
&mtime, &musec) == -1) {
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
ATIME = atime;
@@ -2993,9 +3066,9 @@ done:
#endif /* HAVE_UTIMES */
}
if (res < 0) {
- return posix_error_with_allocated_filename(path);
+ return posix_error_with_allocated_filename(opath);
}
- PyMem_Free(path);
+ release_bytes(opath);
Py_INCREF(Py_None);
return Py_None;
#undef UTIME_ARG
@@ -3030,6 +3103,22 @@ free_string_array(char **array, Py_ssize_t count)
PyMem_Free(array[i]);
PyMem_DEL(array);
}
+
+int fsconvert_strdup(PyObject *o, char**out)
+{
+ PyObject *bytes;
+ Py_ssize_t size;
+ if (!PyUnicode_FSConverter(o, &bytes))
+ return 0;
+ size = PyObject_Size(bytes);
+ *out = PyMem_Malloc(size+1);
+ if (!*out)
+ return 0;
+ /* Don't lock bytes, as we hold the GIL */
+ memcpy(*out, bytes2str(bytes, 0), size+1);
+ Py_DECREF(bytes);
+ return 1;
+}
#endif
@@ -3044,6 +3133,7 @@ Execute an executable path with arguments, replacing current process.\n\
static PyObject *
posix_execv(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
@@ -3053,10 +3143,11 @@ posix_execv(PyObject *self, PyObject *args)
/* execv has two arguments: (path, argv), where
argv is a list or tuple of strings. */
- if (!PyArg_ParseTuple(args, "etO:execv",
- Py_FileSystemDefaultEncoding,
- &path, &argv))
+ if (!PyArg_ParseTuple(args, "O&O:execv",
+ PyUnicode_FSConverter,
+ &opath, &argv))
return NULL;
+ path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3067,28 +3158,27 @@ posix_execv(PyObject *self, PyObject *args)
}
else {
PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
if (argc < 1) {
PyErr_SetString(PyExc_ValueError, "execv() arg 2 must not be empty");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
- PyMem_Free(path);
+ release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i), "et",
- Py_FileSystemDefaultEncoding,
- &argvlist[i])) {
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(PyExc_TypeError,
"execv() arg 2 must contain only strings");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
@@ -3100,7 +3190,7 @@ posix_execv(PyObject *self, PyObject *args)
/* If we get here it's definitely an error */
free_string_array(argvlist, argc);
- PyMem_Free(path);
+ release_bytes(opath);
return posix_error();
}
@@ -3116,6 +3206,7 @@ Execute a path with arguments and environment, replacing current process.\n\
static PyObject *
posix_execve(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
PyObject *argv, *env;
char **argvlist;
@@ -3129,10 +3220,11 @@ posix_execve(PyObject *self, PyObject *args)
argv is a list or tuple of strings and env is a dictionary
like posix.environ. */
- if (!PyArg_ParseTuple(args, "etOO:execve",
- Py_FileSystemDefaultEncoding,
- &path, &argv, &env))
+ if (!PyArg_ParseTuple(args, "O&OO:execve",
+ PyUnicode_FSConverter,
+ &opath, &argv, &env))
return NULL;
+ path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3158,10 +3250,8 @@ posix_execve(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i),
- "et;execve() arg 2 must contain only strings",
- Py_FileSystemDefaultEncoding,
- &argvlist[i]))
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i]))
{
lastarg = i;
goto fail_1;
@@ -3243,7 +3333,7 @@ posix_execve(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
#endif /* HAVE_EXECV */
@@ -3261,6 +3351,7 @@ Execute the program 'path' in a new process.\n\
static PyObject *
posix_spawnv(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
@@ -3272,10 +3363,11 @@ posix_spawnv(PyObject *self, PyObject *args)
/* spawnv has three arguments: (mode, path, argv), where
argv is a list or tuple of strings. */
- if (!PyArg_ParseTuple(args, "ietO:spawnv", &mode,
- Py_FileSystemDefaultEncoding,
- &path, &argv))
+ if (!PyArg_ParseTuple(args, "iO&O:spawnv", &mode,
+ PyUnicode_FSConverter,
+ &opath, &argv))
return NULL;
+ path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3287,24 +3379,23 @@ posix_spawnv(PyObject *self, PyObject *args)
else {
PyErr_SetString(PyExc_TypeError,
"spawnv() arg 2 must be a tuple or list");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
- PyMem_Free(path);
+ release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i), "et",
- Py_FileSystemDefaultEncoding,
- &argvlist[i])) {
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(
PyExc_TypeError,
"spawnv() arg 2 must contain only strings");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
}
@@ -3324,7 +3415,7 @@ posix_spawnv(PyObject *self, PyObject *args)
#endif
free_string_array(argvlist, argc);
- PyMem_Free(path);
+ release_bytes(opath);
if (spawnval == -1)
return posix_error();
@@ -3349,6 +3440,7 @@ Execute the program 'path' in a new process.\n\
static PyObject *
posix_spawnve(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
PyObject *argv, *env;
char **argvlist;
@@ -3364,10 +3456,11 @@ posix_spawnve(PyObject *self, PyObject *args)
argv is a list or tuple of strings and env is a dictionary
like posix.environ. */
- if (!PyArg_ParseTuple(args, "ietOO:spawnve", &mode,
- Py_FileSystemDefaultEncoding,
- &path, &argv, &env))
+ if (!PyArg_ParseTuple(args, "iO&OO:spawnve", &mode,
+ PyUnicode_FSConverter,
+ &opath, &argv, &env))
return NULL;
+ path = bytes2str(opath, 1);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3393,10 +3486,8 @@ posix_spawnve(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i),
- "et;spawnve() arg 2 must contain only strings",
- Py_FileSystemDefaultEncoding,
- &argvlist[i]))
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i]))
{
lastarg = i;
goto fail_1;
@@ -3486,7 +3577,7 @@ posix_spawnve(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
- PyMem_Free(path);
+ release_bytes(opath);
return res;
}
@@ -3504,6 +3595,7 @@ search path to find the file.\n\
static PyObject *
posix_spawnvp(PyObject *self, PyObject *args)
{
+ PyObject *opath;
char *path;
PyObject *argv;
char **argvlist;
@@ -3514,10 +3606,11 @@ posix_spawnvp(PyObject *self, PyObject *args)
/* spawnvp has three arguments: (mode, path, argv), where
argv is a list or tuple of strings. */
- if (!PyArg_ParseTuple(args, "ietO:spawnvp", &mode,
- Py_FileSystemDefaultEncoding,
- &path, &argv))
+ if (!PyArg_ParseTuple(args, "iO&O:spawnvp", &mode,
+ PyUnicode_FSConverter,
+ &opath, &argv))
return NULL;
+ path = bytes2str(opath);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3529,24 +3622,23 @@ posix_spawnvp(PyObject *self, PyObject *args)
else {
PyErr_SetString(PyExc_TypeError,
"spawnvp() arg 2 must be a tuple or list");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
argvlist = PyMem_NEW(char *, argc+1);
if (argvlist == NULL) {
- PyMem_Free(path);
+ release_bytes(opath);
return PyErr_NoMemory();
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i), "et",
- Py_FileSystemDefaultEncoding,
- &argvlist[i])) {
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i])) {
free_string_array(argvlist, i);
PyErr_SetString(
PyExc_TypeError,
"spawnvp() arg 2 must contain only strings");
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
}
@@ -3561,7 +3653,7 @@ posix_spawnvp(PyObject *self, PyObject *args)
Py_END_ALLOW_THREADS
free_string_array(argvlist, argc);
- PyMem_Free(path);
+ release_bytes(opath);
if (spawnval == -1)
return posix_error();
@@ -3583,6 +3675,7 @@ search path to find the file.\n\
static PyObject *
posix_spawnvpe(PyObject *self, PyObject *args)
{
+ PyObject *opath
char *path;
PyObject *argv, *env;
char **argvlist;
@@ -3598,9 +3691,10 @@ posix_spawnvpe(PyObject *self, PyObject *args)
like posix.environ. */
if (!PyArg_ParseTuple(args, "ietOO:spawnvpe", &mode,
- Py_FileSystemDefaultEncoding,
- &path, &argv, &env))
+ PyUnicode_FSConverter,
+ &opath, &argv, &env))
return NULL;
+ path = bytes2str(opath);
if (PyList_Check(argv)) {
argc = PyList_Size(argv);
getitem = PyList_GetItem;
@@ -3626,10 +3720,8 @@ posix_spawnvpe(PyObject *self, PyObject *args)
goto fail_0;
}
for (i = 0; i < argc; i++) {
- if (!PyArg_Parse((*getitem)(argv, i),
- "et;spawnvpe() arg 2 must contain only strings",
- Py_FileSystemDefaultEncoding,
- &argvlist[i]))
+ if (!fsconvert_strdup((*getitem)(argv, i),
+ &argvlist[i]))
{
lastarg = i;
goto fail_1;
@@ -3710,7 +3802,7 @@ posix_spawnvpe(PyObject *self, PyObject *args)
Py_XDECREF(vals);
Py_XDECREF(keys);
fail_0:
- PyMem_Free(path);
+ release_bytes(opath);
return res;
}
#endif /* PYOS_OS2 */
@@ -4549,12 +4641,12 @@ static PyObject *
posix_lstat(PyObject *self, PyObject *args)
{
#ifdef HAVE_LSTAT
- return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
+ return posix_do_stat(self, args, "O&:lstat", lstat, NULL, NULL);
#else /* !HAVE_LSTAT */
#ifdef MS_WINDOWS
- return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
+ return posix_do_stat(self, args, "O&:lstat", STAT, "U:lstat", win32_wstat);
#else
- return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
+ return posix_do_stat(self, args, "O&:lstat", STAT, NULL, NULL);
#endif
#endif /* !HAVE_LSTAT */
}
@@ -4570,16 +4662,18 @@ posix_readlink(PyObject *self, PyObject *args)
{
PyObject* v;
char buf[MAXPATHLEN];
+ PyObject *opath;
char *path;
int n;
int arg_is_unicode = 0;
- if (!PyArg_ParseTuple(args, "et:readlink",
- Py_FileSystemDefaultEncoding, &path))
+ if (!PyArg_ParseTuple(args, "O&:readlink",
+ PyUnicode_FSConverter, &opath))
return NULL;
+ path = bytes2str(opath, 1);
v = PySequence_GetItem(args, 0);
if (v == NULL) {
- PyMem_Free(path);
+ release_bytes(opath);
return NULL;
}
@@ -4592,16 +4686,16 @@ posix_readlink(PyObject *self, PyObject *args)
n = readlink(path, buf, (int) sizeof buf);
Py_END_ALLOW_THREADS
if (n < 0)
- return posix_error_with_allocated_filename(path);
+ return posix_error_with_allocated_filename(opath);
- PyMem_Free(path);
+ release_bytes(opath);
v = PyBytes_FromStringAndSize(buf, n);
if (arg_is_unicode) {
PyObject *w;
w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding,
- "strict");
+ "utf8b");
if (w != NULL) {
Py_DECREF(v);
v = w;
@@ -4623,7 +4717,7 @@ Create a symbolic link pointing to src named dst.");
static PyObject *
posix_symlink(PyObject *self, PyObject *args)
{
- return posix_2str(args, "etet:symlink", symlink);
+ return posix_2str(args, "O&O&:symlink", symlink);
}
#endif /* HAVE_SYMLINK */
@@ -4811,7 +4905,8 @@ Open a file (for low level IO).");
static PyObject *
posix_open(PyObject *self, PyObject *args)
{
- char *file = NULL;
+ PyObject *ofile;
+ char *file;
int flag;
int mode = 0777;
int fd;
@@ -4835,17 +4930,17 @@ posix_open(PyObject *self, PyObject *args)
}
#endif
- if (!PyArg_ParseTuple(args, "eti|i",
- Py_FileSystemDefaultEncoding, &file,
+ if (!PyArg_ParseTuple(args, "O&i|i",
+ PyUnicode_FSConverter, &ofile,
&flag, &mode))
return NULL;
-
+ file = bytes2str(ofile, 1);
Py_BEGIN_ALLOW_THREADS
fd = open(file, flag, mode);
Py_END_ALLOW_THREADS
if (fd < 0)
- return posix_error_with_allocated_filename(file);
- PyMem_Free(file);
+ return posix_error_with_allocated_filename(ofile);
+ release_bytes(ofile);
return PyLong_FromLong((long)fd);
}
@@ -5289,20 +5384,27 @@ posix_putenv(PyObject *self, PyObject *args)
wchar_t *s1, *s2;
wchar_t *newenv;
#else
+ PyObject *os1, *os2;
char *s1, *s2;
char *newenv;
#endif
PyObject *newstr;
size_t len;
- if (!PyArg_ParseTuple(args,
#ifdef MS_WINDOWS
+ if (!PyArg_ParseTuple(args,
"uu:putenv",
-#else
- "ss:putenv",
-#endif
&s1, &s2))
return NULL;
+#else
+ if (!PyArg_ParseTuple(args,
+ "O&O&:putenv",
+ PyUnicode_FSConverter, &os1,
+ PyUnicode_FSConverter, &os2))
+ return NULL;
+ s1 = bytes2str(os1, 1);
+ s2 = bytes2str(os2, 1);
+#endif
#if defined(PYOS_OS2)
if (stricmp(s1, "BEGINLIBPATH") == 0) {
@@ -5345,6 +5447,8 @@ posix_putenv(PyObject *self, PyObject *args)
PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
if (putenv(newenv)) {
Py_DECREF(newstr);
+ release_bytes(os1);
+ release_bytes(os2);
posix_error();
return NULL;
}
@@ -5365,6 +5469,10 @@ posix_putenv(PyObject *self, PyObject *args)
#if defined(PYOS_OS2)
}
#endif
+#ifndef MS_WINDOWS
+ release_bytes(os1);
+ release_bytes(os2);
+#endif
Py_INCREF(Py_None);
return Py_None;
}
@@ -6688,6 +6796,7 @@ the underlying Win32 ShellExecute function doesn't work if it is.");
static PyObject *
win32_startfile(PyObject *self, PyObject *args)
{
+ PyObject *ofilepath;
char *filepath;
char *operation = NULL;
HINSTANCE rc;
@@ -6729,20 +6838,21 @@ win32_startfile(PyObject *self, PyObject *args)
#endif
normal:
- if (!PyArg_ParseTuple(args, "et|s:startfile",
- Py_FileSystemDefaultEncoding, &filepath,
+ if (!PyArg_ParseTuple(args, "O&|s:startfile",
+ PyUnicode_FSConverter, &ofilepath,
&operation))
return NULL;
+ filepath = bytes2str(ofilepath, 1);
Py_BEGIN_ALLOW_THREADS
rc = ShellExecute((HWND)0, operation, filepath,
NULL, NULL, SW_SHOWNORMAL);
Py_END_ALLOW_THREADS
if (rc <= (HINSTANCE)32) {
PyObject *errval = win32_error("startfile", filepath);
- PyMem_Free(filepath);
+ release_bytes(ofilepath);
return errval;
}
- PyMem_Free(filepath);
+ release_bytes(ofilepath);
Py_INCREF(Py_None);
return Py_None;
}
diff --git a/Modules/python.c b/Modules/python.c
index f6da86f..4c0a55b 100644
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -14,6 +14,93 @@ wmain(int argc, wchar_t **argv)
return Py_Main(argc, argv);
}
#else
+static wchar_t*
+char2wchar(char* arg)
+{
+ wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+ /* Some platforms have a broken implementation of
+ * mbstowcs which does not count the characters that
+ * would result from conversion. Use an upper bound.
+ */
+ size_t argsize = strlen(arg);
+#else
+ size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+ size_t count;
+ unsigned char *in;
+ wchar_t *out;
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbs;
+#endif
+ if (argsize != (size_t)-1) {
+ res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+ if (!res)
+ goto oom;
+ count = mbstowcs(res, arg, argsize+1);
+ if (count != (size_t)-1)
+ return res;
+ PyMem_Free(res);
+ }
+ /* Conversion failed. Fall back to escaping with utf8b. */
+#ifdef HAVE_MBRTOWC
+ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+ /* Overallocate; as multi-byte characters are in the argument, the
+ actual output could use less memory. */
+ argsize = strlen(arg) + 1;
+ res = PyMem_Malloc(argsize*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ memset(&mbs, 0, sizeof mbs);
+ while (argsize) {
+ size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+ if (converted == 0)
+ /* Reached end of string; null char stored. */
+ break;
+ if (converted == (size_t)-2) {
+ /* Incomplete character. This should never happen,
+ since we provide everything that we have -
+ unless there is a bug in the C library, or I
+ misunderstood how mbrtowc works. */
+ fprintf(stderr, "unexpected mbrtowc result -2\n");
+ return NULL;
+ }
+ if (converted == (size_t)-1) {
+ /* Conversion error. Escape as UTF-8b, and start over
+ in the initial shift state. */
+ *out++ = 0xdc00 + *in++;
+ argsize--;
+ memset(&mbs, 0, sizeof mbs);
+ continue;
+ }
+ /* successfully converted some bytes */
+ in += converted;
+ argsize -= converted;
+ out++;
+ }
+#else
+ /* Cannot use C locale for escaping; manually escape as if charset
+ is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+ correctly in the locale's charset, which must be an ASCII superset. */
+ res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+ if (!res) goto oom;
+ in = (unsigned char*)arg;
+ out = res;
+ while(*in)
+ if(*in < 128)
+ *out++ = *in++;
+ else
+ *out++ = 0xdc00 + *in++;
+ *out = 0;
+#endif
+ return res;
+oom:
+ fprintf(stderr, "out of memory\n");
+ return NULL;
+}
+
int
main(int argc, char **argv)
{
@@ -40,31 +127,9 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
-#ifdef HAVE_BROKEN_MBSTOWCS
- /* Some platforms have a broken implementation of
- * mbstowcs which does not count the characters that
- * would result from conversion. Use an upper bound.
- */
- size_t argsize = strlen(argv[i]);
-#else
- size_t argsize = mbstowcs(NULL, argv[i], 0);
-#endif
- size_t count;
- if (argsize == (size_t)-1) {
- fprintf(stderr, "Could not convert argument %d to string\n", i);
+ argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]);
+ if (!argv_copy[i])
return 1;
- }
- argv_copy[i] = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
- argv_copy2[i] = argv_copy[i];
- if (!argv_copy[i]) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
- count = mbstowcs(argv_copy[i], argv[i], argsize+1);
- if (count == (size_t)-1) {
- fprintf(stderr, "Could not convert argument %d to string\n", i);
- return 1;
- }
}
setlocale(LC_ALL, oldloc);
free(oldloc);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 18b6fa2..218e70b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
}
}
+/* Convert the argument to a bytes object, according to the file
+ system encoding */
+
+int
+PyUnicode_FSConverter(PyObject* arg, void* addr)
+{
+ PyObject *output = NULL;
+ Py_ssize_t size;
+ void *data;
+ if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
+ output = arg;
+ Py_INCREF(output);
+ }
+ else {
+ arg = PyUnicode_FromObject(arg);
+ if (!arg)
+ return 0;
+ output = PyUnicode_AsEncodedObject(arg,
+ Py_FileSystemDefaultEncoding,
+ "utf8b");
+ Py_DECREF(arg);
+ if (!output)
+ return 0;
+ if (!PyBytes_Check(output)) {
+ Py_DECREF(output);
+ PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
+ return 0;
+ }
+ }
+ if (PyBytes_Check(output)) {
+ size = PyBytes_GET_SIZE(output);
+ data = PyBytes_AS_STRING(output);
+ }
+ else {
+ size = PyByteArray_GET_SIZE(output);
+ data = PyByteArray_AS_STRING(output);
+ }
+ if (size != strlen(data)) {
+ PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+ Py_DECREF(output);
+ return 0;
+ }
+ *(PyObject**)addr = output;
+ return 1;
+}
+
+
char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
@@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
collstart-startp, collend-startp, &newpos);
if (repunicode == NULL)
goto onError;
- if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
- PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+ if (PyBytes_Check(repunicode)) {
+ /* Directly copy bytes result to output. */
+ repsize = PyBytes_Size(repunicode);
+ if (repsize > 1) {
+ /* Make room for all additional bytes. */
+ if (_PyBytes_Resize(&res, ressize+repsize-1)) {
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ ressize += repsize-1;
+ }
+ memcpy(str, PyBytes_AsString(repunicode), repsize);
+ str += repsize;
+ p = startp + newpos;
Py_DECREF(repunicode);
- goto onError;
+ break;
}
/* need more space? (at least enough for what we
have+the replacement+the rest of the string, so
@@ -5123,11 +5181,24 @@ int charmap_encoding_error(
collstartpos, collendpos, &newpos);
if (repunicode == NULL)
return -1;
- if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
- PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+ if (PyBytes_Check(repunicode)) {
+ /* Directly copy bytes result to output. */
+ Py_ssize_t outsize = PyBytes_Size(*res);
+ Py_ssize_t requiredsize;
+ repsize = PyBytes_Size(repunicode);
+ requiredsize = *respos + repsize;
+ if (requiredsize > outsize)
+ /* Make room for all additional bytes. */
+ if (charmapencode_resize(res, respos, requiredsize)) {
+ Py_DECREF(repunicode);
+ return -1;
+ }
+ memcpy(PyBytes_AsString(*res) + *respos,
+ PyBytes_AsString(repunicode), repsize);
+ *respos += repsize;
+ *inpos = newpos;
Py_DECREF(repunicode);
- return -1;
+ break;
}
/* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode);
@@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
+ /* Byte results not supported, since they have no decimal property. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
Py_DECREF(repunicode);
goto onError;
diff --git a/Python/codecs.c b/Python/codecs.c
index 633a24c..7e3ff8a 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
}
}
+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+ PyObject *restuple;
+ PyObject *object;
+ Py_ssize_t start;
+ Py_ssize_t end;
+ PyObject *res;
+ if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ Py_UNICODE *p;
+ Py_UNICODE *startp;
+ char *outp;
+ if (PyUnicodeEncodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeEncodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+ return NULL;
+ startp = PyUnicode_AS_UNICODE(object);
+ res = PyBytes_FromStringAndSize(NULL, end-start);
+ if (!res) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ outp = PyBytes_AsString(res);
+ for (p = startp+start; p < startp+end; p++) {
+ Py_UNICODE ch = *p;
+ if (ch < 0xdc80 || ch > 0xdcff) {
+ /* Not a UTF-8b surrogate, fail with original exception */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return NULL;
+ }
+ *outp++ = ch - 0xdc00;
+ }
+ restuple = Py_BuildValue("(On)", res, end);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return restuple;
+ }
+ else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ unsigned char *p;
+ Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+ int consumed = 0;
+ if (PyUnicodeDecodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeDecodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+ return NULL;
+ if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ while (consumed < 4 && consumed < end-start) {
+ /* Refuse to escape ASCII bytes. */
+ if (p[start+consumed] < 128)
+ break;
+ ch[consumed] = 0xdc00 + p[start+consumed];
+ consumed++;
+ }
+ Py_DECREF(object);
+ if (!consumed) {
+ /* codec complained about ASCII byte. */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ return NULL;
+ }
+ return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ }
+ else {
+ wrong_exception_type(exc);
+ return NULL;
+ }
+}
+
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateErrors(exc);
}
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+ return PyCodec_UTF8bErrors(exc);
+}
+
static int _PyCodecRegistry_Init(void)
{
static struct {
@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
surrogates_errors,
METH_O
}
+ },
+ {
+ "utf8b",
+ {
+ "utf8b",
+ utf8b_errors,
+ METH_O
+ }
}
};
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f93403b..c75f55f 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -262,6 +262,22 @@ Py_InitializeEx(int install_sigs)
_PyImportHooks_Init();
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+ /* On Unix, set the file system encoding according to the
+ user's preference, if the CODESET names a well-known
+ Python codec, and Py_FileSystemDefaultEncoding isn't
+ initialized by other means. Also set the encoding of
+ stdin and stdout if these are terminals. */
+
+ codeset = get_codeset();
+ if (codeset) {
+ if (!Py_FileSystemDefaultEncoding)
+ Py_FileSystemDefaultEncoding = codeset;
+ else
+ free(codeset);
+ }
+#endif
+
if (install_sigs)
initsigs(); /* Signal handling stuff, including initintr() */
@@ -285,22 +301,6 @@ Py_InitializeEx(int install_sigs)
#ifdef WITH_THREAD
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
-
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
- /* On Unix, set the file system encoding according to the
- user's preference, if the CODESET names a well-known
- Python codec, and Py_FileSystemDefaultEncoding isn't
- initialized by other means. Also set the encoding of
- stdin and stdout if these are terminals. */
-
- codeset = get_codeset();
- if (codeset) {
- if (!Py_FileSystemDefaultEncoding)
- Py_FileSystemDefaultEncoding = codeset;
- else
- free(codeset);
- }
-#endif
}
void
diff --git a/configure b/configure
index d1da285..cdc9515 100755
--- a/configure
+++ b/configure
@@ -1,5 +1,5 @@
#! /bin/sh
-# From configure.in Revision: 71731 .
+# From configure.in Revision: 72144 .
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.61 for python 3.1.
#
@@ -16299,11 +16299,12 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }
+
for ac_func in alarm setitimer getitimer bind_textdomain_codeset chown \
clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
mremap nice pathconf pause plock poll pthread_init \
putenv readlink realpath \
select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
diff --git a/configure.in b/configure.in
index 6a1e231..ba43b21 100644
--- a/configure.in
+++ b/configure.in
@@ -2403,7 +2403,7 @@ AC_CHECK_FUNCS(alarm setitimer getitimer bind_textdomain_codeset chown \
clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
mremap nice pathconf pause plock poll pthread_init \
putenv readlink realpath \
select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 01bc235..4c77900 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -419,6 +419,9 @@
/* Define this if you have the makedev macro. */
#undef HAVE_MAKEDEV
+/* Define to 1 if you have the `mbrtowc' function. */
+#undef HAVE_MBRTOWC
+
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE