From cc16be85c0b7119854c00fb5c666825deef641cf Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 8 Sep 2016 10:35:16 -0700 Subject: Issue #27781: Change file system encoding on Windows to UTF-8 (PEP 529) --- Doc/c-api/unicode.rst | 30 +- Doc/library/sys.rst | 51 ++- Doc/using/cmdline.rst | 14 + Doc/whatsnew/3.6.rst | 29 ++ Include/fileobject.h | 1 + Include/unicodeobject.h | 8 +- Lib/os.py | 5 +- Lib/test/test_os.py | 113 +---- Misc/NEWS | 6 +- Modules/_codecsmodule.c | 8 +- Modules/clinic/_codecsmodule.c.h | 26 +- Modules/clinic/posixmodule.c.h | 96 +++- Modules/overlapped.c | 10 +- Modules/posixmodule.c | 925 ++++++++++++--------------------------- Objects/unicodeobject.c | 46 +- Python/bltinmodule.c | 8 +- Python/pylifecycle.c | 20 + Python/sysmodule.c | 50 ++- 18 files changed, 614 insertions(+), 832 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 44e9259..0835477 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -802,10 +802,11 @@ File System Encoding """""""""""""""""""" To encode and decode file names and other environment strings, -:c:data:`Py_FileSystemEncoding` should be used as the encoding, and -``"surrogateescape"`` should be used as the error handler (:pep:`383`). To -encode file names during argument parsing, the ``"O&"`` converter should be -used, passing :c:func:`PyUnicode_FSConverter` as the conversion function: +:c:data:`Py_FileSystemDefaultEncoding` should be used as the encoding, and +:c:data:`Py_FileSystemDefaultEncodeErrors` should be used as the error handler +(:pep:`383` and :pep:`529`). To encode file names to :class:`bytes` during +argument parsing, the ``"O&"`` converter should be used, passing +:c:func:`PyUnicode_FSConverter` as the conversion function: .. c:function:: int PyUnicode_FSConverter(PyObject* obj, void* result) @@ -820,8 +821,9 @@ used, passing :c:func:`PyUnicode_FSConverter` as the conversion function: .. versionchanged:: 3.6 Accepts a :term:`path-like object`. -To decode file names during argument parsing, the ``"O&"`` converter should be -used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function: +To decode file names to :class:`str` during argument parsing, the ``"O&"`` +converter should be used, passing :c:func:`PyUnicode_FSDecoder` as the +conversion function: .. c:function:: int PyUnicode_FSDecoder(PyObject* obj, void* result) @@ -840,7 +842,7 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function: .. c:function:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the - ``"surrogateescape"`` error handler, or ``"strict"`` on Windows. + :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the locale encoding. @@ -854,28 +856,28 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function: The :c:func:`Py_DecodeLocale` function. - .. versionchanged:: 3.2 - Use ``"strict"`` error handler on Windows. + .. versionchanged:: 3.6 + Use :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. .. c:function:: PyObject* PyUnicode_DecodeFSDefault(const char *s) Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding` - and the ``"surrogateescape"`` error handler, or ``"strict"`` on Windows. + and the :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the locale encoding. Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length. - .. versionchanged:: 3.2 - Use ``"strict"`` error handler on Windows. + .. versionchanged:: 3.6 + Use :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. .. c:function:: PyObject* PyUnicode_EncodeFSDefault(PyObject *unicode) Encode a Unicode object to :c:data:`Py_FileSystemDefaultEncoding` with the - ``"surrogateescape"`` error handler, or ``"strict"`` on Windows, and return + :c:data:`Py_FileSystemDefaultEncodeErrors` error handler, and return :class:`bytes`. Note that the resulting :class:`bytes` object may contain null bytes. @@ -892,6 +894,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function: .. versionadded:: 3.2 + .. versionchanged:: 3.6 + Use :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. wchar_t Support """"""""""""""" diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 8c9ca2a..9460b84 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -428,25 +428,42 @@ always available. .. function:: getfilesystemencoding() - Return the name of the encoding used to convert Unicode filenames into - system file names. The result value depends on the operating system: + Return the name of the encoding used to convert between Unicode + filenames and bytes filenames. For best compatibility, str should be + used for filenames in all cases, although representing filenames as bytes + is also supported. Functions accepting or returning filenames should support + either str or bytes and internally convert to the system's preferred + representation. - * On Mac OS X, the encoding is ``'utf-8'``. + This encoding is always ASCII-compatible. + + :func:`os.fsencode` and :func:`os.fsdecode` should be used to ensure that + the correct encoding and errors mode are used. - * On Unix, the encoding is the user's preference according to the result of - nl_langinfo(CODESET). + * On Mac OS X, the encoding is ``'utf-8'``. - * On Windows NT+, file names are Unicode natively, so no conversion is - performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as - this is the encoding that applications should use when they explicitly - want to convert Unicode strings to byte strings that are equivalent when - used as file names. + * On Unix, the encoding is the locale encoding. - * On Windows 9x, the encoding is ``'mbcs'``. + * On Windows, the encoding may be ``'utf-8'`` or ``'mbcs'``, depending + on user configuration. .. versionchanged:: 3.2 :func:`getfilesystemencoding` result cannot be ``None`` anymore. + .. versionchanged:: 3.6 + Windows is no longer guaranteed to return ``'mbcs'``. See :pep:`529` + and :func:`_enablelegacywindowsfsencoding` for more information. + +.. function:: getfilesystemencodeerrors() + + Return the name of the error mode used to convert between Unicode filenames + and bytes filenames. The encoding name is returned from + :func:`getfilesystemencoding`. + + :func:`os.fsencode` and :func:`os.fsdecode` should be used to ensure that + the correct encoding and errors mode are used. + + .. versionadded:: 3.6 .. function:: getrefcount(object) @@ -1138,6 +1155,18 @@ always available. This function has been added on a provisional basis (see :pep:`411` for details.) Use it only for debugging purposes. +.. function:: _enablelegacywindowsfsencoding() + + Changes the default filesystem encoding and errors mode to 'mbcs' and + 'replace' respectively, for consistency with versions of Python prior to 3.6. + + This is equivalent to defining the :envvar:`PYTHONLEGACYWINDOWSFSENCODING` + environment variable before launching Python. + + Availability: Windows + + .. versionadded:: 3.6 + See :pep:`529` for more details. .. data:: stdin stdout diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 37a9e14..2a83bd1 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -672,6 +672,20 @@ conflict. It now has no effect if set to an empty string. +.. envvar:: PYTHONLEGACYWINDOWSFSENCODING + + If set to a non-empty string, the default filesystem encoding and errors mode + will revert to their pre-3.6 values of 'mbcs' and 'replace', respectively. + Otherwise, the new defaults 'utf-8' and 'surrogatepass' are used. + + This may also be enabled at runtime with + :func:`sys._enablelegacywindowsfsencoding()`. + + Availability: Windows + + .. versionadded:: 3.6 + See :pep:`529` for more details. + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index f2b53fb..ce1c44e 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -76,6 +76,8 @@ Security improvements: Windows improvements: +* PEP 529: :ref:`Change Windows filesystem encoding to UTF-8 ` + * The ``py.exe`` launcher, when used interactively, no longer prefers Python 2 over Python 3 when the user doesn't specify a version (via command line arguments or a config file). Handling of shebang lines @@ -218,6 +220,33 @@ evaluated at run time, and then formatted using the :func:`format` protocol. See :pep:`498` and the main documentation at :ref:`f-strings`. +.. _pep-529: + +PEP 529: Change Windows filesystem encoding to UTF-8 +---------------------------------------------------- + +Representing filesystem paths is best performed with str (Unicode) rather than +bytes. However, there are some situations where using bytes is sufficient and +correct. + +Prior to Python 3.6, data loss could result when using bytes paths on Windows. +With this change, using bytes to represent paths is now supported on Windows, +provided those bytes are encoded with the encoding returned by +:func:`sys.getfilesystemencoding()`, which now defaults to ``'utf-8'``. + +Applications that do not use str to represent paths should use +:func:`os.fsencode()` and :func:`os.fsdecode()` to ensure their bytes are +correctly encoded. To revert to the previous behaviour, set +:envvar:`PYTHONLEGACYWINDOWSFSENCODING` or call +:func:`sys._enablelegacywindowsfsencoding`. + +See :pep:`529` for more information and discussion of code modifications that +may be required. + +.. note:: + + This change is considered experimental for 3.6.0 beta releases. The default + encoding may change before the final release. PEP 487: Simpler customization of class creation ------------------------------------------------ diff --git a/Include/fileobject.h b/Include/fileobject.h index 03155d3..03984ba 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -23,6 +23,7 @@ PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); If non-NULL, this is different than the default encoding for strings */ PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; +PyAPI_DATA(const char *) Py_FileSystemDefaultEncodeErrors; PyAPI_DATA(int) Py_HasFileSystemDefaultEncoding; /* Internal API diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index d38721f..1933ad1 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -103,10 +103,6 @@ typedef wchar_t Py_UNICODE; # endif #endif -#if defined(MS_WINDOWS) -# define HAVE_MBCS -#endif - #ifdef HAVE_WCHAR_H /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ # ifdef _HAVE_BSDI @@ -1657,7 +1653,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( ); #endif -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ @@ -1700,7 +1696,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( const char *errors /* error handling */ ); -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Decimal Encoder ---------------------------------------------------- */ diff --git a/Lib/os.py b/Lib/os.py index 10d70ad..7379dad 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -851,10 +851,7 @@ if supports_bytes_environ: def _fscodec(): encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - errors = 'strict' - else: - errors = 'surrogateescape' + errors = sys.getfilesystemencodeerrors() def fsencode(filename): """Encode filename (an os.PathLike, bytes, or str) to the filesystem diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 2de94c6..aee31ed 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -90,16 +90,6 @@ def ignore_deprecation_warnings(msg_regex, quiet=False): yield -@contextlib.contextmanager -def bytes_filename_warn(expected): - msg = 'The Windows bytes API has been deprecated' - if os.name == 'nt': - with ignore_deprecation_warnings(msg, quiet=not expected): - yield - else: - yield - - class _PathLike(os.PathLike): def __init__(self, path=""): @@ -342,8 +332,7 @@ class StatAttributeTests(unittest.TestCase): fname = self.fname.encode(sys.getfilesystemencoding()) except UnicodeEncodeError: self.skipTest("cannot encode %a for the filesystem" % self.fname) - with bytes_filename_warn(True): - self.check_stat_attributes(fname) + self.check_stat_attributes(fname) def test_stat_result_pickle(self): result = os.stat(self.fname) @@ -1032,8 +1021,6 @@ class BytesWalkTests(WalkTests): def setUp(self): super().setUp() self.stack = contextlib.ExitStack() - if os.name == 'nt': - self.stack.enter_context(bytes_filename_warn(False)) def tearDown(self): self.stack.close() @@ -1640,8 +1627,7 @@ class LinkTests(unittest.TestCase): def _test_link(self, file1, file2): create_file(file1) - with bytes_filename_warn(False): - os.link(file1, file2) + os.link(file1, file2) with open(file1, "r") as f1, open(file2, "r") as f2: self.assertTrue(os.path.sameopenfile(f1.fileno(), f2.fileno())) @@ -1934,10 +1920,9 @@ class Win32ListdirTests(unittest.TestCase): self.created_paths) # bytes - with bytes_filename_warn(False): - self.assertEqual( - sorted(os.listdir(os.fsencode(support.TESTFN))), - [os.fsencode(path) for path in self.created_paths]) + self.assertEqual( + sorted(os.listdir(os.fsencode(support.TESTFN))), + [os.fsencode(path) for path in self.created_paths]) def test_listdir_extended_path(self): """Test when the path starts with '\\\\?\\'.""" @@ -1949,11 +1934,10 @@ class Win32ListdirTests(unittest.TestCase): self.created_paths) # bytes - with bytes_filename_warn(False): - path = b'\\\\?\\' + os.fsencode(os.path.abspath(support.TESTFN)) - self.assertEqual( - sorted(os.listdir(path)), - [os.fsencode(path) for path in self.created_paths]) + path = b'\\\\?\\' + os.fsencode(os.path.abspath(support.TESTFN)) + self.assertEqual( + sorted(os.listdir(path)), + [os.fsencode(path) for path in self.created_paths]) @unittest.skipUnless(sys.platform == "win32", "Win32 specific tests") @@ -2028,10 +2012,8 @@ class Win32SymlinkTests(unittest.TestCase): self.assertNotEqual(os.lstat(link), os.stat(link)) bytes_link = os.fsencode(link) - with bytes_filename_warn(True): - self.assertEqual(os.stat(bytes_link), os.stat(target)) - with bytes_filename_warn(True): - self.assertNotEqual(os.lstat(bytes_link), os.stat(bytes_link)) + self.assertEqual(os.stat(bytes_link), os.stat(target)) + self.assertNotEqual(os.lstat(bytes_link), os.stat(bytes_link)) def test_12084(self): level1 = os.path.abspath(support.TESTFN) @@ -2589,46 +2571,6 @@ class ExtendedAttributeTests(unittest.TestCase): self._check_xattrs(getxattr, setxattr, removexattr, listxattr) -@unittest.skipUnless(sys.platform == "win32", "Win32 specific tests") -class Win32DeprecatedBytesAPI(unittest.TestCase): - def test_deprecated(self): - import nt - filename = os.fsencode(support.TESTFN) - for func, *args in ( - (nt._getfullpathname, filename), - (nt._isdir, filename), - (os.access, filename, os.R_OK), - (os.chdir, filename), - (os.chmod, filename, 0o777), - (os.getcwdb,), - (os.link, filename, filename), - (os.listdir, filename), - (os.lstat, filename), - (os.mkdir, filename), - (os.open, filename, os.O_RDONLY), - (os.rename, filename, filename), - (os.rmdir, filename), - (os.startfile, filename), - (os.stat, filename), - (os.unlink, filename), - (os.utime, filename), - ): - with bytes_filename_warn(True): - try: - func(*args) - except OSError: - # ignore OSError, we only care about DeprecationWarning - pass - - @support.skip_unless_symlink - def test_symlink(self): - self.addCleanup(support.unlink, support.TESTFN) - - filename = os.fsencode(support.TESTFN) - with bytes_filename_warn(True): - os.symlink(filename, filename) - - @unittest.skipUnless(hasattr(os, 'get_terminal_size'), "requires os.get_terminal_size") class TermsizeTests(unittest.TestCase): def test_does_not_crash(self): @@ -2712,16 +2654,7 @@ class OSErrorTests(unittest.TestCase): (self.bytes_filenames, os.replace, b"dst"), (self.unicode_filenames, os.rename, "dst"), (self.unicode_filenames, os.replace, "dst"), - # Issue #16414: Don't test undecodable names with listdir() - # because of a Windows bug. - # - # With the ANSI code page 932, os.listdir(b'\xe7') return an - # empty list (instead of failing), whereas os.listdir(b'\xff') - # raises a FileNotFoundError. It looks like a Windows bug: - # b'\xe7' directory does not exist, FindFirstFileA(b'\xe7') - # fails with ERROR_FILE_NOT_FOUND (2), instead of - # ERROR_PATH_NOT_FOUND (3). - (self.unicode_filenames, os.listdir,), + (self.unicode_filenames, os.listdir, ), )) else: funcs.extend(( @@ -2762,19 +2695,24 @@ class OSErrorTests(unittest.TestCase): else: funcs.append((self.filenames, os.readlink,)) + for filenames, func, *func_args in funcs: for name in filenames: try: - if isinstance(name, str): + if isinstance(name, (str, bytes)): func(name, *func_args) - elif isinstance(name, bytes): - with bytes_filename_warn(False): - func(name, *func_args) else: with self.assertWarnsRegex(DeprecationWarning, 'should be'): func(name, *func_args) except OSError as err: - self.assertIs(err.filename, name) + self.assertIs(err.filename, name, str(func)) + except RuntimeError as err: + if sys.platform != 'win32': + raise + + # issue27781: undecodable bytes currently raise RuntimeError + # by 3.6.0b4 this will become UnicodeDecodeError or nothing + self.assertIsInstance(err.__context__, UnicodeDecodeError) else: self.fail("No exception thrown by {}".format(func)) @@ -3086,7 +3024,6 @@ class TestScandir(unittest.TestCase): entry = self.create_file_entry() self.assertEqual(os.fspath(entry), os.path.join(self.path, 'file.txt')) - @unittest.skipIf(os.name == "nt", "test requires bytes path support") def test_fspath_protocol_bytes(self): bytes_filename = os.fsencode('bytesfile.txt') bytes_entry = self.create_file_entry(name=bytes_filename) @@ -3158,12 +3095,6 @@ class TestScandir(unittest.TestCase): entry.stat(follow_symlinks=False) def test_bytes(self): - if os.name == "nt": - # On Windows, os.scandir(bytes) must raise an exception - with bytes_filename_warn(True): - self.assertRaises(TypeError, os.scandir, b'.') - return - self.create_file("file.txt") path_bytes = os.fsencode(self.path) diff --git a/Misc/NEWS b/Misc/NEWS index fa8c307..933a5c1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -286,6 +286,8 @@ Build Windows ------- +- Issue #27781: Change file system encoding on Windows to UTF-8 (PEP 529) + - Issue #27731: Opt-out of MAX_PATH on Windows 10 - Issue #6135: Adds encoding and errors parameters to subprocess. @@ -2632,7 +2634,7 @@ Library - Issue #24774: Fix docstring in http.server.test. Patch from Chiu-Hsiang Hsu. - Issue #21159: Improve message in configparser.InterpolationMissingOptionError. - Patch from Łukasz Langa. + Patch from ?ukasz Langa. - Issue #20362: Honour TestCase.longMessage correctly in assertRegex. Patch from Ilia Kurenkov. @@ -4560,7 +4562,7 @@ Library Based on patch by Martin Panter. - Issue #17293: uuid.getnode() now determines MAC address on AIX using netstat. - Based on patch by Aivars Kalvāns. + Based on patch by Aivars Kalv?ns. - Issue #22769: Fixed ttk.Treeview.tag_has() when called without arguments. diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 4d25a53..586b73a 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -604,7 +604,7 @@ _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, return codec_tuple(decoded, data->len); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /*[clinic input] _codecs.mbcs_decode @@ -666,7 +666,7 @@ _codecs_code_page_decode_impl(PyObject *module, int codepage, return codec_tuple(decoded, consumed); } -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Encoder ------------------------------------------------------------ */ @@ -972,7 +972,7 @@ _codecs_charmap_build_impl(PyObject *module, PyObject *map) return PyUnicode_BuildEncodingMap(map); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /*[clinic input] _codecs.mbcs_encode @@ -1021,7 +1021,7 @@ _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, PyUnicode_GET_LENGTH(str)); } -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Error handler registry --------------------------------------------- */ diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 6a63cec..c7fd66f 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -764,7 +764,7 @@ exit: return return_value; } -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_mbcs_decode__doc__, "mbcs_decode($module, data, errors=None, final=False, /)\n" @@ -801,9 +801,9 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_oem_decode__doc__, "oem_decode($module, data, errors=None, final=False, /)\n" @@ -840,9 +840,9 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_code_page_decode__doc__, "code_page_decode($module, codepage, data, errors=None, final=False, /)\n" @@ -880,7 +880,7 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ PyDoc_STRVAR(_codecs_readbuffer_encode__doc__, "readbuffer_encode($module, data, errors=None, /)\n" @@ -1351,7 +1351,7 @@ exit: return return_value; } -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_mbcs_encode__doc__, "mbcs_encode($module, str, errors=None, /)\n" @@ -1381,9 +1381,9 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_oem_encode__doc__, "oem_encode($module, str, errors=None, /)\n" @@ -1413,9 +1413,9 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) PyDoc_STRVAR(_codecs_code_page_encode__doc__, "code_page_encode($module, code_page, str, errors=None, /)\n" @@ -1447,7 +1447,7 @@ exit: return return_value; } -#endif /* defined(HAVE_MBCS) */ +#endif /* defined(MS_WINDOWS) */ PyDoc_STRVAR(_codecs_register_error__doc__, "register_error($module, errors, handler, /)\n" @@ -1536,4 +1536,4 @@ exit: #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=7874e2d559d49368 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ebe313ab417b17bb input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index e543db4..6088eec 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1649,24 +1649,24 @@ PyDoc_STRVAR(os_execv__doc__, {"execv", (PyCFunction)os_execv, METH_VARARGS, os_execv__doc__}, static PyObject * -os_execv_impl(PyObject *module, PyObject *path, PyObject *argv); +os_execv_impl(PyObject *module, path_t *path, PyObject *argv); static PyObject * os_execv(PyObject *module, PyObject *args) { PyObject *return_value = NULL; - PyObject *path = NULL; + path_t path = PATH_T_INITIALIZE("execv", "path", 0, 0); PyObject *argv; if (!PyArg_ParseTuple(args, "O&O:execv", - PyUnicode_FSConverter, &path, &argv)) { + path_converter, &path, &argv)) { goto exit; } - return_value = os_execv_impl(module, path, argv); + return_value = os_execv_impl(module, &path, argv); exit: /* Cleanup for path */ - Py_XDECREF(path); + path_cleanup(&path); return return_value; } @@ -1719,7 +1719,7 @@ exit: #endif /* defined(HAVE_EXECV) */ -#if defined(HAVE_SPAWNV) +#if (defined(HAVE_SPAWNV) || defined(HAVE_WSPAWNV)) PyDoc_STRVAR(os_spawnv__doc__, "spawnv($module, mode, path, argv, /)\n" @@ -1738,32 +1738,32 @@ PyDoc_STRVAR(os_spawnv__doc__, {"spawnv", (PyCFunction)os_spawnv, METH_VARARGS, os_spawnv__doc__}, static PyObject * -os_spawnv_impl(PyObject *module, int mode, PyObject *path, PyObject *argv); +os_spawnv_impl(PyObject *module, int mode, path_t *path, PyObject *argv); static PyObject * os_spawnv(PyObject *module, PyObject *args) { PyObject *return_value = NULL; int mode; - PyObject *path = NULL; + path_t path = PATH_T_INITIALIZE("spawnv", "path", 0, 0); PyObject *argv; if (!PyArg_ParseTuple(args, "iO&O:spawnv", - &mode, PyUnicode_FSConverter, &path, &argv)) { + &mode, path_converter, &path, &argv)) { goto exit; } - return_value = os_spawnv_impl(module, mode, path, argv); + return_value = os_spawnv_impl(module, mode, &path, argv); exit: /* Cleanup for path */ - Py_XDECREF(path); + path_cleanup(&path); return return_value; } -#endif /* defined(HAVE_SPAWNV) */ +#endif /* (defined(HAVE_SPAWNV) || defined(HAVE_WSPAWNV)) */ -#if defined(HAVE_SPAWNV) +#if (defined(HAVE_SPAWNV) || defined(HAVE_WSPAWNV)) PyDoc_STRVAR(os_spawnve__doc__, "spawnve($module, mode, path, argv, env, /)\n" @@ -1784,7 +1784,7 @@ PyDoc_STRVAR(os_spawnve__doc__, {"spawnve", (PyCFunction)os_spawnve, METH_VARARGS, os_spawnve__doc__}, static PyObject * -os_spawnve_impl(PyObject *module, int mode, PyObject *path, PyObject *argv, +os_spawnve_impl(PyObject *module, int mode, path_t *path, PyObject *argv, PyObject *env); static PyObject * @@ -1792,24 +1792,24 @@ os_spawnve(PyObject *module, PyObject *args) { PyObject *return_value = NULL; int mode; - PyObject *path = NULL; + path_t path = PATH_T_INITIALIZE("spawnve", "path", 0, 0); PyObject *argv; PyObject *env; if (!PyArg_ParseTuple(args, "iO&OO:spawnve", - &mode, PyUnicode_FSConverter, &path, &argv, &env)) { + &mode, path_converter, &path, &argv, &env)) { goto exit; } - return_value = os_spawnve_impl(module, mode, path, argv, env); + return_value = os_spawnve_impl(module, mode, &path, argv, env); exit: /* Cleanup for path */ - Py_XDECREF(path); + path_cleanup(&path); return return_value; } -#endif /* defined(HAVE_SPAWNV) */ +#endif /* (defined(HAVE_SPAWNV) || defined(HAVE_WSPAWNV)) */ #if defined(HAVE_FORK1) @@ -4994,6 +4994,60 @@ os_abort(PyObject *module, PyObject *Py_UNUSED(ignored)) return os_abort_impl(module); } +#if defined(MS_WINDOWS) + +PyDoc_STRVAR(os_startfile__doc__, +"startfile($module, /, filepath, operation=None)\n" +"--\n" +"\n" +"startfile(filepath [, operation])\n" +"\n" +"Start a file with its associated application.\n" +"\n" +"When \"operation\" is not specified or \"open\", this acts like\n" +"double-clicking the file in Explorer, or giving the file name as an\n" +"argument to the DOS \"start\" command: the file is opened with whatever\n" +"application (if any) its extension is associated.\n" +"When another \"operation\" is given, it specifies what should be done with\n" +"the file. A typical operation is \"print\".\n" +"\n" +"startfile returns as soon as the associated application is launched.\n" +"There is no option to wait for the application to close, and no way\n" +"to retrieve the application\'s exit status.\n" +"\n" +"The filepath is relative to the current directory. If you want to use\n" +"an absolute path, make sure the first character is not a slash (\"/\");\n" +"the underlying Win32 ShellExecute function doesn\'t work if it is."); + +#define OS_STARTFILE_METHODDEF \ + {"startfile", (PyCFunction)os_startfile, METH_VARARGS|METH_KEYWORDS, os_startfile__doc__}, + +static PyObject * +os_startfile_impl(PyObject *module, path_t *filepath, Py_UNICODE *operation); + +static PyObject * +os_startfile(PyObject *module, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static char *_keywords[] = {"filepath", "operation", NULL}; + path_t filepath = PATH_T_INITIALIZE("startfile", "filepath", 0, 0); + Py_UNICODE *operation = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|u:startfile", _keywords, + path_converter, &filepath, &operation)) { + goto exit; + } + return_value = os_startfile_impl(module, &filepath, operation); + +exit: + /* Cleanup for filepath */ + path_cleanup(&filepath); + + return return_value; +} + +#endif /* defined(MS_WINDOWS) */ + #if defined(HAVE_GETLOADAVG) PyDoc_STRVAR(os_getloadavg__doc__, @@ -6034,6 +6088,10 @@ exit: #define OS_SYSCONF_METHODDEF #endif /* !defined(OS_SYSCONF_METHODDEF) */ +#ifndef OS_STARTFILE_METHODDEF + #define OS_STARTFILE_METHODDEF +#endif /* !defined(OS_STARTFILE_METHODDEF) */ + #ifndef OS_GETLOADAVG_METHODDEF #define OS_GETLOADAVG_METHODDEF #endif /* !defined(OS_GETLOADAVG_METHODDEF) */ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index f85e5bc..0aa8657 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -973,28 +973,28 @@ Overlapped_AcceptEx(OverlappedObject *self, PyObject *args) static int parse_address(PyObject *obj, SOCKADDR *Address, int Length) { - char *Host; + Py_UNICODE *Host; unsigned short Port; unsigned long FlowInfo; unsigned long ScopeId; memset(Address, 0, Length); - if (PyArg_ParseTuple(obj, "sH", &Host, &Port)) + if (PyArg_ParseTuple(obj, "uH", &Host, &Port)) { Address->sa_family = AF_INET; - if (WSAStringToAddressA(Host, AF_INET, NULL, Address, &Length) < 0) { + if (WSAStringToAddressW(Host, AF_INET, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); return -1; } ((SOCKADDR_IN*)Address)->sin_port = htons(Port); return Length; } - else if (PyArg_ParseTuple(obj, "sHkk", &Host, &Port, &FlowInfo, &ScopeId)) + else if (PyArg_ParseTuple(obj, "uHkk", &Host, &Port, &FlowInfo, &ScopeId)) { PyErr_Clear(); Address->sa_family = AF_INET6; - if (WSAStringToAddressA(Host, AF_INET6, NULL, Address, &Length) < 0) { + if (WSAStringToAddressW(Host, AF_INET6, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); return -1; } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 161704f..a39ea65 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -164,6 +164,8 @@ corresponding Unix manual entries for more information on calls."); #define HAVE_GETLOGIN 1 #define HAVE_SPAWNV 1 #define HAVE_EXECV 1 +#define HAVE_WSPAWNV 1 +#define HAVE_WEXECV 1 #define HAVE_PIPE 1 #define HAVE_SYSTEM 1 #define HAVE_CWAIT 1 @@ -735,7 +737,7 @@ dir_fd_converter(PyObject *o, void *p) * * * On Windows, if we get a (Unicode) string we * extract the wchar_t * and return it; if we get - * bytes we extract the char * and return that. + * bytes we decode to wchar_t * and return that. * * * On all other platforms, strings are encoded * to bytes using PyUnicode_FSConverter, then we @@ -767,7 +769,9 @@ dir_fd_converter(PyObject *o, void *p) * and was not encoded. (Only used on Windows.) * path.narrow * Points to the path if it was expressed as bytes, - * or it was Unicode and was encoded to bytes. + * or it was Unicode and was encoded to bytes. (On Windows, + * is an non-zero integer if the path was expressed as bytes. + * The type is deliberately incompatible to prevent misuse.) * path.fd * Contains a file descriptor if path.accept_fd was true * and the caller provided a signed integer instead of any @@ -812,15 +816,24 @@ typedef struct { int nullable; int allow_fd; const wchar_t *wide; +#ifdef MS_WINDOWS + BOOL narrow; +#else const char *narrow; +#endif int fd; Py_ssize_t length; PyObject *object; PyObject *cleanup; } path_t; +#ifdef MS_WINDOWS +#define PATH_T_INITIALIZE(function_name, argument_name, nullable, allow_fd) \ + {function_name, argument_name, nullable, allow_fd, NULL, FALSE, -1, 0, NULL, NULL} +#else #define PATH_T_INITIALIZE(function_name, argument_name, nullable, allow_fd) \ {function_name, argument_name, nullable, allow_fd, NULL, NULL, -1, 0, NULL, NULL} +#endif static void path_cleanup(path_t *path) { @@ -839,6 +852,10 @@ path_converter(PyObject *o, void *p) /* Default to failure, forcing explicit signaling of succcess. */ int ret = 0; const char *narrow; +#ifdef MS_WINDOWS + PyObject *wo; + const wchar_t *wide; +#endif #define FORMAT_EXCEPTION(exc, fmt) \ PyErr_Format(exc, "%s%s" fmt, \ @@ -857,7 +874,11 @@ path_converter(PyObject *o, void *p) if ((o == Py_None) && path->nullable) { path->wide = NULL; +#ifdef MS_WINDOWS + path->narrow = FALSE; +#else path->narrow = NULL; +#endif path->length = 0; path->object = o; path->fd = -1; @@ -899,9 +920,7 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS - const wchar_t *wide; - - wide = PyUnicode_AsUnicodeAndSize(o, &length); + wide = PyUnicode_AsWideCharString(o, &length); if (!wide) { goto exit; } @@ -915,7 +934,6 @@ path_converter(PyObject *o, void *p) } path->wide = wide; - path->narrow = NULL; path->length = length; path->object = o; path->fd = -1; @@ -928,11 +946,6 @@ path_converter(PyObject *o, void *p) #endif } else if (is_bytes) { -#ifdef MS_WINDOWS - if (win32_warn_bytes_api()) { - goto exit; - } -#endif bytes = o; Py_INCREF(bytes); } @@ -950,22 +963,21 @@ path_converter(PyObject *o, void *p) Py_TYPE(o)->tp_name)) { goto exit; } -#ifdef MS_WINDOWS - if (win32_warn_bytes_api()) { - goto exit; - } -#endif bytes = PyBytes_FromObject(o); if (!bytes) { goto exit; } } - else if (path->allow_fd && PyIndex_Check(o)) { + else if (is_index) { if (!_fd_converter(o, &path->fd)) { goto exit; } path->wide = NULL; +#ifdef MS_WINDOWS + path->narrow = FALSE; +#else path->narrow = NULL; +#endif path->length = 0; path->object = o; ret = 1; @@ -987,14 +999,6 @@ path_converter(PyObject *o, void *p) } length = PyBytes_GET_SIZE(bytes); -#ifdef MS_WINDOWS - if (length > MAX_PATH-1) { - FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); - Py_DECREF(bytes); - goto exit; - } -#endif - narrow = PyBytes_AS_STRING(bytes); if ((size_t)length != strlen(narrow)) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); @@ -1002,8 +1006,35 @@ path_converter(PyObject *o, void *p) goto exit; } +#ifdef MS_WINDOWS + wo = PyUnicode_DecodeFSDefaultAndSize( + narrow, + length + ); + if (!wo) { + goto exit; + } + + wide = PyUnicode_AsWideCharString(wo, &length); + Py_DECREF(wo); + + if (!wide) { + goto exit; + } + if (length > 32767) { + FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); + goto exit; + } + if (wcslen(wide) != length) { + FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); + goto exit; + } + path->wide = wide; + path->narrow = TRUE; +#else path->wide = NULL; path->narrow = narrow; +#endif path->length = length; path->object = o; path->fd = -1; @@ -1067,7 +1098,11 @@ follow_symlinks_specified(const char *function_name, int follow_symlinks) static int path_and_dir_fd_invalid(const char *function_name, path_t *path, int dir_fd) { - if (!path->narrow && !path->wide && (dir_fd != DEFAULT_DIR_FD)) { + if (!path->wide && (dir_fd != DEFAULT_DIR_FD) +#ifndef MS_WINDOWS + && !path->narrow +#endif + ) { PyErr_Format(PyExc_ValueError, "%s: can't specify dir_fd without matching path", function_name); @@ -1397,31 +1432,6 @@ posix_fildes_fd(int fd, int (*func)(int)) it also needs to set "magic" environment variables indicating the per-drive current directory, which are of the form =: */ static BOOL __stdcall -win32_chdir(LPCSTR path) -{ - char new_path[MAX_PATH]; - int result; - char env[4] = "=x:"; - - if(!SetCurrentDirectoryA(path)) - return FALSE; - result = GetCurrentDirectoryA(Py_ARRAY_LENGTH(new_path), new_path); - if (!result) - return FALSE; - /* In the ANSI API, there should not be any paths longer - than MAX_PATH-1 (not including the final null character). */ - assert(result < Py_ARRAY_LENGTH(new_path)); - if (strncmp(new_path, "\\\\", 2) == 0 || - strncmp(new_path, "//", 2) == 0) - /* UNC path, nothing to do. */ - return TRUE; - env[1] = new_path[0]; - return SetEnvironmentVariableA(env, new_path); -} - -/* The Unicode version differs from the ANSI version - since the current directory might exceed MAX_PATH characters */ -static BOOL __stdcall win32_wchdir(LPCWSTR path) { wchar_t path_buf[MAX_PATH], *new_path = path_buf; @@ -1467,33 +1477,10 @@ win32_wchdir(LPCWSTR path) #define HAVE_STAT_NSEC 1 #define HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES 1 -static BOOL -attributes_from_dir(LPCSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) -{ - HANDLE hFindFile; - WIN32_FIND_DATAA FileData; - hFindFile = FindFirstFileA(pszFile, &FileData); - if (hFindFile == INVALID_HANDLE_VALUE) - return FALSE; - FindClose(hFindFile); - memset(info, 0, sizeof(*info)); - *reparse_tag = 0; - info->dwFileAttributes = FileData.dwFileAttributes; - info->ftCreationTime = FileData.ftCreationTime; - info->ftLastAccessTime = FileData.ftLastAccessTime; - info->ftLastWriteTime = FileData.ftLastWriteTime; - info->nFileSizeHigh = FileData.nFileSizeHigh; - info->nFileSizeLow = FileData.nFileSizeLow; -/* info->nNumberOfLinks = 1; */ - if (FileData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) - *reparse_tag = FileData.dwReserved0; - return TRUE; -} - static void -find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData, - BY_HANDLE_FILE_INFORMATION *info, - ULONG *reparse_tag) +find_data_to_file_info(WIN32_FIND_DATAW *pFileData, + BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) { memset(info, 0, sizeof(*info)); info->dwFileAttributes = pFileData->dwFileAttributes; @@ -1510,7 +1497,7 @@ find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData, } static BOOL -attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) +attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) { HANDLE hFindFile; WIN32_FIND_DATAW FileData; @@ -1518,7 +1505,7 @@ attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG * if (hFindFile == INVALID_HANDLE_VALUE) return FALSE; FindClose(hFindFile); - find_data_to_file_info_w(&FileData, info, reparse_tag); + find_data_to_file_info(&FileData, info, reparse_tag); return TRUE; } @@ -1561,10 +1548,7 @@ get_target_path(HANDLE hdl, wchar_t **target_path) } static int -win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse); -static int -win32_xstat_impl(const char *path, struct _Py_stat_struct *result, +win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { int code; @@ -1572,96 +1556,6 @@ win32_xstat_impl(const char *path, struct _Py_stat_struct *result, BY_HANDLE_FILE_INFORMATION info; ULONG reparse_tag = 0; wchar_t *target_path; - const char *dot; - - hFile = CreateFileA( - path, - FILE_READ_ATTRIBUTES, /* desired access */ - 0, /* share mode */ - NULL, /* security attributes */ - OPEN_EXISTING, - /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ - /* FILE_FLAG_OPEN_REPARSE_POINT does not follow the symlink. - Because of this, calls like GetFinalPathNameByHandle will return - the symlink path again and not the actual final path. */ - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS| - FILE_FLAG_OPEN_REPARSE_POINT, - NULL); - - if (hFile == INVALID_HANDLE_VALUE) { - /* Either the target doesn't exist, or we don't have access to - get a handle to it. If the former, we need to return an error. - If the latter, we can use attributes_from_dir. */ - if (GetLastError() != ERROR_SHARING_VIOLATION) - return -1; - /* Could not get attributes on open file. Fall back to - reading the directory. */ - if (!attributes_from_dir(path, &info, &reparse_tag)) - /* Very strange. This should not fail now */ - return -1; - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (traverse) { - /* Should traverse, but could not open reparse point handle */ - SetLastError(ERROR_SHARING_VIOLATION); - return -1; - } - } - } else { - if (!GetFileInformationByHandle(hFile, &info)) { - CloseHandle(hFile); - return -1; - } - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (!win32_get_reparse_tag(hFile, &reparse_tag)) - return -1; - - /* Close the outer open file handle now that we're about to - reopen it with different flags. */ - if (!CloseHandle(hFile)) - return -1; - - if (traverse) { - /* In order to call GetFinalPathNameByHandle we need to open - the file without the reparse handling flag set. */ - hFile2 = CreateFileA( - path, FILE_READ_ATTRIBUTES, FILE_SHARE_READ, - NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, - NULL); - if (hFile2 == INVALID_HANDLE_VALUE) - return -1; - - if (!get_target_path(hFile2, &target_path)) - return -1; - - code = win32_xstat_impl_w(target_path, result, FALSE); - PyMem_RawFree(target_path); - return code; - } - } else - CloseHandle(hFile); - } - _Py_attribute_data_to_stat(&info, reparse_tag, result); - - /* Set S_IEXEC if it is an .exe, .bat, ... */ - dot = strrchr(path, '.'); - if (dot) { - if (stricmp(dot, ".bat") == 0 || stricmp(dot, ".cmd") == 0 || - stricmp(dot, ".exe") == 0 || stricmp(dot, ".com") == 0) - result->st_mode |= 0111; - } - return 0; -} - -static int -win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse) -{ - int code; - HANDLE hFile, hFile2; - BY_HANDLE_FILE_INFORMATION info; - ULONG reparse_tag = 0; - wchar_t *target_path; const wchar_t *dot; hFile = CreateFileW( @@ -1686,7 +1580,7 @@ win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, return -1; /* Could not get attributes on open file. Fall back to reading the directory. */ - if (!attributes_from_dir_w(path, &info, &reparse_tag)) + if (!attributes_from_dir(path, &info, &reparse_tag)) /* Very strange. This should not fail now */ return -1; if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { @@ -1724,7 +1618,7 @@ win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, if (!get_target_path(hFile2, &target_path)) return -1; - code = win32_xstat_impl_w(target_path, result, FALSE); + code = win32_xstat_impl(target_path, result, FALSE); PyMem_RawFree(target_path); return code; } @@ -1744,7 +1638,7 @@ win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, } static int -win32_xstat(const char *path, struct _Py_stat_struct *result, BOOL traverse) +win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { /* Protocol violation: we explicitly clear errno, instead of setting it to a POSIX error. Callers should use GetLastError. */ @@ -1752,16 +1646,6 @@ win32_xstat(const char *path, struct _Py_stat_struct *result, BOOL traverse) errno = 0; return code; } - -static int -win32_xstat_w(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) -{ - /* Protocol violation: we explicitly clear errno, instead of - setting it to a POSIX error. Callers should use GetLastError. */ - int code = win32_xstat_impl_w(path, result, traverse); - errno = 0; - return code; -} /* About the following functions: win32_lstat_w, win32_stat, win32_stat_w In Posix, stat automatically traverses symlinks and returns the stat @@ -1771,34 +1655,20 @@ win32_xstat_w(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse Therefore, win32_lstat will get the attributes traditionally, and win32_stat will first explicitly resolve the symlink target and then will - call win32_lstat on that result. - - The _w represent Unicode equivalents of the aforementioned ANSI functions. */ + call win32_lstat on that result. */ static int -win32_lstat(const char* path, struct _Py_stat_struct *result) +win32_lstat(const wchar_t* path, struct _Py_stat_struct *result) { return win32_xstat(path, result, FALSE); } static int -win32_lstat_w(const wchar_t* path, struct _Py_stat_struct *result) -{ - return win32_xstat_w(path, result, FALSE); -} - -static int -win32_stat(const char* path, struct _Py_stat_struct *result) +win32_stat(const wchar_t* path, struct _Py_stat_struct *result) { return win32_xstat(path, result, TRUE); } -static int -win32_stat_w(const wchar_t* path, struct _Py_stat_struct *result) -{ - return win32_xstat_w(path, result, TRUE); -} - #endif /* MS_WINDOWS */ PyDoc_STRVAR(stat_result__doc__, @@ -2200,26 +2070,25 @@ posix_do_stat(const char *function_name, path_t *path, result = FSTAT(path->fd, &st); else #ifdef MS_WINDOWS - if (path->wide) { - if (follow_symlinks) - result = win32_stat_w(path->wide, &st); - else - result = win32_lstat_w(path->wide, &st); - } + if (follow_symlinks) + result = win32_stat(path->wide, &st); else -#endif -#if defined(HAVE_LSTAT) || defined(MS_WINDOWS) + result = win32_lstat(path->wide, &st); +#else + else +#if defined(HAVE_LSTAT) if ((!follow_symlinks) && (dir_fd == DEFAULT_DIR_FD)) result = LSTAT(path->narrow, &st); else -#endif +#endif /* HAVE_LSTAT */ #ifdef HAVE_FSTATAT if ((dir_fd != DEFAULT_DIR_FD) || !follow_symlinks) result = fstatat(dir_fd, path->narrow, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); else -#endif +#endif /* HAVE_FSTATAT */ result = STAT(path->narrow, &st); +#endif /* MS_WINDOWS */ Py_END_ALLOW_THREADS if (result != 0) { @@ -2655,10 +2524,7 @@ os_access_impl(PyObject *module, path_t *path, int mode, int dir_fd, #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (path->wide != NULL) - attr = GetFileAttributesW(path->wide); - else - attr = GetFileAttributesA(path->narrow); + attr = GetFileAttributesW(path->wide); Py_END_ALLOW_THREADS /* @@ -2782,11 +2648,8 @@ os_chdir_impl(PyObject *module, path_t *path) Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS - if (path->wide) - result = win32_wchdir(path->wide); - else - result = win32_chdir(path->narrow); - result = !result; /* on unix, success = 0, on windows, success = !0 */ + /* on unix, success = 0, on windows, success = !0 */ + result = !win32_wchdir(path->wide); #else #ifdef HAVE_FCHDIR if (path->fd != -1) @@ -2881,10 +2744,7 @@ os_chmod_impl(PyObject *module, path_t *path, int mode, int dir_fd, #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (path->wide) - attr = GetFileAttributesW(path->wide); - else - attr = GetFileAttributesA(path->narrow); + attr = GetFileAttributesW(path->wide); if (attr == INVALID_FILE_ATTRIBUTES) result = 0; else { @@ -2892,10 +2752,7 @@ os_chmod_impl(PyObject *module, path_t *path, int mode, int dir_fd, attr &= ~FILE_ATTRIBUTE_READONLY; else attr |= FILE_ATTRIBUTE_READONLY; - if (path->wide) - result = SetFileAttributesW(path->wide, attr); - else - result = SetFileAttributesA(path->narrow, attr); + result = SetFileAttributesW(path->wide, attr); } Py_END_ALLOW_THREADS @@ -3488,7 +3345,7 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, /*[clinic end generated code: output=7f00f6007fd5269a input=b0095ebbcbaa7e04]*/ { #ifdef MS_WINDOWS - BOOL result; + BOOL result = FALSE; #else int result; #endif @@ -3500,18 +3357,18 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, } #endif +#ifndef MS_WINDOWS if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { PyErr_SetString(PyExc_NotImplementedError, "link: src and dst must be the same type"); return NULL; } +#endif #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS if (src->wide) result = CreateHardLinkW(dst->wide, src->wide, NULL); - else - result = CreateHardLinkA(dst->narrow, src->narrow, NULL); Py_END_ALLOW_THREADS if (!result) @@ -3526,13 +3383,13 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, dst_dir_fd, dst->narrow, follow_symlinks ? AT_SYMLINK_FOLLOW : 0); else -#endif +#endif /* HAVE_LINKAT */ result = link(src->narrow, dst->narrow); Py_END_ALLOW_THREADS if (result) return path_error2(src, dst); -#endif +#endif /* MS_WINDOWS */ Py_RETURN_NONE; } @@ -3546,97 +3403,39 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) PyObject *v; HANDLE hFindFile = INVALID_HANDLE_VALUE; BOOL result; - WIN32_FIND_DATA FileData; - char namebuf[MAX_PATH+4]; /* Overallocate for "\*.*" */ + wchar_t namebuf[MAX_PATH+4]; /* Overallocate for "\*.*" */ /* only claim to have space for MAX_PATH */ Py_ssize_t len = Py_ARRAY_LENGTH(namebuf)-4; wchar_t *wnamebuf = NULL; - if (!path->narrow) { - WIN32_FIND_DATAW wFileData; - const wchar_t *po_wchars; - - if (!path->wide) { /* Default arg: "." */ - po_wchars = L"."; - len = 1; - } else { - po_wchars = path->wide; - len = wcslen(path->wide); - } - /* The +5 is so we can append "\\*.*\0" */ - wnamebuf = PyMem_New(wchar_t, len + 5); - if (!wnamebuf) { - PyErr_NoMemory(); - goto exit; - } - wcscpy(wnamebuf, po_wchars); - if (len > 0) { - wchar_t wch = wnamebuf[len-1]; - if (wch != SEP && wch != ALTSEP && wch != L':') - wnamebuf[len++] = SEP; - wcscpy(wnamebuf + len, L"*.*"); - } - if ((list = PyList_New(0)) == NULL) { - goto exit; - } - Py_BEGIN_ALLOW_THREADS - hFindFile = FindFirstFileW(wnamebuf, &wFileData); - Py_END_ALLOW_THREADS - if (hFindFile == INVALID_HANDLE_VALUE) { - int error = GetLastError(); - if (error == ERROR_FILE_NOT_FOUND) - goto exit; - Py_DECREF(list); - list = path_error(path); - goto exit; - } - do { - /* Skip over . and .. */ - if (wcscmp(wFileData.cFileName, L".") != 0 && - wcscmp(wFileData.cFileName, L"..") != 0) { - v = PyUnicode_FromWideChar(wFileData.cFileName, - wcslen(wFileData.cFileName)); - if (v == NULL) { - Py_DECREF(list); - list = NULL; - break; - } - if (PyList_Append(list, v) != 0) { - Py_DECREF(v); - Py_DECREF(list); - list = NULL; - break; - } - Py_DECREF(v); - } - Py_BEGIN_ALLOW_THREADS - result = FindNextFileW(hFindFile, &wFileData); - Py_END_ALLOW_THREADS - /* FindNextFile sets error to ERROR_NO_MORE_FILES if - it got to the end of the directory. */ - if (!result && GetLastError() != ERROR_NO_MORE_FILES) { - Py_DECREF(list); - list = path_error(path); - goto exit; - } - } while (result == TRUE); + WIN32_FIND_DATAW wFileData; + const wchar_t *po_wchars; + if (!path->wide) { /* Default arg: "." */ + po_wchars = L"."; + len = 1; + } else { + po_wchars = path->wide; + len = wcslen(path->wide); + } + /* The +5 is so we can append "\\*.*\0" */ + wnamebuf = PyMem_New(wchar_t, len + 5); + if (!wnamebuf) { + PyErr_NoMemory(); goto exit; } - strcpy(namebuf, path->narrow); - len = path->length; + wcscpy(wnamebuf, po_wchars); if (len > 0) { - char ch = namebuf[len-1]; - if (ch != '\\' && ch != '/' && ch != ':') - namebuf[len++] = '\\'; - strcpy(namebuf + len, "*.*"); + wchar_t wch = wnamebuf[len-1]; + if (wch != SEP && wch != ALTSEP && wch != L':') + wnamebuf[len++] = SEP; + wcscpy(wnamebuf + len, L"*.*"); + } + if ((list = PyList_New(0)) == NULL) { + goto exit; } - - if ((list = PyList_New(0)) == NULL) - return NULL; - Py_BEGIN_ALLOW_THREADS - hFindFile = FindFirstFile(namebuf, &FileData); + hFindFile = FindFirstFileW(wnamebuf, &wFileData); Py_END_ALLOW_THREADS if (hFindFile == INVALID_HANDLE_VALUE) { int error = GetLastError(); @@ -3648,9 +3447,13 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) } do { /* Skip over . and .. */ - if (strcmp(FileData.cFileName, ".") != 0 && - strcmp(FileData.cFileName, "..") != 0) { - v = PyBytes_FromString(FileData.cFileName); + if (wcscmp(wFileData.cFileName, L".") != 0 && + wcscmp(wFileData.cFileName, L"..") != 0) { + v = PyUnicode_FromWideChar(wFileData.cFileName, + wcslen(wFileData.cFileName)); + if (path->narrow && v) { + Py_SETREF(v, PyUnicode_EncodeFSDefault(v)); + } if (v == NULL) { Py_DECREF(list); list = NULL; @@ -3665,7 +3468,7 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) Py_DECREF(v); } Py_BEGIN_ALLOW_THREADS - result = FindNextFile(hFindFile, &FileData); + result = FindNextFileW(hFindFile, &wFileData); Py_END_ALLOW_THREADS /* FindNextFile sets error to ERROR_NO_MORE_FILES if it got to the end of the directory. */ @@ -3846,41 +3649,29 @@ static PyObject * os__getfullpathname_impl(PyObject *module, path_t *path) /*[clinic end generated code: output=bb8679d56845bc9b input=332ed537c29d0a3e]*/ { - if (!path->narrow) - { - wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf; - wchar_t *wtemp; - DWORD result; - PyObject *v; - - result = GetFullPathNameW(path->wide, - Py_ARRAY_LENGTH(woutbuf), - woutbuf, &wtemp); - if (result > Py_ARRAY_LENGTH(woutbuf)) { - woutbufp = PyMem_New(wchar_t, result); - if (!woutbufp) - return PyErr_NoMemory(); - result = GetFullPathNameW(path->wide, result, woutbufp, &wtemp); - } - if (result) - v = PyUnicode_FromWideChar(woutbufp, wcslen(woutbufp)); - else - v = win32_error_object("GetFullPathNameW", path->object); - if (woutbufp != woutbuf) - PyMem_Free(woutbufp); - return v; - } - else { - char outbuf[MAX_PATH]; - char *temp; + wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf; + wchar_t *wtemp; + DWORD result; + PyObject *v; - if (!GetFullPathName(path->narrow, Py_ARRAY_LENGTH(outbuf), - outbuf, &temp)) { - win32_error_object("GetFullPathName", path->object); - return NULL; - } - return PyBytes_FromString(outbuf); + result = GetFullPathNameW(path->wide, + Py_ARRAY_LENGTH(woutbuf), + woutbuf, &wtemp); + if (result > Py_ARRAY_LENGTH(woutbuf)) { + woutbufp = PyMem_New(wchar_t, result); + if (!woutbufp) + return PyErr_NoMemory(); + result = GetFullPathNameW(path->wide, result, woutbufp, &wtemp); } + if (result) { + v = PyUnicode_FromWideChar(woutbufp, wcslen(woutbufp)); + if (path->narrow) + Py_SETREF(v, PyUnicode_EncodeFSDefault(v)); + } else + v = win32_error_object("GetFullPathNameW", path->object); + if (woutbufp != woutbuf) + PyMem_Free(woutbufp); + return v; } @@ -3964,10 +3755,7 @@ os__isdir_impl(PyObject *module, path_t *path) DWORD attributes; Py_BEGIN_ALLOW_THREADS - if (!path->narrow) - attributes = GetFileAttributesW(path->wide); - else - attributes = GetFileAttributesA(path->narrow); + attributes = GetFileAttributesW(path->wide); Py_END_ALLOW_THREADS if (attributes == INVALID_FILE_ATTRIBUTES) @@ -4065,10 +3853,7 @@ os_mkdir_impl(PyObject *module, path_t *path, int mode, int dir_fd) #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (path->wide) - result = CreateDirectoryW(path->wide, NULL); - else - result = CreateDirectoryA(path->narrow, NULL); + result = CreateDirectoryW(path->wide, NULL); Py_END_ALLOW_THREADS if (!result) @@ -4088,7 +3873,7 @@ os_mkdir_impl(PyObject *module, path_t *path, int mode, int dir_fd) Py_END_ALLOW_THREADS if (result < 0) return path_error(path); -#endif +#endif /* MS_WINDOWS */ Py_RETURN_NONE; } @@ -4211,31 +3996,28 @@ internal_rename(path_t *src, path_t *dst, int src_dir_fd, int dst_dir_fd, int is } #endif - if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { - PyErr_Format(PyExc_ValueError, - "%s: src and dst must be the same type", function_name); - return NULL; - } - #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (src->wide) - result = MoveFileExW(src->wide, dst->wide, flags); - else - result = MoveFileExA(src->narrow, dst->narrow, flags); + result = MoveFileExW(src->wide, dst->wide, flags); Py_END_ALLOW_THREADS if (!result) return path_error2(src, dst); #else + if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { + PyErr_Format(PyExc_ValueError, + "%s: src and dst must be the same type", function_name); + return NULL; + } + Py_BEGIN_ALLOW_THREADS #ifdef HAVE_RENAMEAT if (dir_fd_specified) result = renameat(src_dir_fd, src->narrow, dst_dir_fd, dst->narrow); else #endif - result = rename(src->narrow, dst->narrow); + result = rename(src->narrow, dst->narrow); Py_END_ALLOW_THREADS if (result) @@ -4316,11 +4098,8 @@ os_rmdir_impl(PyObject *module, path_t *path, int dir_fd) Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS - if (path->wide) - result = RemoveDirectoryW(path->wide); - else - result = RemoveDirectoryA(path->narrow); - result = !result; /* Windows, success=1, UNIX, success=0 */ + /* Windows, success=1, UNIX, success=0 */ + result = !RemoveDirectoryW(path->wide); #else #ifdef HAVE_UNLINKAT if (dir_fd != DEFAULT_DIR_FD) @@ -4466,11 +4245,8 @@ os_unlink_impl(PyObject *module, path_t *path, int dir_fd) Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS - if (path->wide) - result = Py_DeleteFileW(path->wide); - else - result = DeleteFileA(path->narrow); - result = !result; /* Windows, success=1, UNIX, success=0 */ + /* Windows, success=1, UNIX, success=0 */ + result = !Py_DeleteFileW(path->wide); #else #ifdef HAVE_UNLINKAT if (dir_fd != DEFAULT_DIR_FD) @@ -4881,14 +4657,9 @@ os_utime_impl(PyObject *module, path_t *path, PyObject *times, PyObject *ns, #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (path->wide) - hFile = CreateFileW(path->wide, FILE_WRITE_ATTRIBUTES, 0, - NULL, OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, NULL); - else - hFile = CreateFileA(path->narrow, FILE_WRITE_ATTRIBUTES, 0, - NULL, OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, NULL); + hFile = CreateFileW(path->wide, FILE_WRITE_ATTRIBUTES, 0, + NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, NULL); Py_END_ALLOW_THREADS if (hFile == INVALID_HANDLE_VALUE) { path_error(path); @@ -4974,9 +4745,15 @@ os__exit_impl(PyObject *module, int status) return NULL; /* Make gcc -Wall happy */ } +#if defined(HAVE_WEXECV) || defined(HAVE_WSPAWNV) +#define EXECV_CHAR wchar_t +#else +#define EXECV_CHAR char +#endif + #if defined(HAVE_EXECV) || defined(HAVE_SPAWNV) static void -free_string_array(char **array, Py_ssize_t count) +free_string_array(EXECV_CHAR **array, Py_ssize_t count) { Py_ssize_t i; for (i = 0; i < count; i++) @@ -4985,10 +4762,15 @@ free_string_array(char **array, Py_ssize_t count) } static -int fsconvert_strdup(PyObject *o, char**out) +int fsconvert_strdup(PyObject *o, EXECV_CHAR**out) { - PyObject *bytes; Py_ssize_t size; +#if defined(HAVE_WEXECV) || defined(HAVE_WSPAWNV) + *out = PyUnicode_AsWideCharString(o, &size); + if (!*out) + return 0; +#else + PyObject *bytes; if (!PyUnicode_FSConverter(o, &bytes)) return 0; size = PyBytes_GET_SIZE(bytes); @@ -4999,26 +4781,24 @@ int fsconvert_strdup(PyObject *o, char**out) } memcpy(*out, PyBytes_AsString(bytes), size+1); Py_DECREF(bytes); +#endif return 1; } #endif #if defined(HAVE_EXECV) || defined (HAVE_FEXECVE) -static char** +static EXECV_CHAR** parse_envlist(PyObject* env, Py_ssize_t *envc_ptr) { - char **envlist; Py_ssize_t i, pos, envc; PyObject *keys=NULL, *vals=NULL; - PyObject *key, *val, *key2, *val2; - char *p; - const char *k, *v; - size_t len; + PyObject *key, *val, *keyval; + EXECV_CHAR **envlist; i = PyMapping_Size(env); if (i < 0) return NULL; - envlist = PyMem_NEW(char *, i + 1); + envlist = PyMem_NEW(EXECV_CHAR *, i + 1); if (envlist == NULL) { PyErr_NoMemory(); return NULL; @@ -5042,28 +4822,16 @@ parse_envlist(PyObject* env, Py_ssize_t *envc_ptr) if (!key || !val) goto error; - if (PyUnicode_FSConverter(key, &key2) == 0) - goto error; - if (PyUnicode_FSConverter(val, &val2) == 0) { - Py_DECREF(key2); + keyval = PyUnicode_FromFormat("%U=%U", key, val); + if (!keyval) goto error; - } - k = PyBytes_AsString(key2); - v = PyBytes_AsString(val2); - len = PyBytes_GET_SIZE(key2) + PyBytes_GET_SIZE(val2) + 2; - - p = PyMem_NEW(char, len); - if (p == NULL) { - PyErr_NoMemory(); - Py_DECREF(key2); - Py_DECREF(val2); + if (!fsconvert_strdup(keyval, &envlist[envc++])) { + Py_DECREF(keyval); goto error; } - PyOS_snprintf(p, len, "%s=%s", k, v); - envlist[envc++] = p; - Py_DECREF(key2); - Py_DECREF(val2); + + Py_DECREF(keyval); } Py_DECREF(vals); Py_DECREF(keys); @@ -5075,17 +4843,15 @@ parse_envlist(PyObject* env, Py_ssize_t *envc_ptr) error: Py_XDECREF(keys); Py_XDECREF(vals); - while (--envc >= 0) - PyMem_DEL(envlist[envc]); - PyMem_DEL(envlist); + free_string_array(envlist, envc); return NULL; } -static char** +static EXECV_CHAR** parse_arglist(PyObject* argv, Py_ssize_t *argc) { int i; - char **argvlist = PyMem_NEW(char *, *argc+1); + EXECV_CHAR **argvlist = PyMem_NEW(EXECV_CHAR *, *argc+1); if (argvlist == NULL) { PyErr_NoMemory(); return NULL; @@ -5107,6 +4873,7 @@ fail: free_string_array(argvlist, *argc); return NULL; } + #endif @@ -5114,7 +4881,7 @@ fail: /*[clinic input] os.execv - path: FSConverter + path: path_t Path of executable file. argv: object Tuple or list of strings. @@ -5124,17 +4891,15 @@ Execute an executable path with arguments, replacing current process. [clinic start generated code]*/ static PyObject * -os_execv_impl(PyObject *module, PyObject *path, PyObject *argv) -/*[clinic end generated code: output=b21dc34deeb5b004 input=96041559925e5229]*/ +os_execv_impl(PyObject *module, path_t *path, PyObject *argv) +/*[clinic end generated code: output=3b52fec34cd0dafd input=9bac31efae07dac7]*/ { - const char *path_char; - char **argvlist; + EXECV_CHAR **argvlist; Py_ssize_t argc; /* execv has two arguments: (path, argv), where argv is a list or tuple of strings. */ - path_char = PyBytes_AsString(path); if (!PyList_Check(argv) && !PyTuple_Check(argv)) { PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list"); @@ -5151,7 +4916,11 @@ os_execv_impl(PyObject *module, PyObject *path, PyObject *argv) return NULL; } - execv(path_char, argvlist); +#ifdef HAVE_WEXECV + _wexecv(path->wide, argvlist); +#else + execv(path->narrow, argvlist); +#endif /* If we get here it's definitely an error */ @@ -5177,8 +4946,8 @@ static PyObject * os_execve_impl(PyObject *module, path_t *path, PyObject *argv, PyObject *env) /*[clinic end generated code: output=ff9fa8e4da8bde58 input=626804fa092606d9]*/ { - char **argvlist = NULL; - char **envlist; + EXECV_CHAR **argvlist = NULL; + EXECV_CHAR **envlist; Py_ssize_t argc, envc; /* execve has three arguments: (path, argv, env), where @@ -5211,30 +4980,33 @@ os_execve_impl(PyObject *module, path_t *path, PyObject *argv, PyObject *env) fexecve(path->fd, argvlist, envlist); else #endif +#ifdef HAVE_WEXECV + _wexecve(path->wide, argvlist, envlist); +#else execve(path->narrow, argvlist, envlist); +#endif /* If we get here it's definitely an error */ path_error(path); - while (--envc >= 0) - PyMem_DEL(envlist[envc]); - PyMem_DEL(envlist); + free_string_array(envlist, envc); fail: if (argvlist) free_string_array(argvlist, argc); return NULL; } + #endif /* HAVE_EXECV */ -#ifdef HAVE_SPAWNV +#if defined(HAVE_SPAWNV) || defined(HAVE_WSPAWNV) /*[clinic input] os.spawnv mode: int Mode of process creation. - path: FSConverter + path: path_t Path of executable file. argv: object Tuple or list of strings. @@ -5244,11 +5016,10 @@ Execute the program specified by path in a new process. [clinic start generated code]*/ static PyObject * -os_spawnv_impl(PyObject *module, int mode, PyObject *path, PyObject *argv) -/*[clinic end generated code: output=c427c0ce40f10638 input=042c91dfc1e6debc]*/ +os_spawnv_impl(PyObject *module, int mode, path_t *path, PyObject *argv) +/*[clinic end generated code: output=71cd037a9d96b816 input=43224242303291be]*/ { - const char *path_char; - char **argvlist; + EXECV_CHAR **argvlist; int i; Py_ssize_t argc; intptr_t spawnval; @@ -5257,7 +5028,6 @@ os_spawnv_impl(PyObject *module, int mode, PyObject *path, PyObject *argv) /* spawnv has three arguments: (mode, path, argv), where argv is a list or tuple of strings. */ - path_char = PyBytes_AsString(path); if (PyList_Check(argv)) { argc = PyList_Size(argv); getitem = PyList_GetItem; @@ -5272,7 +5042,7 @@ os_spawnv_impl(PyObject *module, int mode, PyObject *path, PyObject *argv) return NULL; } - argvlist = PyMem_NEW(char *, argc+1); + argvlist = PyMem_NEW(EXECV_CHAR *, argc+1); if (argvlist == NULL) { return PyErr_NoMemory(); } @@ -5292,7 +5062,11 @@ os_spawnv_impl(PyObject *module, int mode, PyObject *path, PyObject *argv) mode = _P_OVERLAY; Py_BEGIN_ALLOW_THREADS - spawnval = _spawnv(mode, path_char, argvlist); +#ifdef HAVE_WSPAWNV + spawnval = _wspawnv(mode, path->wide, argvlist); +#else + spawnval = _spawnv(mode, path->narrow, argvlist); +#endif Py_END_ALLOW_THREADS free_string_array(argvlist, argc); @@ -5309,7 +5083,7 @@ os.spawnve mode: int Mode of process creation. - path: FSConverter + path: path_t Path of executable file. argv: object Tuple or list of strings. @@ -5321,13 +5095,12 @@ Execute the program specified by path in a new process. [clinic start generated code]*/ static PyObject * -os_spawnve_impl(PyObject *module, int mode, PyObject *path, PyObject *argv, +os_spawnve_impl(PyObject *module, int mode, path_t *path, PyObject *argv, PyObject *env) -/*[clinic end generated code: output=ebcfa5f7ba2f4219 input=02362fd937963f8f]*/ +/*[clinic end generated code: output=30fe85be56fe37ad input=3e40803ee7c4c586]*/ { - const char *path_char; - char **argvlist; - char **envlist; + EXECV_CHAR **argvlist; + EXECV_CHAR **envlist; PyObject *res = NULL; Py_ssize_t argc, i, envc; intptr_t spawnval; @@ -5338,7 +5111,6 @@ os_spawnve_impl(PyObject *module, int mode, PyObject *path, PyObject *argv, argv is a list or tuple of strings and env is a dictionary like posix.environ. */ - path_char = PyBytes_AsString(path); if (PyList_Check(argv)) { argc = PyList_Size(argv); getitem = PyList_GetItem; @@ -5358,7 +5130,7 @@ os_spawnve_impl(PyObject *module, int mode, PyObject *path, PyObject *argv, goto fail_0; } - argvlist = PyMem_NEW(char *, argc+1); + argvlist = PyMem_NEW(EXECV_CHAR *, argc+1); if (argvlist == NULL) { PyErr_NoMemory(); goto fail_0; @@ -5382,7 +5154,11 @@ os_spawnve_impl(PyObject *module, int mode, PyObject *path, PyObject *argv, mode = _P_OVERLAY; Py_BEGIN_ALLOW_THREADS - spawnval = _spawnve(mode, path_char, argvlist, envlist); +#ifdef HAVE_WSPAWNV + spawnval = _wspawnve(mode, path->wide, argvlist, envlist); +#else + spawnval = _spawnve(mode, path->narrow, argvlist, envlist); +#endif Py_END_ALLOW_THREADS if (spawnval == -1) @@ -7290,21 +7066,18 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs) /* Grab CreateSymbolicLinkW dynamically from kernel32 */ static DWORD (CALLBACK *Py_CreateSymbolicLinkW)(LPCWSTR, LPCWSTR, DWORD) = NULL; -static DWORD (CALLBACK *Py_CreateSymbolicLinkA)(LPCSTR, LPCSTR, DWORD) = NULL; static int check_CreateSymbolicLink(void) { HINSTANCE hKernel32; /* only recheck */ - if (Py_CreateSymbolicLinkW && Py_CreateSymbolicLinkA) + if (Py_CreateSymbolicLinkW) return 1; hKernel32 = GetModuleHandleW(L"KERNEL32"); *(FARPROC*)&Py_CreateSymbolicLinkW = GetProcAddress(hKernel32, "CreateSymbolicLinkW"); - *(FARPROC*)&Py_CreateSymbolicLinkA = GetProcAddress(hKernel32, - "CreateSymbolicLinkA"); - return (Py_CreateSymbolicLinkW && Py_CreateSymbolicLinkA); + return Py_CreateSymbolicLinkW != NULL; } /* Remove the last portion of the path */ @@ -7321,20 +7094,6 @@ _dirnameW(WCHAR *path) *ptr = 0; } -/* Remove the last portion of the path */ -static void -_dirnameA(char *path) -{ - char *ptr; - - /* walk the path from the end until a backslash is encountered */ - for(ptr = path + strlen(path); ptr != path; ptr--) { - if (*ptr == '\\' || *ptr == '/') - break; - } - *ptr = 0; -} - /* Is this path absolute? */ static int _is_absW(const WCHAR *path) @@ -7343,14 +7102,6 @@ _is_absW(const WCHAR *path) } -/* Is this path absolute? */ -static int -_is_absA(const char *path) -{ - return path[0] == '\\' || path[0] == '/' || path[1] == ':'; - -} - /* join root and rest with a backslash */ static void _joinW(WCHAR *dest_path, const WCHAR *root, const WCHAR *rest) @@ -7372,27 +7123,6 @@ _joinW(WCHAR *dest_path, const WCHAR *root, const WCHAR *rest) wcscpy(dest_path+root_len, rest); } -/* join root and rest with a backslash */ -static void -_joinA(char *dest_path, const char *root, const char *rest) -{ - size_t root_len; - - if (_is_absA(rest)) { - strcpy(dest_path, rest); - return; - } - - root_len = strlen(root); - - strcpy(dest_path, root); - if(root_len) { - dest_path[root_len] = '\\'; - root_len++; - } - strcpy(dest_path+root_len, rest); -} - /* Return True if the path at src relative to dest is a directory */ static int _check_dirW(LPCWSTR src, LPCWSTR dest) @@ -7411,25 +7141,6 @@ _check_dirW(LPCWSTR src, LPCWSTR dest) && src_info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ); } - -/* Return True if the path at src relative to dest is a directory */ -static int -_check_dirA(LPCSTR src, LPCSTR dest) -{ - WIN32_FILE_ATTRIBUTE_DATA src_info; - char dest_parent[MAX_PATH]; - char src_resolved[MAX_PATH] = ""; - - /* dest_parent = os.path.dirname(dest) */ - strcpy(dest_parent, dest); - _dirnameA(dest_parent); - /* src_resolved = os.path.join(dest_parent, src) */ - _joinA(src_resolved, dest_parent, src); - return ( - GetFileAttributesExA(src_resolved, GetFileExInfoStandard, &src_info) - && src_info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY - ); -} #endif @@ -7489,18 +7200,10 @@ os_symlink_impl(PyObject *module, path_t *src, path_t *dst, #ifdef MS_WINDOWS Py_BEGIN_ALLOW_THREADS - if (dst->wide) { - /* if src is a directory, ensure target_is_directory==1 */ - target_is_directory |= _check_dirW(src->wide, dst->wide); - result = Py_CreateSymbolicLinkW(dst->wide, src->wide, - target_is_directory); - } - else { - /* if src is a directory, ensure target_is_directory==1 */ - target_is_directory |= _check_dirA(src->narrow, dst->narrow); - result = Py_CreateSymbolicLinkA(dst->narrow, src->narrow, - target_is_directory); - } + /* if src is a directory, ensure target_is_directory==1 */ + target_is_directory |= _check_dirW(src->wide, dst->wide); + result = Py_CreateSymbolicLinkW(dst->wide, src->wide, + target_is_directory); Py_END_ALLOW_THREADS if (!result) @@ -7805,16 +7508,14 @@ os_open_impl(PyObject *module, path_t *path, int flags, int mode, int dir_fd) do { Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS - if (path->wide) - fd = _wopen(path->wide, flags, mode); - else + fd = _wopen(path->wide, flags, mode); #endif #ifdef HAVE_OPENAT if (dir_fd != DEFAULT_DIR_FD) fd = openat(dir_fd, path->narrow, flags, mode); else -#endif fd = open(path->narrow, flags, mode); +#endif Py_END_ALLOW_THREADS } while (fd < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); _Py_END_SUPPRESS_IPH @@ -8955,10 +8656,7 @@ os_truncate_impl(PyObject *module, path_t *path, Py_off_t length) Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS - if (path->wide) - fd = _wopen(path->wide, _O_WRONLY | _O_BINARY | _O_NOINHERIT); - else - fd = _open(path->narrow, _O_WRONLY | _O_BINARY | _O_NOINHERIT); + fd = _wopen(path->wide, _O_WRONLY | _O_BINARY | _O_NOINHERIT); if (fd < 0) result = -1; else { @@ -10612,31 +10310,8 @@ os_abort_impl(PyObject *module) } #ifdef MS_WINDOWS -/* AC 3.5: change to path_t? but that might change exceptions */ -PyDoc_STRVAR(win32_startfile__doc__, -"startfile(filepath [, operation])\n\ -\n\ -Start a file with its associated application.\n\ -\n\ -When \"operation\" is not specified or \"open\", this acts like\n\ -double-clicking the file in Explorer, or giving the file name as an\n\ -argument to the DOS \"start\" command: the file is opened with whatever\n\ -application (if any) its extension is associated.\n\ -When another \"operation\" is given, it specifies what should be done with\n\ -the file. A typical operation is \"print\".\n\ -\n\ -startfile returns as soon as the associated application is launched.\n\ -There is no option to wait for the application to close, and no way\n\ -to retrieve the application's exit status.\n\ -\n\ -The filepath is relative to the current directory. If you want to use\n\ -an absolute path, make sure the first character is not a slash (\"/\");\n\ -the underlying Win32 ShellExecute function doesn't work if it is."); - /* Grab ShellExecute dynamically from shell32 */ static int has_ShellExecute = -1; -static HINSTANCE (CALLBACK *Py_ShellExecuteA)(HWND, LPCSTR, LPCSTR, LPCSTR, - LPCSTR, INT); static HINSTANCE (CALLBACK *Py_ShellExecuteW)(HWND, LPCWSTR, LPCWSTR, LPCWSTR, LPCWSTR, INT); static int @@ -10650,12 +10325,9 @@ check_ShellExecute() hShell32 = LoadLibraryW(L"SHELL32"); Py_END_ALLOW_THREADS if (hShell32) { - *(FARPROC*)&Py_ShellExecuteA = GetProcAddress(hShell32, - "ShellExecuteA"); *(FARPROC*)&Py_ShellExecuteW = GetProcAddress(hShell32, "ShellExecuteW"); - has_ShellExecute = Py_ShellExecuteA && - Py_ShellExecuteW; + has_ShellExecute = Py_ShellExecuteW != NULL; } else { has_ShellExecute = 0; } @@ -10664,17 +10336,37 @@ check_ShellExecute() } +/*[clinic input] +os.startfile + filepath: path_t + operation: Py_UNICODE = NULL + +startfile(filepath [, operation]) + +Start a file with its associated application. + +When "operation" is not specified or "open", this acts like +double-clicking the file in Explorer, or giving the file name as an +argument to the DOS "start" command: the file is opened with whatever +application (if any) its extension is associated. +When another "operation" is given, it specifies what should be done with +the file. A typical operation is "print". + +startfile returns as soon as the associated application is launched. +There is no option to wait for the application to close, and no way +to retrieve the application's exit status. + +The filepath is relative to the current directory. If you want to use +an absolute path, make sure the first character is not a slash ("/"); +the underlying Win32 ShellExecute function doesn't work if it is. +[clinic start generated code]*/ + static PyObject * -win32_startfile(PyObject *self, PyObject *args) +os_startfile_impl(PyObject *module, path_t *filepath, Py_UNICODE *operation) +/*[clinic end generated code: output=912ceba79acfa1c9 input=63950bf2986380d0]*/ { - PyObject *ofilepath; - const char *filepath; - const char *operation = NULL; - const wchar_t *wpath, *woperation; HINSTANCE rc; - PyObject *unipath, *uoperation = NULL; - if(!check_ShellExecute()) { /* If the OS doesn't have ShellExecute, return a NotImplementedError. */ @@ -10682,68 +10374,16 @@ win32_startfile(PyObject *self, PyObject *args) "startfile not available on this platform"); } - if (!PyArg_ParseTuple(args, "U|s:startfile", - &unipath, &operation)) { - PyErr_Clear(); - goto normal; - } - - if (operation) { - uoperation = PyUnicode_DecodeASCII(operation, - strlen(operation), NULL); - if (!uoperation) { - PyErr_Clear(); - operation = NULL; - goto normal; - } - } - - wpath = PyUnicode_AsUnicode(unipath); - if (wpath == NULL) - goto normal; - if (uoperation) { - woperation = PyUnicode_AsUnicode(uoperation); - if (woperation == NULL) - goto normal; - } - else - woperation = NULL; - Py_BEGIN_ALLOW_THREADS - rc = Py_ShellExecuteW((HWND)0, woperation, wpath, + rc = Py_ShellExecuteW((HWND)0, operation, filepath->wide, NULL, NULL, SW_SHOWNORMAL); Py_END_ALLOW_THREADS - Py_XDECREF(uoperation); if (rc <= (HINSTANCE)32) { - win32_error_object("startfile", unipath); - return NULL; - } - Py_INCREF(Py_None); - return Py_None; - -normal: - if (!PyArg_ParseTuple(args, "O&|s:startfile", - PyUnicode_FSConverter, &ofilepath, - &operation)) - return NULL; - if (win32_warn_bytes_api()) { - Py_DECREF(ofilepath); + win32_error_object("startfile", filepath->object); return NULL; } - filepath = PyBytes_AsString(ofilepath); - Py_BEGIN_ALLOW_THREADS - rc = Py_ShellExecuteA((HWND)0, operation, filepath, - NULL, NULL, SW_SHOWNORMAL); - Py_END_ALLOW_THREADS - if (rc <= (HINSTANCE)32) { - PyObject *errval = win32_error("startfile", filepath); - Py_DECREF(ofilepath); - return errval; - } - Py_DECREF(ofilepath); - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } #endif /* MS_WINDOWS */ @@ -11560,9 +11200,9 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) return NULL; if (follow_symlinks) - result = win32_stat_w(path, &st); + result = win32_stat(path, &st); else - result = win32_lstat_w(path, &st); + result = win32_lstat(path, &st); if (result != 0) { return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, @@ -11761,7 +11401,7 @@ DirEntry_inode(DirEntry *self) if (!path) return NULL; - if (win32_lstat_w(path, &stat) != 0) { + if (win32_lstat(path, &stat) != 0) { return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, 0, self->path); } @@ -11910,6 +11550,11 @@ DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW) entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1); if (!entry->name) goto error; + if (path->narrow) { + Py_SETREF(entry->name, PyUnicode_EncodeFSDefault(entry->name)); + if (!entry->name) + goto error; + } joined_path = join_path_filenameW(path->wide, dataW->cFileName); if (!joined_path) @@ -11919,8 +11564,13 @@ DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW) PyMem_Free(joined_path); if (!entry->path) goto error; + if (path->narrow) { + Py_SETREF(entry->path, PyUnicode_EncodeFSDefault(entry->path)); + if (!entry->path) + goto error; + } - find_data_to_file_info_w(dataW, &file_info, &reparse_tag); + find_data_to_file_info(dataW, &file_info, &reparse_tag); _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); return (PyObject *)entry; @@ -12316,11 +11966,6 @@ posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs) Py_XINCREF(iterator->path.object); #ifdef MS_WINDOWS - if (iterator->path.narrow) { - PyErr_SetString(PyExc_TypeError, - "os.scandir() doesn't support bytes path on Windows, use Unicode instead"); - goto error; - } iterator->first_time = 1; path_strW = join_path_filenameW(iterator->path.wide, L"*.*"); @@ -12570,7 +12215,7 @@ static PyMethodDef posix_methods[] = { OS_KILLPG_METHODDEF OS_PLOCK_METHODDEF #ifdef MS_WINDOWS - {"startfile", win32_startfile, METH_VARARGS, win32_startfile__doc__}, + OS_STARTFILE_METHODDEF #endif OS_SETUID_METHODDEF OS_SETEUID_METHODDEF diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 88f68ef..7979eec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3185,7 +3185,7 @@ PyUnicode_Decode(const char *s, || strcmp(lower, "us_ascii") == 0) { return PyUnicode_DecodeASCII(s, size, errors); } - #ifdef HAVE_MBCS + #ifdef MS_WINDOWS else if (strcmp(lower, "mbcs") == 0) { return PyUnicode_DecodeMBCS(s, size, errors); } @@ -3507,10 +3507,8 @@ encode_error: PyObject * PyUnicode_EncodeFSDefault(PyObject *unicode) { -#ifdef HAVE_MBCS - return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); -#elif defined(__APPLE__) - return _PyUnicode_AsUTF8String(unicode, "surrogateescape"); +#if defined(__APPLE__) + return _PyUnicode_AsUTF8String(unicode, Py_FileSystemDefaultEncodeErrors); #else PyInterpreterState *interp = PyThreadState_GET()->interp; /* Bootstrap check: if the filesystem codec is implemented in Python, we @@ -3525,10 +3523,10 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { return PyUnicode_AsEncodedString(unicode, Py_FileSystemDefaultEncoding, - "surrogateescape"); + Py_FileSystemDefaultEncodeErrors); } else { - return PyUnicode_EncodeLocale(unicode, "surrogateescape"); + return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors); } #endif } @@ -3577,7 +3575,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, || strcmp(lower, "us_ascii") == 0) { return _PyUnicode_AsASCIIString(unicode, errors); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS else if (strcmp(lower, "mbcs") == 0) { return PyUnicode_EncodeCodePage(CP_ACP, unicode, errors); } @@ -3813,10 +3811,8 @@ PyUnicode_DecodeFSDefault(const char *s) { PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) { -#ifdef HAVE_MBCS - return PyUnicode_DecodeMBCS(s, size, NULL); -#elif defined(__APPLE__) - return PyUnicode_DecodeUTF8Stateful(s, size, "surrogateescape", NULL); +#if defined(__APPLE__) + return PyUnicode_DecodeUTF8Stateful(s, size, Py_FileSystemDefaultEncodeErrors, NULL); #else PyInterpreterState *interp = PyThreadState_GET()->interp; /* Bootstrap check: if the filesystem codec is implemented in Python, we @@ -3829,12 +3825,24 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) cannot only rely on it: check also interp->fscodec_initialized for subinterpreters. */ if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { - return PyUnicode_Decode(s, size, + PyObject *res = PyUnicode_Decode(s, size, Py_FileSystemDefaultEncoding, - "surrogateescape"); + Py_FileSystemDefaultEncodeErrors); +#ifdef MS_WINDOWS + if (!res && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); + PyErr_Format(PyExc_RuntimeError, + "filesystem path bytes were not correctly encoded with '%s'. " \ + "Please report this at http://bugs.python.org/issue27781", + Py_FileSystemDefaultEncoding); + _PyErr_ChainExceptions(exc, val, tb); + } +#endif + return res; } else { - return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape"); + return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncodeErrors); } #endif } @@ -4218,7 +4226,7 @@ onError: Py_CLEAR(*exceptionObject); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* error handling callback helper: build arguments, call the callback and check the arguments, if no exception occurred, copy the replacement to the output @@ -4332,7 +4340,7 @@ unicode_decode_call_errorhandler_wchar( Py_XDECREF(restuple); return -1; } -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ static int unicode_decode_call_errorhandler_writer( @@ -7022,7 +7030,7 @@ PyUnicode_AsASCIIString(PyObject *unicode) return _PyUnicode_AsASCIIString(unicode, NULL); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ @@ -7741,7 +7749,7 @@ PyUnicode_AsMBCSString(PyObject *unicode) #undef NEED_RETRY -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Character Mapping Codec -------------------------------------------- */ diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index be14560..252c0a7 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -21,16 +21,18 @@ Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the values for Py_FileSystemDefaultEncoding! */ -#ifdef HAVE_MBCS -const char *Py_FileSystemDefaultEncoding = "mbcs"; +#if defined(__APPLE__) +const char *Py_FileSystemDefaultEncoding = "utf-8"; int Py_HasFileSystemDefaultEncoding = 1; -#elif defined(__APPLE__) +#elif defined(MS_WINDOWS) +/* may be changed by initfsencoding(), but should never be free()d */ const char *Py_FileSystemDefaultEncoding = "utf-8"; int Py_HasFileSystemDefaultEncoding = 1; #else const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ int Py_HasFileSystemDefaultEncoding = 0; #endif +const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; _Py_IDENTIFIER(__builtins__); _Py_IDENTIFIER(__dict__); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 1896888..3f3b614 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -90,6 +90,9 @@ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ int Py_IsolatedFlag = 0; /* for -I, isolate from user's env */ +#ifdef MS_WINDOWS +int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */ +#endif PyThreadState *_Py_Finalizing = NULL; @@ -321,6 +324,10 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) check its value further. */ if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); +#ifdef MS_WINDOWS + if ((p = Py_GETENV("PYTHONLEGACYWINDOWSFSENCODING")) && *p != '\0') + Py_LegacyWindowsFSEncodingFlag = add_flag(Py_LegacyWindowsFSEncodingFlag, p); +#endif _PyRandom_Init(); @@ -958,6 +965,18 @@ initfsencoding(PyInterpreterState *interp) { PyObject *codec; +#ifdef MS_WINDOWS + if (Py_LegacyWindowsFSEncodingFlag) + { + Py_FileSystemDefaultEncoding = "mbcs"; + Py_FileSystemDefaultEncodeErrors = "replace"; + } + else + { + Py_FileSystemDefaultEncoding = "utf-8"; + Py_FileSystemDefaultEncodeErrors = "surrogatepass"; + } +#else if (Py_FileSystemDefaultEncoding == NULL) { Py_FileSystemDefaultEncoding = get_locale_encoding(); @@ -968,6 +987,7 @@ initfsencoding(PyInterpreterState *interp) interp->fscodec_initialized = 1; return 0; } +#endif /* the encoding is mbcs, utf-8 or ascii */ codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a54f266..0fe76b7 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -311,6 +311,23 @@ operating system filenames." ); static PyObject * +sys_getfilesystemencodeerrors(PyObject *self) +{ + if (Py_FileSystemDefaultEncodeErrors) + return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors); + PyErr_SetString(PyExc_RuntimeError, + "filesystem encoding is not initialized"); + return NULL; +} + +PyDoc_STRVAR(getfilesystemencodeerrors_doc, + "getfilesystemencodeerrors() -> string\n\ +\n\ +Return the error mode used to convert Unicode filenames in\n\ +operating system filenames." +); + +static PyObject * sys_intern(PyObject *self, PyObject *args) { PyObject *s; @@ -866,6 +883,24 @@ sys_getwindowsversion(PyObject *self) #pragma warning(pop) +PyDoc_STRVAR(enablelegacywindowsfsencoding_doc, +"_enablelegacywindowsfsencoding()\n\ +\n\ +Changes the default filesystem encoding to mbcs:replace for consistency\n\ +with earlier versions of Python. See PEP 529 for more information.\n\ +\n\ +This is equivalent to defining the PYTHONLEGACYWINDOWSFSENCODING \n\ +environment variable before launching Python." +); + +static PyObject * +sys_enablelegacywindowsfsencoding(PyObject *self) +{ + Py_FileSystemDefaultEncoding = "mbcs"; + Py_FileSystemDefaultEncodeErrors = "replace"; + Py_RETURN_NONE; +} + #endif /* MS_WINDOWS */ #ifdef HAVE_DLOPEN @@ -1225,6 +1260,8 @@ static PyMethodDef sys_methods[] = { #endif {"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding, METH_NOARGS, getfilesystemencoding_doc}, + { "getfilesystemencodeerrors", (PyCFunction)sys_getfilesystemencodeerrors, + METH_NOARGS, getfilesystemencodeerrors_doc }, #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, #endif @@ -1240,6 +1277,8 @@ static PyMethodDef sys_methods[] = { #ifdef MS_WINDOWS {"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS, getwindowsversion_doc}, + {"_enablelegacywindowsfsencoding", (PyCFunction)sys_enablelegacywindowsfsencoding, + METH_NOARGS, enablelegacywindowsfsencoding_doc }, #endif /* MS_WINDOWS */ {"intern", sys_intern, METH_VARARGS, intern_doc}, {"is_finalizing", sys_is_finalizing, METH_NOARGS, is_finalizing_doc}, @@ -1456,14 +1495,21 @@ version -- the version of this interpreter as a string\n\ version_info -- version information as a named tuple\n\ " ) -#ifdef MS_WINDOWS +#ifdef MS_COREDLL /* concatenating string here */ PyDoc_STR( "dllhandle -- [Windows only] integer handle of the Python DLL\n\ winver -- [Windows only] version number of the Python DLL\n\ " ) -#endif /* MS_WINDOWS */ +#endif /* MS_COREDLL */ +#ifdef MS_WINDOWS +/* concatenating string here */ +PyDoc_STR( +"_enablelegacywindowsfsencoding -- [Windows only] \n\ +" +) +#endif PyDoc_STR( "__stdin__ -- the original stdin; don't touch!\n\ __stdout__ -- the original stdout; don't touch!\n\ -- cgit v0.12