From df2d4a6f3d5da2839c4fc11d31511c8e028daf2c Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 21 Aug 2019 15:27:33 -0700 Subject: bpo-37834: Normalise handling of reparse points on Windows (GH-15231) bpo-37834: Normalise handling of reparse points on Windows * ntpath.realpath() and nt.stat() will traverse all supported reparse points (previously was mixed) * nt.lstat() will let the OS traverse reparse points that are not name surrogates (previously would not traverse any reparse point) * nt.[l]stat() will only set S_IFLNK for symlinks (previous behaviour) * nt.readlink() will read destinations for symlinks and junction points only bpo-1311: os.path.exists('nul') now returns True on Windows * nt.stat('nul').st_mode is now S_IFCHR (previously was an error) --- Doc/library/os.rst | 53 ++- Doc/library/shutil.rst | 4 + Doc/library/stat.rst | 10 + Doc/whatsnew/3.8.rst | 21 ++ Include/fileutils.h | 1 + Lib/shutil.py | 48 ++- Lib/test/test_os.py | 61 +++- Lib/test/test_shutil.py | 69 +++- Lib/test/test_tools/test_lll.py | 6 +- Lib/test/test_venv.py | 6 +- .../2019-08-12-12-00-24.bpo-37834.VB2QVj.rst | 2 + .../2019-08-21-12-58-18.bpo-1311.BoW1wU.rst | 2 + Modules/_stat.c | 7 + Modules/clinic/posixmodule.c.h | 19 +- Modules/posixmodule.c | 387 +++++++++++---------- Python/fileutils.c | 7 +- 16 files changed, 470 insertions(+), 233 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst create mode 100644 Misc/NEWS.d/next/Windows/2019-08-21-12-58-18.bpo-1311.BoW1wU.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index c74d687..6cbfb74 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1858,6 +1858,12 @@ features: .. versionchanged:: 3.6 Accepts a :term:`path-like object` for *src* and *dst*. + .. versionchanged:: 3.8 + On Windows, now opens reparse points that represent another path + (name surrogates), including symbolic links and directory junctions. + Other kinds of reparse points are resolved by the operating system as + for :func:`~os.stat`. + .. function:: mkdir(path, mode=0o777, *, dir_fd=None) @@ -2039,6 +2045,10 @@ features: This function can also support :ref:`paths relative to directory descriptors `. + When trying to resolve a path that may contain links, use + :func:`~os.path.realpath` to properly handle recursion and platform + differences. + .. availability:: Unix, Windows. .. versionchanged:: 3.2 @@ -2053,6 +2063,11 @@ features: .. versionchanged:: 3.8 Accepts a :term:`path-like object` and a bytes object on Windows. + .. versionchanged:: 3.8 + Added support for directory junctions, and changed to return the + substitution path (which typically includes ``\\?\`` prefix) rather + than the optional "print name" field that was previously returned. + .. function:: remove(path, *, dir_fd=None) Remove (delete) the file *path*. If *path* is a directory, an @@ -2366,7 +2381,8 @@ features: On Unix, this method always requires a system call. On Windows, it only requires a system call if *follow_symlinks* is ``True`` and the - entry is a symbolic link. + entry is a reparse point (for example, a symbolic link or directory + junction). On Windows, the ``st_ino``, ``st_dev`` and ``st_nlink`` attributes of the :class:`stat_result` are always set to zero. Call :func:`os.stat` to @@ -2403,6 +2419,17 @@ features: This function can support :ref:`specifying a file descriptor ` and :ref:`not following symlinks `. + On Windows, passing ``follow_symlinks=False`` will disable following all + name-surrogate reparse points, which includes symlinks and directory + junctions. Other types of reparse points that do not resemble links or that + the operating system is unable to follow will be opened directly. When + following a chain of multiple links, this may result in the original link + being returned instead of the non-link that prevented full traversal. To + obtain stat results for the final path in this case, use the + :func:`os.path.realpath` function to resolve the path name as far as + possible and call :func:`lstat` on the result. This does not apply to + dangling symlinks or junction points, which will raise the usual exceptions. + .. index:: module: stat Example:: @@ -2427,6 +2454,14 @@ features: .. versionchanged:: 3.6 Accepts a :term:`path-like object`. + .. versionchanged:: 3.8 + On Windows, all reparse points that can be resolved by the operating + system are now followed, and passing ``follow_symlinks=False`` + disables following all name surrogate reparse points. If the operating + system reaches a reparse point that it is not able to follow, *stat* now + returns the information for the original path as if + ``follow_symlinks=False`` had been specified instead of raising an error. + .. class:: stat_result @@ -2578,7 +2613,7 @@ features: File type. - On Windows systems, the following attribute is also available: + On Windows systems, the following attributes are also available: .. attribute:: st_file_attributes @@ -2587,6 +2622,12 @@ features: :c:func:`GetFileInformationByHandle`. See the ``FILE_ATTRIBUTE_*`` constants in the :mod:`stat` module. + .. attribute:: st_reparse_tag + + When :attr:`st_file_attributes` has the ``FILE_ATTRIBUTE_REPARSE_POINT`` + set, this field contains the tag identifying the type of reparse point. + See the ``IO_REPARSE_TAG_*`` constants in the :mod:`stat` module. + The standard module :mod:`stat` defines functions and constants that are useful for extracting information from a :c:type:`stat` structure. (On Windows, some items are filled with dummy values.) @@ -2614,6 +2655,14 @@ features: .. versionadded:: 3.7 Added the :attr:`st_fstype` member to Solaris/derivatives. + .. versionadded:: 3.8 + Added the :attr:`st_reparse_tag` member on Windows. + + .. versionchanged:: 3.8 + On Windows, the :attr:`st_mode` member now identifies special + files as :const:`S_IFCHR`, :const:`S_IFIFO` or :const:`S_IFBLK` + as appropriate. + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index eaeee8d..a51e068 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -304,6 +304,10 @@ Directory and files operations Added a symlink attack resistant version that is used automatically if platform supports fd-based functions. + .. versionchanged:: 3.8 + On Windows, will no longer delete the contents of a directory junction + before removing the junction. + .. attribute:: rmtree.avoids_symlink_attacks Indicates whether the current platform and implementation provides a diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst index c8f6904..f48a0a9 100644 --- a/Doc/library/stat.rst +++ b/Doc/library/stat.rst @@ -425,3 +425,13 @@ for more detail on the meaning of these constants. FILE_ATTRIBUTE_VIRTUAL .. versionadded:: 3.5 + +On Windows, the following constants are available for comparing against the +``st_reparse_tag`` member returned by :func:`os.lstat`. These are well-known +constants, but are not an exhaustive list. + +.. data:: IO_REPARSE_TAG_SYMLINK + IO_REPARSE_TAG_MOUNT_POINT + IO_REPARSE_TAG_APPEXECLINK + + .. versionadded:: 3.8 diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 93758e9..0294e9a 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -808,6 +808,21 @@ A new :func:`os.memfd_create` function was added to wrap the ``memfd_create()`` syscall. (Contributed by Zackery Spytz and Christian Heimes in :issue:`26836`.) +On Windows, much of the manual logic for handling reparse points (including +symlinks and directory junctions) has been delegated to the operating system. +Specifically, :func:`os.stat` will now traverse anything supported by the +operating system, while :func:`os.lstat` will only open reparse points that +identify as "name surrogates" while others are opened as for :func:`os.stat`. +In all cases, :attr:`stat_result.st_mode` will only have ``S_IFLNK`` set for +symbolic links and not other kinds of reparse points. To identify other kinds +of reparse point, check the new :attr:`stat_result.st_reparse_tag` attribute. + +On Windows, :func:`os.readlink` is now able to read directory junctions. Note +that :func:`~os.path.islink` will return ``False`` for directory junctions, +and so code that checks ``islink`` first will continue to treat junctions as +directories, while code that handles errors from :func:`os.readlink` may now +treat junctions as links. + os.path ------- @@ -824,6 +839,9 @@ characters or bytes unrepresentable at the OS level. environment variable and does not use :envvar:`HOME`, which is not normally set for regular user accounts. +:func:`~os.path.isdir` on Windows no longer returns true for a link to a +non-existent directory. + :func:`~os.path.realpath` on Windows now resolves reparse points, including symlinks and directory junctions. @@ -912,6 +930,9 @@ format for new archives to improve portability and standards conformance, inherited from the corresponding change to the :mod:`tarfile` module. (Contributed by C.A.M. Gerlach in :issue:`30661`.) +:func:`shutil.rmtree` on Windows now removes directory junctions without +recursively removing their contents first. + ssl --- diff --git a/Include/fileutils.h b/Include/fileutils.h index f081779..a9655bb 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -84,6 +84,7 @@ struct _Py_stat_struct { time_t st_ctime; int st_ctime_nsec; unsigned long st_file_attributes; + unsigned long st_reparse_tag; }; #else # define _Py_stat_struct stat diff --git a/Lib/shutil.py b/Lib/shutil.py index ab1a7d6..39f793b 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -452,7 +452,14 @@ def _copytree(entries, src, dst, symlinks, ignore, copy_function, dstname = os.path.join(dst, srcentry.name) srcobj = srcentry if use_srcentry else srcname try: - if srcentry.is_symlink(): + is_symlink = srcentry.is_symlink() + if is_symlink and os.name == 'nt': + # Special check for directory junctions, which appear as + # symlinks but we want to recurse. + lstat = srcentry.stat(follow_symlinks=False) + if lstat.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT: + is_symlink = False + if is_symlink: linkto = os.readlink(srcname) if symlinks: # We can't just leave it to `copy_function` because legacy @@ -537,6 +544,37 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) +if hasattr(stat, 'FILE_ATTRIBUTE_REPARSE_POINT'): + # Special handling for directory junctions to make them behave like + # symlinks for shutil.rmtree, since in general they do not appear as + # regular links. + def _rmtree_isdir(entry): + try: + st = entry.stat(follow_symlinks=False) + return (stat.S_ISDIR(st.st_mode) and not + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + except OSError: + return False + + def _rmtree_islink(path): + try: + st = os.lstat(path) + return (stat.S_ISLNK(st.st_mode) or + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + except OSError: + return False +else: + def _rmtree_isdir(entry): + try: + return entry.is_dir(follow_symlinks=False) + except OSError: + return False + + def _rmtree_islink(path): + return os.path.islink(path) + # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): try: @@ -547,11 +585,7 @@ def _rmtree_unsafe(path, onerror): entries = [] for entry in entries: fullname = entry.path - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - if is_dir: + if _rmtree_isdir(entry): try: if entry.is_symlink(): # This can only happen if someone replaces @@ -681,7 +715,7 @@ def rmtree(path, ignore_errors=False, onerror=None): os.close(fd) else: try: - if os.path.islink(path): + if _rmtree_islink(path): # symlinks to directories are forbidden, see bug #1669 raise OSError("Cannot call rmtree on a symbolic link") except OSError: diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 15fd65b..ba9f5c3 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -8,6 +8,7 @@ import codecs import contextlib import decimal import errno +import fnmatch import fractions import itertools import locale @@ -2253,6 +2254,20 @@ class ReadlinkTests(unittest.TestCase): filelinkb = os.fsencode(filelink) filelinkb_target = os.fsencode(filelink_target) + def assertPathEqual(self, left, right): + left = os.path.normcase(left) + right = os.path.normcase(right) + if sys.platform == 'win32': + # Bad practice to blindly strip the prefix as it may be required to + # correctly refer to the file, but we're only comparing paths here. + has_prefix = lambda p: p.startswith( + b'\\\\?\\' if isinstance(p, bytes) else '\\\\?\\') + if has_prefix(left): + left = left[4:] + if has_prefix(right): + right = right[4:] + self.assertEqual(left, right) + def setUp(self): self.assertTrue(os.path.exists(self.filelink_target)) self.assertTrue(os.path.exists(self.filelinkb_target)) @@ -2274,14 +2289,14 @@ class ReadlinkTests(unittest.TestCase): os.symlink(self.filelink_target, self.filelink) self.addCleanup(support.unlink, self.filelink) filelink = FakePath(self.filelink) - self.assertEqual(os.readlink(filelink), self.filelink_target) + self.assertPathEqual(os.readlink(filelink), self.filelink_target) @support.skip_unless_symlink def test_pathlike_bytes(self): os.symlink(self.filelinkb_target, self.filelinkb) self.addCleanup(support.unlink, self.filelinkb) path = os.readlink(FakePath(self.filelinkb)) - self.assertEqual(path, self.filelinkb_target) + self.assertPathEqual(path, self.filelinkb_target) self.assertIsInstance(path, bytes) @support.skip_unless_symlink @@ -2289,7 +2304,7 @@ class ReadlinkTests(unittest.TestCase): os.symlink(self.filelinkb_target, self.filelinkb) self.addCleanup(support.unlink, self.filelinkb) path = os.readlink(self.filelinkb) - self.assertEqual(path, self.filelinkb_target) + self.assertPathEqual(path, self.filelinkb_target) self.assertIsInstance(path, bytes) @@ -2348,16 +2363,12 @@ class Win32SymlinkTests(unittest.TestCase): # was created with target_is_dir==True. os.remove(self.missing_link) - @unittest.skip("currently fails; consider for improvement") def test_isdir_on_directory_link_to_missing_target(self): self._create_missing_dir_link() - # consider having isdir return true for directory links - self.assertTrue(os.path.isdir(self.missing_link)) + self.assertFalse(os.path.isdir(self.missing_link)) - @unittest.skip("currently fails; consider for improvement") def test_rmdir_on_directory_link_to_missing_target(self): self._create_missing_dir_link() - # consider allowing rmdir to remove directory links os.rmdir(self.missing_link) def check_stat(self, link, target): @@ -2453,6 +2464,24 @@ class Win32SymlinkTests(unittest.TestCase): except OSError: pass + def test_appexeclink(self): + root = os.path.expandvars(r'%LOCALAPPDATA%\Microsoft\WindowsApps') + aliases = [os.path.join(root, a) + for a in fnmatch.filter(os.listdir(root), '*.exe')] + + for alias in aliases: + if support.verbose: + print() + print("Testing with", alias) + st = os.lstat(alias) + self.assertEqual(st, os.stat(alias)) + self.assertFalse(stat.S_ISLNK(st.st_mode)) + self.assertEqual(st.st_reparse_tag, stat.IO_REPARSE_TAG_APPEXECLINK) + # testing the first one we see is sufficient + break + else: + self.skipTest("test requires an app execution alias") + @unittest.skipUnless(sys.platform == "win32", "Win32 specific tests") class Win32JunctionTests(unittest.TestCase): junction = 'junctiontest' @@ -2460,25 +2489,29 @@ class Win32JunctionTests(unittest.TestCase): def setUp(self): assert os.path.exists(self.junction_target) - assert not os.path.exists(self.junction) + assert not os.path.lexists(self.junction) def tearDown(self): - if os.path.exists(self.junction): - # os.rmdir delegates to Windows' RemoveDirectoryW, - # which removes junction points safely. - os.rmdir(self.junction) + if os.path.lexists(self.junction): + os.unlink(self.junction) def test_create_junction(self): _winapi.CreateJunction(self.junction_target, self.junction) + self.assertTrue(os.path.lexists(self.junction)) self.assertTrue(os.path.exists(self.junction)) self.assertTrue(os.path.isdir(self.junction)) + self.assertNotEqual(os.stat(self.junction), os.lstat(self.junction)) + self.assertEqual(os.stat(self.junction), os.stat(self.junction_target)) - # Junctions are not recognized as links. + # bpo-37834: Junctions are not recognized as links. self.assertFalse(os.path.islink(self.junction)) + self.assertEqual(os.path.normcase("\\\\?\\" + self.junction_target), + os.path.normcase(os.readlink(self.junction))) def test_unlink_removes_junction(self): _winapi.CreateJunction(self.junction_target, self.junction) self.assertTrue(os.path.exists(self.junction)) + self.assertTrue(os.path.lexists(self.junction)) os.unlink(self.junction) self.assertFalse(os.path.exists(self.junction)) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 88dc4d9..636e3bd 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -42,6 +42,11 @@ try: except ImportError: UID_GID_SUPPORT = False +try: + import _winapi +except ImportError: + _winapi = None + def _fake_rename(*args, **kwargs): # Pretend the destination path is on a different filesystem. raise OSError(getattr(errno, 'EXDEV', 18), "Invalid cross-device link") @@ -226,6 +231,47 @@ class TestShutil(unittest.TestCase): self.assertTrue(os.path.exists(dir3)) self.assertTrue(os.path.exists(file1)) + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_fails_on_junctions(self): + tmp = self.mkdtemp() + dir_ = os.path.join(tmp, 'dir') + os.mkdir(dir_) + link = os.path.join(tmp, 'link') + _winapi.CreateJunction(dir_, link) + self.assertRaises(OSError, shutil.rmtree, link) + self.assertTrue(os.path.exists(dir_)) + self.assertTrue(os.path.lexists(link)) + errors = [] + def onerror(*args): + errors.append(args) + shutil.rmtree(link, onerror=onerror) + self.assertEqual(len(errors), 1) + self.assertIs(errors[0][0], os.path.islink) + self.assertEqual(errors[0][1], link) + self.assertIsInstance(errors[0][2][1], OSError) + + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_works_on_junctions(self): + tmp = self.mkdtemp() + dir1 = os.path.join(tmp, 'dir1') + dir2 = os.path.join(dir1, 'dir2') + dir3 = os.path.join(tmp, 'dir3') + for d in dir1, dir2, dir3: + os.mkdir(d) + file1 = os.path.join(tmp, 'file1') + write_file(file1, 'foo') + link1 = os.path.join(dir1, 'link1') + _winapi.CreateJunction(dir2, link1) + link2 = os.path.join(dir1, 'link2') + _winapi.CreateJunction(dir3, link2) + link3 = os.path.join(dir1, 'link3') + _winapi.CreateJunction(file1, link3) + # make sure junctions are removed but not followed + shutil.rmtree(dir1) + self.assertFalse(os.path.exists(dir1)) + self.assertTrue(os.path.exists(dir3)) + self.assertTrue(os.path.exists(file1)) + def test_rmtree_errors(self): # filename is guaranteed not to exist filename = tempfile.mktemp() @@ -754,8 +800,12 @@ class TestShutil(unittest.TestCase): src_stat = os.lstat(src_link) shutil.copytree(src_dir, dst_dir, symlinks=True) self.assertTrue(os.path.islink(os.path.join(dst_dir, 'sub', 'link'))) - self.assertEqual(os.readlink(os.path.join(dst_dir, 'sub', 'link')), - os.path.join(src_dir, 'file.txt')) + actual = os.readlink(os.path.join(dst_dir, 'sub', 'link')) + # Bad practice to blindly strip the prefix as it may be required to + # correctly refer to the file, but we're only comparing paths here. + if os.name == 'nt' and actual.startswith('\\\\?\\'): + actual = actual[4:] + self.assertEqual(actual, os.path.join(src_dir, 'file.txt')) dst_stat = os.lstat(dst_link) if hasattr(os, 'lchmod'): self.assertEqual(dst_stat.st_mode, src_stat.st_mode) @@ -886,7 +936,6 @@ class TestShutil(unittest.TestCase): shutil.copytree(src, dst, copy_function=custom_cpfun) self.assertEqual(len(flag), 1) - @unittest.skipIf(os.name == 'nt', 'temporarily disabled on Windows') @unittest.skipUnless(hasattr(os, 'link'), 'requires os.link') def test_dont_copy_file_onto_link_to_itself(self): # bug 851123. @@ -941,6 +990,20 @@ class TestShutil(unittest.TestCase): finally: shutil.rmtree(TESTFN, ignore_errors=True) + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_on_junction(self): + os.mkdir(TESTFN) + try: + src = os.path.join(TESTFN, 'cheese') + dst = os.path.join(TESTFN, 'shop') + os.mkdir(src) + open(os.path.join(src, 'spam'), 'wb').close() + _winapi.CreateJunction(src, dst) + self.assertRaises(OSError, shutil.rmtree, dst) + shutil.rmtree(dst, ignore_errors=True) + finally: + shutil.rmtree(TESTFN, ignore_errors=True) + # Issue #3002: copyfile and copytree block indefinitely on named pipes @unittest.skipUnless(hasattr(os, "mkfifo"), 'requires os.mkfifo()') def test_copyfile_named_pipe(self): diff --git a/Lib/test/test_tools/test_lll.py b/Lib/test/test_tools/test_lll.py index f3fbe961..b01e218 100644 --- a/Lib/test/test_tools/test_lll.py +++ b/Lib/test/test_tools/test_lll.py @@ -1,6 +1,7 @@ """Tests for the lll script in the Tools/script directory.""" import os +import sys import tempfile from test import support from test.test_tools import skip_if_missing, import_tool @@ -26,12 +27,13 @@ class lllTests(unittest.TestCase): with support.captured_stdout() as output: self.lll.main([dir1, dir2]) + prefix = '\\\\?\\' if os.name == 'nt' else '' self.assertEqual(output.getvalue(), f'{dir1}:\n' - f'symlink -> {fn1}\n' + f'symlink -> {prefix}{fn1}\n' f'\n' f'{dir2}:\n' - f'symlink -> {fn2}\n' + f'symlink -> {prefix}{fn2}\n' ) diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 9724d9e..de93d95 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -394,11 +394,7 @@ class EnsurePipTest(BaseTest): with open(os.devnull, "rb") as f: self.assertEqual(f.read(), b"") - # Issue #20541: os.path.exists('nul') is False on Windows - if os.devnull.lower() == 'nul': - self.assertFalse(os.path.exists(os.devnull)) - else: - self.assertTrue(os.path.exists(os.devnull)) + self.assertTrue(os.path.exists(os.devnull)) def do_test_with_pip(self, system_site_packages): rmtree(self.env_dir) diff --git a/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst b/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst new file mode 100644 index 0000000..f2a654c --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst @@ -0,0 +1,2 @@ +Treat all name surrogate reparse points on Windows in :func:`os.lstat` and +other reparse points as regular files in :func:`os.stat`. diff --git a/Misc/NEWS.d/next/Windows/2019-08-21-12-58-18.bpo-1311.BoW1wU.rst b/Misc/NEWS.d/next/Windows/2019-08-21-12-58-18.bpo-1311.BoW1wU.rst new file mode 100644 index 0000000..2ee98e4 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-08-21-12-58-18.bpo-1311.BoW1wU.rst @@ -0,0 +1,2 @@ +The ``nul`` file on Windows now returns True from :func:`~os.path.exists` +and a valid result from :func:`os.stat` with ``S_IFCHR`` set. diff --git a/Modules/_stat.c b/Modules/_stat.c index f6cb303..6a3020a 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -589,6 +589,13 @@ PyInit__stat(void) if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_SYSTEM)) return NULL; if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_TEMPORARY)) return NULL; if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_VIRTUAL)) return NULL; + + if (PyModule_AddObject(m, "IO_REPARSE_TAG_SYMLINK", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_SYMLINK))) return NULL; + if (PyModule_AddObject(m, "IO_REPARSE_TAG_MOUNT_POINT", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_MOUNT_POINT))) return NULL; + if (PyModule_AddObject(m, "IO_REPARSE_TAG_APPEXECLINK", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_APPEXECLINK))) return NULL; #endif return m; diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index aa1ab79..c4ebe39 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1272,19 +1272,6 @@ exit: #if defined(MS_WINDOWS) -PyDoc_STRVAR(os__isdir__doc__, -"_isdir($module, path, /)\n" -"--\n" -"\n" -"Return true if the pathname refers to an existing directory."); - -#define OS__ISDIR_METHODDEF \ - {"_isdir", (PyCFunction)os__isdir, METH_O, os__isdir__doc__}, - -#endif /* defined(MS_WINDOWS) */ - -#if defined(MS_WINDOWS) - PyDoc_STRVAR(os__getvolumepathname__doc__, "_getvolumepathname($module, /, path)\n" "--\n" @@ -8274,10 +8261,6 @@ exit: #define OS__GETFINALPATHNAME_METHODDEF #endif /* !defined(OS__GETFINALPATHNAME_METHODDEF) */ -#ifndef OS__ISDIR_METHODDEF - #define OS__ISDIR_METHODDEF -#endif /* !defined(OS__ISDIR_METHODDEF) */ - #ifndef OS__GETVOLUMEPATHNAME_METHODDEF #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ @@ -8741,4 +8724,4 @@ exit: #ifndef OS__REMOVE_DLL_DIRECTORY_METHODDEF #define OS__REMOVE_DLL_DIRECTORY_METHODDEF #endif /* !defined(OS__REMOVE_DLL_DIRECTORY_METHODDEF) */ -/*[clinic end generated code: output=1e001c855e011720 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b71eff00b91f5e43 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 4f8c074..2302678 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1625,6 +1625,7 @@ win32_wchdir(LPCWSTR path) */ #define HAVE_STAT_NSEC 1 #define HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES 1 +#define HAVE_STRUCT_STAT_ST_REPARSE_TAG 1 static void find_data_to_file_info(WIN32_FIND_DATAW *pFileData, @@ -1658,136 +1659,178 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } -static BOOL -get_target_path(HANDLE hdl, wchar_t **target_path) -{ - int buf_size, result_length; - wchar_t *buf; - - /* We have a good handle to the target, use it to determine - the target path name (then we'll call lstat on it). */ - buf_size = GetFinalPathNameByHandleW(hdl, 0, 0, - VOLUME_NAME_DOS); - if(!buf_size) - return FALSE; - - buf = (wchar_t *)PyMem_RawMalloc((buf_size + 1) * sizeof(wchar_t)); - if (!buf) { - SetLastError(ERROR_OUTOFMEMORY); - return FALSE; - } - - result_length = GetFinalPathNameByHandleW(hdl, - buf, buf_size, VOLUME_NAME_DOS); - - if(!result_length) { - PyMem_RawFree(buf); - return FALSE; - } - - buf[result_length] = 0; - - *target_path = buf; - return TRUE; -} - static int win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { - int code; - HANDLE hFile, hFile2; - BY_HANDLE_FILE_INFORMATION info; - ULONG reparse_tag = 0; - wchar_t *target_path; - const wchar_t *dot; + HANDLE hFile; + BY_HANDLE_FILE_INFORMATION fileInfo; + FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; + DWORD fileType, error; + BOOL isUnhandledTag = FALSE; + int retval = 0; - hFile = CreateFileW( - path, - FILE_READ_ATTRIBUTES, /* desired access */ - 0, /* share mode */ - NULL, /* security attributes */ - OPEN_EXISTING, - /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ - /* FILE_FLAG_OPEN_REPARSE_POINT does not follow the symlink. - Because of this, calls like GetFinalPathNameByHandle will return - the symlink path again and not the actual final path. */ - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS| - FILE_FLAG_OPEN_REPARSE_POINT, - NULL); + DWORD access = FILE_READ_ATTRIBUTES; + DWORD flags = FILE_FLAG_BACKUP_SEMANTICS; /* Allow opening directories. */ + if (!traverse) { + flags |= FILE_FLAG_OPEN_REPARSE_POINT; + } + hFile = CreateFileW(path, access, 0, NULL, OPEN_EXISTING, flags, NULL); if (hFile == INVALID_HANDLE_VALUE) { - /* Either the target doesn't exist, or we don't have access to - get a handle to it. If the former, we need to return an error. - If the latter, we can use attributes_from_dir. */ - DWORD lastError = GetLastError(); - if (lastError != ERROR_ACCESS_DENIED && - lastError != ERROR_SHARING_VIOLATION) - return -1; - /* Could not get attributes on open file. Fall back to - reading the directory. */ - if (!attributes_from_dir(path, &info, &reparse_tag)) - /* Very strange. This should not fail now */ - return -1; - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (traverse) { - /* Should traverse, but could not open reparse point handle */ - SetLastError(lastError); + /* Either the path doesn't exist, or the caller lacks access. */ + error = GetLastError(); + switch (error) { + case ERROR_ACCESS_DENIED: /* Cannot sync or read attributes. */ + case ERROR_SHARING_VIOLATION: /* It's a paging file. */ + /* Try reading the parent directory. */ + if (!attributes_from_dir(path, &fileInfo, &tagInfo.ReparseTag)) { + /* Cannot read the parent directory. */ + SetLastError(error); return -1; } - } - } else { - if (!GetFileInformationByHandle(hFile, &info)) { - CloseHandle(hFile); - return -1; - } - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (!win32_get_reparse_tag(hFile, &reparse_tag)) { - CloseHandle(hFile); - return -1; + if (fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (traverse || + !IsReparseTagNameSurrogate(tagInfo.ReparseTag)) { + /* The stat call has to traverse but cannot, so fail. */ + SetLastError(error); + return -1; + } } - /* Close the outer open file handle now that we're about to - reopen it with different flags. */ - if (!CloseHandle(hFile)) + break; + + case ERROR_INVALID_PARAMETER: + /* \\.\con requires read or write access. */ + hFile = CreateFileW(path, access | GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, + OPEN_EXISTING, flags, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + SetLastError(error); return -1; + } + break; + case ERROR_CANT_ACCESS_FILE: + /* bpo37834: open unhandled reparse points if traverse fails. */ if (traverse) { - /* In order to call GetFinalPathNameByHandle we need to open - the file without the reparse handling flag set. */ - hFile2 = CreateFileW( - path, FILE_READ_ATTRIBUTES, FILE_SHARE_READ, - NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, - NULL); - if (hFile2 == INVALID_HANDLE_VALUE) - return -1; + traverse = FALSE; + isUnhandledTag = TRUE; + hFile = CreateFileW(path, access, 0, NULL, OPEN_EXISTING, + flags | FILE_FLAG_OPEN_REPARSE_POINT, NULL); + } + if (hFile == INVALID_HANDLE_VALUE) { + SetLastError(error); + return -1; + } + break; - if (!get_target_path(hFile2, &target_path)) { - CloseHandle(hFile2); - return -1; - } + default: + return -1; + } + } - if (!CloseHandle(hFile2)) { - return -1; + if (hFile != INVALID_HANDLE_VALUE) { + /* Handle types other than files on disk. */ + fileType = GetFileType(hFile); + if (fileType != FILE_TYPE_DISK) { + if (fileType == FILE_TYPE_UNKNOWN && GetLastError() != 0) { + retval = -1; + goto cleanup; + } + DWORD fileAttributes = GetFileAttributesW(path); + memset(result, 0, sizeof(*result)); + if (fileAttributes != INVALID_FILE_ATTRIBUTES && + fileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + /* \\.\pipe\ or \\.\mailslot\ */ + result->st_mode = _S_IFDIR; + } else if (fileType == FILE_TYPE_CHAR) { + /* \\.\nul */ + result->st_mode = _S_IFCHR; + } else if (fileType == FILE_TYPE_PIPE) { + /* \\.\pipe\spam */ + result->st_mode = _S_IFIFO; + } + /* FILE_TYPE_UNKNOWN, e.g. \\.\mailslot\waitfor.exe\spam */ + goto cleanup; + } + + /* Query the reparse tag, and traverse a non-link. */ + if (!traverse) { + if (!GetFileInformationByHandleEx(hFile, FileAttributeTagInfo, + &tagInfo, sizeof(tagInfo))) { + /* Allow devices that do not support FileAttributeTagInfo. */ + switch (GetLastError()) { + case ERROR_INVALID_PARAMETER: + case ERROR_INVALID_FUNCTION: + case ERROR_NOT_SUPPORTED: + tagInfo.FileAttributes = FILE_ATTRIBUTE_NORMAL; + tagInfo.ReparseTag = 0; + break; + default: + retval = -1; + goto cleanup; } + } else if (tagInfo.FileAttributes & + FILE_ATTRIBUTE_REPARSE_POINT) { + if (IsReparseTagNameSurrogate(tagInfo.ReparseTag)) { + if (isUnhandledTag) { + /* Traversing previously failed for either this link + or its target. */ + SetLastError(ERROR_CANT_ACCESS_FILE); + retval = -1; + goto cleanup; + } + /* Traverse a non-link, but not if traversing already failed + for an unhandled tag. */ + } else if (!isUnhandledTag) { + CloseHandle(hFile); + return win32_xstat_impl(path, result, TRUE); + } + } + } - code = win32_xstat_impl(target_path, result, FALSE); - PyMem_RawFree(target_path); - return code; + if (!GetFileInformationByHandle(hFile, &fileInfo)) { + switch (GetLastError()) { + case ERROR_INVALID_PARAMETER: + case ERROR_INVALID_FUNCTION: + case ERROR_NOT_SUPPORTED: + retval = -1; + goto cleanup; } - } else - CloseHandle(hFile); + /* Volumes and physical disks are block devices, e.g. + \\.\C: and \\.\PhysicalDrive0. */ + memset(result, 0, sizeof(*result)); + result->st_mode = 0x6000; /* S_IFBLK */ + goto cleanup; + } } - _Py_attribute_data_to_stat(&info, reparse_tag, result); - /* Set S_IEXEC if it is an .exe, .bat, ... */ - dot = wcsrchr(path, '.'); - if (dot) { - if (_wcsicmp(dot, L".bat") == 0 || _wcsicmp(dot, L".cmd") == 0 || - _wcsicmp(dot, L".exe") == 0 || _wcsicmp(dot, L".com") == 0) - result->st_mode |= 0111; + _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); + + if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } } - return 0; + +cleanup: + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + + return retval; } static int @@ -1806,9 +1849,8 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) default does not traverse symlinks and instead returns attributes for the symlink. - Therefore, win32_lstat will get the attributes traditionally, and - win32_stat will first explicitly resolve the symlink target and then will - call win32_lstat on that result. */ + Instead, we will open the file (which *does* traverse symlinks by default) + and GetFileInformationByHandle(). */ static int win32_lstat(const wchar_t* path, struct _Py_stat_struct *result) @@ -1877,6 +1919,9 @@ static PyStructSequence_Field stat_result_fields[] = { #ifdef HAVE_STRUCT_STAT_ST_FSTYPE {"st_fstype", "Type of filesystem"}, #endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG + {"st_reparse_tag", "Windows reparse tag"}, +#endif {0} }; @@ -1928,6 +1973,12 @@ static PyStructSequence_Field stat_result_fields[] = { #define ST_FSTYPE_IDX ST_FILE_ATTRIBUTES_IDX #endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG +#define ST_REPARSE_TAG_IDX (ST_FSTYPE_IDX+1) +#else +#define ST_REPARSE_TAG_IDX ST_FSTYPE_IDX +#endif + static PyStructSequence_Desc stat_result_desc = { "stat_result", /* name */ stat_result__doc__, /* doc */ @@ -2155,6 +2206,10 @@ _pystat_fromstructstat(STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_FSTYPE_IDX, PyUnicode_FromString(st->st_fstype)); #endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG + PyStructSequence_SET_ITEM(v, ST_REPARSE_TAG_IDX, + PyLong_FromUnsignedLong(st->st_reparse_tag)); +#endif if (PyErr_Occurred()) { Py_DECREF(v); @@ -3877,8 +3932,9 @@ os__getfinalpathname_impl(PyObject *module, path_t *path) } result = PyUnicode_FromWideChar(target_path, result_length); - if (path->narrow) + if (result && path->narrow) { Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + } cleanup: if (target_path != buf) { @@ -3888,44 +3944,6 @@ cleanup: return result; } -/*[clinic input] -os._isdir - - path as arg: object - / - -Return true if the pathname refers to an existing directory. -[clinic start generated code]*/ - -static PyObject * -os__isdir(PyObject *module, PyObject *arg) -/*[clinic end generated code: output=404f334d85d4bf25 input=36cb6785874d479e]*/ -{ - DWORD attributes; - path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0); - - if (!path_converter(arg, &path)) { - if (PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - Py_RETURN_FALSE; - } - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - attributes = GetFileAttributesW(path.wide); - Py_END_ALLOW_THREADS - - path_cleanup(&path); - if (attributes == INVALID_FILE_ATTRIBUTES) - Py_RETURN_FALSE; - - if (attributes & FILE_ATTRIBUTE_DIRECTORY) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; -} - /*[clinic input] os._getvolumepathname @@ -7796,11 +7814,10 @@ os_readlink_impl(PyObject *module, path_t *path, int dir_fd) return PyBytes_FromStringAndSize(buffer, length); #elif defined(MS_WINDOWS) DWORD n_bytes_returned; - DWORD io_result; + DWORD io_result = 0; HANDLE reparse_point_handle; char target_buffer[_Py_MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; _Py_REPARSE_DATA_BUFFER *rdb = (_Py_REPARSE_DATA_BUFFER *)target_buffer; - const wchar_t *print_name; PyObject *result; /* First get a handle to the reparse point */ @@ -7813,42 +7830,51 @@ os_readlink_impl(PyObject *module, path_t *path, int dir_fd) OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT|FILE_FLAG_BACKUP_SEMANTICS, 0); - Py_END_ALLOW_THREADS - - if (reparse_point_handle == INVALID_HANDLE_VALUE) { - return path_error(path); + if (reparse_point_handle != INVALID_HANDLE_VALUE) { + /* New call DeviceIoControl to read the reparse point */ + io_result = DeviceIoControl( + reparse_point_handle, + FSCTL_GET_REPARSE_POINT, + 0, 0, /* in buffer */ + target_buffer, sizeof(target_buffer), + &n_bytes_returned, + 0 /* we're not using OVERLAPPED_IO */ + ); + CloseHandle(reparse_point_handle); } - - Py_BEGIN_ALLOW_THREADS - /* New call DeviceIoControl to read the reparse point */ - io_result = DeviceIoControl( - reparse_point_handle, - FSCTL_GET_REPARSE_POINT, - 0, 0, /* in buffer */ - target_buffer, sizeof(target_buffer), - &n_bytes_returned, - 0 /* we're not using OVERLAPPED_IO */ - ); - CloseHandle(reparse_point_handle); Py_END_ALLOW_THREADS if (io_result == 0) { return path_error(path); } - if (rdb->ReparseTag != IO_REPARSE_TAG_SYMLINK) + wchar_t *name = NULL; + Py_ssize_t nameLen = 0; + if (rdb->ReparseTag == IO_REPARSE_TAG_SYMLINK) { - PyErr_SetString(PyExc_ValueError, - "not a symbolic link"); - return NULL; + name = (wchar_t *)((char*)rdb->SymbolicLinkReparseBuffer.PathBuffer + + rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset); + nameLen = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(wchar_t); } - print_name = (wchar_t *)((char*)rdb->SymbolicLinkReparseBuffer.PathBuffer + - rdb->SymbolicLinkReparseBuffer.PrintNameOffset); - - result = PyUnicode_FromWideChar(print_name, - rdb->SymbolicLinkReparseBuffer.PrintNameLength / sizeof(wchar_t)); - if (path->narrow) { - Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + else if (rdb->ReparseTag == IO_REPARSE_TAG_MOUNT_POINT) + { + name = (wchar_t *)((char*)rdb->MountPointReparseBuffer.PathBuffer + + rdb->MountPointReparseBuffer.SubstituteNameOffset); + nameLen = rdb->MountPointReparseBuffer.SubstituteNameLength / sizeof(wchar_t); + } + else + { + PyErr_SetString(PyExc_ValueError, "not a symbolic link"); + } + if (name) { + if (nameLen > 4 && wcsncmp(name, L"\\??\\", 4) == 0) { + /* Our buffer is mutable, so this is okay */ + name[1] = L'\\'; + } + result = PyUnicode_FromWideChar(name, nameLen); + if (path->narrow) { + Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + } } return result; #endif @@ -13647,7 +13673,6 @@ static PyMethodDef posix_methods[] = { OS_PATHCONF_METHODDEF OS_ABORT_METHODDEF OS__GETFULLPATHNAME_METHODDEF - OS__ISDIR_METHODDEF OS__GETDISKUSAGE_METHODDEF OS__GETFINALPATHNAME_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index 55bc194..36a3c99 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -878,7 +878,12 @@ _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); result->st_nlink = info->nNumberOfLinks; result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; - if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = reparse_tag; + if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + reparse_tag == IO_REPARSE_TAG_SYMLINK) { /* first clear the S_IFMT bits */ result->st_mode ^= (result->st_mode & S_IFMT); /* now set the bits that make this a symlink */ -- cgit v0.12