From a74f117dab369e6c54156c7b2256769fed0c23d0 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 14 Apr 2024 00:08:03 +0100 Subject: GH-115060: Speed up `pathlib.Path.glob()` by omitting initial `stat()` (#117831) Since 6258844c, paths that might not exist can be fed into pathlib's globbing implementation, which will call `os.scandir()` / `os.lstat()` only when strictly necessary. This allows us to drop an initial `self.is_dir()` call, which saves a `stat()`. Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> --- Doc/library/pathlib.rst | 9 +++++---- Lib/pathlib/__init__.py | 4 +--- Lib/pathlib/_abc.py | 4 +--- Lib/test/test_pathlib/test_pathlib.py | 7 +++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 3 +++ .../next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst | 3 +++ 6 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f4ed479..2e18e41 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1004,10 +1004,6 @@ call fails (for example because the path doesn't exist). .. seealso:: :ref:`pathlib-pattern-language` documentation. - This method calls :meth:`Path.is_dir` on the top-level directory and - propagates any :exc:`OSError` exception that is raised. Subsequent - :exc:`OSError` exceptions from scanning directories are suppressed. - By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: typically, case-sensitive on POSIX, and case-insensitive on Windows. @@ -1028,6 +1024,11 @@ call fails (for example because the path doesn't exist). .. versionchanged:: 3.13 The *pattern* parameter accepts a :term:`path-like object`. + .. versionchanged:: 3.13 + Any :exc:`OSError` exceptions raised from scanning the filesystem are + suppressed. In previous versions, such exceptions are suppressed in many + cases, but not all. + .. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 66eb08a..a4721fb 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -607,11 +607,9 @@ class Path(_abc.PathBase, PurePath): if raw[-1] in (self.parser.sep, self.parser.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. parts.append('') - if not self.is_dir(): - return iter([]) select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks) root = str(self) - paths = select(root, exists=True) + paths = select(root) # Normalize results if root == '.': diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b51ad6f..05698d5 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -705,10 +705,8 @@ class PathBase(PurePathBase): anchor, parts = pattern._stack if anchor: raise NotImplementedError("Non-relative patterns are unsupported") - if not self.is_dir(): - return iter([]) select = self._glob_selector(parts, case_sensitive, recurse_symlinks) - return select(self, exists=True) + return select(self) def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): """Recursively yield all existing files (of any kind, including diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 651d666..5fd1a41 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1263,6 +1263,13 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + def test_glob_inaccessible(self): + P = self.cls + p = P(self.base, "mydir1", "mydir2") + p.mkdir(parents=True) + p.parent.chmod(0) + self.assertEqual(set(p.glob('*')), set()) + def test_rglob_pathlike(self): P = self.cls p = P(self.base, "dirC") diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 6656b03..aadecbc 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,6 +8,7 @@ import unittest from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath +from test.support import is_wasi from test.support.os_helper import TESTFN @@ -1920,6 +1921,8 @@ class DummyPathTest(DummyPurePathTest): } self.assertEqual(given, {p / x for x in expect}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") def test_glob_dotdot(self): # ".." is not special in globs. P = self.cls diff --git a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst new file mode 100644 index 0000000..50b374a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst @@ -0,0 +1,3 @@ +Speed up :meth:`pathlib.Path.glob` by omitting an initial +:meth:`~pathlib.Path.is_dir` call. As a result of this change, +:meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`. -- cgit v0.12