summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/pathlib.rst29
-rw-r--r--Doc/whatsnew/3.14.rst6
-rw-r--r--Lib/pathlib/_abc.py12
-rw-r--r--Lib/pathlib/_local.py8
-rw-r--r--Lib/test/test_pathlib/test_pathlib_abc.py67
-rw-r--r--Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst3
6 files changed, 114 insertions, 11 deletions
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 4380122..b6fb365 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -1289,6 +1289,35 @@ Reading directories
raised.
+.. method:: Path.scandir()
+
+ When the path points to a directory, return an iterator of
+ :class:`os.DirEntry` objects corresponding to entries in the directory. The
+ returned iterator supports the :term:`context manager` protocol. It is
+ implemented using :func:`os.scandir` and gives the same guarantees.
+
+ Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
+ significantly increase the performance of code that also needs file type or
+ file attribute information, because :class:`os.DirEntry` objects expose
+ this information if the operating system provides it when scanning a
+ directory.
+
+ The following example displays the names of subdirectories. The
+ ``entry.is_dir()`` check will generally not make an additional system call::
+
+ >>> p = Path('docs')
+ >>> with p.scandir() as entries:
+ ... for entry in entries:
+ ... if entry.is_dir():
+ ... entry.name
+ ...
+ '_templates'
+ '_build'
+ '_static'
+
+ .. versionadded:: 3.14
+
+
.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
Glob the given relative *pattern* in the directory represented by this path,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 7f9e310..48314f9 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -380,6 +380,12 @@ pathlib
(Contributed by Barney Gale in :gh:`73991`.)
+* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
+ of :class:`os.DirEntry` objects. This is exactly equivalent to calling
+ :func:`os.scandir` on a path object.
+
+ (Contributed by Barney Gale in :gh:`125413`.)
+
pdb
---
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index 11c8018..dfff8b4 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -639,13 +639,23 @@ class PathBase(PurePathBase):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)
+ def scandir(self):
+ """Yield os.DirEntry objects of the directory contents.
+
+ The children are yielded in arbitrary order, and the
+ special entries '.' and '..' are not included.
+ """
+ raise UnsupportedOperation(self._unsupported_msg('scandir()'))
+
def iterdir(self):
"""Yield path objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
- raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
+ with self.scandir() as entries:
+ names = [entry.name for entry in entries]
+ return map(self.joinpath, names)
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py
index a789971..ef072b8 100644
--- a/Lib/pathlib/_local.py
+++ b/Lib/pathlib/_local.py
@@ -615,6 +615,14 @@ class Path(PathBase, PurePath):
path_str = path_str[:-1]
yield path_str
+ def scandir(self):
+ """Yield os.DirEntry objects of the directory contents.
+
+ The children are yielded in arbitrary order, and the
+ special entries '.' and '..' are not included.
+ """
+ return os.scandir(self)
+
def iterdir(self):
"""Yield path objects of the directory contents.
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py
index 08355a7..11e34f5 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -1,4 +1,5 @@
import collections
+import contextlib
import io
import os
import errno
@@ -1424,6 +1425,24 @@ DummyPathStatResult = collections.namedtuple(
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')
+class DummyDirEntry:
+ """
+ Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
+ """
+ __slots__ = ('name', '_is_symlink', '_is_dir')
+
+ def __init__(self, name, is_symlink, is_dir):
+ self.name = name
+ self._is_symlink = is_symlink
+ self._is_dir = is_dir
+
+ def is_symlink(self):
+ return self._is_symlink
+
+ def is_dir(self, *, follow_symlinks=True):
+ return self._is_dir and (follow_symlinks or not self._is_symlink)
+
+
class DummyPath(PathBase):
"""
Simple implementation of PathBase that keeps files and directories in
@@ -1491,14 +1510,25 @@ class DummyPath(PathBase):
stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
return stream
- def iterdir(self):
- path = str(self.resolve())
- if path in self._files:
- raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
- elif path in self._directories:
- return iter([self / name for name in self._directories[path]])
+ @contextlib.contextmanager
+ def scandir(self):
+ path = self.resolve()
+ path_str = str(path)
+ if path_str in self._files:
+ raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
+ elif path_str in self._directories:
+ yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
else:
- raise FileNotFoundError(errno.ENOENT, "File not found", path)
+ raise FileNotFoundError(errno.ENOENT, "File not found", path_str)
+
+ @property
+ def _dir_entry(self):
+ path_str = str(self)
+ is_symlink = path_str in self._symlinks
+ is_directory = (path_str in self._directories
+ if not is_symlink
+ else self._symlinks[path_str][1])
+ return DummyDirEntry(self.name, is_symlink, is_directory)
def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name)
@@ -1602,7 +1632,7 @@ class DummyPathTest(DummyPurePathTest):
if self.can_symlink:
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
- p.joinpath('linkB').symlink_to('dirB')
+ p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
@@ -2187,6 +2217,23 @@ class DummyPathTest(DummyPurePathTest):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))
+ def test_scandir(self):
+ p = self.cls(self.base)
+ with p.scandir() as entries:
+ self.assertTrue(list(entries))
+ with p.scandir() as entries:
+ for entry in entries:
+ child = p / entry.name
+ self.assertIsNotNone(entry)
+ self.assertEqual(entry.name, child.name)
+ self.assertEqual(entry.is_symlink(),
+ child.is_symlink())
+ self.assertEqual(entry.is_dir(follow_symlinks=False),
+ child.is_dir(follow_symlinks=False))
+ if entry.name != 'brokenLinkLoop':
+ self.assertEqual(entry.is_dir(), child.is_dir())
+
+
def test_glob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
@@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
def readlink(self):
path = str(self.parent.resolve() / self.name)
if path in self._symlinks:
- return self.with_segments(self._symlinks[path])
+ return self.with_segments(self._symlinks[path][0])
elif path in self._files or path in self._directories:
raise OSError(errno.EINVAL, "Not a symlink", path)
else:
@@ -3050,7 +3097,7 @@ class DummyPathWithSymlinks(DummyPath):
if path in self._symlinks:
raise FileExistsError(errno.EEXIST, "File exists", path)
self._directories[parent].add(self.name)
- self._symlinks[path] = str(target)
+ self._symlinks[path] = str(target), target_is_directory
class DummyPathWithSymlinksTest(DummyPathTest):
diff --git a/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
new file mode 100644
index 0000000..ddf1f97
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
@@ -0,0 +1,3 @@
+Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
+children and their file attributes. This is a trivial wrapper of
+:func:`os.scandir`.