summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-03-08 00:58:04 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-03-08 00:58:04 (GMT)
commit6036e4431dbc45952550c2b730fc0d1c82e30883 (patch)
tree88e63f7cf46f6ff1b85662e088e738f3963515ef
parentadb351fcea72291b606ef03013644fd080dc4f77 (diff)
downloadcpython-6036e4431dbc45952550c2b730fc0d1c82e30883.zip
cpython-6036e4431dbc45952550c2b730fc0d1c82e30883.tar.gz
cpython-6036e4431dbc45952550c2b730fc0d1c82e30883.tar.bz2
Issue #22524: New os.scandir() function, part of the PEP 471: "os.scandir()
function -- a better and faster directory iterator". Patch written by Ben Hoyt.
-rw-r--r--Doc/library/os.rst177
-rw-r--r--Doc/whatsnew/3.5.rst19
-rw-r--r--Lib/test/test_os.py224
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/posixmodule.c818
5 files changed, 1222 insertions, 20 deletions
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 2467c60..0014b6c 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -1601,6 +1601,11 @@ features:
Availability: Unix, Windows.
+ .. seealso::
+
+ The :func:`scandir` function returns the directory entries with more
+ information than just the name.
+
.. versionchanged:: 3.2
The *path* parameter became optional.
@@ -1893,6 +1898,178 @@ features:
The *dir_fd* parameter.
+.. function:: scandir(path='.')
+
+ Return an iterator of :class:`DirEntry` objects corresponding to the entries
+ in the directory given by *path*. The entries are yielded in arbitrary
+ order, and the special entries ``'.'`` and ``'..'`` are not included.
+
+ On Windows, *path* must of type :class:`str`. On POSIX, *path* can be of
+ type :class:`str` or :class:`bytes`. If *path* is of type :class:`bytes`,
+ the :attr:`~DirEntry.name` and :attr:`~DirEntry.path` attributes of
+ :class:`DirEntry` are also of type ``bytes``. Use :func:`~os.fsencode` and
+ :func:`~os.fsdecode` to encode and decode paths.
+
+ The :func:`scandir` function is recommended, instead of :func:`listdir`,
+ when the file type of entries is used. In most cases, the file type of a
+ :class:`DirEntry` is retrieved directly by :func:`scandir`, no system call
+ is required. If only the name of entries is used, :func:`listdir` can
+ be more efficient than :func:`scandir`.
+
+ The following example shows a simple use of :func:`scandir` to display all
+ the files excluding directories in the given *path* that don't start with
+ ``'.'``::
+
+ for entry in os.scandir(path):
+ if not entry.name.startswith('.') and entry.is_file():
+ print(entry.name)
+
+ .. note::
+
+ On Unix-based systems, :func:`scandir` uses the system's
+ `opendir() <http://pubs.opengroup.org/onlinepubs/009695399/functions/opendir.html>`_
+ and
+ `readdir() <http://pubs.opengroup.org/onlinepubs/009695399/functions/readdir_r.html>`_
+ functions. On Windows, it uses the Win32
+ `FindFirstFileW <http://msdn.microsoft.com/en-us/library/windows/desktop/aa364418(v=vs.85).aspx>`_
+ and
+ `FindNextFileW <http://msdn.microsoft.com/en-us/library/windows/desktop/aa364428(v=vs.85).aspx>`_
+ functions.
+
+ .. seealso::
+
+ The :func:`listdir` function returns the names of the directory entries.
+
+ .. versionadded:: 3.5
+
+
+.. class:: DirEntry
+
+ Object yielded by :func:`scandir` to expose the file path and other file
+ attributes of a directory entry.
+
+ :func:`scandir` will provide as much of this information as possible without
+ making additional system calls. When a ``stat()`` or ``lstat()`` system call
+ is made, the ``DirEntry`` object cache the result .
+
+ ``DirEntry`` instances are not intended to be stored in long-lived data
+ structures; if you know the file metadata has changed or if a long time has
+ elapsed since calling :func:`scandir`, call ``os.stat(entry.path)`` to fetch
+ up-to-date information.
+
+ Because the ``DirEntry`` methods can make operating system calls, they may
+ also raise :exc:`OSError`. For example, if a file is deleted between calling
+ :func:`scandir` and calling :func:`DirEntry.stat`, a
+ :exc:`FileNotFoundError` exception can be raised. Unfortunately, the
+ behaviour on errors depends on the platform. If you need very fine-grained
+ control over errors, you can catch :exc:`OSError` when calling one of the
+ ``DirEntry`` methods and handle as appropriate.
+
+ Attributes and methods on a ``DirEntry`` instance are as follows:
+
+ .. attribute:: name
+
+ The entry's base filename, relative to the :func:`scandir` *path*
+ argument.
+
+ The :attr:`name` type is :class:`str`. On POSIX, it can be of type
+ :class:`bytes` if the type of the :func:`scandir` *path* argument is also
+ :class:`bytes`. Use :func:`~os.fsdecode` to decode the name.
+
+ .. attribute:: path
+
+ The entry's full path name: equivalent to ``os.path.join(scandir_path,
+ entry.name)`` where *scandir_path* is the :func:`scandir` *path*
+ argument. The path is only absolute if the :func:`scandir` *path*
+ argument is absolute.
+
+ The :attr:`name` type is :class:`str`. On POSIX, it can be of type
+ :class:`bytes` if the type of the :func:`scandir` *path* argument is also
+ :class:`bytes`. Use :func:`~os.fsdecode` to decode the path.
+
+ .. method:: inode()
+
+ Return the inode number of the entry.
+
+ The result is cached in the object, use ``os.stat(entry.path,
+ follow_symlinks=False).st_ino`` to fetch up-to-date information.
+
+ On POSIX, no system call is required.
+
+ .. method:: is_dir(\*, follow_symlinks=True)
+
+ If *follow_symlinks* is ``True`` (the default), return ``True`` if the
+ entry is a directory or a symbolic link pointing to a directory,
+ return ``False`` if it points to another kind of file, if it doesn't
+ exist anymore or if it is a broken symbolic link.
+
+ If *follow_symlinks* is ``False``, return ``True`` only if this entry
+ is a directory, return ``False`` if it points to a symbolic link or
+ another kind of file, if the entry doesn't exist anymore or if it is a
+ broken symbolic link
+
+ The result is cached in the object. Call :func:`stat.S_ISDIR` with
+ :func:`os.stat` to fetch up-to-date information.
+
+ The method can raise :exc:`OSError`, such as :exc:`PermissionError`,
+ but :exc:`FileNotFoundError` is catched.
+
+ In most cases, no system call is required.
+
+ .. method:: is_file(\*, follow_symlinks=True)
+
+ If *follow_symlinks* is ``True`` (the default), return ``True`` if the
+ entry is a regular file or a symbolic link pointing to a regular file,
+ return ``False`` if it points to another kind of file, if it doesn't
+ exist anymore or if it is a broken symbolic link.
+
+ If *follow_symlinks* is ``False``, return ``True`` only if this entry
+ is a regular file, return ``False`` if it points to a symbolic link or
+ another kind of file, if it doesn't exist anymore or if it is a broken
+ symbolic link.
+
+ The result is cached in the object. Call :func:`stat.S_ISREG` with
+ :func:`os.stat` to fetch up-to-date information.
+
+ The method can raise :exc:`OSError`, such as :exc:`PermissionError`,
+ but :exc:`FileNotFoundError` is catched.
+
+ In most cases, no system call is required.
+
+ .. method:: is_symlink()
+
+ Return ``True`` if this entry is a symbolic link or a broken symbolic
+ link, return ``False`` if it points to a another kind of file or if the
+ entry doesn't exist anymore.
+
+ The result is cached in the object. Call :func:`os.path.islink` to fetch
+ up-to-date information.
+
+ The method can raise :exc:`OSError`, such as :exc:`PermissionError`,
+ but :exc:`FileNotFoundError` is catched.
+
+ In most cases, no system call is required.
+
+ .. method:: stat(\*, follow_symlinks=True)
+
+ Return a :class:`stat_result` object for this entry. This function
+ normally follows symbolic links; to stat a symbolic link add the
+ argument ``follow_symlinks=False``.
+
+ On Windows, the ``st_ino``, ``st_dev`` and ``st_nlink`` attributes of the
+ :class:`stat_result` are always set to zero. Call :func:`os.stat` to
+ get these attributes.
+
+ The result is cached in the object. Call :func:`os.stat` to fetch
+ up-to-date information.
+
+ On Windows, ``DirEntry.stat(follow_symlinks=False)`` doesn't require a
+ system call. ``DirEntry.stat()`` requires a system call if the entry is a
+ symbolic link.
+
+ .. versionadded:: 3.5
+
+
.. function:: stat(path, \*, dir_fd=None, follow_symlinks=True)
Get the status of a file or a file descriptor. Perform the equivalent of a
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 8027fa3..6932fcb 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -111,6 +111,19 @@ Please read on for a comprehensive list of user-facing changes.
PEP written by Carl Meyer
+
+PEP 471 - os.scandir() function -- a better and faster directory iterator
+-------------------------------------------------------------------------
+
+:pep:`471` includes a new directory iteration function, :func:`os.scandir`,
+in the standard library.
+
+.. seealso::
+
+ :pep:`471` -- os.scandir() function -- a better and faster directory
+ iterator.
+
+
PEP 475: Retry system calls failing with EINTR
----------------------------------------------
@@ -118,6 +131,8 @@ PEP 475: Retry system calls failing with EINTR
this means that user code doesn't have to deal with EINTR or InterruptedError
manually, and should make it more robust against asynchronous signal reception.
+PEP and implementation written by Ben Hoyt with the help of Victor Stinner.
+
.. seealso::
:pep:`475` -- Retry system calls failing with EINTR
@@ -279,6 +294,10 @@ json
os
--
+* New :func:`os.scandir` function: Return an iterator of :class:`os.DirEntry`
+ objects corresponding to the entries in the directory given by *path*.
+ (Implementation written by Ben Hoyt with the help of Victor Stinner.)
+
* :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes`
attribute on Windows. (Contributed by Ben Hoyt in :issue:`21719`.)
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index afa7a4a..e5996c4 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -2698,5 +2698,229 @@ class ExportsTests(unittest.TestCase):
self.assertIn('walk', os.__all__)
+class TestScandir(unittest.TestCase):
+ def setUp(self):
+ self.path = os.path.realpath(support.TESTFN)
+ self.addCleanup(support.rmtree, self.path)
+ os.mkdir(self.path)
+
+ def create_file(self, name="file.txt"):
+ filename = os.path.join(self.path, name)
+ with open(filename, "wb") as fp:
+ fp.write(b'python')
+ return filename
+
+ def get_entries(self, names):
+ entries = dict((entry.name, entry)
+ for entry in os.scandir(self.path))
+ self.assertEqual(sorted(entries.keys()), names)
+ return entries
+
+ def assert_stat_equal(self, stat1, stat2, skip_fields):
+ if skip_fields:
+ for attr in dir(stat1):
+ if not attr.startswith("st_"):
+ continue
+ if attr in ("st_dev", "st_ino", "st_nlink"):
+ continue
+ self.assertEqual(getattr(stat1, attr),
+ getattr(stat2, attr),
+ (stat1, stat2, attr))
+ else:
+ self.assertEqual(stat1, stat2)
+
+ def check_entry(self, entry, name, is_dir, is_file, is_symlink):
+ self.assertEqual(entry.name, name)
+ self.assertEqual(entry.path, os.path.join(self.path, name))
+ self.assertEqual(entry.inode(),
+ os.stat(entry.path, follow_symlinks=False).st_ino)
+
+ entry_stat = os.stat(entry.path)
+ self.assertEqual(entry.is_dir(),
+ stat.S_ISDIR(entry_stat.st_mode))
+ self.assertEqual(entry.is_file(),
+ stat.S_ISREG(entry_stat.st_mode))
+ self.assertEqual(entry.is_symlink(),
+ os.path.islink(entry.path))
+
+ entry_lstat = os.stat(entry.path, follow_symlinks=False)
+ self.assertEqual(entry.is_dir(follow_symlinks=False),
+ stat.S_ISDIR(entry_lstat.st_mode))
+ self.assertEqual(entry.is_file(follow_symlinks=False),
+ stat.S_ISREG(entry_lstat.st_mode))
+
+ self.assert_stat_equal(entry.stat(),
+ entry_stat,
+ os.name == 'nt' and not is_symlink)
+ self.assert_stat_equal(entry.stat(follow_symlinks=False),
+ entry_lstat,
+ os.name == 'nt')
+
+ def test_attributes(self):
+ link = hasattr(os, 'link')
+ symlink = support.can_symlink()
+
+ dirname = os.path.join(self.path, "dir")
+ os.mkdir(dirname)
+ filename = self.create_file("file.txt")
+ if link:
+ os.link(filename, os.path.join(self.path, "link_file.txt"))
+ if symlink:
+ os.symlink(dirname, os.path.join(self.path, "symlink_dir"),
+ target_is_directory=True)
+ os.symlink(filename, os.path.join(self.path, "symlink_file.txt"))
+
+ names = ['dir', 'file.txt']
+ if link:
+ names.append('link_file.txt')
+ if symlink:
+ names.extend(('symlink_dir', 'symlink_file.txt'))
+ entries = self.get_entries(names)
+
+ entry = entries['dir']
+ self.check_entry(entry, 'dir', True, False, False)
+
+ entry = entries['file.txt']
+ self.check_entry(entry, 'file.txt', False, True, False)
+
+ if link:
+ entry = entries['link_file.txt']
+ self.check_entry(entry, 'link_file.txt', False, True, False)
+
+ if symlink:
+ entry = entries['symlink_dir']
+ self.check_entry(entry, 'symlink_dir', True, False, True)
+
+ entry = entries['symlink_file.txt']
+ self.check_entry(entry, 'symlink_file.txt', False, True, True)
+
+ def get_entry(self, name):
+ entries = list(os.scandir(self.path))
+ self.assertEqual(len(entries), 1)
+
+ entry = entries[0]
+ self.assertEqual(entry.name, name)
+ return entry
+
+ def create_file_entry(self):
+ filename = self.create_file()
+ return self.get_entry(os.path.basename(filename))
+
+ def test_current_directory(self):
+ filename = self.create_file()
+ old_dir = os.getcwd()
+ try:
+ os.chdir(self.path)
+
+ # call scandir() without parameter: it must list the content
+ # of the current directory
+ entries = dict((entry.name, entry) for entry in os.scandir())
+ self.assertEqual(sorted(entries.keys()),
+ [os.path.basename(filename)])
+ finally:
+ os.chdir(old_dir)
+
+ def test_repr(self):
+ entry = self.create_file_entry()
+ self.assertEqual(repr(entry), "<DirEntry 'file.txt'>")
+
+ def test_removed_dir(self):
+ path = os.path.join(self.path, 'dir')
+
+ os.mkdir(path)
+ entry = self.get_entry('dir')
+ os.rmdir(path)
+
+ # On POSIX, is_dir() result depends if scandir() filled d_type or not
+ if os.name == 'nt':
+ self.assertTrue(entry.is_dir())
+ self.assertFalse(entry.is_file())
+ self.assertFalse(entry.is_symlink())
+ if os.name == 'nt':
+ self.assertRaises(FileNotFoundError, entry.inode)
+ # don't fail
+ entry.stat()
+ entry.stat(follow_symlinks=False)
+ else:
+ self.assertGreater(entry.inode(), 0)
+ self.assertRaises(FileNotFoundError, entry.stat)
+ self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False)
+
+ def test_removed_file(self):
+ entry = self.create_file_entry()
+ os.unlink(entry.path)
+
+ self.assertFalse(entry.is_dir())
+ # On POSIX, is_dir() result depends if scandir() filled d_type or not
+ if os.name == 'nt':
+ self.assertTrue(entry.is_file())
+ self.assertFalse(entry.is_symlink())
+ if os.name == 'nt':
+ self.assertRaises(FileNotFoundError, entry.inode)
+ # don't fail
+ entry.stat()
+ entry.stat(follow_symlinks=False)
+ else:
+ self.assertGreater(entry.inode(), 0)
+ self.assertRaises(FileNotFoundError, entry.stat)
+ self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False)
+
+ def test_broken_symlink(self):
+ if not support.can_symlink():
+ return self.skipTest('cannot create symbolic link')
+
+ filename = self.create_file("file.txt")
+ os.symlink(filename,
+ os.path.join(self.path, "symlink.txt"))
+ entries = self.get_entries(['file.txt', 'symlink.txt'])
+ entry = entries['symlink.txt']
+ os.unlink(filename)
+
+ self.assertGreater(entry.inode(), 0)
+ self.assertFalse(entry.is_dir())
+ self.assertFalse(entry.is_file()) # broken symlink returns False
+ self.assertFalse(entry.is_dir(follow_symlinks=False))
+ self.assertFalse(entry.is_file(follow_symlinks=False))
+ self.assertTrue(entry.is_symlink())
+ self.assertRaises(FileNotFoundError, entry.stat)
+ # don't fail
+ entry.stat(follow_symlinks=False)
+
+ def test_bytes(self):
+ if os.name == "nt":
+ # On Windows, os.scandir(bytes) must raise an exception
+ self.assertRaises(TypeError, os.scandir, b'.')
+ return
+
+ self.create_file("file.txt")
+
+ path_bytes = os.fsencode(self.path)
+ entries = list(os.scandir(path_bytes))
+ self.assertEqual(len(entries), 1, entries)
+ entry = entries[0]
+
+ self.assertEqual(entry.name, b'file.txt')
+ self.assertEqual(entry.path,
+ os.fsencode(os.path.join(self.path, 'file.txt')))
+
+ def test_empty_path(self):
+ self.assertRaises(FileNotFoundError, os.scandir, '')
+
+ def test_consume_iterator_twice(self):
+ self.create_file("file.txt")
+ iterator = os.scandir(self.path)
+
+ entries = list(iterator)
+ self.assertEqual(len(entries), 1, entries)
+
+ # check than consuming the iterator twice doesn't raise exception
+ entries2 = list(iterator)
+ self.assertEqual(len(entries2), 0, entries2)
+
+ def test_bad_path_type(self):
+ for obj in [1234, 1.234, {}, []]:
+ self.assertRaises(TypeError, os.scandir, obj)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 4e9b2f0..343cf8f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,6 +17,10 @@ Core and Builtins
Library
-------
+- Issue #22524: New os.scandir() function, part of the PEP 471: "os.scandir()
+ function -- a better and faster directory iterator". Patch written by Ben
+ Hoyt.
+
- Issue #23103: Reduced the memory consumption of IPv4Address and IPv6Address.
- Issue #21793: BaseHTTPRequestHandler again logs response code as numeric,
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 679fc8f..c00113e 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -25,6 +25,7 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
+#include "structmember.h"
#ifndef MS_WINDOWS
#include "posixmodule.h"
#else
@@ -373,6 +374,14 @@ static int win32_can_symlink = 0;
#define DWORD_MAX 4294967295U
#ifdef MS_WINDOWS
+#define INITFUNC PyInit_nt
+#define MODNAME "nt"
+#else
+#define INITFUNC PyInit_posix
+#define MODNAME "posix"
+#endif
+
+#ifdef MS_WINDOWS
/* defined in fileutils.c */
PyAPI_FUNC(void) _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *);
PyAPI_FUNC(void) _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
@@ -1396,6 +1405,25 @@ attributes_from_dir(LPCSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *rep
return TRUE;
}
+static void
+find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData,
+ BY_HANDLE_FILE_INFORMATION *info,
+ ULONG *reparse_tag)
+{
+ memset(info, 0, sizeof(*info));
+ info->dwFileAttributes = pFileData->dwFileAttributes;
+ info->ftCreationTime = pFileData->ftCreationTime;
+ info->ftLastAccessTime = pFileData->ftLastAccessTime;
+ info->ftLastWriteTime = pFileData->ftLastWriteTime;
+ info->nFileSizeHigh = pFileData->nFileSizeHigh;
+ info->nFileSizeLow = pFileData->nFileSizeLow;
+/* info->nNumberOfLinks = 1; */
+ if (pFileData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
+ *reparse_tag = pFileData->dwReserved0;
+ else
+ *reparse_tag = 0;
+}
+
static BOOL
attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag)
{
@@ -1405,17 +1433,7 @@ attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *
if (hFindFile == INVALID_HANDLE_VALUE)
return FALSE;
FindClose(hFindFile);
- memset(info, 0, sizeof(*info));
- *reparse_tag = 0;
- info->dwFileAttributes = FileData.dwFileAttributes;
- info->ftCreationTime = FileData.ftCreationTime;
- info->ftLastAccessTime = FileData.ftLastAccessTime;
- info->ftLastWriteTime = FileData.ftLastWriteTime;
- info->nFileSizeHigh = FileData.nFileSizeHigh;
- info->nFileSizeLow = FileData.nFileSizeLow;
-/* info->nNumberOfLinks = 1; */
- if (FileData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
- *reparse_tag = FileData.dwReserved0;
+ find_data_to_file_info_w(&FileData, info, reparse_tag);
return TRUE;
}
@@ -16330,6 +16348,766 @@ posix_set_blocking(PyObject *self, PyObject *args)
#endif /* !MS_WINDOWS */
+PyDoc_STRVAR(posix_scandir__doc__,
+"scandir(path='.') -> iterator of DirEntry objects for given path");
+
+static char *follow_symlinks_keywords[] = {"follow_symlinks", NULL};
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *name;
+ PyObject *path;
+ PyObject *stat;
+ PyObject *lstat;
+#ifdef MS_WINDOWS
+ struct _Py_stat_struct win32_lstat;
+ __int64 win32_file_index;
+ int got_file_index;
+#else /* POSIX */
+ unsigned char d_type;
+ ino_t d_ino;
+#endif
+} DirEntry;
+
+static void
+DirEntry_dealloc(DirEntry *entry)
+{
+ Py_XDECREF(entry->name);
+ Py_XDECREF(entry->path);
+ Py_XDECREF(entry->stat);
+ Py_XDECREF(entry->lstat);
+ Py_TYPE(entry)->tp_free((PyObject *)entry);
+}
+
+/* Forward reference */
+static int
+DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits);
+
+/* Set exception and return -1 on error, 0 for False, 1 for True */
+static int
+DirEntry_is_symlink(DirEntry *self)
+{
+#ifdef MS_WINDOWS
+ return (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK;
+#else /* POSIX */
+ if (self->d_type != DT_UNKNOWN)
+ return self->d_type == DT_LNK;
+ else
+ return DirEntry_test_mode(self, 0, S_IFLNK);
+#endif
+}
+
+static PyObject *
+DirEntry_py_is_symlink(DirEntry *self)
+{
+ int result;
+
+ result = DirEntry_is_symlink(self);
+ if (result == -1)
+ return NULL;
+ return PyBool_FromLong(result);
+}
+
+static PyObject *
+DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
+{
+ int result;
+ struct _Py_stat_struct st;
+
+#ifdef MS_WINDOWS
+ wchar_t *path;
+
+ path = PyUnicode_AsUnicode(self->path);
+ if (!path)
+ return NULL;
+
+ if (follow_symlinks)
+ result = win32_stat_w(path, &st);
+ else
+ result = win32_lstat_w(path, &st);
+
+ if (result != 0) {
+ return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError,
+ 0, self->path);
+ }
+#else /* POSIX */
+ PyObject *bytes;
+ char *path;
+
+ if (!PyUnicode_FSConverter(self->path, &bytes))
+ return NULL;
+ path = PyBytes_AS_STRING(bytes);
+
+ if (follow_symlinks)
+ result = STAT(path, &st);
+ else
+ result = LSTAT(path, &st);
+ Py_DECREF(bytes);
+
+ if (result != 0)
+ return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, self->path);
+#endif
+
+ return _pystat_fromstructstat(&st);
+}
+
+static PyObject *
+DirEntry_get_lstat(DirEntry *self)
+{
+ if (!self->lstat) {
+#ifdef MS_WINDOWS
+ self->lstat = _pystat_fromstructstat(&self->win32_lstat);
+#else /* POSIX */
+ self->lstat = DirEntry_fetch_stat(self, 0);
+#endif
+ }
+ Py_XINCREF(self->lstat);
+ return self->lstat;
+}
+
+static PyObject *
+DirEntry_get_stat(DirEntry *self, int follow_symlinks)
+{
+ if (!follow_symlinks)
+ return DirEntry_get_lstat(self);
+
+ if (!self->stat) {
+ int result = DirEntry_is_symlink(self);
+ if (result == -1)
+ return NULL;
+ else if (result)
+ self->stat = DirEntry_fetch_stat(self, 1);
+ else
+ self->stat = DirEntry_get_lstat(self);
+ }
+
+ Py_XINCREF(self->stat);
+ return self->stat;
+}
+
+static PyObject *
+DirEntry_stat(DirEntry *self, PyObject *args, PyObject *kwargs)
+{
+ int follow_symlinks = 1;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.stat",
+ follow_symlinks_keywords, &follow_symlinks))
+ return NULL;
+
+ return DirEntry_get_stat(self, follow_symlinks);
+}
+
+/* Set exception and return -1 on error, 0 for False, 1 for True */
+static int
+DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits)
+{
+ PyObject *stat = NULL;
+ PyObject *st_mode = NULL;
+ long mode;
+ int result;
+ int is_symlink;
+ int need_stat;
+ _Py_IDENTIFIER(st_mode);
+#ifdef MS_WINDOWS
+ unsigned long dir_bits;
+#endif
+
+#ifdef MS_WINDOWS
+ is_symlink = (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK;
+ need_stat = follow_symlinks && is_symlink;
+#else /* POSIX */
+ is_symlink = self->d_type == DT_LNK;
+ need_stat = self->d_type == DT_UNKNOWN || (follow_symlinks && is_symlink);
+#endif
+
+ if (need_stat) {
+ stat = DirEntry_get_stat(self, follow_symlinks);
+ if (!stat) {
+ if (PyErr_ExceptionMatches(PyExc_FileNotFoundError)) {
+ /* If file doesn't exist (anymore), then return False
+ (i.e., say it's not a file/directory) */
+ PyErr_Clear();
+ return 0;
+ }
+ goto error;
+ }
+ st_mode = _PyObject_GetAttrId(stat, &PyId_st_mode);
+ if (!st_mode)
+ goto error;
+
+ mode = PyLong_AsLong(st_mode);
+ if (mode == -1 && PyErr_Occurred())
+ goto error;
+ Py_CLEAR(st_mode);
+ Py_CLEAR(stat);
+ result = (mode & S_IFMT) == mode_bits;
+ }
+ else if (is_symlink) {
+ assert(mode_bits != S_IFLNK);
+ result = 0;
+ }
+ else {
+ assert(mode_bits == S_IFDIR || mode_bits == S_IFREG);
+#ifdef MS_WINDOWS
+ dir_bits = self->win32_lstat.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY;
+ if (mode_bits == S_IFDIR)
+ result = dir_bits != 0;
+ else
+ result = dir_bits == 0;
+#else /* POSIX */
+ if (mode_bits == S_IFDIR)
+ result = self->d_type == DT_DIR;
+ else
+ result = self->d_type == DT_REG;
+#endif
+ }
+
+ return result;
+
+error:
+ Py_XDECREF(st_mode);
+ Py_XDECREF(stat);
+ return -1;
+}
+
+static PyObject *
+DirEntry_py_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits)
+{
+ int result;
+
+ result = DirEntry_test_mode(self, follow_symlinks, mode_bits);
+ if (result == -1)
+ return NULL;
+ return PyBool_FromLong(result);
+}
+
+static PyObject *
+DirEntry_is_dir(DirEntry *self, PyObject *args, PyObject *kwargs)
+{
+ int follow_symlinks = 1;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_dir",
+ follow_symlinks_keywords, &follow_symlinks))
+ return NULL;
+
+ return DirEntry_py_test_mode(self, follow_symlinks, S_IFDIR);
+}
+
+static PyObject *
+DirEntry_is_file(DirEntry *self, PyObject *args, PyObject *kwargs)
+{
+ int follow_symlinks = 1;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_file",
+ follow_symlinks_keywords, &follow_symlinks))
+ return NULL;
+
+ return DirEntry_py_test_mode(self, follow_symlinks, S_IFREG);
+}
+
+static PyObject *
+DirEntry_inode(DirEntry *self)
+{
+#ifdef MS_WINDOWS
+ if (!self->got_file_index) {
+ wchar_t *path;
+ struct _Py_stat_struct stat;
+
+ path = PyUnicode_AsUnicode(self->path);
+ if (!path)
+ return NULL;
+
+ if (win32_lstat_w(path, &stat) != 0) {
+ return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError,
+ 0, self->path);
+ }
+
+ self->win32_file_index = stat.st_ino;
+ self->got_file_index = 1;
+ }
+ return PyLong_FromLongLong((PY_LONG_LONG)self->win32_file_index);
+#else /* POSIX */
+#ifdef HAVE_LARGEFILE_SUPPORT
+ return PyLong_FromLongLong((PY_LONG_LONG)self->d_ino);
+#else
+ return PyLong_FromLong((long)self->d_ino);
+#endif
+#endif
+}
+
+static PyObject *
+DirEntry_repr(DirEntry *self)
+{
+ return PyUnicode_FromFormat("<DirEntry %R>", self->name);
+}
+
+static PyMemberDef DirEntry_members[] = {
+ {"name", T_OBJECT_EX, offsetof(DirEntry, name), READONLY,
+ "the entry's base filename, relative to scandir() \"path\" argument"},
+ {"path", T_OBJECT_EX, offsetof(DirEntry, path), READONLY,
+ "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)"},
+ {NULL}
+};
+
+static PyMethodDef DirEntry_methods[] = {
+ {"is_dir", (PyCFunction)DirEntry_is_dir, METH_VARARGS | METH_KEYWORDS,
+ "return True if the entry is a directory; cached per entry"
+ },
+ {"is_file", (PyCFunction)DirEntry_is_file, METH_VARARGS | METH_KEYWORDS,
+ "return True if the entry is a file; cached per entry"
+ },
+ {"is_symlink", (PyCFunction)DirEntry_py_is_symlink, METH_NOARGS,
+ "return True if the entry is a symbolic link; cached per entry"
+ },
+ {"stat", (PyCFunction)DirEntry_stat, METH_VARARGS | METH_KEYWORDS,
+ "return stat_result object for the entry; cached per entry"
+ },
+ {"inode", (PyCFunction)DirEntry_inode, METH_NOARGS,
+ "return inode of the entry; cached per entry",
+ },
+ {NULL}
+};
+
+PyTypeObject DirEntryType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ MODNAME ".DirEntry", /* tp_name */
+ sizeof(DirEntry), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)DirEntry_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ (reprfunc)DirEntry_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ DirEntry_methods, /* tp_methods */
+ DirEntry_members, /* tp_members */
+};
+
+#ifdef MS_WINDOWS
+
+static wchar_t *
+join_path_filenameW(wchar_t *path_wide, wchar_t* filename)
+{
+ Py_ssize_t path_len;
+ Py_ssize_t size;
+ wchar_t *result;
+ wchar_t ch;
+
+ if (!path_wide) { /* Default arg: "." */
+ path_wide = L".";
+ path_len = 1;
+ }
+ else {
+ path_len = wcslen(path_wide);
+ }
+
+ /* The +1's are for the path separator and the NUL */
+ size = path_len + 1 + wcslen(filename) + 1;
+ result = PyMem_New(wchar_t, size);
+ if (!result) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ wcscpy(result, path_wide);
+ if (path_len > 0) {
+ ch = result[path_len - 1];
+ if (ch != SEP && ch != ALTSEP && ch != L':')
+ result[path_len++] = SEP;
+ wcscpy(result + path_len, filename);
+ }
+ return result;
+}
+
+static PyObject *
+DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW)
+{
+ DirEntry *entry;
+ BY_HANDLE_FILE_INFORMATION file_info;
+ ULONG reparse_tag;
+ wchar_t *joined_path;
+
+ entry = PyObject_New(DirEntry, &DirEntryType);
+ if (!entry)
+ return NULL;
+ entry->name = NULL;
+ entry->path = NULL;
+ entry->stat = NULL;
+ entry->lstat = NULL;
+ entry->got_file_index = 0;
+
+ entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1);
+ if (!entry->name)
+ goto error;
+
+ joined_path = join_path_filenameW(path->wide, dataW->cFileName);
+ if (!joined_path)
+ goto error;
+
+ entry->path = PyUnicode_FromWideChar(joined_path, -1);
+ PyMem_Free(joined_path);
+ if (!entry->path)
+ goto error;
+
+ find_data_to_file_info_w(dataW, &file_info, &reparse_tag);
+ _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat);
+
+ return (PyObject *)entry;
+
+error:
+ Py_DECREF(entry);
+ return NULL;
+}
+
+#else /* POSIX */
+
+static char *
+join_path_filename(char *path_narrow, char* filename, Py_ssize_t filename_len)
+{
+ Py_ssize_t path_len;
+ Py_ssize_t size;
+ char *result;
+
+ if (!path_narrow) { /* Default arg: "." */
+ path_narrow = ".";
+ path_len = 1;
+ }
+ else {
+ path_len = strlen(path_narrow);
+ }
+
+ if (filename_len == -1)
+ filename_len = strlen(filename);
+
+ /* The +1's are for the path separator and the NUL */
+ size = path_len + 1 + filename_len + 1;
+ result = PyMem_New(char, size);
+ if (!result) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ strcpy(result, path_narrow);
+ if (path_len > 0 && result[path_len - 1] != '/')
+ result[path_len++] = '/';
+ strcpy(result + path_len, filename);
+ return result;
+}
+
+static PyObject *
+DirEntry_from_posix_info(path_t *path, char *name, Py_ssize_t name_len,
+ unsigned char d_type, ino_t d_ino)
+{
+ DirEntry *entry;
+ char *joined_path;
+
+ entry = PyObject_New(DirEntry, &DirEntryType);
+ if (!entry)
+ return NULL;
+ entry->name = NULL;
+ entry->path = NULL;
+ entry->stat = NULL;
+ entry->lstat = NULL;
+
+ joined_path = join_path_filename(path->narrow, name, name_len);
+ if (!joined_path)
+ goto error;
+
+ if (!path->narrow || !PyBytes_Check(path->object)) {
+ entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
+ entry->path = PyUnicode_DecodeFSDefault(joined_path);
+ }
+ else {
+ entry->name = PyBytes_FromStringAndSize(name, name_len);
+ entry->path = PyBytes_FromString(joined_path);
+ }
+ PyMem_Free(joined_path);
+ if (!entry->name || !entry->path)
+ goto error;
+
+ entry->d_type = d_type;
+ entry->d_ino = d_ino;
+
+ return (PyObject *)entry;
+
+error:
+ Py_XDECREF(entry);
+ return NULL;
+}
+
+#endif
+
+
+typedef struct {
+ PyObject_HEAD
+ path_t path;
+#ifdef MS_WINDOWS
+ HANDLE handle;
+ WIN32_FIND_DATAW file_data;
+ int first_time;
+#else /* POSIX */
+ DIR *dirp;
+#endif
+} ScandirIterator;
+
+#ifdef MS_WINDOWS
+
+static void
+ScandirIterator_close(ScandirIterator *iterator)
+{
+ if (iterator->handle == INVALID_HANDLE_VALUE)
+ return;
+
+ Py_BEGIN_ALLOW_THREADS
+ FindClose(iterator->handle);
+ Py_END_ALLOW_THREADS
+ iterator->handle = INVALID_HANDLE_VALUE;
+}
+
+static PyObject *
+ScandirIterator_iternext(ScandirIterator *iterator)
+{
+ WIN32_FIND_DATAW *file_data = &iterator->file_data;
+ BOOL success;
+
+ /* Happens if the iterator is iterated twice */
+ if (iterator->handle == INVALID_HANDLE_VALUE) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+
+ while (1) {
+ if (!iterator->first_time) {
+ Py_BEGIN_ALLOW_THREADS
+ success = FindNextFileW(iterator->handle, file_data);
+ Py_END_ALLOW_THREADS
+ if (!success) {
+ if (GetLastError() != ERROR_NO_MORE_FILES)
+ return path_error(&iterator->path);
+ /* No more files found in directory, stop iterating */
+ break;
+ }
+ }
+ iterator->first_time = 0;
+
+ /* Skip over . and .. */
+ if (wcscmp(file_data->cFileName, L".") != 0 &&
+ wcscmp(file_data->cFileName, L"..") != 0)
+ return DirEntry_from_find_data(&iterator->path, file_data);
+
+ /* Loop till we get a non-dot directory or finish iterating */
+ }
+
+ ScandirIterator_close(iterator);
+
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+}
+
+#else /* POSIX */
+
+static void
+ScandirIterator_close(ScandirIterator *iterator)
+{
+ if (!iterator->dirp)
+ return;
+
+ Py_BEGIN_ALLOW_THREADS
+ closedir(iterator->dirp);
+ Py_END_ALLOW_THREADS
+ iterator->dirp = NULL;
+ return;
+}
+
+static PyObject *
+ScandirIterator_iternext(ScandirIterator *iterator)
+{
+ struct dirent *direntp;
+ Py_ssize_t name_len;
+ int is_dot;
+ unsigned char d_type;
+
+ /* Happens if the iterator is iterated twice */
+ if (!iterator->dirp) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+
+ while (1) {
+ errno = 0;
+ Py_BEGIN_ALLOW_THREADS
+ direntp = readdir(iterator->dirp);
+ Py_END_ALLOW_THREADS
+
+ if (!direntp) {
+ if (errno != 0)
+ return path_error(&iterator->path);
+ /* No more files found in directory, stop iterating */
+ break;
+ }
+
+ /* Skip over . and .. */
+ name_len = NAMLEN(direntp);
+ is_dot = direntp->d_name[0] == '.' &&
+ (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2));
+ if (!is_dot) {
+#if defined(__GLIBC__) && !defined(_DIRENT_HAVE_D_TYPE)
+ d_type = DT_UNKNOWN; /* System doesn't support d_type */
+#else
+ d_type = direntp->d_type;
+#endif
+ return DirEntry_from_posix_info(&iterator->path, direntp->d_name,
+ name_len, d_type, direntp->d_ino);
+ }
+
+ /* Loop till we get a non-dot directory or finish iterating */
+ }
+
+ ScandirIterator_close(iterator);
+
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+}
+
+#endif
+
+static void
+ScandirIterator_dealloc(ScandirIterator *iterator)
+{
+ ScandirIterator_close(iterator);
+ Py_XDECREF(iterator->path.object);
+ path_cleanup(&iterator->path);
+ Py_TYPE(iterator)->tp_free((PyObject *)iterator);
+}
+
+PyTypeObject ScandirIteratorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ MODNAME ".ScandirIterator", /* tp_name */
+ sizeof(ScandirIterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)ScandirIterator_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)ScandirIterator_iternext, /* tp_iternext */
+};
+
+static PyObject *
+posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ ScandirIterator *iterator;
+ static char *keywords[] = {"path", NULL};
+#ifdef MS_WINDOWS
+ wchar_t *path_strW;
+#else
+ char *path;
+#endif
+
+ iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
+ if (!iterator)
+ return NULL;
+ memset(&iterator->path, 0, sizeof(path_t));
+ iterator->path.function_name = "scandir";
+ iterator->path.nullable = 1;
+
+#ifdef MS_WINDOWS
+ iterator->handle = INVALID_HANDLE_VALUE;
+#else
+ iterator->dirp = NULL;
+#endif
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O&:scandir", keywords,
+ path_converter, &iterator->path))
+ goto error;
+
+ /* path_converter doesn't keep path.object around, so do it
+ manually for the lifetime of the iterator here (the refcount
+ is decremented in ScandirIterator_dealloc)
+ */
+ Py_XINCREF(iterator->path.object);
+
+#ifdef MS_WINDOWS
+ if (iterator->path.narrow) {
+ PyErr_SetString(PyExc_TypeError,
+ "os.scandir() doesn't support bytes path on Windows, use Unicode instead");
+ goto error;
+ }
+ iterator->first_time = 1;
+
+ path_strW = join_path_filenameW(iterator->path.wide, L"*.*");
+ if (!path_strW)
+ goto error;
+
+ Py_BEGIN_ALLOW_THREADS
+ iterator->handle = FindFirstFileW(path_strW, &iterator->file_data);
+ Py_END_ALLOW_THREADS
+
+ PyMem_Free(path_strW);
+
+ if (iterator->handle == INVALID_HANDLE_VALUE) {
+ path_error(&iterator->path);
+ goto error;
+ }
+#else /* POSIX */
+ if (iterator->path.narrow)
+ path = iterator->path.narrow;
+ else
+ path = ".";
+
+ errno = 0;
+ Py_BEGIN_ALLOW_THREADS
+ iterator->dirp = opendir(path);
+ Py_END_ALLOW_THREADS
+
+ if (!iterator->dirp) {
+ path_error(&iterator->path);
+ goto error;
+ }
+#endif
+
+ return (PyObject *)iterator;
+
+error:
+ Py_DECREF(iterator);
+ return NULL;
+}
+
+
/*[clinic input]
dump buffer
[clinic start generated code]*/
@@ -17002,6 +17780,9 @@ static PyMethodDef posix_methods[] = {
{"get_blocking", posix_get_blocking, METH_VARARGS, get_blocking__doc__},
{"set_blocking", posix_set_blocking, METH_VARARGS, set_blocking__doc__},
#endif
+ {"scandir", (PyCFunction)posix_scandir,
+ METH_VARARGS | METH_KEYWORDS,
+ posix_scandir__doc__},
{NULL, NULL} /* Sentinel */
};
@@ -17444,15 +18225,6 @@ all_ins(PyObject *m)
}
-#ifdef MS_WINDOWS
-#define INITFUNC PyInit_nt
-#define MODNAME "nt"
-
-#else
-#define INITFUNC PyInit_posix
-#define MODNAME "posix"
-#endif
-
static struct PyModuleDef posixmodule = {
PyModuleDef_HEAD_INIT,
MODNAME,
@@ -17673,6 +18445,12 @@ INITFUNC(void)
if (PyStructSequence_InitType2(&TerminalSizeType,
&TerminalSize_desc) < 0)
return NULL;
+
+ /* initialize scandir types */
+ if (PyType_Ready(&ScandirIteratorType) < 0)
+ return NULL;
+ if (PyType_Ready(&DirEntryType) < 0)
+ return NULL;
}
#if defined(HAVE_WAITID) && !defined(__APPLE__)
Py_INCREF((PyObject*) &WaitidResultType);