diff options
-rw-r--r-- | Doc/library/glob.rst | 2 | ||||
-rw-r--r-- | Doc/whatsnew/3.6.rst | 4 | ||||
-rw-r--r-- | Lib/glob.py | 70 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 49 insertions, 30 deletions
diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 328eef3..a8a5a50 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -14,7 +14,7 @@ The :mod:`glob` module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character ranges expressed with ``[]`` will be correctly matched. This is done by using -the :func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and +the :func:`os.scandir` and :func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a subshell. Note that unlike :func:`fnmatch.fnmatch`, :mod:`glob` treats filenames beginning with a dot (``.``) as special cases. (For tilde and shell variable expansion, use :func:`os.path.expanduser` and diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 085bca3..7abfac9 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -767,6 +767,10 @@ Optimizations Argument Clinic this overhead is significantly decreased. (Contributed by Serhiy Storchaka in :issue:`27574`). +* Optimized :func:`~glob.glob` and :func:`~glob.iglob` functions in the + :mod:`glob` module; they are now about 3--6 times faster. + (Contributed by Serhiy Storchaka in :issue:`25596`). + Build and C API Changes ======================= diff --git a/Lib/glob.py b/Lib/glob.py index 16330d8..002cd92 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -30,15 +30,16 @@ def iglob(pathname, *, recursive=False): If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ - it = _iglob(pathname, recursive) + it = _iglob(pathname, recursive, False) if recursive and _isrecursive(pathname): s = next(it) # skip empty string assert not s return it -def _iglob(pathname, recursive): +def _iglob(pathname, recursive, dironly): dirname, basename = os.path.split(pathname) if not has_magic(pathname): + assert not dironly if basename: if os.path.lexists(pathname): yield pathname @@ -49,47 +50,39 @@ def _iglob(pathname, recursive): return if not dirname: if recursive and _isrecursive(basename): - yield from glob2(dirname, basename) + yield from _glob2(dirname, basename, dironly) else: - yield from glob1(dirname, basename) + yield from _glob1(dirname, basename, dironly) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path # contains magic characters (i.e. r'\\?\C:'). if dirname != pathname and has_magic(dirname): - dirs = _iglob(dirname, recursive) + dirs = _iglob(dirname, recursive, True) else: dirs = [dirname] if has_magic(basename): if recursive and _isrecursive(basename): - glob_in_dir = glob2 + glob_in_dir = _glob2 else: - glob_in_dir = glob1 + glob_in_dir = _glob1 else: - glob_in_dir = glob0 + glob_in_dir = _glob0 for dirname in dirs: - for name in glob_in_dir(dirname, basename): + for name in glob_in_dir(dirname, basename, dironly): yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# They return a list of basenames. _glob1 accepts a pattern while _glob0 # takes a literal basename (so it only has to check for its existence). -def glob1(dirname, pattern): - if not dirname: - if isinstance(pattern, bytes): - dirname = bytes(os.curdir, 'ASCII') - else: - dirname = os.curdir - try: - names = os.listdir(dirname) - except OSError: - return [] +def _glob1(dirname, pattern, dironly): + names = list(_iterdir(dirname, dironly)) if not _ishidden(pattern): - names = [x for x in names if not _ishidden(x)] + names = (x for x in names if not _ishidden(x)) return fnmatch.filter(names, pattern) -def glob0(dirname, basename): +def _glob0(dirname, basename, dironly): if not basename: # `os.path.split()` returns an empty basename for paths ending with a # directory separator. 'q*x/' should match only directories. @@ -100,30 +93,49 @@ def glob0(dirname, basename): return [basename] return [] +# Following functions are not public but can be used by third-party code. + +def glob0(dirname, pattern): + return _glob0(dirname, pattern, False) + +def glob1(dirname, pattern): + return _glob1(dirname, pattern, False) + # This helper function recursively yields relative pathnames inside a literal # directory. -def glob2(dirname, pattern): +def _glob2(dirname, pattern, dironly): assert _isrecursive(pattern) yield pattern[:0] - yield from _rlistdir(dirname) + yield from _rlistdir(dirname, dironly) -# Recursively yields relative pathnames inside a literal directory. -def _rlistdir(dirname): +# If dironly is false, yields all file names inside a directory. +# If dironly is true, yields only directory names. +def _iterdir(dirname, dironly): if not dirname: if isinstance(dirname, bytes): dirname = bytes(os.curdir, 'ASCII') else: dirname = os.curdir try: - names = os.listdir(dirname) - except os.error: + with os.scandir(dirname) as it: + for entry in it: + try: + if not dironly or entry.is_dir(): + yield entry.name + except OSError: + pass + except OSError: return + +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname, dironly): + names = list(_iterdir(dirname, dironly)) for x in names: if not _ishidden(x): yield x path = os.path.join(dirname, x) if dirname else x - for y in _rlistdir(path): + for y in _rlistdir(path, dironly): yield os.path.join(x, y) @@ -89,6 +89,9 @@ Core and Builtins Library ------- +- Issue #25596: Optimized glob() and iglob() functions in the + glob module; they are now about 3--6 times faster. + - Issue #27928: Add scrypt (password-based key derivation function) to hashlib module (requires OpenSSL 1.1.0). |