summaryrefslogtreecommitdiffstats
path: root/Lib/pathlib/_abc.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/pathlib/_abc.py')
-rw-r--r--Lib/pathlib/_abc.py200
1 files changed, 35 insertions, 165 deletions
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index ca38a51..553f797 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -12,6 +12,8 @@ resemble pathlib's PurePath and Path respectively.
"""
import functools
+import glob
+import operator
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
@@ -40,109 +42,23 @@ def _ignore_error(exception):
def _is_case_sensitive(parser):
return parser.normcase('Aa') == 'Aa'
-#
-# Globbing helpers
-#
-
-re = glob = None
-
-
-@functools.lru_cache(maxsize=512)
-def _compile_pattern(pat, sep, case_sensitive, recursive=True):
- """Compile given glob pattern to a re.Pattern object (observing case
- sensitivity)."""
- global re, glob
- if re is None:
- import re, glob
-
- flags = re.NOFLAG if case_sensitive else re.IGNORECASE
- regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep)
- return re.compile(regex, flags=flags).match
+class Globber(glob._Globber):
+ lstat = operator.methodcaller('lstat')
+ scandir = operator.methodcaller('_scandir')
+ add_slash = operator.methodcaller('joinpath', '')
-def _select_special(paths, part):
- """Yield special literal children of the given paths."""
- for path in paths:
- yield path._make_child_relpath(part)
-
-
-def _select_children(parent_paths, dir_only, match):
- """Yield direct children of given paths, filtering by name and type."""
- for parent_path in parent_paths:
- try:
- # We must close the scandir() object before proceeding to
- # avoid exhausting file descriptors when globbing deep trees.
- with parent_path._scandir() as scandir_it:
- entries = list(scandir_it)
- except OSError:
- pass
- else:
- for entry in entries:
- if dir_only:
- try:
- if not entry.is_dir():
- continue
- except OSError:
- continue
- # Avoid cost of making a path object for non-matching paths by
- # matching against the os.DirEntry.name string.
- if match is None or match(entry.name):
- yield parent_path._make_child_direntry(entry)
-
+ @staticmethod
+ def concat_path(path, text):
+ """Appends text to the given path.
+ """
+ return path.with_segments(path._raw_path + text)
-def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
- """Yield given paths and all their children, recursively, filtering by
- string and type.
- """
- for parent_path in parent_paths:
- if match is not None:
- # If we're filtering paths through a regex, record the length of
- # the parent path. We'll pass it to match(path, pos=...) later.
- parent_len = len(str(parent_path._make_child_relpath('_'))) - 1
- paths = [parent_path._make_child_relpath('')]
- while paths:
- path = paths.pop()
- if match is None or match(str(path), parent_len):
- # Yield *directory* path that matches pattern (if any).
- yield path
- try:
- # We must close the scandir() object before proceeding to
- # avoid exhausting file descriptors when globbing deep trees.
- with path._scandir() as scandir_it:
- entries = list(scandir_it)
- except OSError:
- pass
- else:
- for entry in entries:
- # Handle directory entry.
- try:
- if entry.is_dir(follow_symlinks=follow_symlinks):
- # Recurse into this directory.
- paths.append(path._make_child_direntry(entry))
- continue
- except OSError:
- pass
-
- # Handle file entry.
- if not dir_only:
- # Avoid cost of making a path object for non-matching
- # files by matching against the os.DirEntry object.
- if match is None or match(path._direntry_str(entry), parent_len):
- # Yield *file* path that matches pattern (if any).
- yield path._make_child_direntry(entry)
-
-
-def _select_unique(paths):
- """Yields the given paths, filtering out duplicates."""
- yielded = set()
- try:
- for path in paths:
- path_str = str(path)
- if path_str not in yielded:
- yield path
- yielded.add(path_str)
- finally:
- yielded.clear()
+ @staticmethod
+ def parse_entry(entry):
+ """Returns the path of an entry yielded from scandir().
+ """
+ return entry
class UnsupportedOperation(NotImplementedError):
@@ -218,6 +134,7 @@ class PurePathBase:
'_resolving',
)
parser = ParserBase()
+ _globber = Globber
def __init__(self, path, *paths):
self._raw_path = self.parser.join(path, *paths) if paths else path
@@ -455,14 +372,6 @@ class PurePathBase:
return self.parser.isabs(self._raw_path)
@property
- def _pattern_stack(self):
- """Stack of path components, to be used with patterns in glob()."""
- anchor, parts = self._stack
- if anchor:
- raise NotImplementedError("Non-relative patterns are unsupported")
- return parts
-
- @property
def _pattern_str(self):
"""The path expressed as a string, for use in pattern-matching."""
return str(self)
@@ -487,8 +396,9 @@ class PurePathBase:
return False
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
return False
+ globber = self._globber(sep, case_sensitive)
for path_part, pattern_part in zip(path_parts, pattern_parts):
- match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False)
+ match = globber.compile(pattern_part)
if match(path_part) is None:
return False
return True
@@ -502,7 +412,8 @@ class PurePathBase:
pattern = self.with_segments(pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.parser)
- match = _compile_pattern(pattern._pattern_str, pattern.parser.sep, case_sensitive)
+ globber = self._globber(pattern.parser.sep, case_sensitive, recursive=True)
+ match = globber.compile(pattern._pattern_str)
return match(self._pattern_str) is not None
@@ -772,11 +683,6 @@ class PathBase(PurePathBase):
from contextlib import nullcontext
return nullcontext(self.iterdir())
- def _direntry_str(self, entry):
- # Transform an entry yielded from _scandir() into a path string.
- # PathBase._scandir() yields PathBase objects, so use str().
- return str(entry)
-
def _make_child_direntry(self, entry):
# Transform an entry yielded from _scandir() into a path object.
# PathBase._scandir() yields PathBase objects, so this is a no-op.
@@ -785,62 +691,26 @@ class PathBase(PurePathBase):
def _make_child_relpath(self, name):
return self.joinpath(name)
+ def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
+ if case_sensitive is None:
+ case_sensitive = _is_case_sensitive(self.parser)
+ recursive = True if recurse_symlinks else glob._no_recurse_symlinks
+ globber = self._globber(self.parser.sep, case_sensitive, recursive)
+ return globber.selector(parts)
+
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
- if case_sensitive is None:
- # TODO: evaluate case-sensitivity of each directory in _select_children().
- case_sensitive = _is_case_sensitive(self.parser)
-
- stack = pattern._pattern_stack
- specials = ('', '.', '..')
- deduplicate_paths = False
- sep = self.parser.sep
- paths = iter([self] if self.is_dir() else [])
- while stack:
- part = stack.pop()
- if part in specials:
- # Join special component (e.g. '..') onto paths.
- paths = _select_special(paths, part)
-
- elif part == '**':
- # Consume following '**' components, which have no effect.
- while stack and stack[-1] == '**':
- stack.pop()
-
- # Consume following non-special components, provided we're
- # treating symlinks consistently. Each component is joined
- # onto 'part', which is used to generate an re.Pattern object.
- if recurse_symlinks:
- while stack and stack[-1] not in specials:
- part += sep + stack.pop()
-
- # If the previous loop consumed pattern components, compile an
- # re.Pattern object based on those components.
- match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None
-
- # Recursively walk directories, filtering by type and regex.
- paths = _select_recursive(paths, bool(stack), recurse_symlinks, match)
-
- # De-duplicate if we've already seen a '**' component.
- if deduplicate_paths:
- paths = _select_unique(paths)
- deduplicate_paths = True
-
- elif '**' in part:
- raise ValueError("Invalid pattern: '**' can only be an entire path component")
-
- else:
- # If the pattern component isn't '*', compile an re.Pattern
- # object based on the component.
- match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None
-
- # Iterate over directories' children filtering by type and regex.
- paths = _select_children(paths, bool(stack), match)
- return paths
+ anchor, parts = pattern._stack
+ if anchor:
+ raise NotImplementedError("Non-relative patterns are unsupported")
+ if not self.is_dir():
+ return iter([])
+ select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
+ return select(self, exists=True)
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Recursively yield all existing files (of any kind, including