diff options
author | Barney Gale <barney.gale@gmail.com> | 2023-12-08 17:39:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-08 17:39:04 (GMT) |
commit | 76929fdeebc5f89655a7a535c19fdcece9728a7d (patch) | |
tree | b4d9f023e4b7951d60caadb58ada10a5b21279f2 | |
parent | 5a0137ca34deb6e1e2e890a52cb4b22d645c166b (diff) | |
download | cpython-76929fdeebc5f89655a7a535c19fdcece9728a7d.zip cpython-76929fdeebc5f89655a7a535c19fdcece9728a7d.tar.gz cpython-76929fdeebc5f89655a7a535c19fdcece9728a7d.tar.bz2 |
GH-110109: Add `pathlib._PurePathBase` (#110670)
Add private `pathlib._PurePathBase` class: a private superclass of both `PurePath` and `_PathBase`. Unlike `PurePath`, it does not define any of these special methods: `__fspath__`, `__bytes__`, `__reduce__`, `__hash__`, `__eq__`, `__lt__`, `__le__`, `__gt__`, `__ge__`. Its initializer and path joining methods accept only strings, not os.PathLike objects more broadly.
This is important for supporting *virtual paths*: user subclasses of `_PathBase` that provide access to archive files, FTP servers, etc. In these classes, the above methods should be implemented by users only as appropriate, with due consideration for the hash/equality of any backing objects, such as file objects or sockets.
-rw-r--r-- | Lib/pathlib.py | 83 | ||||
-rw-r--r-- | Lib/test/test_pathlib.py | 82 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst | 3 |
3 files changed, 115 insertions, 53 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c48cff3..87d1f6b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -198,14 +198,13 @@ class _PathParents(Sequence): return "<{}.parents>".format(type(self._path).__name__) -class PurePath: - """Base class for manipulating paths without I/O. +class _PurePathBase: + """Base class for pure path objects. - PurePath represents a filesystem path and offers operations which - don't imply any actual filesystem I/O. Depending on your system, - instantiating a PurePath will return either a PurePosixPath or a - PureWindowsPath object. You can also instantiate either of these classes - directly, regardless of your system. + This class *does not* provide several magic methods that are defined in + its subclass PurePath. They are: __fspath__, __bytes__, __reduce__, + __hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path + joining methods accept only strings, not os.PathLike objects more broadly. """ __slots__ = ( @@ -227,22 +226,6 @@ class PurePath: # for the first time. It's used to implement `_str_normcase` '_str', - # The `_str_normcase_cached` slot stores the string path with - # normalized case. It is set when the `_str_normcase` property is - # accessed for the first time. It's used to implement `__eq__()` - # `__hash__()`, and `_parts_normcase` - '_str_normcase_cached', - - # The `_parts_normcase_cached` slot stores the case-normalized - # string path after splitting on path separators. It's set when the - # `_parts_normcase` property is accessed for the first time. It's used - # to implement comparison methods like `__lt__()`. - '_parts_normcase_cached', - - # The `_hash` slot stores the hash of the case-normalized string - # path. It's set when `__hash__()` is called for the first time. - '_hash', - # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `_PathBase.resolve()`. This prevents duplicate # work from occurring when `resolve()` calls `stat()` or `readlink()`. @@ -250,6 +233,10 @@ class PurePath: ) pathmod = os.path + def __init__(self, *paths): + self._raw_paths = paths + self._resolving = False + def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. Subclasses may override this method to customize how new path objects @@ -444,7 +431,7 @@ class PurePath: warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, remove=(3, 14)) other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePath): + elif not isinstance(other, _PurePathBase): other = self.with_segments(other) for step, path in enumerate(chain([other], other.parents)): if path == self or path in self.parents: @@ -468,7 +455,7 @@ class PurePath: warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", msg, remove=(3, 14)) other = self.with_segments(other, *_deprecated) - elif not isinstance(other, PurePath): + elif not isinstance(other, _PurePathBase): other = self.with_segments(other) return other == self or other in self.parents @@ -487,7 +474,7 @@ class PurePath: paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(self, *pathsegments) + return self.with_segments(*self._raw_paths, *pathsegments) def __truediv__(self, key): try: @@ -497,7 +484,7 @@ class PurePath: def __rtruediv__(self, key): try: - return self.with_segments(key, self) + return self.with_segments(key, *self._raw_paths) except TypeError: return NotImplemented @@ -555,7 +542,7 @@ class PurePath: """ Return True if this path matches the given pattern. """ - if not isinstance(path_pattern, PurePath): + if not isinstance(path_pattern, _PurePathBase): path_pattern = self.with_segments(path_pattern) if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) @@ -570,6 +557,35 @@ class PurePath: match = _compile_pattern(pattern_str, sep, case_sensitive) return match(str(self)) is not None + +class PurePath(_PurePathBase): + """Base class for manipulating paths without I/O. + + PurePath represents a filesystem path and offers operations which + don't imply any actual filesystem I/O. Depending on your system, + instantiating a PurePath will return either a PurePosixPath or a + PureWindowsPath object. You can also instantiate either of these classes + directly, regardless of your system. + """ + + __slots__ = ( + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', + ) + def __new__(cls, *args, **kwargs): """Construct a PurePath from one or several strings and or existing PurePath objects. The strings and path objects are combined so as @@ -600,8 +616,7 @@ class PurePath: "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") paths.append(path) - self._raw_paths = paths - self._resolving = False + super().__init__(*paths) def __reduce__(self): # Using the parts tuple helps share interned path parts @@ -719,7 +734,7 @@ class PureWindowsPath(PurePath): # Filesystem-accessing classes -class _PathBase(PurePath): +class _PathBase(_PurePathBase): """Base class for concrete path objects. This class provides dummy implementations for many methods that derived @@ -733,8 +748,6 @@ class _PathBase(PurePath): such as paths in archive files or on remote storage systems. """ __slots__ = () - __bytes__ = None - __fspath__ = None # virtual paths have no local file system representation @classmethod def _unsupported(cls, method_name): @@ -1341,7 +1354,7 @@ class _PathBase(PurePath): self._unsupported("as_uri") -class Path(_PathBase): +class Path(_PathBase, PurePath): """PurePath subclass that can make system calls. Path represents a filesystem path but unlike PurePath, also offers @@ -1351,8 +1364,6 @@ class Path(_PathBase): but cannot instantiate a WindowsPath on a POSIX system or vice versa. """ __slots__ = () - __bytes__ = PurePath.__bytes__ - __fspath__ = PurePath.__fspath__ as_uri = PurePath.as_uri def __init__(self, *args, **kwargs): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ea92214..d35516a 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -49,8 +49,35 @@ if hasattr(os, 'geteuid'): # Tests for the pure classes. # -class PurePathTest(unittest.TestCase): - cls = pathlib.PurePath + +class PurePathBaseTest(unittest.TestCase): + cls = pathlib._PurePathBase + + def test_magic_methods(self): + P = self.cls + self.assertFalse(hasattr(P, '__fspath__')) + self.assertFalse(hasattr(P, '__bytes__')) + self.assertIs(P.__reduce__, object.__reduce__) + self.assertIs(P.__hash__, object.__hash__) + self.assertIs(P.__eq__, object.__eq__) + self.assertIs(P.__lt__, object.__lt__) + self.assertIs(P.__le__, object.__le__) + self.assertIs(P.__gt__, object.__gt__) + self.assertIs(P.__ge__, object.__ge__) + + +class DummyPurePath(pathlib._PurePathBase): + def __eq__(self, other): + if not isinstance(other, DummyPurePath): + return NotImplemented + return str(self) == str(other) + + def __hash__(self): + return hash(str(self)) + + +class DummyPurePathTest(unittest.TestCase): + cls = DummyPurePath # Keys are canonical paths, values are list of tuples of arguments # supposed to produce equal paths. @@ -82,12 +109,6 @@ class PurePathTest(unittest.TestCase): P('/a', 'b', 'c') P('a/b/c') P('/a/b/c') - P(FakePath("a/b/c")) - self.assertEqual(P(P('a')), P('a')) - self.assertEqual(P(P('a'), 'b'), P('a/b')) - self.assertEqual(P(P('a'), P('b')), P('a/b')) - self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) - self.assertEqual(P(P('./a:b')), P('./a:b')) def test_concrete_class(self): if self.cls is pathlib.PurePath: @@ -193,8 +214,6 @@ class PurePathTest(unittest.TestCase): self.assertIs(type(pp), type(p)) pp = p.joinpath('c', 'd') self.assertEqual(pp, P('a/b/c/d')) - pp = p.joinpath(P('c')) - self.assertEqual(pp, P('a/b/c')) pp = p.joinpath('/c') self.assertEqual(pp, P('/c')) @@ -211,8 +230,6 @@ class PurePathTest(unittest.TestCase): self.assertEqual(pp, P('a/b/c/d')) pp = 'c' / p / 'd' self.assertEqual(pp, P('c/a/b/d')) - pp = p / P('c') - self.assertEqual(pp, P('a/b/c')) pp = p/ '/c' self.assertEqual(pp, P('/c')) @@ -678,6 +695,29 @@ class PurePathTest(unittest.TestCase): self.assertFalse(p.is_relative_to('')) self.assertFalse(p.is_relative_to(P('a'))) + +class PurePathTest(DummyPurePathTest): + cls = pathlib.PurePath + + def test_constructor_nested(self): + P = self.cls + P(FakePath("a/b/c")) + self.assertEqual(P(P('a')), P('a')) + self.assertEqual(P(P('a'), 'b'), P('a/b')) + self.assertEqual(P(P('a'), P('b')), P('a/b')) + self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) + self.assertEqual(P(P('./a:b')), P('./a:b')) + + def test_join_nested(self): + P = self.cls + p = P('a/b').joinpath(P('c')) + self.assertEqual(p, P('a/b/c')) + + def test_div_nested(self): + P = self.cls + p = P('a/b') / P('c') + self.assertEqual(p, P('a/b/c')) + def test_pickling_common(self): P = self.cls p = P('/a/b') @@ -1545,7 +1585,7 @@ class PurePathSubclassTest(PurePathTest): # Tests for the virtual classes. # -class PathBaseTest(PurePathTest): +class PathBaseTest(PurePathBaseTest): cls = pathlib._PathBase def test_unsupported_operation(self): @@ -1636,6 +1676,14 @@ class DummyPath(pathlib._PathBase): _directories = {} _symlinks = {} + def __eq__(self, other): + if not isinstance(other, DummyPath): + return NotImplemented + return str(self) == str(other) + + def __hash__(self): + return hash(str(self)) + def stat(self, *, follow_symlinks=True): if follow_symlinks: path = str(self.resolve()) @@ -1707,7 +1755,7 @@ class DummyPath(pathlib._PathBase): self.mkdir(mode, parents=False, exist_ok=exist_ok) -class DummyPathTest(unittest.TestCase): +class DummyPathTest(DummyPurePathTest): """Tests for PathBase methods that use stat(), open() and iterdir().""" cls = DummyPath @@ -2014,7 +2062,7 @@ class DummyPathTest(unittest.TestCase): def test_rglob_common(self): def _check(glob, expected): - self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) + self.assertEqual(set(glob), {P(BASE, q) for q in expected}) P = self.cls p = P(BASE) it = p.rglob("fileA") @@ -2198,7 +2246,7 @@ class DummyPathTest(unittest.TestCase): # directory_depth > recursion_limit directory_depth = recursion_limit + 10 base = self.cls(BASE, 'deep') - path = self.cls(base, *(['d'] * directory_depth)) + path = base.joinpath(*(['d'] * directory_depth)) path.mkdir(parents=True) with set_recursion_limit(recursion_limit): @@ -2741,7 +2789,7 @@ class DummyPathTest(unittest.TestCase): # directory_depth > recursion_limit directory_depth = recursion_limit + 10 base = self.cls(BASE, 'deep') - path = self.cls(base, *(['d'] * directory_depth)) + path = base.joinpath(*(['d'] * directory_depth)) path.mkdir(parents=True) with set_recursion_limit(recursion_limit): diff --git a/Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst b/Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst new file mode 100644 index 0000000..4f12d12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst @@ -0,0 +1,3 @@ +Add private ``pathlib._PurePathBase`` class: a base class for +:class:`pathlib.PurePath` that omits certain magic methods. It may be made +public (along with ``_PathBase``) in future. |