summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-01-14 21:49:53 (GMT)
committerGitHub <noreply@github.com>2024-01-14 21:49:53 (GMT)
commitca6cf56330ae7751819b62748f33f23d98596703 (patch)
tree461b2a8b3651a1570736ff9cbed4832671d813bd
parentc2808431b32fa7bc0d222d4549389f781f1a7333 (diff)
downloadcpython-ca6cf56330ae7751819b62748f33f23d98596703.zip
cpython-ca6cf56330ae7751819b62748f33f23d98596703.tar.gz
cpython-ca6cf56330ae7751819b62748f33f23d98596703.tar.bz2
Add `pathlib._abc.PathModuleBase` (#113893)
Path modules provide a subset of the `os.path` API, specifically those functions needed to provide `PurePathBase` functionality. Each `PurePathBase` subclass references its path module via a `pathmod` class attribute. This commit adds a new `PathModuleBase` class, which provides abstract methods that unconditionally raise `UnsupportedOperation`. An instance of this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`. As a result, `PurePathBase` is no longer POSIX-y by default, and all its methods raise `UnsupportedOperation` courtesy of `pathmod`. Users who subclass `PurePathBase` or `PathBase` should choose the path syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their own subclass of `PathModuleBase`, as circumstances demand.
-rw-r--r--Lib/pathlib/__init__.py60
-rw-r--r--Lib/pathlib/_abc.py124
-rw-r--r--Lib/test/test_pathlib/test_pathlib.py1
-rw-r--r--Lib/test/test_pathlib/test_pathlib_abc.py56
4 files changed, 182 insertions, 59 deletions
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py
index e70cfe9..f14d35b 100644
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -33,6 +33,15 @@ __all__ = [
]
+# Reference for Windows paths can be found at
+# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
+_WIN_RESERVED_NAMES = frozenset(
+ {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
+ {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
+ {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
+)
+
+
class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself."""
@@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
"""
__slots__ = (
+ # The `_raw_paths` slot stores unnormalized string paths. This is set
+ # in the `__init__()` method.
+ '_raw_paths',
+
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
@@ -141,6 +154,26 @@ class PurePath(_abc.PurePathBase):
# Avoid calling super().__init__, as an optimisation
self._raw_paths = paths
+ def joinpath(self, *pathsegments):
+ """Combine this path with one or several arguments, and return a
+ new path representing either a subpath (if all arguments are relative
+ paths) or a totally different path (if one of the arguments is
+ anchored).
+ """
+ return self.with_segments(self, *pathsegments)
+
+ def __truediv__(self, key):
+ try:
+ return self.with_segments(self, key)
+ except TypeError:
+ return NotImplemented
+
+ def __rtruediv__(self, key):
+ try:
+ return self.with_segments(key, self)
+ except TypeError:
+ return NotImplemented
+
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
@@ -386,6 +419,33 @@ class PurePath(_abc.PurePathBase):
other = self.with_segments(other)
return other == self or other in self.parents
+ def is_absolute(self):
+ """True if the path is absolute (has both a root and, if applicable,
+ a drive)."""
+ if self.pathmod is posixpath:
+ # Optimization: work with raw paths on POSIX.
+ for path in self._raw_paths:
+ if path.startswith('/'):
+ return True
+ return False
+ return self.pathmod.isabs(self)
+
+ def is_reserved(self):
+ """Return True if the path contains one of the special names reserved
+ by the system, if any."""
+ if self.pathmod is not ntpath or not self.name:
+ return False
+
+ # NOTE: the rules for reserved names seem somewhat complicated
+ # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
+ # exist). We err on the side of caution and return True for paths
+ # which are not considered reserved by Windows.
+ if self.drive.startswith('\\\\'):
+ # UNC paths are never reserved.
+ return False
+ name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
+ return name.upper() in _WIN_RESERVED_NAMES
+
def as_uri(self):
"""Return the path as a URI."""
if not self.is_absolute():
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index a6956f2..1fdca00 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -12,7 +12,6 @@ resemble pathlib's PurePath and Path respectively.
"""
import functools
-import posixpath
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
@@ -20,14 +19,6 @@ from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
# Internals
#
-# Reference for Windows paths can be found at
-# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
-_WIN_RESERVED_NAMES = frozenset(
- {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
- {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
- {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
-)
-
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
@@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
pass
+class PathModuleBase:
+ """Base class for path modules, which do low-level path manipulation.
+
+ Path modules provide a subset of the os.path API, specifically those
+ functions needed to provide PurePathBase functionality. Each PurePathBase
+ subclass references its path module via a 'pathmod' class attribute.
+
+ Every method in this base class raises an UnsupportedOperation exception.
+ """
+
+ @classmethod
+ def _unsupported(cls, attr):
+ raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")
+
+ @property
+ def sep(self):
+ """The character used to separate path components."""
+ self._unsupported('sep')
+
+ def join(self, path, *paths):
+ """Join path segments."""
+ self._unsupported('join()')
+
+ def split(self, path):
+ """Split the path into a pair (head, tail), where *head* is everything
+ before the final path separator, and *tail* is everything after.
+ Either part may be empty.
+ """
+ self._unsupported('split()')
+
+ def splitroot(self, path):
+ """Split the pathname path into a 3-item tuple (drive, root, tail),
+ where *drive* is a device name or mount point, *root* is a string of
+ separators after the drive, and *tail* is everything after the root.
+ Any part may be empty."""
+ self._unsupported('splitroot()')
+
+ def normcase(self, path):
+ """Normalize the case of the path."""
+ self._unsupported('normcase()')
+
+ def isabs(self, path):
+ """Returns whether the path is absolute, i.e. unaffected by the
+ current directory or drive."""
+ self._unsupported('isabs()')
+
+
class PurePathBase:
"""Base class for pure path objects.
@@ -154,19 +192,19 @@ class PurePathBase:
"""
__slots__ = (
- # The `_raw_paths` slot stores unnormalized string paths. This is set
- # in the `__init__()` method.
- '_raw_paths',
+ # The `_raw_path` slot store a joined string path. This is set in the
+ # `__init__()` method.
+ '_raw_path',
# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',
)
- pathmod = posixpath
+ pathmod = PathModuleBase()
- def __init__(self, *paths):
- self._raw_paths = paths
+ def __init__(self, path, *paths):
+ self._raw_path = self.pathmod.join(path, *paths) if paths else path
self._resolving = False
def with_segments(self, *pathsegments):
@@ -176,11 +214,6 @@ class PurePathBase:
"""
return type(self)(*pathsegments)
- @property
- def _raw_path(self):
- """The joined but unnormalized path."""
- return self.pathmod.join(*self._raw_paths)
-
def __str__(self):
"""Return the string representation of the path, suitable for
passing to system calls."""
@@ -194,7 +227,7 @@ class PurePathBase:
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
- return self.pathmod.splitdrive(self._raw_path)[0]
+ return self.pathmod.splitroot(self._raw_path)[0]
@property
def root(self):
@@ -210,7 +243,7 @@ class PurePathBase:
@property
def name(self):
"""The final path component, if any."""
- return self.pathmod.basename(self._raw_path)
+ return self.pathmod.split(self._raw_path)[1]
@property
def suffix(self):
@@ -251,10 +284,10 @@ class PurePathBase:
def with_name(self, name):
"""Return a new path with the file name changed."""
- dirname = self.pathmod.dirname
- if dirname(name):
+ split = self.pathmod.split
+ if split(name)[0]:
raise ValueError(f"Invalid name {name!r}")
- return self.with_segments(dirname(self._raw_path), name)
+ return self.with_segments(split(self._raw_path)[0], name)
def with_stem(self, stem):
"""Return a new path with the stem changed."""
@@ -336,17 +369,17 @@ class PurePathBase:
paths) or a totally different path (if one of the arguments is
anchored).
"""
- return self.with_segments(*self._raw_paths, *pathsegments)
+ return self.with_segments(self._raw_path, *pathsegments)
def __truediv__(self, key):
try:
- return self.joinpath(key)
+ return self.with_segments(self._raw_path, key)
except TypeError:
return NotImplemented
def __rtruediv__(self, key):
try:
- return self.with_segments(key, *self._raw_paths)
+ return self.with_segments(key, self._raw_path)
except TypeError:
return NotImplemented
@@ -371,7 +404,7 @@ class PurePathBase:
def parent(self):
"""The logical parent of the path."""
path = self._raw_path
- parent = self.pathmod.dirname(path)
+ parent = self.pathmod.split(path)[0]
if path != parent:
parent = self.with_segments(parent)
parent._resolving = self._resolving
@@ -381,43 +414,20 @@ class PurePathBase:
@property
def parents(self):
"""A sequence of this path's logical parents."""
- dirname = self.pathmod.dirname
+ split = self.pathmod.split
path = self._raw_path
- parent = dirname(path)
+ parent = split(path)[0]
parents = []
while path != parent:
parents.append(self.with_segments(parent))
path = parent
- parent = dirname(path)
+ parent = split(path)[0]
return tuple(parents)
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
- if self.pathmod is posixpath:
- # Optimization: work with raw paths on POSIX.
- for path in self._raw_paths:
- if path.startswith('/'):
- return True
- return False
- else:
- return self.pathmod.isabs(self._raw_path)
-
- def is_reserved(self):
- """Return True if the path contains one of the special names reserved
- by the system, if any."""
- if self.pathmod is posixpath or not self.name:
- return False
-
- # NOTE: the rules for reserved names seem somewhat complicated
- # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
- # exist). We err on the side of caution and return True for paths
- # which are not considered reserved by Windows.
- if self.drive.startswith('\\\\'):
- # UNC paths are never reserved.
- return False
- name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
- return name.upper() in _WIN_RESERVED_NAMES
+ return self.pathmod.isabs(self._raw_path)
def match(self, path_pattern, *, case_sensitive=None):
"""
@@ -726,7 +736,7 @@ class PathBase(PurePathBase):
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
pattern_parts = list(path_pattern.parts)
- if not self.pathmod.basename(pattern):
+ if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')
diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py
index 1b560ad..61d7939 100644
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -1151,6 +1151,7 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
def test_matches_pathbase_api(self):
our_names = {name for name in dir(self.cls) if name[0] != '_'}
+ our_names.remove('is_reserved') # only present in PurePath
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
self.assertEqual(our_names, path_names)
for attr_name in our_names:
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py
index 14df1e6..c3c568c 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -5,7 +5,7 @@ import errno
import stat
import unittest
-from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase
+from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase
import posixpath
from test.support.os_helper import TESTFN
@@ -17,6 +17,20 @@ class UnsupportedOperationTest(unittest.TestCase):
self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError))
+class PathModuleBaseTest(unittest.TestCase):
+ cls = PathModuleBase
+
+ def test_unsupported_operation(self):
+ m = self.cls()
+ e = UnsupportedOperation
+ with self.assertRaises(e):
+ m.sep
+ self.assertRaises(e, m.join, 'foo')
+ self.assertRaises(e, m.split, 'foo')
+ self.assertRaises(e, m.splitroot, 'foo')
+ self.assertRaises(e, m.normcase, 'foo')
+ self.assertRaises(e, m.isabs, 'foo')
+
#
# Tests for the pure classes.
#
@@ -25,6 +39,42 @@ class UnsupportedOperationTest(unittest.TestCase):
class PurePathBaseTest(unittest.TestCase):
cls = PurePathBase
+ def test_unsupported_operation_pure(self):
+ p = self.cls('foo')
+ e = UnsupportedOperation
+ with self.assertRaises(e):
+ p.drive
+ with self.assertRaises(e):
+ p.root
+ with self.assertRaises(e):
+ p.anchor
+ with self.assertRaises(e):
+ p.parts
+ with self.assertRaises(e):
+ p.parent
+ with self.assertRaises(e):
+ p.parents
+ with self.assertRaises(e):
+ p.name
+ with self.assertRaises(e):
+ p.stem
+ with self.assertRaises(e):
+ p.suffix
+ with self.assertRaises(e):
+ p.suffixes
+ with self.assertRaises(e):
+ p / 'bar'
+ with self.assertRaises(e):
+ 'bar' / p
+ self.assertRaises(e, p.joinpath, 'bar')
+ self.assertRaises(e, p.with_name, 'bar')
+ self.assertRaises(e, p.with_stem, 'bar')
+ self.assertRaises(e, p.with_suffix, '.txt')
+ self.assertRaises(e, p.relative_to, '')
+ self.assertRaises(e, p.is_relative_to, '')
+ self.assertRaises(e, p.is_absolute)
+ self.assertRaises(e, p.match, '*')
+
def test_magic_methods(self):
P = self.cls
self.assertFalse(hasattr(P, '__fspath__'))
@@ -39,11 +89,12 @@ class PurePathBaseTest(unittest.TestCase):
self.assertIs(P.__ge__, object.__ge__)
def test_pathmod(self):
- self.assertIs(self.cls.pathmod, posixpath)
+ self.assertIsInstance(self.cls.pathmod, PathModuleBase)
class DummyPurePath(PurePathBase):
__slots__ = ()
+ pathmod = posixpath
def __eq__(self, other):
if not isinstance(other, DummyPurePath):
@@ -669,6 +720,7 @@ class DummyPath(PathBase):
memory.
"""
__slots__ = ()
+ pathmod = posixpath
_files = {}
_directories = {}