From 6716254e71eeb4666fd6d1a13857832caad7b19f Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 5 Mar 2023 23:50:21 +0000 Subject: GH-101362: Optimise PurePath(PurePath(...)) (GH-101667) The previous `_parse_args()` method pulled the `_parts` out of any supplied `PurePath` objects; these were subsequently joined in `_from_parts()` using `os.path.join()`. This is actually a slower form of joining than calling `fspath()` on the path object, because it doesn't take advantage of the fact that the contents of `_parts` is normalized! This reduces the time taken to run `PurePath("foo", "bar")` by ~20%, and the time taken to run `PurePath(p, "cheese")`, where `p = PurePath("/foo", "bar", "baz")`, by ~40%. Automerge-Triggered-By: GH:AlexWaygood --- Doc/library/pathlib.rst | 5 +-- Lib/pathlib.py | 36 +++++++--------------- Lib/test/test_pathlib.py | 27 ++++++++++++++++ .../2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst | 4 +++ 4 files changed, 45 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index c8a734e..8e91936 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -105,8 +105,9 @@ we also call *flavours*: PurePosixPath('setup.py') Each element of *pathsegments* can be either a string representing a - path segment, an object implementing the :class:`os.PathLike` interface - which returns a string, or another path object:: + path segment, or an object implementing the :class:`os.PathLike` interface + where the :meth:`~os.PathLike.__fspath__` method returns a string, + such as another path object:: >>> PurePath('foo', 'some/path', 'bar') PurePosixPath('foo/some/path/bar') diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c37ff21..d375529 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -281,6 +281,14 @@ class PurePath(object): path = cls._flavour.join(*parts) sep = cls._flavour.sep altsep = cls._flavour.altsep + if isinstance(path, str): + # Force-cast str subclasses to str (issue #21127) + path = str(path) + else: + raise TypeError( + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) @@ -292,31 +300,9 @@ class PurePath(object): return drv, root, parsed @classmethod - def _parse_args(cls, args): - # This is useful when you don't want to create an instance, just - # canonicalize some constructor arguments. - parts = [] - for a in args: - if isinstance(a, PurePath): - parts += a._parts - else: - a = os.fspath(a) - if isinstance(a, str): - # Force-cast str subclasses to str (issue #21127) - parts.append(str(a)) - else: - raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(a)) - return cls._parse_parts(parts) - - @classmethod def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. self = object.__new__(cls) - drv, root, parts = self._parse_args(args) + drv, root, parts = self._parse_parts(args) self._drv = drv self._root = root self._parts = parts @@ -575,7 +561,7 @@ class PurePath(object): anchored). """ drv1, root1, parts1 = self._drv, self._root, self._parts - drv2, root2, parts2 = self._parse_args(args) + drv2, root2, parts2 = self._parse_parts(args) if root2: if not drv2 and drv1: return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:]) @@ -662,7 +648,7 @@ class PurePath(object): return True # Can't subclass os.PathLike from PurePath and keep the constructor -# optimizations in PurePath._parse_args(). +# optimizations in PurePath.__slots__. os.PathLike.register(PurePath) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 4de91d5..df9c1f6 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -166,6 +166,33 @@ class _BasePurePathTest(object): self.assertEqual(P(P('a'), P('b')), P('a/b')) self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) + def test_bytes(self): + P = self.cls + message = (r"argument should be a str or an os\.PathLike object " + r"where __fspath__ returns a str, not 'bytes'") + with self.assertRaisesRegex(TypeError, message): + P(b'a') + with self.assertRaises(TypeError): + P(b'a', 'b') + with self.assertRaises(TypeError): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object # from a str subclass instance, and it then gets converted to diff --git a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst new file mode 100644 index 0000000..c05f92a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst @@ -0,0 +1,4 @@ +Speed up :class:`pathlib.PurePath` construction by handling arguments more +uniformly. When a :class:`pathlib.Path` argument is supplied, +we use its string representation rather than joining its parts +with :func:`os.path.join`. -- cgit v0.12