summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2023-06-07 22:27:06 (GMT)
committerGitHub <noreply@github.com>2023-06-07 22:27:06 (GMT)
commitffeaec7e60c88d585deacb10264ba7a96e5e52df (patch)
tree323ca7d80fc65f72e3b8bf9263d145e69c7a579a
parentf5df347fcf5fe029edbe6bf274da0f4880401852 (diff)
downloadcpython-ffeaec7e60c88d585deacb10264ba7a96e5e52df.zip
cpython-ffeaec7e60c88d585deacb10264ba7a96e5e52df.tar.gz
cpython-ffeaec7e60c88d585deacb10264ba7a96e5e52df.tar.bz2
GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999)
Joining of arguments is moved to `_load_parts`, which is called when a normalized path is needed.
-rw-r--r--Lib/pathlib.py44
-rw-r--r--Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst2
2 files changed, 29 insertions, 17 deletions
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 89c7b1e..d8c597f 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -195,10 +195,10 @@ def _select_unique(paths):
yielded = set()
try:
for path in paths:
- raw_path = path._raw_path
- if raw_path not in yielded:
+ path_str = str(path)
+ if path_str not in yielded:
yield path
- yielded.add(raw_path)
+ yielded.add(path_str)
finally:
yielded.clear()
@@ -247,9 +247,9 @@ class PurePath:
"""
__slots__ = (
- # The `_raw_path` slot stores an unnormalized string path. This is set
+ # The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
- '_raw_path',
+ '_raw_paths',
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
@@ -306,10 +306,11 @@ class PurePath:
paths = []
for arg in args:
if isinstance(arg, PurePath):
- path = arg._raw_path
if arg._flavour is ntpath and self._flavour is posixpath:
# GH-103631: Convert separators for backwards compatibility.
- path = path.replace('\\', '/')
+ paths.extend(path.replace('\\', '/') for path in arg._raw_paths)
+ else:
+ paths.extend(arg._raw_paths)
else:
try:
path = os.fspath(arg)
@@ -320,13 +321,8 @@ class PurePath:
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
- paths.append(path)
- if len(paths) == 0:
- self._raw_path = ''
- elif len(paths) == 1:
- self._raw_path = paths[0]
- else:
- self._raw_path = self._flavour.join(*paths)
+ paths.append(path)
+ self._raw_paths = paths
def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects.
@@ -356,7 +352,14 @@ class PurePath:
return drv, root, parsed
def _load_parts(self):
- drv, root, tail = self._parse_path(self._raw_path)
+ paths = self._raw_paths
+ if len(paths) == 0:
+ path = ''
+ elif len(paths) == 1:
+ path = paths[0]
+ else:
+ path = self._flavour.join(*paths)
+ drv, root, tail = self._parse_path(path)
self._drv = drv
self._root = root
self._tail_cached = tail
@@ -687,10 +690,17 @@ class PurePath:
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
- # ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
+ # ntpath.isabs() is defective - see GH-44626.
return bool(self.drive and self.root)
- return self._flavour.isabs(self._raw_path)
+ elif self._flavour is posixpath:
+ # Optimization: work with raw paths on POSIX.
+ for path in self._raw_paths:
+ if path.startswith('/'):
+ return True
+ return False
+ else:
+ return self._flavour.isabs(str(self))
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
diff --git a/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
new file mode 100644
index 0000000..8b81b68
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
@@ -0,0 +1,2 @@
+Improve performance of :class:`pathlib.PurePath` initialisation by
+deferring joining of paths when multiple arguments are given.