From e5b08ddddf1099f04bf65e63017de840bd4b5980 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 27 Jan 2023 00:28:27 +0000 Subject: gh-101000: Add os.path.splitroot() (#101002) Co-authored-by: Eryk Sun Co-authored-by: Alex Waygood --- Doc/library/os.path.rst | 33 +++++ Doc/whatsnew/3.12.rst | 11 +- Lib/ntpath.py | 126 +++++++++++-------- Lib/pathlib.py | 24 ++-- Lib/posixpath.py | 43 +++++-- Lib/test/test_ntpath.py | 138 ++++++++++++++------- Lib/test/test_pathlib.py | 37 ------ Lib/test/test_posixpath.py | 29 +++++ .../2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst | 3 + 9 files changed, 279 insertions(+), 165 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 42bbe24..786c2fd 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -488,6 +488,39 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. +.. function:: splitroot(path) + + Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where + *drive* is a device name or mount point, *root* is a string of separators + after the drive, and *tail* is everything after the root. Any of these + items may be the empty string. In all cases, ``drive + root + tail`` will + be the same as *path*. + + On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is + relative), a single forward slash (if *path* is absolute), or two forward slashes + (implementation-defined per `IEEE Std 1003.1-2017; 4.13 Pathname Resolution + `_.) + For example:: + + >>> splitroot('/home/sam') + ('', '/', 'home/sam') + >>> splitroot('//home/sam') + ('', '//', 'home/sam') + >>> splitroot('///home/sam') + ('', '/', '//home/sam') + + On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device + name. The *root* may be empty, a forward slash, or a backward slash. For + example:: + + >>> splitroot('C:/Users/Sam') + ('C:', '/', 'Users/Sam') + >>> splitroot('//Server/Share/Users/Sam') + ('//Server/Share', '/', 'Users/Sam') + + .. versionadded:: 3.12 + + .. function:: splitext(path) Split the pathname *path* into a pair ``(root, ext)`` such that ``root + ext == diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 2f9ca11..a071159 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -288,13 +288,18 @@ os for a process with :func:`os.pidfd_open` in non-blocking mode. (Contributed by Kumar Aditya in :gh:`93312`.) -* Add :func:`os.path.isjunction` to check if a given path is a junction. - (Contributed by Charles Machalow in :gh:`99547`.) - * :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction` method to check if the entry is a junction. (Contributed by Charles Machalow in :gh:`99547`.) +os.path +------- + +* Add :func:`os.path.isjunction` to check if a given path is a junction. + (Contributed by Charles Machalow in :gh:`99547`.) + +* Add :func:`os.path.splitroot` to split a path into a triad + ``(drive, root, tail)``. (Contributed by Barney Gale in :gh:`101000`.) shutil ------ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index cd7fb58..f9ee8e0 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -24,7 +24,7 @@ import genericpath from genericpath import * -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", +__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", "ismount", "expanduser","expandvars","normpath","abspath", @@ -117,19 +117,21 @@ def join(path, *paths): try: if not paths: path[:0] + sep #23780: Ensure compatible data type even if p is null. - result_drive, result_path = splitdrive(path) + result_drive, result_root, result_path = splitroot(path) for p in map(os.fspath, paths): - p_drive, p_path = splitdrive(p) - if p_path and p_path[0] in seps: + p_drive, p_root, p_path = splitroot(p) + if p_root: # Second path is absolute if p_drive or not result_drive: result_drive = p_drive + result_root = p_root result_path = p_path continue elif p_drive and p_drive != result_drive: if p_drive.lower() != result_drive.lower(): # Different drives => ignore the first path entirely result_drive = p_drive + result_root = p_root result_path = p_path continue # Same drive in different case @@ -139,10 +141,10 @@ def join(path, *paths): result_path = result_path + sep result_path = result_path + p_path ## add separator between UNC and non-absolute path - if (result_path and result_path[0] not in seps and + if (result_path and not result_root and result_drive and result_drive[-1:] != colon): return result_drive + sep + result_path - return result_drive + result_path + return result_drive + result_root + result_path except (TypeError, AttributeError, BytesWarning): genericpath._check_arg_types('join', path, *paths) raise @@ -170,34 +172,60 @@ def splitdrive(p): Paths cannot contain both a drive letter and a UNC path. """ + drive, root, tail = splitroot(p) + return drive, root + tail + + +def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ p = os.fspath(p) - if len(p) >= 2: - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - normp = p.replace(altsep, sep) - if normp[0:2] == sep * 2: + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' + else: + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: # UNC drives, e.g. \\server\share or \\?\UNC\server\share # Device drives, e.g. \\.\device or \\?\device start = 8 if normp[:8].upper() == unc_prefix else 2 index = normp.find(sep, start) if index == -1: - return p, p[:0] + return p, empty, empty index2 = normp.find(sep, index + 1) if index2 == -1: - return p, p[:0] - return p[:index2], p[index2:] - if normp[1:2] == colon: - # Drive-letter drives, e.g. X: - return p[:2], p[2:] - return p[:0], p + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] + else: + # Relative path, e.g. Windows + return empty, empty, p # Split a path in head (everything up to the last '/') and tail (the @@ -212,15 +240,13 @@ def split(p): Either part may be empty.""" p = os.fspath(p) seps = _get_bothseps(p) - d, p = splitdrive(p) + d, r, p = splitroot(p) # set i to index beyond p's last slash i = len(p) while i and p[i-1] not in seps: i -= 1 head, tail = p[:i], p[i:] # now tail has no slashes - # remove trailing slashes from head, unless it's all slashes - head = head.rstrip(seps) or head - return d + head, tail + return d + r + head.rstrip(seps), tail # Split a path in root and extension. @@ -311,10 +337,10 @@ def ismount(path): path = os.fspath(path) seps = _get_bothseps(path) path = abspath(path) - root, rest = splitdrive(path) - if root and root[0] in seps: - return (not rest) or (rest in seps) - if rest and rest in seps: + drive, root, rest = splitroot(path) + if drive and drive[0] in seps: + return not rest + if root and not rest: return True if _getvolumepathname: @@ -525,13 +551,8 @@ except ImportError: curdir = '.' pardir = '..' path = path.replace(altsep, sep) - prefix, path = splitdrive(path) - - # collapse initial backslashes - if path.startswith(sep): - prefix += sep - path = path.lstrip(sep) - + drive, root, path = splitroot(path) + prefix = drive + root comps = path.split(sep) i = 0 while i < len(comps): @@ -541,7 +562,7 @@ except ImportError: if i > 0 and comps[i-1] != pardir: del comps[i-1:i+1] i -= 1 - elif i == 0 and prefix.endswith(sep): + elif i == 0 and root: del comps[i] else: i += 1 @@ -765,8 +786,8 @@ def relpath(path, start=None): try: start_abs = abspath(normpath(start)) path_abs = abspath(normpath(path)) - start_drive, start_rest = splitdrive(start_abs) - path_drive, path_rest = splitdrive(path_abs) + start_drive, _, start_rest = splitroot(start_abs) + path_drive, _, path_rest = splitroot(path_abs) if normcase(start_drive) != normcase(path_drive): raise ValueError("path is on mount %r, start on mount %r" % ( path_drive, start_drive)) @@ -816,21 +837,19 @@ def commonpath(paths): curdir = '.' try: - drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths] - split_paths = [p.split(sep) for d, p in drivesplits] + drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths] + split_paths = [p.split(sep) for d, r, p in drivesplits] - try: - isabs, = set(p[:1] == sep for d, p in drivesplits) - except ValueError: - raise ValueError("Can't mix absolute and relative paths") from None + if len({r for d, r, p in drivesplits}) != 1: + raise ValueError("Can't mix absolute and relative paths") # Check that all drive letters or UNC paths match. The check is made only # now otherwise type errors for mixing strings and bytes would not be # caught. - if len(set(d for d, p in drivesplits)) != 1: + if len({d for d, r, p in drivesplits}) != 1: raise ValueError("Paths don't have the same drive") - drive, path = splitdrive(paths[0].replace(altsep, sep)) + drive, root, path = splitroot(paths[0].replace(altsep, sep)) common = path.split(sep) common = [c for c in common if c and c != curdir] @@ -844,8 +863,7 @@ def commonpath(paths): else: common = common[:len(s1)] - prefix = drive + sep if isabs else drive - return prefix + sep.join(common) + return drive + root + sep.join(common) except (TypeError, AttributeError): genericpath._check_arg_types('commonpath', *paths) raise diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ae7a62f..17659bc 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -272,19 +272,6 @@ class PurePath(object): return (self.__class__, tuple(self._parts)) @classmethod - def _split_root(cls, part): - sep = cls._flavour.sep - rel = cls._flavour.splitdrive(part)[1].lstrip(sep) - anchor = part.removesuffix(rel) - if anchor: - anchor = cls._flavour.normpath(anchor) - drv, root = cls._flavour.splitdrive(anchor) - if drv.startswith(sep): - # UNC paths always have a root. - root = sep - return drv, root, rel - - @classmethod def _parse_parts(cls, parts): if not parts: return '', '', [] @@ -293,7 +280,10 @@ class PurePath(object): path = cls._flavour.join(*parts) if altsep: path = path.replace(altsep, sep) - drv, root, rel = cls._split_root(path) + drv, root, rel = cls._flavour.splitroot(path) + if drv.startswith(sep): + # pathlib assumes that UNC paths always have a root. + root = sep unfiltered_parsed = [drv + root] + rel.split(sep) parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed @@ -493,9 +483,9 @@ class PurePath(object): """Return a new path with the file name changed.""" if not self.name: raise ValueError("%r has an empty name" % (self,)) - drv, root, parts = self._parse_parts((name,)) - if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] - or drv or root or len(parts) != 1): + f = self._flavour + drv, root, tail = f.splitroot(name) + if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail): raise ValueError("Invalid name %r" % (name)) return self._from_parsed_parts(self._drv, self._root, self._parts[:-1] + [name]) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 737f8a5..32b5d6e 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -28,7 +28,7 @@ import stat import genericpath from genericpath import * -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", +__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime","islink","exists","lexists","isdir","isfile", "ismount", "expanduser","expandvars","normpath","abspath", @@ -135,6 +135,35 @@ def splitdrive(p): return p[:0], p +def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] + + # Return the tail (basename) part of a path, same as split(path)[1]. def basename(p): @@ -372,13 +401,7 @@ except ImportError: dotdot = '..' if path == empty: return dot - initial_slashes = path.startswith(sep) - # POSIX allows one or two initial slashes, but treats three or more - # as single slash. - # (see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13) - if (initial_slashes and - path.startswith(sep*2) and not path.startswith(sep*3)): - initial_slashes = 2 + _, initial_slashes, path = splitroot(path) comps = path.split(sep) new_comps = [] for comp in comps: @@ -390,9 +413,7 @@ except ImportError: elif new_comps: new_comps.pop() comps = new_comps - path = sep.join(comps) - if initial_slashes: - path = sep*initial_slashes + path + path = initial_slashes + sep.join(comps) return path or dot else: diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index f56de0b..bce38a5 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -98,57 +98,106 @@ class TestNtpath(NtpathTestCase): tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d')) def test_splitdrive(self): - tester('ntpath.splitdrive("c:\\foo\\bar")', - ('c:', '\\foo\\bar')) - tester('ntpath.splitdrive("c:/foo/bar")', - ('c:', '/foo/bar')) + tester("ntpath.splitdrive('')", ('', '')) + tester("ntpath.splitdrive('foo')", ('', 'foo')) + tester("ntpath.splitdrive('foo\\bar')", ('', 'foo\\bar')) + tester("ntpath.splitdrive('foo/bar')", ('', 'foo/bar')) + tester("ntpath.splitdrive('\\')", ('', '\\')) + tester("ntpath.splitdrive('/')", ('', '/')) + tester("ntpath.splitdrive('\\foo\\bar')", ('', '\\foo\\bar')) + tester("ntpath.splitdrive('/foo/bar')", ('', '/foo/bar')) + tester('ntpath.splitdrive("c:foo\\bar")', ('c:', 'foo\\bar')) + tester('ntpath.splitdrive("c:foo/bar")', ('c:', 'foo/bar')) + tester('ntpath.splitdrive("c:\\foo\\bar")', ('c:', '\\foo\\bar')) + tester('ntpath.splitdrive("c:/foo/bar")', ('c:', '/foo/bar')) + tester("ntpath.splitdrive('\\\\')", ('\\\\', '')) + tester("ntpath.splitdrive('//')", ('//', '')) tester('ntpath.splitdrive("\\\\conky\\mountpoint\\foo\\bar")', ('\\\\conky\\mountpoint', '\\foo\\bar')) tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")', ('//conky/mountpoint', '/foo/bar')) - tester('ntpath.splitdrive("\\\\\\conky\\mountpoint\\foo\\bar")', - ('\\\\\\conky', '\\mountpoint\\foo\\bar')) - tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")', - ('///conky', '/mountpoint/foo/bar')) - tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")', - ('\\\\conky\\', '\\mountpoint\\foo\\bar')) - tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")', - ('//conky/', '/mountpoint/foo/bar')) - # Issue #19911: UNC part containing U+0130 - self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'), - ('//conky/MOUNTPOİNT', '/foo/bar')) - # gh-81790: support device namespace, including UNC drives. - tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) - tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) - tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) - tester('ntpath.splitdrive("//?/UNC")', ("//?/UNC", "")) - tester('ntpath.splitdrive("//?/UNC/")', ("//?/UNC/", "")) - tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) - tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) - tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) - tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', - ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) - tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/")) - - tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) - tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) - tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) - tester('ntpath.splitdrive("\\\\?\\UNC")', ("\\\\?\\UNC", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', ("\\\\?\\UNC\\server\\share", "\\dir")) - tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', - ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) - tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\")) + tester('ntpath.splitdrive("//?/UNC/server/share/dir")', + ("//?/UNC/server/share", "/dir")) + + def test_splitroot(self): + tester("ntpath.splitroot('')", ('', '', '')) + tester("ntpath.splitroot('foo')", ('', '', 'foo')) + tester("ntpath.splitroot('foo\\bar')", ('', '', 'foo\\bar')) + tester("ntpath.splitroot('foo/bar')", ('', '', 'foo/bar')) + tester("ntpath.splitroot('\\')", ('', '\\', '')) + tester("ntpath.splitroot('/')", ('', '/', '')) + tester("ntpath.splitroot('\\foo\\bar')", ('', '\\', 'foo\\bar')) + tester("ntpath.splitroot('/foo/bar')", ('', '/', 'foo/bar')) + tester('ntpath.splitroot("c:foo\\bar")', ('c:', '', 'foo\\bar')) + tester('ntpath.splitroot("c:foo/bar")', ('c:', '', 'foo/bar')) + tester('ntpath.splitroot("c:\\foo\\bar")', ('c:', '\\', 'foo\\bar')) + tester('ntpath.splitroot("c:/foo/bar")', ('c:', '/', 'foo/bar')) + + # Redundant slashes are not included in the root. + tester("ntpath.splitroot('c:\\\\a')", ('c:', '\\', '\\a')) + tester("ntpath.splitroot('c:\\\\\\a/b')", ('c:', '\\', '\\\\a/b')) + + # Mixed path separators. + tester("ntpath.splitroot('c:/\\')", ('c:', '/', '\\')) + tester("ntpath.splitroot('c:\\/')", ('c:', '\\', '/')) + tester("ntpath.splitroot('/\\a/b\\/\\')", ('/\\a/b', '\\', '/\\')) + tester("ntpath.splitroot('\\/a\\b/\\/')", ('\\/a\\b', '/', '\\/')) + + # UNC paths. + tester("ntpath.splitroot('\\\\')", ('\\\\', '', '')) + tester("ntpath.splitroot('//')", ('//', '', '')) + tester('ntpath.splitroot("\\\\conky\\mountpoint\\foo\\bar")', + ('\\\\conky\\mountpoint', '\\', 'foo\\bar')) + tester('ntpath.splitroot("//conky/mountpoint/foo/bar")', + ('//conky/mountpoint', '/', 'foo/bar')) + tester('ntpath.splitroot("\\\\\\conky\\mountpoint\\foo\\bar")', + ('\\\\\\conky', '\\', 'mountpoint\\foo\\bar')) + tester('ntpath.splitroot("///conky/mountpoint/foo/bar")', + ('///conky', '/', 'mountpoint/foo/bar')) + tester('ntpath.splitroot("\\\\conky\\\\mountpoint\\foo\\bar")', + ('\\\\conky\\', '\\', 'mountpoint\\foo\\bar')) + tester('ntpath.splitroot("//conky//mountpoint/foo/bar")', + ('//conky/', '/', 'mountpoint/foo/bar')) + + # Issue #19911: UNC part containing U+0130 + self.assertEqual(ntpath.splitroot('//conky/MOUNTPOİNT/foo/bar'), + ('//conky/MOUNTPOİNT', '/', 'foo/bar')) + + # gh-81790: support device namespace, including UNC drives. + tester('ntpath.splitroot("//?/c:")', ("//?/c:", "", "")) + tester('ntpath.splitroot("//?/c:/")', ("//?/c:", "/", "")) + tester('ntpath.splitroot("//?/c:/dir")', ("//?/c:", "/", "dir")) + tester('ntpath.splitroot("//?/UNC")', ("//?/UNC", "", "")) + tester('ntpath.splitroot("//?/UNC/")', ("//?/UNC/", "", "")) + tester('ntpath.splitroot("//?/UNC/server/")', ("//?/UNC/server/", "", "")) + tester('ntpath.splitroot("//?/UNC/server/share")', ("//?/UNC/server/share", "", "")) + tester('ntpath.splitroot("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/", "dir")) + tester('ntpath.splitroot("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', + ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/', 'spam')) + tester('ntpath.splitroot("//?/BootPartition/")', ("//?/BootPartition", "/", "")) + + tester('ntpath.splitroot("\\\\?\\c:")', ("\\\\?\\c:", "", "")) + tester('ntpath.splitroot("\\\\?\\c:\\")', ("\\\\?\\c:", "\\", "")) + tester('ntpath.splitroot("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\", "dir")) + tester('ntpath.splitroot("\\\\?\\UNC")', ("\\\\?\\UNC", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\share")', + ("\\\\?\\UNC\\server\\share", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\share\\dir")', + ("\\\\?\\UNC\\server\\share", "\\", "dir")) + tester('ntpath.splitroot("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', + ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\', 'spam')) + tester('ntpath.splitroot("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\", "")) # gh-96290: support partial/invalid UNC drives - tester('ntpath.splitdrive("//")', ("//", "")) # empty server & missing share - tester('ntpath.splitdrive("///")', ("///", "")) # empty server & empty share - tester('ntpath.splitdrive("///y")', ("///y", "")) # empty server & non-empty share - tester('ntpath.splitdrive("//x")', ("//x", "")) # non-empty server & missing share - tester('ntpath.splitdrive("//x/")', ("//x/", "")) # non-empty server & empty share + tester('ntpath.splitroot("//")', ("//", "", "")) # empty server & missing share + tester('ntpath.splitroot("///")', ("///", "", "")) # empty server & empty share + tester('ntpath.splitroot("///y")', ("///y", "", "")) # empty server & non-empty share + tester('ntpath.splitroot("//x")', ("//x", "", "")) # non-empty server & missing share + tester('ntpath.splitroot("//x/")', ("//x/", "", "")) # non-empty server & empty share def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) @@ -930,6 +979,9 @@ class PathLikeTests(NtpathTestCase): def test_path_splitdrive(self): self._check_function(self.path.splitdrive) + def test_path_splitroot(self): + self._check_function(self.path.splitroot) + def test_path_basename(self): self._check_function(self.path.basename) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 1fe242b..a596795 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -80,26 +80,6 @@ class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase): check(['c:\\a'], ('', '', ['c:\\a'])) check(['\\a'], ('', '', ['\\a'])) - def test_splitroot(self): - f = self.cls._split_root - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a/b'), ('', '', 'a/b')) - self.assertEqual(f('a/b/'), ('', '', 'a/b/')) - self.assertEqual(f('/a'), ('', '/', 'a')) - self.assertEqual(f('/a/b'), ('', '/', 'a/b')) - self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) - # The root is collapsed when there are redundant slashes - # except when there are exactly two leading slashes, which - # is a special case in POSIX. - self.assertEqual(f('//a'), ('', '//', 'a')) - self.assertEqual(f('///a'), ('', '/', 'a')) - self.assertEqual(f('///a/b'), ('', '/', 'a/b')) - # Paths which look like NT paths aren't treated specially. - self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) - self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) - self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) - class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): cls = pathlib.PureWindowsPath @@ -143,23 +123,6 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y'])) check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y'])) - def test_splitroot(self): - f = self.cls._split_root - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a\\b'), ('', '', 'a\\b')) - self.assertEqual(f('\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) - self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) - # Redundant slashes in the root are collapsed. - self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) - self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) - # Valid UNC paths. - self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) - # # Tests for the pure classes. diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 6c1c0f5..9be4640 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -115,6 +115,32 @@ class PosixPathTest(unittest.TestCase): self.splitextTest("........", "........", "") self.splitextTest("", "", "") + def test_splitroot(self): + f = posixpath.splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a/b'), ('', '', 'a/b')) + self.assertEqual(f('a/b/'), ('', '', 'a/b/')) + self.assertEqual(f('/a'), ('', '/', 'a')) + self.assertEqual(f('/a/b'), ('', '/', 'a/b')) + self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) + # The root is collapsed when there are redundant slashes + # except when there are exactly two leading slashes, which + # is a special case in POSIX. + self.assertEqual(f('//a'), ('', '//', 'a')) + self.assertEqual(f('///a'), ('', '/', '//a')) + self.assertEqual(f('///a/b'), ('', '/', '//a/b')) + # Paths which look like NT paths aren't treated specially. + self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) + self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) + self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + # Byte paths are supported + self.assertEqual(f(b''), (b'', b'', b'')) + self.assertEqual(f(b'a'), (b'', b'', b'a')) + self.assertEqual(f(b'/a'), (b'', b'/', b'a')) + self.assertEqual(f(b'//a'), (b'', b'//', b'a')) + self.assertEqual(f(b'///a'), (b'', b'/', b'//a')) + def test_isabs(self): self.assertIs(posixpath.isabs(""), False) self.assertIs(posixpath.isabs("/"), True) @@ -752,6 +778,9 @@ class PathLikeTests(unittest.TestCase): def test_path_splitdrive(self): self.assertPathEqual(self.path.splitdrive) + def test_path_splitroot(self): + self.assertPathEqual(self.path.splitroot) + def test_path_basename(self): self.assertPathEqual(self.path.basename) diff --git a/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst new file mode 100644 index 0000000..2082361 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst @@ -0,0 +1,3 @@ +Add :func:`os.path.splitroot()`, which splits a path into a 3-item tuple +``(drive, root, tail)``. This new function is used by :mod:`pathlib` to +improve the performance of path construction by up to a third. -- cgit v0.12