From e418fc3a6e7bade68ab5dfe72f14ddba28e6acb5 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 25 May 2024 21:01:36 +0100 Subject: GH-82805: Fix handling of single-dot file extensions in pathlib (#118952) pathlib now treats "`.`" as a valid file extension (suffix). This brings it in line with `os.path.splitext()`. In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method that splits a path into a `(root, ext)` pair, like `os.path.splitext()`. This method is called by `PurePathBase.stem`, `suffix`, etc. In a future version of pathlib, we might make these base classes public, and so users will be able to define their own `splitext()` method to control file extension splitting. In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes` properties that don't use `splitext()`, which avoids computing the path base name twice. --- Doc/library/pathlib.rst | 13 ++++++ Lib/pathlib/_abc.py | 34 +++++++-------- Lib/pathlib/_local.py | 34 +++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 50 ++++++++++++++-------- .../2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst | 5 +++ 5 files changed, 101 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 71e2e54..c72d409 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -449,6 +449,10 @@ Pure paths provide the following methods and properties: This is commonly called the file extension. + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. + .. attribute:: PurePath.suffixes A list of the path's suffixes, often called file extensions:: @@ -460,6 +464,10 @@ Pure paths provide the following methods and properties: >>> PurePosixPath('my/library').suffixes [] + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. + .. attribute:: PurePath.stem @@ -713,6 +721,11 @@ Pure paths provide the following methods and properties: >>> p.with_suffix('') PureWindowsPath('README') + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. In previous + versions, :exc:`ValueError` is raised if a single dot is supplied. + .. method:: PurePath.with_segments(*pathsegments) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 3cdbb73..6b5d9fc 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -68,6 +68,12 @@ class ParserBase: drive. Either part may be empty.""" raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) + def splitext(self, path): + """Split the path into a pair (root, ext), where *ext* is empty or + begins with a begins with a period and contains at most one period, + and *root* is everything before the extension.""" + raise UnsupportedOperation(self._unsupported_msg('splitext()')) + def normcase(self, path): """Normalize the case of the path.""" raise UnsupportedOperation(self._unsupported_msg('normcase()')) @@ -151,12 +157,7 @@ class PurePathBase: This includes the leading period. For example: '.txt' """ - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[i:] - else: - return '' + return self.parser.splitext(self.name)[1] @property def suffixes(self): @@ -165,21 +166,18 @@ class PurePathBase: These include the leading periods. For example: ['.tar', '.gz'] """ - name = self.name - if name.endswith('.'): - return [] - name = name.lstrip('.') - return ['.' + suffix for suffix in name.split('.')[1:]] + split = self.parser.splitext + stem, suffix = split(self.name) + suffixes = [] + while suffix: + suffixes.append(suffix) + stem, suffix = split(stem) + return suffixes[::-1] @property def stem(self): """The final path component, minus its last suffix.""" - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[:i] - else: - return name + return self.parser.splitext(self.name)[0] def with_name(self, name): """Return a new path with the file name changed.""" @@ -208,7 +206,7 @@ class PurePathBase: if not stem: # If the stem is empty, we can't make the suffix non-empty. raise ValueError(f"{self!r} has an empty name") - elif suffix and not (suffix.startswith('.') and len(suffix) > 1): + elif suffix and not suffix.startswith('.'): raise ValueError(f"Invalid suffix {suffix!r}") else: return self.with_name(stem + suffix) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f2776b1..49d9f81 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -361,6 +361,40 @@ class PurePath(PurePathBase): tail[-1] = name return self._from_parsed_parts(self.drive, self.root, tail) + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if i != -1: + stem = name[:i] + # Stem must contain at least one non-dot character. + if stem.lstrip('.'): + return stem + return name + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + name = self.name.lstrip('.') + i = name.rfind('.') + if i != -1: + return name[i:] + return '' + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]] + def relative_to(self, other, *, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d9e51c0..57cc161 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -50,6 +50,7 @@ class ParserBaseTest(unittest.TestCase): self.assertRaises(e, m.join, 'foo') self.assertRaises(e, m.split, 'foo') self.assertRaises(e, m.splitdrive, 'foo') + self.assertRaises(e, m.splitext, 'foo') self.assertRaises(e, m.normcase, 'foo') self.assertRaises(e, m.isabs, 'foo') @@ -789,8 +790,12 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('/a/.hg.rc').suffix, '.rc') self.assertEqual(P('a/b.tar.gz').suffix, '.gz') self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('a/trailing.dot.').suffix, '.') + self.assertEqual(P('/a/trailing.dot.').suffix, '.') + self.assertEqual(P('a/..d.o.t..').suffix, '.') + self.assertEqual(P('a/inn.er..dots').suffix, '.dots') + self.assertEqual(P('photo').suffix, '') + self.assertEqual(P('photo.jpg').suffix, '.jpg') @needs_windows def test_suffix_windows(self): @@ -807,8 +812,8 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('c:a/trailing.dot.').suffix, '.') + self.assertEqual(P('c:/a/trailing.dot.').suffix, '.') self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') @@ -828,8 +833,12 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.']) + self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots']) + self.assertEqual(P('photo').suffixes, []) + self.assertEqual(P('photo.jpg').suffixes, ['.jpg']) @needs_windows def test_suffixes_windows(self): @@ -848,8 +857,8 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('//My.py/Share.php').suffixes, []) self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.']) def test_stem_empty(self): P = self.cls @@ -865,8 +874,11 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('a/.hgrc').stem, '.hgrc') self.assertEqual(P('a/.hg.rc').stem, '.hg') self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') + self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot') + self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.') + self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.') + self.assertEqual(P('photo').stem, 'photo') + self.assertEqual(P('photo.jpg').stem, 'photo') @needs_windows def test_stem_windows(self): @@ -880,8 +892,8 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') + self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot') + def test_with_name_common(self): P = self.cls self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) @@ -929,16 +941,16 @@ class DummyPurePathTest(unittest.TestCase): self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) - self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) - self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) + self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.')) + self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.')) @needs_windows def test_with_stem_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) - self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.')) + self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.')) self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') @@ -974,6 +986,11 @@ class DummyPurePathTest(unittest.TestCase): # Stripping suffix. self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + # Single dot + self.assertEqual(P('a/b').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.')) + self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.')) @needs_windows def test_with_suffix_windows(self): @@ -1012,7 +1029,6 @@ class DummyPurePathTest(unittest.TestCase): # Invalid suffix. self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') self.assertRaises(ValueError, P('a/b').with_suffix, '/') - self.assertRaises(ValueError, P('a/b').with_suffix, '.') self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') diff --git a/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst b/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst new file mode 100644 index 0000000..8715ded --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst @@ -0,0 +1,5 @@ +Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and +related attributes and methods. For example, the +:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now +``['.bar', '.']`` rather than ``[]``. This brings file extension splitting +in line with :func:`os.path.splitext`. -- cgit v0.12