summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2024-09-04 20:41:39 (GMT)
committerGitHub <noreply@github.com>2024-09-04 20:41:39 (GMT)
commit0aa1ee22ab6e204e9d3d0e9dd63ea648ed691ef1 (patch)
treee0de3e226f46ccb96f40afbf963555b643100382
parentd3f39cefe721db643957d48316ac98c5e279f0c3 (diff)
downloadcpython-0aa1ee22ab6e204e9d3d0e9dd63ea648ed691ef1.zip
cpython-0aa1ee22ab6e204e9d3d0e9dd63ea648ed691ef1.tar.gz
cpython-0aa1ee22ab6e204e9d3d0e9dd63ea648ed691ef1.tar.bz2
[3.10] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123426)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6) (cherry picked from commit 17b77bb) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
-rw-r--r--Lib/test/test_zipfile.py76
-rw-r--r--Lib/zipfile.py69
-rw-r--r--Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst3
3 files changed, 81 insertions, 67 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index a60dc11..33e5dfc 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -5,6 +5,7 @@ import io
import itertools
import os
import pathlib
+import platform
import posixpath
import string
import struct
@@ -3282,7 +3283,11 @@ with zipfile.ZipFile(io.BytesIO(), "w") as zf:
def test_malformed_paths(self):
"""
- Path should handle malformed paths.
+ Path should handle malformed paths gracefully.
+
+ Paths with leading slashes are not visible.
+
+ Paths with dots are treated like regular files.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
@@ -3291,11 +3296,70 @@ with zipfile.ZipFile(io.BytesIO(), "w") as zf:
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
- assert list(map(str, root.iterdir())) == [
- 'one-slash.txt',
- 'two-slash.txt',
- 'parent.txt',
- ]
+ assert list(map(str, root.iterdir())) == ['../']
+ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
+
+ @unittest.skipIf(platform.system() == "Windows", "GH-123693")
+ def test_unsupported_names(self):
+ """
+ Path segments with special characters are readable.
+
+ On some platforms or file systems, characters like
+ ``:`` and ``?`` are not allowed, but they are valid
+ in the zip file.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr("path?", b"content")
+ zf.writestr("V: NMS.flac", b"fLaC...")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ contents = root.iterdir()
+ assert next(contents).name == 'path?'
+ item = next(contents)
+ assert item.name == 'V: NMS.flac', item.name
+ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
+
+ @unittest.skipIf(platform.system() == "Windows", "GH-123693")
+ def test_backslash_not_separator(self):
+ """
+ In a zip file, backslashes are not separators.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ (first,) = root.iterdir()
+ assert not first.is_dir()
+ assert first.name == 'foo\\bar', first.name
+
+
+class DirtyZipInfo(zipfile.ZipInfo):
+ """
+ Bypass name sanitization.
+ """
+
+ def __init__(self, filename, *args, **kwargs):
+ super().__init__(filename, *args, **kwargs)
+ self.filename = filename
+
+ @classmethod
+ def for_name(cls, name, archive):
+ """
+ Construct the same way that ZipFile.writestr does.
+
+ TODO: extract this functionality and re-use
+ """
+ self = cls(filename=name, date_time=time.localtime(time.time())[:6])
+ self.compress_type = archive.compression
+ self.compress_level = archive.compresslevel
+ if self.filename.endswith('/'): # pragma: no cover
+ self.external_attr = 0o40775 << 16 # drwxrwxr-x
+ self.external_attr |= 0x10 # MS-DOS directory flag
+ else:
+ self.external_attr = 0o600 << 16 # ?rw-------
+ return self
class StripExtraTests(unittest.TestCase):
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index cbac8d9..9b66a9f 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -2152,7 +2152,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
- posixpath.sep, generate all elements of that path
+ posixpath.sep, generate all elements of that path.
>>> list(_ancestry('b/d'))
['b/d', 'b']
@@ -2164,9 +2164,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]
+
+ Multiple separators are treated like a single.
+
+ >>> list(_ancestry('//b//d///f//'))
+ ['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
- while path and path != posixpath.sep:
+ while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)
@@ -2183,65 +2188,7 @@ def _difference(minuend, subtrahend):
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
-class SanitizedNames:
- """
- ZipFile mix-in to ensure names are sanitized.
- """
-
- def namelist(self):
- return list(map(self._sanitize, super().namelist()))
-
- @staticmethod
- def _sanitize(name):
- r"""
- Ensure a relative path with posix separators and no dot names.
- Modeled after
- https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
- but provides consistent cross-platform behavior.
- >>> san = SanitizedNames._sanitize
- >>> san('/foo/bar')
- 'foo/bar'
- >>> san('//foo.txt')
- 'foo.txt'
- >>> san('foo/.././bar.txt')
- 'foo/bar.txt'
- >>> san('foo../.bar.txt')
- 'foo../.bar.txt'
- >>> san('\\foo\\bar.txt')
- 'foo/bar.txt'
- >>> san('D:\\foo.txt')
- 'D/foo.txt'
- >>> san('\\\\server\\share\\file.txt')
- 'server/share/file.txt'
- >>> san('\\\\?\\GLOBALROOT\\Volume3')
- '?/GLOBALROOT/Volume3'
- >>> san('\\\\.\\PhysicalDrive1\\root')
- 'PhysicalDrive1/root'
- Retain any trailing slash.
- >>> san('abc/')
- 'abc/'
- Raises a ValueError if the result is empty.
- >>> san('../..')
- Traceback (most recent call last):
- ...
- ValueError: Empty filename
- """
-
- def allowed(part):
- return part and part not in {'..', '.'}
-
- # Remove the drive letter.
- # Don't use ntpath.splitdrive, because that also strips UNC paths
- bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
- clean = bare.replace('\\', '/')
- parts = clean.split('/')
- joined = '/'.join(filter(allowed, parts))
- if not joined:
- raise ValueError("Empty filename")
- return joined + '/' * name.endswith('/')
-
-
-class CompleteDirs(SanitizedNames, ZipFile):
+class CompleteDirs(ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
diff --git a/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
new file mode 100644
index 0000000..ee9fde6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
@@ -0,0 +1,3 @@
+Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
+causing infinite loops (gh-122905) without breaking contents using
+legitimate characters.