summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2024-08-11 23:48:50 (GMT)
committerGitHub <noreply@github.com>2024-08-11 23:48:50 (GMT)
commit9cd03263100ddb1657826cc4a71470786cab3932 (patch)
tree1a1bbea5286dfe3e38be0efde432c7f044ad6ce0
parent4534068f22f07a8ab9871bc16abf03c478ee2532 (diff)
downloadcpython-9cd03263100ddb1657826cc4a71470786cab3932.zip
cpython-9cd03263100ddb1657826cc4a71470786cab3932.tar.gz
cpython-9cd03263100ddb1657826cc4a71470786cab3932.tar.bz2
gh-122905: Sanitize names in zipfile.Path. (#122906)
Ported from zipp 3.19.1; ref jaraco/zipp#119.
-rw-r--r--Lib/test/test_zipfile/_path/test_path.py17
-rw-r--r--Lib/zipfile/_path/__init__.py64
-rw-r--r--Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst1
3 files changed, 81 insertions, 1 deletions
diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py
index 99842ff..4a31cae 100644
--- a/Lib/test/test_zipfile/_path/test_path.py
+++ b/Lib/test/test_zipfile/_path/test_path.py
@@ -577,3 +577,20 @@ class TestPath(unittest.TestCase):
zipfile.Path(alpharep)
with self.assertRaises(KeyError):
alpharep.getinfo('does-not-exist')
+
+ def test_malformed_paths(self):
+ """
+ Path should handle malformed paths.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr("/one-slash.txt", b"content")
+ zf.writestr("//two-slash.txt", b"content")
+ zf.writestr("../parent.txt", b"content")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ assert list(map(str, root.iterdir())) == [
+ 'one-slash.txt',
+ 'two-slash.txt',
+ 'parent.txt',
+ ]
diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py
index f5ea18c..0f18147 100644
--- a/Lib/zipfile/_path/__init__.py
+++ b/Lib/zipfile/_path/__init__.py
@@ -85,7 +85,69 @@ class InitializedState:
super().__init__(*args, **kwargs)
-class CompleteDirs(InitializedState, zipfile.ZipFile):
+class SanitizedNames:
+ """
+ ZipFile mix-in to ensure names are sanitized.
+ """
+
+ def namelist(self):
+ return list(map(self._sanitize, super().namelist()))
+
+ @staticmethod
+ def _sanitize(name):
+ r"""
+ Ensure a relative path with posix separators and no dot names.
+
+ Modeled after
+ https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
+ but provides consistent cross-platform behavior.
+
+ >>> san = SanitizedNames._sanitize
+ >>> san('/foo/bar')
+ 'foo/bar'
+ >>> san('//foo.txt')
+ 'foo.txt'
+ >>> san('foo/.././bar.txt')
+ 'foo/bar.txt'
+ >>> san('foo../.bar.txt')
+ 'foo../.bar.txt'
+ >>> san('\\foo\\bar.txt')
+ 'foo/bar.txt'
+ >>> san('D:\\foo.txt')
+ 'D/foo.txt'
+ >>> san('\\\\server\\share\\file.txt')
+ 'server/share/file.txt'
+ >>> san('\\\\?\\GLOBALROOT\\Volume3')
+ '?/GLOBALROOT/Volume3'
+ >>> san('\\\\.\\PhysicalDrive1\\root')
+ 'PhysicalDrive1/root'
+
+ Retain any trailing slash.
+ >>> san('abc/')
+ 'abc/'
+
+ Raises a ValueError if the result is empty.
+ >>> san('../..')
+ Traceback (most recent call last):
+ ...
+ ValueError: Empty filename
+ """
+
+ def allowed(part):
+ return part and part not in {'..', '.'}
+
+ # Remove the drive letter.
+ # Don't use ntpath.splitdrive, because that also strips UNC paths
+ bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
+ clean = bare.replace('\\', '/')
+ parts = clean.split('/')
+ joined = '/'.join(filter(allowed, parts))
+ if not joined:
+ raise ValueError("Empty filename")
+ return joined + '/' * name.endswith('/')
+
+
+class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
diff --git a/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
new file mode 100644
index 0000000..1be44c9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
@@ -0,0 +1 @@
+:class:`zipfile.Path` objects now sanitize names from the zipfile.