From 567ab3bd15398c8c7b791f3e376ae3e3c0bbe079 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Mar 2024 20:08:00 +0200 Subject: gh-117084: Fix ZIP file extraction for directory entry names with backslashes on Windows (GH-117129) --- Lib/test/archivetestdata/zipdir_backslash.zip | Bin 0 -> 192 bytes Lib/test/test_zipfile/test_core.py | 16 ++++++++++++++++ Lib/zipfile/__init__.py | 10 +++++++++- .../2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Lib/test/archivetestdata/zipdir_backslash.zip create mode 100644 Misc/NEWS.d/next/Library/2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst diff --git a/Lib/test/archivetestdata/zipdir_backslash.zip b/Lib/test/archivetestdata/zipdir_backslash.zip new file mode 100644 index 0000000..979126e Binary files /dev/null and b/Lib/test/archivetestdata/zipdir_backslash.zip differ diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 368e60a..a605aa1 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2937,6 +2937,22 @@ class TestWithDirectory(unittest.TestCase): os.mkdir(os.path.join(TESTFN2, "a")) self.test_extract_dir() + def test_extract_dir_backslash(self): + zfname = findfile("zipdir_backslash.zip", subdir="archivetestdata") + with zipfile.ZipFile(zfname) as zipf: + zipf.extractall(TESTFN2) + if os.name == 'nt': + self.assertTrue(os.path.isdir(os.path.join(TESTFN2, "a"))) + self.assertTrue(os.path.isdir(os.path.join(TESTFN2, "a", "b"))) + self.assertTrue(os.path.isfile(os.path.join(TESTFN2, "a", "b", "c"))) + self.assertTrue(os.path.isdir(os.path.join(TESTFN2, "d"))) + self.assertTrue(os.path.isdir(os.path.join(TESTFN2, "d", "e"))) + else: + self.assertTrue(os.path.isfile(os.path.join(TESTFN2, "a\\b\\c"))) + self.assertTrue(os.path.isfile(os.path.join(TESTFN2, "d\\e\\"))) + self.assertFalse(os.path.exists(os.path.join(TESTFN2, "a"))) + self.assertFalse(os.path.exists(os.path.join(TESTFN2, "d"))) + def test_write_dir(self): dirpath = os.path.join(TESTFN2, "x") os.mkdir(dirpath) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index cc08f60..b330ece 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -605,7 +605,15 @@ class ZipInfo: def is_dir(self): """Return True if this archive member is a directory.""" - return self.filename.endswith('/') + if self.filename.endswith('/'): + return True + # The ZIP format specification requires to use forward slashes + # as the directory separator, but in practice some ZIP files + # created on Windows can use backward slashes. For compatibility + # with the extraction code which already handles this: + if os.path.altsep: + return self.filename.endswith((os.path.sep, os.path.altsep)) + return False # ZIP encryption uses the CRC32 one-byte primitive for scrambling some diff --git a/Misc/NEWS.d/next/Library/2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst b/Misc/NEWS.d/next/Library/2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst new file mode 100644 index 0000000..6e7790e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst @@ -0,0 +1,2 @@ +Fix :mod:`zipfile` extraction for directory entries with the name containing +backslashes on Windows. -- cgit v0.12