summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_zipfile/test_core.py27
-rw-r--r--Lib/zipfile/__init__.py50
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst2
4 files changed, 67 insertions, 13 deletions
diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
index e23f5c2..73c6b01 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -1616,6 +1616,33 @@ class OtherTests(unittest.TestCase):
self.assertEqual(zf.filelist[0].filename, "foo.txt")
self.assertEqual(zf.filelist[1].filename, "\xf6.txt")
+ @requires_zlib()
+ def test_read_zipfile_containing_unicode_path_extra_field(self):
+ with zipfile.ZipFile(TESTFN, mode='w') as zf:
+ # create a file with a non-ASCII name
+ filename = '이름.txt'
+ filename_encoded = filename.encode('utf-8')
+
+ # create a ZipInfo object with Unicode path extra field
+ zip_info = zipfile.ZipInfo(filename)
+
+ tag_for_unicode_path = b'\x75\x70'
+ version_of_unicode_path = b'\x01'
+
+ import zlib
+ filename_crc = struct.pack('<L', zlib.crc32(filename_encoded))
+
+ extra_data = version_of_unicode_path + filename_crc + filename_encoded
+ tsize = len(extra_data).to_bytes(2, 'little')
+
+ zip_info.extra = tag_for_unicode_path + tsize + extra_data
+
+ # add the file to the ZIP archive
+ zf.writestr(zip_info, b'Hello World!')
+
+ with zipfile.ZipFile(TESTFN, "r") as zf:
+ self.assertEqual(zf.filelist[0].filename, "이름.txt")
+
def test_read_after_write_unicode_filenames(self):
with zipfile.ZipFile(TESTFN2, 'w') as zipfp:
zipfp.writestr('приклад', b'sample')
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index 6e6211d..95c0479 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -338,6 +338,22 @@ def _EndRecData(fpin):
# Unable to find a valid end of central directory structure
return None
+def _sanitize_filename(filename):
+ """Terminate the file name at the first null byte and
+ ensure paths always use forward slashes as the directory separator."""
+
+ # Terminate the file name at the first null byte. Null bytes in file
+ # names are used as tricks by viruses in archives.
+ null_byte = filename.find(chr(0))
+ if null_byte >= 0:
+ filename = filename[0:null_byte]
+ # This is used to ensure paths in generated ZIP files always use
+ # forward slashes as the directory separator, as required by the
+ # ZIP format specification.
+ if os.sep != "/" and os.sep in filename:
+ filename = filename.replace(os.sep, "/")
+ return filename
+
class ZipInfo (object):
"""Class with attributes describing each file in the ZIP archive."""
@@ -368,16 +384,9 @@ class ZipInfo (object):
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename = filename # Original file name in archive
- # Terminate the file name at the first null byte. Null bytes in file
- # names are used as tricks by viruses in archives.
- null_byte = filename.find(chr(0))
- if null_byte >= 0:
- filename = filename[0:null_byte]
- # This is used to ensure paths in generated ZIP files always use
- # forward slashes as the directory separator, as required by the
- # ZIP format specification.
- if os.sep != "/" and os.sep in filename:
- filename = filename.replace(os.sep, "/")
+ # Terminate the file name at the first null byte and
+ # ensure paths always use forward slashes as the directory separator.
+ filename = _sanitize_filename(filename)
self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec
@@ -482,7 +491,7 @@ class ZipInfo (object):
except UnicodeEncodeError:
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
- def _decodeExtra(self):
+ def _decodeExtra(self, filename_crc):
# Try to decode the extra field.
extra = self.extra
unpack = struct.unpack
@@ -508,6 +517,21 @@ class ZipInfo (object):
except struct.error:
raise BadZipFile(f"Corrupt zip64 extra field. "
f"{field} not found.") from None
+ elif tp == 0x7075:
+ data = extra[4:ln+4]
+ # Unicode Path Extra Field
+ try:
+ up_version, up_name_crc = unpack('<BL', data[:5])
+ if up_version == 1 and up_name_crc == filename_crc:
+ up_unicode_name = data[5:].decode('utf-8')
+ if up_unicode_name:
+ self.filename = _sanitize_filename(up_unicode_name)
+ else:
+ warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
+ except struct.error as e:
+ raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
+ except UnicodeDecodeError as e:
+ raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e
extra = extra[ln+4:]
@@ -1409,6 +1433,7 @@ class ZipFile:
if self.debug > 2:
print(centdir)
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
+ orig_filename_crc = crc32(filename)
flags = centdir[_CD_FLAG_BITS]
if flags & _MASK_UTF_FILENAME:
# UTF-8 file names extension
@@ -1432,8 +1457,7 @@ class ZipFile:
x._raw_time = t
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
-
- x._decodeExtra()
+ x._decodeExtra(orig_filename_crc)
x.header_offset = x.header_offset + concat
self.filelist.append(x)
self.NameToInfo[x.filename] = x
diff --git a/Misc/ACKS b/Misc/ACKS
index 929e06a..49f3692 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -627,6 +627,7 @@ Julian Gindi
Yannick Gingras
Neil Girdhar
Matt Giuca
+Andrea Giudiceandrea
Franz Glasner
Wim Glenn
Michael Goderbauer
diff --git a/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst b/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst
new file mode 100644
index 0000000..39461f3
--- /dev/null
+++ b/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst
@@ -0,0 +1,2 @@
+Add support for Unicode Path Extra Field in ZipFile. Patch by Yeojin Kim
+and Andrea Giudiceandrea