From ff1d2f4cc5a165b2d4dcc4b5abe6335cb3626e53 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 May 2011 13:43:23 +0200 Subject: Backport commit 33543b4e0e5d from Python 3.2: #10801: In zipfile, support different encodings for the header and the filenames. Patch by MvL, test by Eli Bendersky. --- Lib/test/test_zipfile.py | 29 ++++++++++++++++++++++++++--- Lib/zipfile.py | 8 +++++++- Misc/NEWS | 3 +++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 34d0fbc..825fba1 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3,7 +3,13 @@ try: import zlib except ImportError: zlib = None -import zipfile, os, unittest, sys, shutil, struct, io +import io +import os +import shutil +import struct +import sys +import unittest +import zipfile from tempfile import TemporaryFile from random import randint, random @@ -14,6 +20,7 @@ from test.support import TESTFN, run_unittest, findfile TESTFN2 = TESTFN + "2" TESTFNDIR = TESTFN + "d" FIXEDTEST_SIZE = 1000 +DATAFILES_DIR = 'zipfile_datafiles' SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'), ('ziptest2dir/_ziptest2', 'qawsedrftg'), @@ -387,9 +394,25 @@ class TestsWithSourceFile(unittest.TestCase): orig_zip.writestr(zinfo, data) orig_zip.close() + def test_unicode_filenames(self): + if __name__ == '__main__': + myfile = sys.argv[0] + else: + myfile = __file__ + + mydir = os.path.dirname(myfile) or os.curdir + fname = os.path.join(mydir, 'zip_cp437_header.zip') + + print(fname) + zipfp = zipfile.ZipFile(fname) + try: + zipfp.extractall() + finally: + zipfp.close() + def tearDown(self): - os.remove(TESTFN) - os.remove(TESTFN2) + support.unlink(TESTFN) + support.unlink(TESTFN2) class TestZip64InSmallFiles(unittest.TestCase): # These tests test the ZIP64 functionality without using large files, diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 2ec6306..a382383 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -928,7 +928,13 @@ class ZipFile: if fheader[_FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) - if fname != zinfo.orig_filename.encode("utf-8"): + if zinfo.flag_bits & 0x800: + # UTF-8 filename + fname_str = fname.decode("utf-8") + else: + fname_str = fname.decode("cp437") + + if fname_str != zinfo.orig_filename: raise BadZipfile( 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) diff --git a/Misc/NEWS b/Misc/NEWS index 8f994b9..40317f3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -72,6 +72,9 @@ Core and Builtins Library ------- +- Issue #10801: In zipfile, support different encodings for the header and + the filenames. + - Issue #10154, #10090: change the normalization of UTF-8 to "UTF-8" instead of "UTF8" in the locale module as the latter is not supported MacOSX and OpenBSD. -- cgit v0.12