Fix GzipFile's handling of filenames given as bytes objects.

Add relevant tests for GzipFile, and also for BZ2File and LZMAFile.
author: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-06-19 23:48:50 (GMT)
committer: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-06-19 23:48:50 (GMT)
commit: 10c8791978203be95af2c4c1d7ce33496fac880c (patch)
tree: d30969af462c1c847aebb202b93e3b5a7cd43250
parent: e67f48ce5e7ad122b17e23b2705bf66cff76d42b (diff)
parent: 103e8113e4bb4ad3687d641f660481c72904d571 (diff)
download: cpython-10c8791978203be95af2c4c1d7ce33496fac880c.zip
cpython-10c8791978203be95af2c4c1d7ce33496fac880c.tar.gz
cpython-10c8791978203be95af2c4c1d7ce33496fac880c.tar.bz2
5 files changed, 59 insertions, 4 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 412bf05..f6f63bb 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -182,9 +182,8 @@ class GzipFile(io.BufferedIOBase):
         if fileobj is None:
             fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
         if filename is None:
-            if hasattr(fileobj, 'name') and isinstance(fileobj.name, str):
-                filename = fileobj.name
-            else:
+            filename = getattr(fileobj, 'name', '')
+            if not isinstance(filename, (str, bytes)):
                 filename = ''
         if mode is None:
             mode = getattr(fileobj, 'mode', 'rb')
@@ -258,7 +257,8 @@ class GzipFile(io.BufferedIOBase):
             # RFC 1952 requires the FNAME field to be Latin-1. Do not
             # include filenames that cannot be represented that way.
             fname = os.path.basename(self.name)
-            fname = fname.encode('latin-1')
+            if not isinstance(fname, bytes):
+                fname = fname.encode('latin-1')
             if fname.endswith(b'.gz'):
                 fname = fname[:-3]
         except UnicodeEncodeError:
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
index 6b1a93c..257b144 100644
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -522,6 +522,21 @@ class BZ2FileTest(BaseTest):
         with BZ2File(self.filename) as bz2f:
             self.assertEqual(bz2f.read(), data1 + data2)
 
+    def testOpenBytesFilename(self):
+        str_filename = self.filename
+        try:
+            bytes_filename = str_filename.encode("ascii")
+        except UnicodeEncodeError:
+            self.skipTest("Temporary file name needs to be ASCII")
+        with BZ2File(bytes_filename, "wb") as f:
+            f.write(self.DATA)
+        with BZ2File(bytes_filename, "rb") as f:
+            self.assertEqual(f.read(), self.DATA)
+        # Sanity check that we are actually operating on the right file.
+        with BZ2File(str_filename, "rb") as f:
+            self.assertEqual(f.read(), self.DATA)
+
+
     # Tests for a BZ2File wrapping another file object:
 
     def testReadBytesIO(self):
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index bb97097..af73953 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -355,6 +355,20 @@ class TestGzip(BaseTest):
             with gzip.GzipFile(fileobj=f, mode="w") as g:
                 pass
 
+    def test_bytes_filename(self):
+        str_filename = self.filename
+        try:
+            bytes_filename = str_filename.encode("ascii")
+        except UnicodeEncodeError:
+            self.skipTest("Temporary file name needs to be ASCII")
+        with gzip.GzipFile(bytes_filename, "wb") as f:
+            f.write(data1 * 50)
+        with gzip.GzipFile(bytes_filename, "rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
+        # Sanity check that we are actually operating on the right file.
+        with gzip.GzipFile(str_filename, "rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
+
     # Testing compress/decompress shortcut functions
 
     def test_compress(self):
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
index 22f2f47..a086586 100644
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -655,6 +655,16 @@ class FileTestCase(unittest.TestCase):
                 self.assertEqual(f.read(), INPUT)
                 self.assertEqual(f.read(), b"")
 
+    def test_read_from_file_with_bytes_filename(self):
+        try:
+            bytes_filename = TESTFN.encode("ascii")
+        except UnicodeEncodeError:
+            self.skipTest("Temporary file name needs to be ASCII")
+        with TempFile(TESTFN, COMPRESSED_XZ):
+            with LZMAFile(bytes_filename) as f:
+                self.assertEqual(f.read(), INPUT)
+                self.assertEqual(f.read(), b"")
+
     def test_read_incomplete(self):
         with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f:
             self.assertRaises(EOFError, f.read)
@@ -814,6 +824,20 @@ class FileTestCase(unittest.TestCase):
         finally:
             unlink(TESTFN)
 
+    def test_write_to_file_with_bytes_filename(self):
+        try:
+            bytes_filename = TESTFN.encode("ascii")
+        except UnicodeEncodeError:
+            self.skipTest("Temporary file name needs to be ASCII")
+        try:
+            with LZMAFile(bytes_filename, "w") as f:
+                f.write(INPUT)
+            expected = lzma.compress(INPUT)
+            with open(TESTFN, "rb") as f:
+                self.assertEqual(f.read(), expected)
+        finally:
+            unlink(TESTFN)
+
     def test_write_append_to_file(self):
         part1 = INPUT[:1024]
         part2 = INPUT[1024:1536]
diff --git a/Misc/NEWS b/Misc/NEWS
index 814528a..bcf28bd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -31,6 +31,8 @@ Core and Builtins
 Library
 -------
 
+- Fix GzipFile's handling of filenames given as bytes objects.
+
 - Issue #14772: Return destination values from some shutil functions.
 
 - Issue #15064: Implement context manager protocol for multiprocessing types
author	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-06-19 23:48:50 (GMT)
committer	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-06-19 23:48:50 (GMT)
commit	10c8791978203be95af2c4c1d7ce33496fac880c (patch)
tree	d30969af462c1c847aebb202b93e3b5a7cd43250
parent	e67f48ce5e7ad122b17e23b2705bf66cff76d42b (diff)
parent	103e8113e4bb4ad3687d641f660481c72904d571 (diff)
download	cpython-10c8791978203be95af2c4c1d7ce33496fac880c.zip cpython-10c8791978203be95af2c4c1d7ce33496fac880c.tar.gz cpython-10c8791978203be95af2c4c1d7ce33496fac880c.tar.bz2