[3.7] bpo-39389: gzip: fix compression level metadata (GH-18077) (GH-18101)

* bpo-39389: gzip: fix compression level metadata (GH-18077) As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a gzip member header should indicate whether the DEFLATE algorithm was tuned for speed or compression ratio. Prior to this patch, archives emitted by the `gzip` module always indicated maximum compression. (cherry picked from commit eab3b3f1c60afecfb4db3c3619109684cb04bd60) Co-authored-by: William Chargin <wchargin@gmail.com>
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2020-03-04 07:06:19 (GMT)
committer: GitHub <noreply@github.com> 2020-03-04 07:06:19 (GMT)
commit: 12c45efe828a90a2f2f58a1f95c85d792a0d9c0a (patch)
tree: 54b7db6cfae96c80d144e0666cebfa5e6271bab2
parent: 3eff46fc7d2e3c80c4dedba4177782f1fc8ad89b (diff)
download: cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.zip
cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.gz
cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.bz2
3 files changed, 36 insertions, 3 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index ddc7bda..e59b454 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -17,6 +17,11 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
 
 READ, WRITE = 1, 2
 
+_COMPRESS_LEVEL_FAST = 1
+_COMPRESS_LEVEL_TRADEOFF = 6
+_COMPRESS_LEVEL_BEST = 9
+
+
 def open(filename, mode="rb", compresslevel=9,
          encoding=None, errors=None, newline=None):
     """Open a gzip-compressed file in binary or text mode.
@@ -191,7 +196,7 @@ class GzipFile(_compression.BaseStream):
         self.fileobj = fileobj
 
         if self.mode == WRITE:
-            self._write_gzip_header()
+            self._write_gzip_header(compresslevel)
 
     @property
     def filename(self):
@@ -218,7 +223,7 @@ class GzipFile(_compression.BaseStream):
         self.bufsize = 0
         self.offset = 0  # Current file offset for seek(), tell(), etc
 
-    def _write_gzip_header(self):
+    def _write_gzip_header(self, compresslevel):
         self.fileobj.write(b'\037\213')             # magic header
         self.fileobj.write(b'\010')                 # compression method
         try:
@@ -239,7 +244,13 @@ class GzipFile(_compression.BaseStream):
         if mtime is None:
             mtime = time.time()
         write32u(self.fileobj, int(mtime))
-        self.fileobj.write(b'\002')
+        if compresslevel == _COMPRESS_LEVEL_BEST:
+            xfl = b'\002'
+        elif compresslevel == _COMPRESS_LEVEL_FAST:
+            xfl = b'\004'
+        else:
+            xfl = b'\000'
+        self.fileobj.write(xfl)
         self.fileobj.write(b'\377')
         if fname:
             self.fileobj.write(fname + b'\000')
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index 17ecda2..0251914 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -358,6 +358,26 @@ class TestGzip(BaseTest):
             isizeBytes = fRead.read(4)
             self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
 
+    def test_compresslevel_metadata(self):
+        # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
+        # specifically, discussion of XFL in section 2.3.1
+        cases = [
+            ('fast', 1, b'\x04'),
+            ('best', 9, b'\x02'),
+            ('tradeoff', 6, b'\x00'),
+        ]
+        xflOffset = 8
+
+        for (name, level, expectedXflByte) in cases:
+            with self.subTest(name):
+                fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
+                with fWrite:
+                    fWrite.write(data1)
+                with open(self.filename, 'rb') as fRead:
+                    fRead.seek(xflOffset)
+                    xflByte = fRead.read(1)
+                    self.assertEqual(xflByte, expectedXflByte)
+
     def test_with_open(self):
         # GzipFile supports the context management protocol
         with gzip.GzipFile(self.filename, "wb") as f:
diff --git a/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst
new file mode 100644
index 0000000..d4c8050
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst
@@ -0,0 +1,2 @@
+Write accurate compression level metadata in :mod:`gzip` archives, rather
+than always signaling maximum compression.
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2020-03-04 07:06:19 (GMT)
committer	GitHub <noreply@github.com>	2020-03-04 07:06:19 (GMT)
commit	12c45efe828a90a2f2f58a1f95c85d792a0d9c0a (patch)
tree	54b7db6cfae96c80d144e0666cebfa5e6271bab2
parent	3eff46fc7d2e3c80c4dedba4177782f1fc8ad89b (diff)
download	cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.zip cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.gz cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.bz2