diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2020-03-04 07:06:19 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-04 07:06:19 (GMT) |
commit | 12c45efe828a90a2f2f58a1f95c85d792a0d9c0a (patch) | |
tree | 54b7db6cfae96c80d144e0666cebfa5e6271bab2 | |
parent | 3eff46fc7d2e3c80c4dedba4177782f1fc8ad89b (diff) | |
download | cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.zip cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.gz cpython-12c45efe828a90a2f2f58a1f95c85d792a0d9c0a.tar.bz2 |
[3.7] bpo-39389: gzip: fix compression level metadata (GH-18077) (GH-18101)
* bpo-39389: gzip: fix compression level metadata (GH-18077)
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a
gzip member header should indicate whether the DEFLATE algorithm was
tuned for speed or compression ratio. Prior to this patch, archives
emitted by the `gzip` module always indicated maximum compression.
(cherry picked from commit eab3b3f1c60afecfb4db3c3619109684cb04bd60)
Co-authored-by: William Chargin <wchargin@gmail.com>
-rw-r--r-- | Lib/gzip.py | 17 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 20 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst | 2 |
3 files changed, 36 insertions, 3 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index ddc7bda..e59b454 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -17,6 +17,11 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 READ, WRITE = 1, 2 +_COMPRESS_LEVEL_FAST = 1 +_COMPRESS_LEVEL_TRADEOFF = 6 +_COMPRESS_LEVEL_BEST = 9 + + def open(filename, mode="rb", compresslevel=9, encoding=None, errors=None, newline=None): """Open a gzip-compressed file in binary or text mode. @@ -191,7 +196,7 @@ class GzipFile(_compression.BaseStream): self.fileobj = fileobj if self.mode == WRITE: - self._write_gzip_header() + self._write_gzip_header(compresslevel) @property def filename(self): @@ -218,7 +223,7 @@ class GzipFile(_compression.BaseStream): self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc - def _write_gzip_header(self): + def _write_gzip_header(self, compresslevel): self.fileobj.write(b'\037\213') # magic header self.fileobj.write(b'\010') # compression method try: @@ -239,7 +244,13 @@ class GzipFile(_compression.BaseStream): if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) - self.fileobj.write(b'\002') + if compresslevel == _COMPRESS_LEVEL_BEST: + xfl = b'\002' + elif compresslevel == _COMPRESS_LEVEL_FAST: + xfl = b'\004' + else: + xfl = b'\000' + self.fileobj.write(xfl) self.fileobj.write(b'\377') if fname: self.fileobj.write(fname + b'\000') diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 17ecda2..0251914 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -358,6 +358,26 @@ class TestGzip(BaseTest): isizeBytes = fRead.read(4) self.assertEqual(isizeBytes, struct.pack('<i', len(data1))) + def test_compresslevel_metadata(self): + # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html + # specifically, discussion of XFL in section 2.3.1 + cases = [ + ('fast', 1, b'\x04'), + ('best', 9, b'\x02'), + ('tradeoff', 6, b'\x00'), + ] + xflOffset = 8 + + for (name, level, expectedXflByte) in cases: + with self.subTest(name): + fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level) + with fWrite: + fWrite.write(data1) + with open(self.filename, 'rb') as fRead: + fRead.seek(xflOffset) + xflByte = fRead.read(1) + self.assertEqual(xflByte, expectedXflByte) + def test_with_open(self): # GzipFile supports the context management protocol with gzip.GzipFile(self.filename, "wb") as f: diff --git a/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst new file mode 100644 index 0000000..d4c8050 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst @@ -0,0 +1,2 @@ +Write accurate compression level metadata in :mod:`gzip` archives, rather +than always signaling maximum compression. |