diff options
author | Ruben Vorderman <r.h.p.vorderman@lumc.nl> | 2024-06-15 18:46:39 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-15 18:46:39 (GMT) |
commit | 08d09cf5ba041c9c5c3860200b56bab66fd44a23 (patch) | |
tree | 8366fa5badaf1f9dbeecde4b79922abb06b19e1f | |
parent | 31d1d72d7e24e0427df70f7dd14b9baff28a4f89 (diff) | |
download | cpython-08d09cf5ba041c9c5c3860200b56bab66fd44a23.zip cpython-08d09cf5ba041c9c5c3860200b56bab66fd44a23.tar.gz cpython-08d09cf5ba041c9c5c3860200b56bab66fd44a23.tar.bz2 |
gh-112346: Always set OS byte to 255, simpler gzip.compress function. (GH-120486)
This matches the output behavior in 3.10 and earlier; the optimization in 3.11 allowed the zlib library's "os" value to be filled in instead in the circumstance when mtime was 0. this keeps things consistent.
-rw-r--r-- | Doc/library/gzip.rst | 8 | ||||
-rw-r--r-- | Lib/gzip.py | 38 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 12 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst | 2 |
4 files changed, 26 insertions, 34 deletions
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 965da59..152cba4 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -188,9 +188,7 @@ The module defines the following items: Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this - function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``. - The zlib function is faster. + the :class:`GzipFile` constructor above. .. versionadded:: 3.2 .. versionchanged:: 3.8 @@ -200,6 +198,10 @@ The module defines the following items: streamed fashion. Calls with *mtime* set to ``0`` are delegated to :func:`zlib.compress` for better speed. + .. versionchanged:: 3.13 + The gzip header OS byte is guaranteed to be set to 255 when this function + is used as was the case in 3.10 and earlier. + .. function:: decompress(data) Decompress the *data*, returning a :class:`bytes` object containing the diff --git a/Lib/gzip.py b/Lib/gzip.py index 0d19c84..ba753ce 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -580,27 +580,6 @@ class _GzipReader(_compression.DecompressReader): self._new_member = True -def _create_simple_gzip_header(compresslevel: int, - mtime = None) -> bytes: - """ - Write a simple gzip header with no extra fields. - :param compresslevel: Compresslevel used to determine the xfl bytes. - :param mtime: The mtime (must support conversion to a 32-bit integer). - :return: A bytes object representing the gzip header. - """ - if mtime is None: - mtime = time.time() - if compresslevel == _COMPRESS_LEVEL_BEST: - xfl = 2 - elif compresslevel == _COMPRESS_LEVEL_FAST: - xfl = 4 - else: - xfl = 0 - # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra - # fields added to header), mtime, xfl and os (255 for unknown OS). - return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255) - - def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): """Compress data in one shot and return the compressed string. @@ -608,15 +587,14 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): mtime can be used to set the modification time. The modification time is set to the current time by default. """ - if mtime == 0: - # Use zlib as it creates the header with 0 mtime by default. - # This is faster and with less overhead. - return zlib.compress(data, level=compresslevel, wbits=31) - header = _create_simple_gzip_header(compresslevel, mtime) - trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff)) - # Wbits=-15 creates a raw deflate block. - return (header + zlib.compress(data, level=compresslevel, wbits=-15) + - trailer) + # Wbits=31 automatically includes a gzip header and trailer. + gzip_data = zlib.compress(data, level=compresslevel, wbits=31) + if mtime is None: + mtime = time.time() + # Reuse gzip header created by zlib, replace mtime and OS byte for + # consistency. + header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255) + return header + gzip_data[10:] def decompress(data): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index cf80127..ae384c3 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -714,7 +714,6 @@ class TestGzip(BaseTest): self.assertEqual(f.mtime, mtime) def test_compress_correct_level(self): - # gzip.compress calls with mtime == 0 take a different code path. for mtime in (0, 42): with self.subTest(mtime=mtime): nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime) @@ -722,6 +721,17 @@ class TestGzip(BaseTest): self.assertIn(data1, nocompress) self.assertNotIn(data1, yescompress) + def test_issue112346(self): + # The OS byte should be 255, this should not change between Python versions. + for mtime in (0, 42): + with self.subTest(mtime=mtime): + compress = gzip.compress(data1, compresslevel=1, mtime=mtime) + self.assertEqual( + struct.unpack("<IxB", compress[4:10]), + (mtime, 255), + "Gzip header does not properly set either mtime or OS byte." + ) + def test_decompress(self): for data in (data1, data2): buf = io.BytesIO() diff --git a/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst b/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst new file mode 100644 index 0000000..d152af4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst @@ -0,0 +1,2 @@ +The OS byte in gzip headers is now always set to 255 when using +:func:`gzip.compress`. |