summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/gzip.rst11
-rw-r--r--Lib/gzip.py21
-rw-r--r--Lib/test/test_gzip.py63
-rw-r--r--Misc/NEWS5
4 files changed, 95 insertions, 5 deletions
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index c6f9ef8..fa73bba 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -24,7 +24,7 @@ For other archive formats, see the :mod:`bz2`, :mod:`zipfile`, and
The module defines the following items:
-.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]])
+.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj[, mtime]]]]])
Constructor for the :class:`GzipFile` class, which simulates most of the methods
of a file object, with the exception of the :meth:`readinto` and
@@ -52,6 +52,15 @@ The module defines the following items:
level of compression; ``1`` is fastest and produces the least compression, and
``9`` is slowest and produces the most compression. The default is ``9``.
+ The *mtime* argument is an optional numeric timestamp to be written to
+ the stream when compressing. All :program:`gzip`compressed streams are
+ required to contain a timestamp. If omitted or ``None``, the current
+ time is used. This module ignores the timestamp when decompressing;
+ however, some programs, such as :program:`gunzip`\ , make use of it.
+ The format of the timestamp is the same as that of the return value of
+ ``time.time()`` and of the ``st_mtime`` member of the object returned
+ by ``os.stat()``.
+
Calling a :class:`GzipFile` object's :meth:`close` method does not close
*fileobj*, since you might wish to append more material after the compressed
data. This also allows you to pass a :class:`StringIO` object opened for
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 11d5571..560a722 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -54,7 +54,7 @@ class GzipFile:
max_read_chunk = 10 * 1024 * 1024 # 10Mb
def __init__(self, filename=None, mode=None,
- compresslevel=9, fileobj=None):
+ compresslevel=9, fileobj=None, mtime=None):
"""Constructor for the GzipFile class.
At least one of fileobj and filename must be given a
@@ -81,6 +81,15 @@ class GzipFile:
level of compression; 1 is fastest and produces the least compression,
and 9 is slowest and produces the most compression. The default is 9.
+ The mtime argument is an optional numeric timestamp to be written
+ to the stream when compressing. All gzip compressed streams
+ are required to contain a timestamp. If omitted or None, the
+ current time is used. This module ignores the timestamp when
+ decompressing; however, some programs, such as gunzip, make use
+ of it. The format of the timestamp is the same as that of the
+ return value of time.time() and of the st_mtime member of the
+ object returned by os.stat().
+
"""
# guarantee the file is opened in binary mode on platforms
@@ -119,6 +128,7 @@ class GzipFile:
self.fileobj = fileobj
self.offset = 0
+ self.mtime = mtime
if self.mode == WRITE:
self._write_gzip_header()
@@ -157,7 +167,10 @@ class GzipFile:
if fname:
flags = FNAME
self.fileobj.write(chr(flags).encode('latin-1'))
- write32u(self.fileobj, int(time.time()))
+ mtime = self.mtime
+ if mtime is None:
+ mtime = time.time()
+ write32u(self.fileobj, int(mtime))
self.fileobj.write(b'\002')
self.fileobj.write(b'\377')
if fname:
@@ -175,10 +188,10 @@ class GzipFile:
if method != 8:
raise IOError('Unknown compression method')
flag = ord( self.fileobj.read(1) )
- # modtime = self.fileobj.read(4)
+ self.mtime = read32(self.fileobj)
# extraflag = self.fileobj.read(1)
# os = self.fileobj.read(1)
- self.fileobj.read(6)
+ self.fileobj.read(2)
if flag & FEXTRA:
# Read & discard the extra field, if present
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index d28c024..e758826 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -6,6 +6,7 @@ import unittest
from test import support
import os
import gzip
+import struct
data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
@@ -160,6 +161,68 @@ class TestGzip(unittest.TestCase):
self.assertEqual(f.name, self.filename)
f.close()
+ def test_mtime(self):
+ mtime = 123456789
+ fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
+ fWrite.write(data1)
+ fWrite.close()
+
+ fRead = gzip.GzipFile(self.filename)
+ dataRead = fRead.read()
+ self.assertEqual(dataRead, data1)
+ self.assert_(hasattr(fRead, 'mtime'))
+ self.assertEqual(fRead.mtime, mtime)
+ fRead.close()
+
+ def test_metadata(self):
+ mtime = 123456789
+
+ fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
+ fWrite.write(data1)
+ fWrite.close()
+
+ fRead = open(self.filename, 'rb')
+
+ # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
+
+ idBytes = fRead.read(2)
+ self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID
+
+ cmByte = fRead.read(1)
+ self.assertEqual(cmByte, b'\x08') # deflate
+
+ flagsByte = fRead.read(1)
+ self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set
+
+ mtimeBytes = fRead.read(4)
+ self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
+
+ xflByte = fRead.read(1)
+ self.assertEqual(xflByte, b'\x02') # maximum compression
+
+ osByte = fRead.read(1)
+ self.assertEqual(osByte, b'\xff') # OS "unknown" (OS-independent)
+
+ # Since the FNAME flag is set, the zero-terminated filename follows.
+ # RFC 1952 specifies that this is the name of the input file, if any.
+ # However, the gzip module defaults to storing the name of the output
+ # file in this field.
+ expected = self.filename.encode('Latin-1') + b'\x00'
+ nameBytes = fRead.read(len(expected))
+ self.assertEqual(nameBytes, expected)
+
+ # Since no other flags were set, the header ends here.
+ # Rather than process the compressed data, let's seek to the trailer.
+ fRead.seek(os.stat(self.filename).st_size - 8)
+
+ crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
+ self.assertEqual(crc32Bytes, b'\xaf\xd7d\x83')
+
+ isizeBytes = fRead.read(4)
+ self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
+
+ fRead.close()
+
def test_main(verbose=None):
support.run_unittest(TestGzip)
diff --git a/Misc/NEWS b/Misc/NEWS
index 495dd3e..5abe1c7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -82,6 +82,11 @@ Core and Builtins
Library
-------
+- Issue #4272: Add an optional argument to the GzipFile constructor to override
+ the timestamp in the gzip stream. The default value remains the current time.
+ The information can be used by e.g. gunzip when decompressing. Patch by
+ Jacques Frechet.
+
- Restore Python 2.3 compatibility for decimal.py.
- Issue #3638: Remove functions from _tkinter module level that depend on