diff options
-rw-r--r-- | Doc/library/gzip.rst | 16 | ||||
-rw-r--r-- | Lib/gzip.py | 25 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 22 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
4 files changed, 58 insertions, 7 deletions
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index dc04ba0..0ae23d2 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -25,10 +25,10 @@ The module defines the following items: .. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None) - Constructor for the :class:`GzipFile` class, which simulates most of the methods - of a :term:`file object`, with the exception of the :meth:`readinto` and - :meth:`truncate` methods. At least one of *fileobj* and *filename* must be - given a non-trivial value. + Constructor for the :class:`GzipFile` class, which simulates most of the + methods of a :term:`file object`, with the exception of the :meth:`truncate` + method. At least one of *fileobj* and *filename* must be given a non-trivial + value. The new class instance is based on *fileobj*, which can be a regular file, a :class:`StringIO` object, or any other object which simulates a file. It @@ -66,8 +66,9 @@ The module defines the following items: writing as *fileobj*, and retrieve the resulting memory buffer using the :class:`io.BytesIO` object's :meth:`~io.BytesIO.getvalue` method. - :class:`GzipFile` supports the whole :class:`io.BufferedIOBase` interface, - including iteration and the :keyword:`with` statement. + :class:`GzipFile` supports the :class:`io.BufferedIOBase` interface, + including iteration and the :keyword:`with` statement. Only the + :meth:`truncate` method isn't implemented. .. versionchanged:: 3.1 Support for the :keyword:`with` statement was added. @@ -78,6 +79,9 @@ The module defines the following items: .. versionchanged:: 3.2 Support for unseekable files was added. + .. versionchanged:: 3.2 + The :meth:`peek` method was implemented. + .. function:: open(filename, mode='rb', compresslevel=9) diff --git a/Lib/gzip.py b/Lib/gzip.py index 3edc839..58e866b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -204,7 +204,10 @@ class GzipFile(io.BufferedIOBase): return self.name def __repr__(self): - s = repr(self.fileobj) + fileobj = self.fileobj + if isinstance(fileobj, _PaddedFile): + fileobj = fileobj.file + s = repr(fileobj) return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>' def _init_write(self, filename): @@ -336,6 +339,26 @@ class GzipFile(io.BufferedIOBase): self.offset += size return chunk + def peek(self, n): + if self.mode != READ: + import errno + raise IOError(errno.EBADF, "read() on write-only GzipFile object") + + # Do not return ridiculously small buffers + if n < 100: + n = 100 + if self.extrasize == 0: + if self.fileobj is None: + return b'' + try: + self._read(max(self.max_read_chunk, n)) + except EOFError: + pass + offset = self.offset - self.extrastart + remaining = self.extrasize + assert remaining == len(self.extrabuf) - offset + return self.extrabuf[offset:offset + n] + def _unread(self, buf): self.extrasize = len(buf) + self.extrasize self.offset -= len(buf) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index e49fe00..8e493b5 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -286,6 +286,28 @@ class TestGzip(unittest.TestCase): with gzip.GzipFile(fileobj=buf, mode="rb") as f: self.assertEqual(f.read(), uncompressed) + def test_peek(self): + uncompressed = data1 * 200 + with gzip.GzipFile(self.filename, "wb") as f: + f.write(uncompressed) + + def sizes(): + while True: + for n in range(5, 50, 10): + yield n + + with gzip.GzipFile(self.filename, "rb") as f: + f.max_read_chunk = 33 + nread = 0 + for n in sizes(): + s = f.peek(n) + if s == b'': + break + self.assertEqual(f.read(len(s)), s) + nread += len(s) + self.assertEqual(f.read(100), b'') + self.assertEqual(nread, len(uncompressed)) + # Testing compress/decompress shortcut functions def test_compress(self): @@ -76,6 +76,8 @@ Core and Builtins Library ------- +- Issue #9962: GzipFile now has the peek() method. + - Issue #9090: When a socket with a timeout fails with EWOULDBLOCK or EAGAIN, retry the select() loop instead of bailing out. This is because select() can incorrectly report a socket as ready for reading (for example, if it |