diff options
-rw-r--r-- | Doc/library/gzip.rst | 46 | ||||
-rw-r--r-- | Lib/gzip.py | 42 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 88 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
4 files changed, 157 insertions, 21 deletions
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 9e57990..861a59c 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would. The data compression is provided by the :mod:`zlib` module. -The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile` -class reads and writes :program:`gzip`\ -format files, automatically compressing -or decompressing the data so that it looks like an ordinary :term:`file object`. +The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the +:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience +functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format +files, automatically compressing or decompressing the data so that it looks like +an ordinary :term:`file object`. Note that additional file formats which can be decompressed by the :program:`gzip` and :program:`gunzip` programs, such as those produced by @@ -24,6 +26,32 @@ Note that additional file formats which can be decompressed by the The module defines the following items: +.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None) + + Open *filename* as a gzip-compressed file in binary or text mode. + + Returns a :term:`file object`. + + The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``, + ``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for + text mode. The default is ``'rb'``. + + The *compresslevel* argument is an integer from 1 to 9, as for the + :class:`GzipFile` constructor. + + For binary mode, this function is equivalent to the :class:`GzipFile` + constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the + *encoding*, *errors* and *newline* arguments must not be provided. + + For text mode, a :class:`GzipFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line ending(s). + + .. versionchanged:: 3.3 + Support for text mode was added, along with the *encoding*, *errors* and + *newline* arguments. + + .. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None) Constructor for the :class:`GzipFile` class, which simulates most of the @@ -46,9 +74,9 @@ The module defines the following items: or ``'wb'``, depending on whether the file will be read or written. The default is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``. - Note that the file is always opened in binary mode; text mode is not - supported. If you need to read a compressed file in text mode, wrap your - :class:`GzipFile` with an :class:`io.TextIOWrapper`. + Note that the file is always opened in binary mode. To open a compressed file + in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an + :class:`io.TextIOWrapper`). The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the level of compression; ``1`` is fastest and produces the least compression, and @@ -97,12 +125,6 @@ The module defines the following items: The :meth:`io.BufferedIOBase.read1` method is now implemented. -.. function:: open(filename, mode='rb', compresslevel=9) - - This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``. - The *filename* argument is required; *mode* defaults to ``'rb'`` and - *compresslevel* defaults to ``9``. - .. function:: compress(data, compresslevel=9) Compress the *data*, returning a :class:`bytes` object containing diff --git a/Lib/gzip.py b/Lib/gzip.py index 85c3e15..2f53aa8 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -16,6 +16,39 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 READ, WRITE = 1, 2 +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a gzip-compressed file in binary or text mode. + + The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode, + or "rt", "wt" or "at" for text mode. The default mode is "rb", and the + default compresslevel is 9. + + For binary mode, this function is equivalent to the GzipFile constructor: + GzipFile(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a GzipFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel) + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + def write32u(output, value): # The L format writes the bit pattern correctly whether signed # or unsigned. @@ -24,15 +57,6 @@ def write32u(output, value): def read32(input): return struct.unpack("<I", input.read(4))[0] -def open(filename, mode="rb", compresslevel=9): - """Shorthand for GzipFile(filename, mode, compresslevel). - - The filename argument is required; mode defaults to 'rb' - and compresslevel defaults to 9. - - """ - return GzipFile(filename, mode, compresslevel) - class _PaddedFile: """Minimal read-only file object that prepends a string to the contents of an actual file. Shouldn't be used outside of gzip.py, as it lacks diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index d2b4871..db1312d 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -374,6 +374,94 @@ class TestGzip(unittest.TestCase): datac = gzip.compress(data) self.assertEqual(gzip.decompress(datac), data) + # Test the 'open' convenience function. + + def test_open_binary(self): + # Test explicit binary modes. + uncompressed = data1 * 50 + with gzip.open(self.filename, "wb") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()) + self.assertEqual(file_data, uncompressed) + with gzip.open(self.filename, "rb") as f: + self.assertEqual(f.read(), uncompressed) + with gzip.open(self.filename, "ab") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()) + self.assertEqual(file_data, uncompressed * 2) + + def test_open_default_binary(self): + # Test implicit binary modes (no "b" or "t" in mode string). + uncompressed = data1 * 50 + with gzip.open(self.filename, "w") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()) + self.assertEqual(file_data, uncompressed) + with gzip.open(self.filename, "r") as f: + self.assertEqual(f.read(), uncompressed) + with gzip.open(self.filename, "a") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()) + self.assertEqual(file_data, uncompressed * 2) + + def test_open_text(self): + # Test text modes. + uncompressed = data1.decode("ascii") * 50 + with gzip.open(self.filename, "wt") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()).decode("ascii") + self.assertEqual(file_data, uncompressed) + with gzip.open(self.filename, "rt") as f: + self.assertEqual(f.read(), uncompressed) + with gzip.open(self.filename, "at") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()).decode("ascii") + self.assertEqual(file_data, uncompressed * 2) + + def test_open_bad_params(self): + # Test invalid parameter combinations. + with self.assertRaises(ValueError): + gzip.open(self.filename, "wbt") + with self.assertRaises(ValueError): + gzip.open(self.filename, "rb", encoding="utf-8") + with self.assertRaises(ValueError): + gzip.open(self.filename, "rb", errors="ignore") + with self.assertRaises(ValueError): + gzip.open(self.filename, "rb", newline="\n") + + def test_open_with_encoding(self): + # Test non-default encoding. + uncompressed = data1.decode("ascii") * 50 + with gzip.open(self.filename, "wt", encoding="utf-16") as f: + f.write(uncompressed) + with open(self.filename, "rb") as f: + file_data = gzip.decompress(f.read()).decode("utf-16") + self.assertEqual(file_data, uncompressed) + with gzip.open(self.filename, "rt", encoding="utf-16") as f: + self.assertEqual(f.read(), uncompressed) + + def test_open_with_encoding_error_handler(self): + # Test with non-default encoding error handler. + with gzip.open(self.filename, "wb") as f: + f.write(b"foo\xffbar") + with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \ + as f: + self.assertEqual(f.read(), "foobar") + + def test_open_with_newline(self): + # Test with explicit newline (universal newline mode disabled). + uncompressed = data1.decode("ascii") * 50 + with gzip.open(self.filename, "wt") as f: + f.write(uncompressed) + with gzip.open(self.filename, "rt", newline="\r") as f: + self.assertEqual(f.readlines(), [uncompressed]) + def test_main(verbose=None): support.run_unittest(TestGzip) @@ -17,6 +17,8 @@ Core and Builtins Library ------- +- Issue #13989: Add support for text mode to gzip.open(). + - Issue #14127: The os.stat() result object now provides three additional fields: st_ctime_ns, st_mtime_ns, and st_atime_ns, providing those times as an integer with nanosecond resolution. The functions os.utime(), os.lutimes(), |