summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/gzip.rst46
-rw-r--r--Lib/gzip.py42
-rw-r--r--Lib/test/test_gzip.py88
-rw-r--r--Misc/NEWS2
4 files changed, 157 insertions, 21 deletions
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index 9e57990..861a59c 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would.
The data compression is provided by the :mod:`zlib` module.
-The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile`
-class reads and writes :program:`gzip`\ -format files, automatically compressing
-or decompressing the data so that it looks like an ordinary :term:`file object`.
+The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the
+:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience
+functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format
+files, automatically compressing or decompressing the data so that it looks like
+an ordinary :term:`file object`.
Note that additional file formats which can be decompressed by the
:program:`gzip` and :program:`gunzip` programs, such as those produced by
@@ -24,6 +26,32 @@ Note that additional file formats which can be decompressed by the
The module defines the following items:
+.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None)
+
+ Open *filename* as a gzip-compressed file in binary or text mode.
+
+ Returns a :term:`file object`.
+
+ The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``,
+ ``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for
+ text mode. The default is ``'rb'``.
+
+ The *compresslevel* argument is an integer from 1 to 9, as for the
+ :class:`GzipFile` constructor.
+
+ For binary mode, this function is equivalent to the :class:`GzipFile`
+ constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the
+ *encoding*, *errors* and *newline* arguments must not be provided.
+
+ For text mode, a :class:`GzipFile` object is created, and wrapped in an
+ :class:`io.TextIOWrapper` instance with the specified encoding, error
+ handling behavior, and line ending(s).
+
+ .. versionchanged:: 3.3
+ Support for text mode was added, along with the *encoding*, *errors* and
+ *newline* arguments.
+
+
.. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None)
Constructor for the :class:`GzipFile` class, which simulates most of the
@@ -46,9 +74,9 @@ The module defines the following items:
or ``'wb'``, depending on whether the file will be read or written. The default
is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``.
- Note that the file is always opened in binary mode; text mode is not
- supported. If you need to read a compressed file in text mode, wrap your
- :class:`GzipFile` with an :class:`io.TextIOWrapper`.
+ Note that the file is always opened in binary mode. To open a compressed file
+ in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an
+ :class:`io.TextIOWrapper`).
The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the
level of compression; ``1`` is fastest and produces the least compression, and
@@ -97,12 +125,6 @@ The module defines the following items:
The :meth:`io.BufferedIOBase.read1` method is now implemented.
-.. function:: open(filename, mode='rb', compresslevel=9)
-
- This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``.
- The *filename* argument is required; *mode* defaults to ``'rb'`` and
- *compresslevel* defaults to ``9``.
-
.. function:: compress(data, compresslevel=9)
Compress the *data*, returning a :class:`bytes` object containing
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 85c3e15..2f53aa8 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -16,6 +16,39 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
READ, WRITE = 1, 2
+def open(filename, mode="rb", compresslevel=9,
+ encoding=None, errors=None, newline=None):
+ """Open a gzip-compressed file in binary or text mode.
+
+ The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
+ or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
+ default compresslevel is 9.
+
+ For binary mode, this function is equivalent to the GzipFile constructor:
+ GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
+ and newline arguments must not be provided.
+
+ For text mode, a GzipFile object is created, and wrapped in an
+ io.TextIOWrapper instance with the specified encoding, error handling
+ behavior, and line ending(s).
+
+ """
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if errors is not None:
+ raise ValueError("Argument 'errors' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+ binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel)
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, errors, newline)
+ else:
+ return binary_file
+
def write32u(output, value):
# The L format writes the bit pattern correctly whether signed
# or unsigned.
@@ -24,15 +57,6 @@ def write32u(output, value):
def read32(input):
return struct.unpack("<I", input.read(4))[0]
-def open(filename, mode="rb", compresslevel=9):
- """Shorthand for GzipFile(filename, mode, compresslevel).
-
- The filename argument is required; mode defaults to 'rb'
- and compresslevel defaults to 9.
-
- """
- return GzipFile(filename, mode, compresslevel)
-
class _PaddedFile:
"""Minimal read-only file object that prepends a string to the contents
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index d2b4871..db1312d 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -374,6 +374,94 @@ class TestGzip(unittest.TestCase):
datac = gzip.compress(data)
self.assertEqual(gzip.decompress(datac), data)
+ # Test the 'open' convenience function.
+
+ def test_open_binary(self):
+ # Test explicit binary modes.
+ uncompressed = data1 * 50
+ with gzip.open(self.filename, "wb") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read())
+ self.assertEqual(file_data, uncompressed)
+ with gzip.open(self.filename, "rb") as f:
+ self.assertEqual(f.read(), uncompressed)
+ with gzip.open(self.filename, "ab") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read())
+ self.assertEqual(file_data, uncompressed * 2)
+
+ def test_open_default_binary(self):
+ # Test implicit binary modes (no "b" or "t" in mode string).
+ uncompressed = data1 * 50
+ with gzip.open(self.filename, "w") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read())
+ self.assertEqual(file_data, uncompressed)
+ with gzip.open(self.filename, "r") as f:
+ self.assertEqual(f.read(), uncompressed)
+ with gzip.open(self.filename, "a") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read())
+ self.assertEqual(file_data, uncompressed * 2)
+
+ def test_open_text(self):
+ # Test text modes.
+ uncompressed = data1.decode("ascii") * 50
+ with gzip.open(self.filename, "wt") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, uncompressed)
+ with gzip.open(self.filename, "rt") as f:
+ self.assertEqual(f.read(), uncompressed)
+ with gzip.open(self.filename, "at") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, uncompressed * 2)
+
+ def test_open_bad_params(self):
+ # Test invalid parameter combinations.
+ with self.assertRaises(ValueError):
+ gzip.open(self.filename, "wbt")
+ with self.assertRaises(ValueError):
+ gzip.open(self.filename, "rb", encoding="utf-8")
+ with self.assertRaises(ValueError):
+ gzip.open(self.filename, "rb", errors="ignore")
+ with self.assertRaises(ValueError):
+ gzip.open(self.filename, "rb", newline="\n")
+
+ def test_open_with_encoding(self):
+ # Test non-default encoding.
+ uncompressed = data1.decode("ascii") * 50
+ with gzip.open(self.filename, "wt", encoding="utf-16") as f:
+ f.write(uncompressed)
+ with open(self.filename, "rb") as f:
+ file_data = gzip.decompress(f.read()).decode("utf-16")
+ self.assertEqual(file_data, uncompressed)
+ with gzip.open(self.filename, "rt", encoding="utf-16") as f:
+ self.assertEqual(f.read(), uncompressed)
+
+ def test_open_with_encoding_error_handler(self):
+ # Test with non-default encoding error handler.
+ with gzip.open(self.filename, "wb") as f:
+ f.write(b"foo\xffbar")
+ with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \
+ as f:
+ self.assertEqual(f.read(), "foobar")
+
+ def test_open_with_newline(self):
+ # Test with explicit newline (universal newline mode disabled).
+ uncompressed = data1.decode("ascii") * 50
+ with gzip.open(self.filename, "wt") as f:
+ f.write(uncompressed)
+ with gzip.open(self.filename, "rt", newline="\r") as f:
+ self.assertEqual(f.readlines(), [uncompressed])
+
def test_main(verbose=None):
support.run_unittest(TestGzip)
diff --git a/Misc/NEWS b/Misc/NEWS
index 937c9cc..e389940 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,6 +17,8 @@ Core and Builtins
Library
-------
+- Issue #13989: Add support for text mode to gzip.open().
+
- Issue #14127: The os.stat() result object now provides three additional
fields: st_ctime_ns, st_mtime_ns, and st_atime_ns, providing those times as an
integer with nanosecond resolution. The functions os.utime(), os.lutimes(),