From 7e126205e615518bc921132918c0ed7f48b31c85 Mon Sep 17 00:00:00 2001 From: Nadeem Vawda Date: Sun, 6 May 2012 15:04:01 +0200 Subject: Closes #13989: Add support for text modes to gzip.open(). Also, add tests for gzip.open(). --- Doc/library/gzip.rst | 46 ++++++++++++++++++++------- Lib/gzip.py | 42 ++++++++++++++++++------ Lib/test/test_gzip.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 2 ++ 4 files changed, 157 insertions(+), 21 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 9e57990..861a59c 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would. The data compression is provided by the :mod:`zlib` module. -The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile` -class reads and writes :program:`gzip`\ -format files, automatically compressing -or decompressing the data so that it looks like an ordinary :term:`file object`. +The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the +:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience +functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format +files, automatically compressing or decompressing the data so that it looks like +an ordinary :term:`file object`. Note that additional file formats which can be decompressed by the :program:`gzip` and :program:`gunzip` programs, such as those produced by @@ -24,6 +26,32 @@ Note that additional file formats which can be decompressed by the The module defines the following items: +.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None) + + Open *filename* as a gzip-compressed file in binary or text mode. + + Returns a :term:`file object`. + + The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``, + ``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for + text mode. The default is ``'rb'``. + + The *compresslevel* argument is an integer from 1 to 9, as for the + :class:`GzipFile` constructor. + + For binary mode, this function is equivalent to the :class:`GzipFile` + constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the + *encoding*, *errors* and *newline* arguments must not be provided. + + For text mode, a :class:`GzipFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line ending(s). + + .. versionchanged:: 3.3 + Support for text mode was added, along with the *encoding*, *errors* and + *newline* arguments. + + .. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None) Constructor for the :class:`GzipFile` class, which simulates most of the @@ -46,9 +74,9 @@ The module defines the following items: or ``'wb'``, depending on whether the file will be read or written. The default is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``. - Note that the file is always opened in binary mode; text mode is not - supported. If you need to read a compressed file in text mode, wrap your - :class:`GzipFile` with an :class:`io.TextIOWrapper`. + Note that the file is always opened in binary mode. To open a compressed file + in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an + :class:`io.TextIOWrapper`). The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the level of compression; ``1`` is fastest and produces the least compression, and @@ -97,12 +125,6 @@ The module defines the following items: The :meth:`io.BufferedIOBase.read1` method is now implemented. -.. function:: open(filename, mode='rb', compresslevel=9) - - This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``. - The *filename* argument is required; *mode* defaults to ``'rb'`` and - *compresslevel* defaults to ``9``. - .. function:: compress(data, compresslevel=9) Compress the *data*, returning a :class:`bytes` object containing diff --git a/Lib/gzip.py b/Lib/gzip.py index 85c3e15..2f53aa8 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -16,6 +16,39 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 READ, WRITE = 1, 2 +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a gzip-compressed file in binary or text mode. + + The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode, + or "rt", "wt" or "at" for text mode. The default mode is "rb", and the + default compresslevel is 9. + + For binary mode, this function is equivalent to the GzipFile constructor: + GzipFile(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a GzipFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel) + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + def write32u(output, value): # The L format writes the bit pattern correctly whether signed # or unsigned. @@ -24,15 +57,6 @@ def write32u(output, value): def read32(input): return struct.unpack("