Add a function lzma.open(), to match gzip.open() and bz2.open().

author: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-06-04 21:38:12 (GMT)
committer: Nadeem Vawda <nadeem.vawda@gmail.com> 2012-06-04 21:38:12 (GMT)
commit: e860404eb78c2f6fcb05477bdb691e81009ee28d (patch)
tree: 8b86efe55750f1df62738a939351bba241582880
parent: 6cbb20cdf61329ebfa6afcacad21ee6252fb5be5 (diff)
download: cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.zip
cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.gz
cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.bz2
4 files changed, 180 insertions, 4 deletions
diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst
index 67e425d..3174dd3 100644
--- a/Doc/library/lzma.rst
+++ b/Doc/library/lzma.rst
@@ -29,6 +29,35 @@ from multiple threads, it is necessary to protect it with a lock.
 Reading and writing compressed files
 ------------------------------------
 
+.. function:: open(filename, mode="rb", \*, format=None, check=-1, preset=None, filters=None, encoding=None, errors=None, newline=None)
+
+   Open an LZMA-compressed file in binary or text mode, returning a :term:`file
+   object`.
+
+   The *filename* argument can be either an actual file name (given as a
+   :class:`str` or :class:`bytes` object), in which case the named file is
+   opened, or it can be an existing file object to read from or write to.
+
+   The *mode* argument can be any of ``"r"``, ``"rb"``, ``"w"``, ``"wb"``,
+   ``"a"`` or ``"ab"`` for binary mode, or ``"rt"``, ``"wt"``, or ``"at"`` for
+   text mode. The default is ``"rb"``.
+
+   When opening a file for reading, the *format* and *filters* arguments have
+   the same meanings as for :class:`LZMADecompressor`. In this case, the *check*
+   and *preset* arguments should not be used.
+
+   When opening a file for writing, the *format*, *check*, *preset* and
+   *filters* arguments have the same meanings as for :class:`LZMACompressor`.
+
+   For binary mode, this function is equivalent to the :class:`LZMAFile`
+   constructor: ``LZMAFile(filename, mode, ...)``. In this case, the *encoding*,
+   *errors* and *newline* arguments must not be provided.
+
+   For text mode, a :class:`LZMAFile` object is created, and wrapped in an
+   :class:`io.TextIOWrapper` instance with the specified encoding, error
+   handling behavior, and line ending(s).
+
+
 .. class:: LZMAFile(filename=None, mode="r", \*, format=None, check=-1, preset=None, filters=None)
 
    Open an LZMA-compressed file in binary mode.
diff --git a/Lib/lzma.py b/Lib/lzma.py
index 0790691..f623204 100644
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -18,10 +18,11 @@ __all__ = [
     "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
 
     "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
-    "compress", "decompress", "is_check_supported",
+    "open", "compress", "decompress", "is_check_supported",
     "encode_filter_properties", "decode_filter_properties",
 ]
 
+import builtins
 import io
 from _lzma import *
 
@@ -122,7 +123,7 @@ class LZMAFile(io.BufferedIOBase):
         if isinstance(filename, (str, bytes)):
             if "b" not in mode:
                 mode += "b"
-            self._fp = open(filename, mode)
+            self._fp = builtins.open(filename, mode)
             self._closefp = True
             self._mode = mode_code
         elif hasattr(filename, "read") or hasattr(filename, "write"):
@@ -370,6 +371,51 @@ class LZMAFile(io.BufferedIOBase):
         return self._pos
 
 
+def open(filename, mode="rb", *,
+         format=None, check=-1, preset=None, filters=None,
+         encoding=None, errors=None, newline=None):
+    """Open an LZMA-compressed file in binary or text mode.
+
+    filename can be either an actual file name (given as a str or bytes object),
+    in which case the named file is opened, or it can be an existing file object
+    to read from or write to.
+
+    The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for
+    binary mode, or "rt", "wt" or "at" for text mode.
+
+    The format, check, preset and filters arguments specify the compression
+    settings, as for LZMACompressor, LZMADecompressor and LZMAFile.
+
+    For binary mode, this function is equivalent to the LZMAFile constructor:
+    LZMAFile(filename, mode, ...). In this case, the encoding, errors and
+    newline arguments must not be provided.
+
+    For text mode, a LZMAFile object is created, and wrapped in an
+    io.TextIOWrapper instance with the specified encoding, error handling
+    behavior, and line ending(s).
+
+    """
+    if "t" in mode:
+        if "b" in mode:
+            raise ValueError("Invalid mode: %r" % (mode,))
+    else:
+        if encoding is not None:
+            raise ValueError("Argument 'encoding' not supported in binary mode")
+        if errors is not None:
+            raise ValueError("Argument 'errors' not supported in binary mode")
+        if newline is not None:
+            raise ValueError("Argument 'newline' not supported in binary mode")
+
+    lz_mode = mode.replace("t", "")
+    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
+                           preset=preset, filters=filters)
+
+    if "t" in mode:
+        return io.TextIOWrapper(binary_file, encoding, errors, newline)
+    else:
+        return binary_file
+
+
 def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
     """Compress a block of data.
 
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
index e4d2cb1..22f2f47 100644
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -935,6 +935,106 @@ class FileTestCase(unittest.TestCase):
         self.assertRaises(ValueError, f.tell)
 
 
+class OpenTestCase(unittest.TestCase):
+
+    def test_binary_modes(self):
+        with lzma.open(BytesIO(COMPRESSED_XZ), "rb") as f:
+            self.assertEqual(f.read(), INPUT)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wb") as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue())
+            self.assertEqual(file_data, INPUT)
+            with lzma.open(bio, "ab") as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue())
+            self.assertEqual(file_data, INPUT * 2)
+
+    def test_text_modes(self):
+        uncompressed = INPUT.decode("ascii")
+        uncompressed_raw = uncompressed.replace("\n", os.linesep)
+        with lzma.open(BytesIO(COMPRESSED_XZ), "rt") as f:
+            self.assertEqual(f.read(), uncompressed)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+            self.assertEqual(file_data, uncompressed_raw)
+            with lzma.open(bio, "at") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+            self.assertEqual(file_data, uncompressed_raw * 2)
+
+    def test_filename(self):
+        with TempFile(TESTFN):
+            with lzma.open(TESTFN, "wb") as f:
+                f.write(INPUT)
+            with open(TESTFN, "rb") as f:
+                file_data = lzma.decompress(f.read())
+                self.assertEqual(file_data, INPUT)
+            with lzma.open(TESTFN, "rb") as f:
+                self.assertEqual(f.read(), INPUT)
+            with lzma.open(TESTFN, "ab") as f:
+                f.write(INPUT)
+            with lzma.open(TESTFN, "rb") as f:
+                self.assertEqual(f.read(), INPUT * 2)
+
+    def test_bad_params(self):
+        # Test invalid parameter combinations.
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "x")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rbt")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", encoding="utf-8")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", errors="ignore")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", newline="\n")
+
+    def test_format_and_filters(self):
+        # Test non-default format and filter chain.
+        options = {"format": lzma.FORMAT_RAW, "filters": FILTERS_RAW_1}
+        with lzma.open(BytesIO(COMPRESSED_RAW_1), "rb", **options) as f:
+            self.assertEqual(f.read(), INPUT)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wb", **options) as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue(), **options)
+            self.assertEqual(file_data, INPUT)
+
+    def test_encoding(self):
+        # Test non-default encoding.
+        uncompressed = INPUT.decode("ascii")
+        uncompressed_raw = uncompressed.replace("\n", os.linesep)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt", encoding="utf-16-le") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("utf-16-le")
+            self.assertEqual(file_data, uncompressed_raw)
+            bio.seek(0)
+            with lzma.open(bio, "rt", encoding="utf-16-le") as f:
+                self.assertEqual(f.read(), uncompressed)
+
+    def test_encoding_error_handler(self):
+        # Test wih non-default encoding error handler.
+        with BytesIO(lzma.compress(b"foo\xffbar")) as bio:
+            with lzma.open(bio, "rt", encoding="ascii", errors="ignore") as f:
+                self.assertEqual(f.read(), "foobar")
+
+    def test_newline(self):
+        # Test with explicit newline (universal newline mode disabled).
+        text = INPUT.decode("ascii")
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt", newline="\n") as f:
+                f.write(text)
+            bio.seek(0)
+            with lzma.open(bio, "rt", newline="\r") as f:
+                self.assertEqual(f.readlines(), [text])
+
+
 class MiscellaneousTestCase(unittest.TestCase):
 
     def test_is_check_supported(self):
@@ -1385,6 +1485,7 @@ def test_main():
         CompressorDecompressorTestCase,
         CompressDecompressFunctionTestCase,
         FileTestCase,
+        OpenTestCase,
         MiscellaneousTestCase,
     )
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 333999f..ee5e5ff 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,8 +17,8 @@ Library
 
 - LZMAFile now accepts the modes "rb"/"wb"/"ab" as synonyms of "r"/"w"/"a".
 
-- The bz2 module now contains an open() function, allowing compressed files to
-  conveniently be opened in text mode as well as binary mode.
+- The bz2 and lzma modules now each contain an open() function, allowing
+  compressed files to readily be opened in text mode as well as binary mode.
 
 - BZ2File.__init__() and LZMAFile.__init__() now accept a file object as their
   first argument, rather than requiring a separate "fileobj" argument.
author	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-06-04 21:38:12 (GMT)
committer	Nadeem Vawda <nadeem.vawda@gmail.com>	2012-06-04 21:38:12 (GMT)
commit	e860404eb78c2f6fcb05477bdb691e81009ee28d (patch)
tree	8b86efe55750f1df62738a939351bba241582880
parent	6cbb20cdf61329ebfa6afcacad21ee6252fb5be5 (diff)
download	cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.zip cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.gz cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.bz2