summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadeem Vawda <nadeem.vawda@gmail.com>2012-06-04 21:38:12 (GMT)
committerNadeem Vawda <nadeem.vawda@gmail.com>2012-06-04 21:38:12 (GMT)
commite860404eb78c2f6fcb05477bdb691e81009ee28d (patch)
tree8b86efe55750f1df62738a939351bba241582880
parent6cbb20cdf61329ebfa6afcacad21ee6252fb5be5 (diff)
downloadcpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.zip
cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.gz
cpython-e860404eb78c2f6fcb05477bdb691e81009ee28d.tar.bz2
Add a function lzma.open(), to match gzip.open() and bz2.open().
-rw-r--r--Doc/library/lzma.rst29
-rw-r--r--Lib/lzma.py50
-rw-r--r--Lib/test/test_lzma.py101
-rw-r--r--Misc/NEWS4
4 files changed, 180 insertions, 4 deletions
diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst
index 67e425d..3174dd3 100644
--- a/Doc/library/lzma.rst
+++ b/Doc/library/lzma.rst
@@ -29,6 +29,35 @@ from multiple threads, it is necessary to protect it with a lock.
Reading and writing compressed files
------------------------------------
+.. function:: open(filename, mode="rb", \*, format=None, check=-1, preset=None, filters=None, encoding=None, errors=None, newline=None)
+
+ Open an LZMA-compressed file in binary or text mode, returning a :term:`file
+ object`.
+
+ The *filename* argument can be either an actual file name (given as a
+ :class:`str` or :class:`bytes` object), in which case the named file is
+ opened, or it can be an existing file object to read from or write to.
+
+ The *mode* argument can be any of ``"r"``, ``"rb"``, ``"w"``, ``"wb"``,
+ ``"a"`` or ``"ab"`` for binary mode, or ``"rt"``, ``"wt"``, or ``"at"`` for
+ text mode. The default is ``"rb"``.
+
+ When opening a file for reading, the *format* and *filters* arguments have
+ the same meanings as for :class:`LZMADecompressor`. In this case, the *check*
+ and *preset* arguments should not be used.
+
+ When opening a file for writing, the *format*, *check*, *preset* and
+ *filters* arguments have the same meanings as for :class:`LZMACompressor`.
+
+ For binary mode, this function is equivalent to the :class:`LZMAFile`
+ constructor: ``LZMAFile(filename, mode, ...)``. In this case, the *encoding*,
+ *errors* and *newline* arguments must not be provided.
+
+ For text mode, a :class:`LZMAFile` object is created, and wrapped in an
+ :class:`io.TextIOWrapper` instance with the specified encoding, error
+ handling behavior, and line ending(s).
+
+
.. class:: LZMAFile(filename=None, mode="r", \*, format=None, check=-1, preset=None, filters=None)
Open an LZMA-compressed file in binary mode.
diff --git a/Lib/lzma.py b/Lib/lzma.py
index 0790691..f623204 100644
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -18,10 +18,11 @@ __all__ = [
"MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
"LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
- "compress", "decompress", "is_check_supported",
+ "open", "compress", "decompress", "is_check_supported",
"encode_filter_properties", "decode_filter_properties",
]
+import builtins
import io
from _lzma import *
@@ -122,7 +123,7 @@ class LZMAFile(io.BufferedIOBase):
if isinstance(filename, (str, bytes)):
if "b" not in mode:
mode += "b"
- self._fp = open(filename, mode)
+ self._fp = builtins.open(filename, mode)
self._closefp = True
self._mode = mode_code
elif hasattr(filename, "read") or hasattr(filename, "write"):
@@ -370,6 +371,51 @@ class LZMAFile(io.BufferedIOBase):
return self._pos
+def open(filename, mode="rb", *,
+ format=None, check=-1, preset=None, filters=None,
+ encoding=None, errors=None, newline=None):
+ """Open an LZMA-compressed file in binary or text mode.
+
+ filename can be either an actual file name (given as a str or bytes object),
+ in which case the named file is opened, or it can be an existing file object
+ to read from or write to.
+
+ The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for
+ binary mode, or "rt", "wt" or "at" for text mode.
+
+ The format, check, preset and filters arguments specify the compression
+ settings, as for LZMACompressor, LZMADecompressor and LZMAFile.
+
+ For binary mode, this function is equivalent to the LZMAFile constructor:
+ LZMAFile(filename, mode, ...). In this case, the encoding, errors and
+ newline arguments must not be provided.
+
+ For text mode, a LZMAFile object is created, and wrapped in an
+ io.TextIOWrapper instance with the specified encoding, error handling
+ behavior, and line ending(s).
+
+ """
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if errors is not None:
+ raise ValueError("Argument 'errors' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+
+ lz_mode = mode.replace("t", "")
+ binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
+ preset=preset, filters=filters)
+
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, errors, newline)
+ else:
+ return binary_file
+
+
def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
"""Compress a block of data.
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
index e4d2cb1..22f2f47 100644
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -935,6 +935,106 @@ class FileTestCase(unittest.TestCase):
self.assertRaises(ValueError, f.tell)
+class OpenTestCase(unittest.TestCase):
+
+ def test_binary_modes(self):
+ with lzma.open(BytesIO(COMPRESSED_XZ), "rb") as f:
+ self.assertEqual(f.read(), INPUT)
+ with BytesIO() as bio:
+ with lzma.open(bio, "wb") as f:
+ f.write(INPUT)
+ file_data = lzma.decompress(bio.getvalue())
+ self.assertEqual(file_data, INPUT)
+ with lzma.open(bio, "ab") as f:
+ f.write(INPUT)
+ file_data = lzma.decompress(bio.getvalue())
+ self.assertEqual(file_data, INPUT * 2)
+
+ def test_text_modes(self):
+ uncompressed = INPUT.decode("ascii")
+ uncompressed_raw = uncompressed.replace("\n", os.linesep)
+ with lzma.open(BytesIO(COMPRESSED_XZ), "rt") as f:
+ self.assertEqual(f.read(), uncompressed)
+ with BytesIO() as bio:
+ with lzma.open(bio, "wt") as f:
+ f.write(uncompressed)
+ file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+ self.assertEqual(file_data, uncompressed_raw)
+ with lzma.open(bio, "at") as f:
+ f.write(uncompressed)
+ file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+ self.assertEqual(file_data, uncompressed_raw * 2)
+
+ def test_filename(self):
+ with TempFile(TESTFN):
+ with lzma.open(TESTFN, "wb") as f:
+ f.write(INPUT)
+ with open(TESTFN, "rb") as f:
+ file_data = lzma.decompress(f.read())
+ self.assertEqual(file_data, INPUT)
+ with lzma.open(TESTFN, "rb") as f:
+ self.assertEqual(f.read(), INPUT)
+ with lzma.open(TESTFN, "ab") as f:
+ f.write(INPUT)
+ with lzma.open(TESTFN, "rb") as f:
+ self.assertEqual(f.read(), INPUT * 2)
+
+ def test_bad_params(self):
+ # Test invalid parameter combinations.
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "")
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "x")
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "rbt")
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "rb", encoding="utf-8")
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "rb", errors="ignore")
+ with self.assertRaises(ValueError):
+ lzma.open(TESTFN, "rb", newline="\n")
+
+ def test_format_and_filters(self):
+ # Test non-default format and filter chain.
+ options = {"format": lzma.FORMAT_RAW, "filters": FILTERS_RAW_1}
+ with lzma.open(BytesIO(COMPRESSED_RAW_1), "rb", **options) as f:
+ self.assertEqual(f.read(), INPUT)
+ with BytesIO() as bio:
+ with lzma.open(bio, "wb", **options) as f:
+ f.write(INPUT)
+ file_data = lzma.decompress(bio.getvalue(), **options)
+ self.assertEqual(file_data, INPUT)
+
+ def test_encoding(self):
+ # Test non-default encoding.
+ uncompressed = INPUT.decode("ascii")
+ uncompressed_raw = uncompressed.replace("\n", os.linesep)
+ with BytesIO() as bio:
+ with lzma.open(bio, "wt", encoding="utf-16-le") as f:
+ f.write(uncompressed)
+ file_data = lzma.decompress(bio.getvalue()).decode("utf-16-le")
+ self.assertEqual(file_data, uncompressed_raw)
+ bio.seek(0)
+ with lzma.open(bio, "rt", encoding="utf-16-le") as f:
+ self.assertEqual(f.read(), uncompressed)
+
+ def test_encoding_error_handler(self):
+ # Test wih non-default encoding error handler.
+ with BytesIO(lzma.compress(b"foo\xffbar")) as bio:
+ with lzma.open(bio, "rt", encoding="ascii", errors="ignore") as f:
+ self.assertEqual(f.read(), "foobar")
+
+ def test_newline(self):
+ # Test with explicit newline (universal newline mode disabled).
+ text = INPUT.decode("ascii")
+ with BytesIO() as bio:
+ with lzma.open(bio, "wt", newline="\n") as f:
+ f.write(text)
+ bio.seek(0)
+ with lzma.open(bio, "rt", newline="\r") as f:
+ self.assertEqual(f.readlines(), [text])
+
+
class MiscellaneousTestCase(unittest.TestCase):
def test_is_check_supported(self):
@@ -1385,6 +1485,7 @@ def test_main():
CompressorDecompressorTestCase,
CompressDecompressFunctionTestCase,
FileTestCase,
+ OpenTestCase,
MiscellaneousTestCase,
)
diff --git a/Misc/NEWS b/Misc/NEWS
index 333999f..ee5e5ff 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,8 +17,8 @@ Library
- LZMAFile now accepts the modes "rb"/"wb"/"ab" as synonyms of "r"/"w"/"a".
-- The bz2 module now contains an open() function, allowing compressed files to
- conveniently be opened in text mode as well as binary mode.
+- The bz2 and lzma modules now each contain an open() function, allowing
+ compressed files to readily be opened in text mode as well as binary mode.
- BZ2File.__init__() and LZMAFile.__init__() now accept a file object as their
first argument, rather than requiring a separate "fileobj" argument.