diff options
author | Yaron de Leeuw <me@jarondl.net> | 2022-06-25 08:43:54 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-25 08:43:54 (GMT) |
commit | 50cd4b6959568999f5e426e58050ea912a490ac6 (patch) | |
tree | e4c74457bdec8383f9a7b9a7aa7509e76d4b064d | |
parent | 81e91c95a51daaa77efa3a3758ecba0475cfef38 (diff) | |
download | cpython-50cd4b6959568999f5e426e58050ea912a490ac6.zip cpython-50cd4b6959568999f5e426e58050ea912a490ac6.tar.gz cpython-50cd4b6959568999f5e426e58050ea912a490ac6.tar.bz2 |
bpo-26253: Add compressionlevel to tarfile stream (GH-2962)
`tarfile` already accepts a compressionlevel argument for creating
files. This patch adds the same for stream-based tarfile usage.
The default is 9, the value that was previously hard-coded.
-rw-r--r-- | Doc/library/tarfile.rst | 7 | ||||
-rwxr-xr-x | Lib/tarfile.py | 22 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 68 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst | 2 |
4 files changed, 88 insertions, 11 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f5c49b0..f9d34de 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -98,8 +98,8 @@ Some facts and figures: If *fileobj* is specified, it is used as an alternative to a :term:`file object` opened in binary mode for *name*. It is supposed to be at position 0. - For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``, - ``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument + For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``, + ``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument *compresslevel* (default ``9``) to specify the compression level of the file. For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the @@ -152,6 +152,9 @@ Some facts and figures: .. versionchanged:: 3.6 The *name* parameter accepts a :term:`path-like object`. + .. versionchanged:: 3.12 + The *compresslevel* keyword argument also works for streams. + .. class:: TarFile :noindex: diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 169c88d..a08f247 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -336,7 +336,8 @@ class _Stream: _Stream is intended to be used only internally. """ - def __init__(self, name, mode, comptype, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize, + compresslevel): """Construct a _Stream object. """ self._extfileobj = True @@ -371,7 +372,7 @@ class _Stream: self._init_read_gz() self.exception = zlib.error else: - self._init_write_gz() + self._init_write_gz(compresslevel) elif comptype == "bz2": try: @@ -383,7 +384,7 @@ class _Stream: self.cmp = bz2.BZ2Decompressor() self.exception = OSError else: - self.cmp = bz2.BZ2Compressor() + self.cmp = bz2.BZ2Compressor(compresslevel) elif comptype == "xz": try: @@ -410,13 +411,14 @@ class _Stream: if hasattr(self, "closed") and not self.closed: self.close() - def _init_write_gz(self): + def _init_write_gz(self, compresslevel): """Initialize for writing with gzip compression. """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) + self.cmp = self.zlib.compressobj(compresslevel, + self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) timestamp = struct.pack("<L", int(time.time())) self.__write(b"\037\213\010\010" + timestamp + b"\002\377") if self.name.endswith(".gz"): @@ -1659,7 +1661,9 @@ class TarFile(object): if filemode not in ("r", "w"): raise ValueError("mode must be 'r' or 'w'") - stream = _Stream(name, filemode, comptype, fileobj, bufsize) + compresslevel = kwargs.pop("compresslevel", 9) + stream = _Stream(name, filemode, comptype, fileobj, bufsize, + compresslevel) try: t = cls(name, filemode, stream, **kwargs) except: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index e0389c5..04f9bee 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1554,6 +1554,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): class LzmaStreamWriteTest(LzmaTest, StreamWriteTest): decompressor = lzma.LZMADecompressor if lzma else None +class _CompressedWriteTest(TarTest): + # This is not actually a standalone test. + # It does not inherit WriteTest because it only makes sense with gz,bz2 + source = (b"And we move to Bristol where they have a special, " + + b"Very Silly candidate") + + def _compressed_tar(self, compresslevel): + fobj = io.BytesIO() + with tarfile.open(tmpname, self.mode, fobj, + compresslevel=compresslevel) as tarfl: + tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source)) + return fobj + + def _test_bz2_header(self, compresslevel): + fobj = self._compressed_tar(compresslevel) + self.assertEqual(fobj.getvalue()[0:10], + b"BZh%d1AY&SY" % compresslevel) + + def _test_gz_header(self, compresslevel): + fobj = self._compressed_tar(compresslevel) + self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08") + +class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase): + prefix = "w:" + def test_compression_levels(self): + self._test_bz2_header(1) + self._test_bz2_header(5) + self._test_bz2_header(9) + +class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest, + unittest.TestCase): + prefix = "w|" + def test_compression_levels(self): + self._test_bz2_header(1) + self._test_bz2_header(5) + self._test_bz2_header(9) + +class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase): + prefix = "w:" + def test_compression_levels(self): + self._test_gz_header(1) + self._test_gz_header(5) + self._test_gz_header(9) + +class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest, + unittest.TestCase): + prefix = "w|" + def test_compression_levels(self): + self._test_gz_header(1) + self._test_gz_header(5) + self._test_gz_header(9) + +class CompressLevelRaises(unittest.TestCase): + def test_compresslevel_wrong_modes(self): + compresslevel = 5 + fobj = io.BytesIO() + with self.assertRaises(TypeError): + tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel) + + def test_wrong_compresslevels(self): + # BZ2 checks that the compresslevel is in [1,9]. gz does not + fobj = io.BytesIO() + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0) + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10) + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10) class GNUWriteTest(unittest.TestCase): # This testcase checks for correct creation of GNU Longname diff --git a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst new file mode 100644 index 0000000..fa0dc95 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst @@ -0,0 +1,2 @@ +Allow adjustable compression level for tarfile streams in +:func:`tarfile.open`. |