summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYaron de Leeuw <me@jarondl.net>2022-06-25 08:43:54 (GMT)
committerGitHub <noreply@github.com>2022-06-25 08:43:54 (GMT)
commit50cd4b6959568999f5e426e58050ea912a490ac6 (patch)
treee4c74457bdec8383f9a7b9a7aa7509e76d4b064d
parent81e91c95a51daaa77efa3a3758ecba0475cfef38 (diff)
downloadcpython-50cd4b6959568999f5e426e58050ea912a490ac6.zip
cpython-50cd4b6959568999f5e426e58050ea912a490ac6.tar.gz
cpython-50cd4b6959568999f5e426e58050ea912a490ac6.tar.bz2
bpo-26253: Add compressionlevel to tarfile stream (GH-2962)
`tarfile` already accepts a compressionlevel argument for creating files. This patch adds the same for stream-based tarfile usage. The default is 9, the value that was previously hard-coded.
-rw-r--r--Doc/library/tarfile.rst7
-rwxr-xr-xLib/tarfile.py22
-rw-r--r--Lib/test/test_tarfile.py68
-rw-r--r--Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst2
4 files changed, 88 insertions, 11 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index f5c49b0..f9d34de 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -98,8 +98,8 @@ Some facts and figures:
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
opened in binary mode for *name*. It is supposed to be at position 0.
- For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``,
- ``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument
+ For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``,
+ ``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument
*compresslevel* (default ``9``) to specify the compression level of the file.
For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the
@@ -152,6 +152,9 @@ Some facts and figures:
.. versionchanged:: 3.6
The *name* parameter accepts a :term:`path-like object`.
+ .. versionchanged:: 3.12
+ The *compresslevel* keyword argument also works for streams.
+
.. class:: TarFile
:noindex:
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 169c88d..a08f247 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -336,7 +336,8 @@ class _Stream:
_Stream is intended to be used only internally.
"""
- def __init__(self, name, mode, comptype, fileobj, bufsize):
+ def __init__(self, name, mode, comptype, fileobj, bufsize,
+ compresslevel):
"""Construct a _Stream object.
"""
self._extfileobj = True
@@ -371,7 +372,7 @@ class _Stream:
self._init_read_gz()
self.exception = zlib.error
else:
- self._init_write_gz()
+ self._init_write_gz(compresslevel)
elif comptype == "bz2":
try:
@@ -383,7 +384,7 @@ class _Stream:
self.cmp = bz2.BZ2Decompressor()
self.exception = OSError
else:
- self.cmp = bz2.BZ2Compressor()
+ self.cmp = bz2.BZ2Compressor(compresslevel)
elif comptype == "xz":
try:
@@ -410,13 +411,14 @@ class _Stream:
if hasattr(self, "closed") and not self.closed:
self.close()
- def _init_write_gz(self):
+ def _init_write_gz(self, compresslevel):
"""Initialize for writing with gzip compression.
"""
- self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
- -self.zlib.MAX_WBITS,
- self.zlib.DEF_MEM_LEVEL,
- 0)
+ self.cmp = self.zlib.compressobj(compresslevel,
+ self.zlib.DEFLATED,
+ -self.zlib.MAX_WBITS,
+ self.zlib.DEF_MEM_LEVEL,
+ 0)
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
@@ -1659,7 +1661,9 @@ class TarFile(object):
if filemode not in ("r", "w"):
raise ValueError("mode must be 'r' or 'w'")
- stream = _Stream(name, filemode, comptype, fileobj, bufsize)
+ compresslevel = kwargs.pop("compresslevel", 9)
+ stream = _Stream(name, filemode, comptype, fileobj, bufsize,
+ compresslevel)
try:
t = cls(name, filemode, stream, **kwargs)
except:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index e0389c5..04f9bee 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1554,6 +1554,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
decompressor = lzma.LZMADecompressor if lzma else None
+class _CompressedWriteTest(TarTest):
+ # This is not actually a standalone test.
+ # It does not inherit WriteTest because it only makes sense with gz,bz2
+ source = (b"And we move to Bristol where they have a special, " +
+ b"Very Silly candidate")
+
+ def _compressed_tar(self, compresslevel):
+ fobj = io.BytesIO()
+ with tarfile.open(tmpname, self.mode, fobj,
+ compresslevel=compresslevel) as tarfl:
+ tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source))
+ return fobj
+
+ def _test_bz2_header(self, compresslevel):
+ fobj = self._compressed_tar(compresslevel)
+ self.assertEqual(fobj.getvalue()[0:10],
+ b"BZh%d1AY&SY" % compresslevel)
+
+ def _test_gz_header(self, compresslevel):
+ fobj = self._compressed_tar(compresslevel)
+ self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08")
+
+class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase):
+ prefix = "w:"
+ def test_compression_levels(self):
+ self._test_bz2_header(1)
+ self._test_bz2_header(5)
+ self._test_bz2_header(9)
+
+class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest,
+ unittest.TestCase):
+ prefix = "w|"
+ def test_compression_levels(self):
+ self._test_bz2_header(1)
+ self._test_bz2_header(5)
+ self._test_bz2_header(9)
+
+class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase):
+ prefix = "w:"
+ def test_compression_levels(self):
+ self._test_gz_header(1)
+ self._test_gz_header(5)
+ self._test_gz_header(9)
+
+class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest,
+ unittest.TestCase):
+ prefix = "w|"
+ def test_compression_levels(self):
+ self._test_gz_header(1)
+ self._test_gz_header(5)
+ self._test_gz_header(9)
+
+class CompressLevelRaises(unittest.TestCase):
+ def test_compresslevel_wrong_modes(self):
+ compresslevel = 5
+ fobj = io.BytesIO()
+ with self.assertRaises(TypeError):
+ tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel)
+
+ def test_wrong_compresslevels(self):
+ # BZ2 checks that the compresslevel is in [1,9]. gz does not
+ fobj = io.BytesIO()
+ with self.assertRaises(ValueError):
+ tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0)
+ with self.assertRaises(ValueError):
+ tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10)
+ with self.assertRaises(ValueError):
+ tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10)
class GNUWriteTest(unittest.TestCase):
# This testcase checks for correct creation of GNU Longname
diff --git a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst
new file mode 100644
index 0000000..fa0dc95
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst
@@ -0,0 +1,2 @@
+Allow adjustable compression level for tarfile streams in
+:func:`tarfile.open`.