From 20cdffd8304522a5c10b53f3dccef7e9f5d685bb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 16 Dec 2016 18:58:33 +0200 Subject: Issue #14061: Misc fixes and cleanups in archiving code in shutil. Imporoved the documentation and tests for make_archive() and unpack_archive(). Improved error handling when corresponding compress module is not available. Brake circular dependency between shutil and tarfile modules. --- Doc/library/shutil.rst | 44 ++++++++++++----------- Lib/shutil.py | 82 ++++++++++++++++++++++-------------------- Lib/test/test_shutil.py | 95 +++++++++++++++++++++++-------------------------- 3 files changed, 112 insertions(+), 109 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 6f50bc3..66e32e6 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -458,6 +458,10 @@ Archiving operations .. versionadded:: 3.2 +.. versionchanged:: 3.5 + Added support for the *xztar* format. + + High-level utilities to create and read compressed and archived files are also provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. @@ -467,8 +471,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *base_name* is the name of the file to create, including the path, minus any format-specific extension. *format* is the archive format: one of - "zip", "tar", "bztar" (if the :mod:`bz2` module is available), "xztar" - (if the :mod:`lzma` module is available) or "gztar". + "zip" (if the :mod:`zlib` module is available), "tar", "gztar" (if the + :mod:`zlib` module is available), "bztar" (if the :mod:`bz2` module is + available), or "xztar" (if the :mod:`lzma` module is available). *root_dir* is a directory that will be the root directory of the archive; for example, we typically chdir into *root_dir* before creating the @@ -491,9 +496,6 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. The *verbose* argument is unused and deprecated. - .. versionchanged:: 3.5 - Added support for the *xztar* format. - .. function:: get_archive_formats() @@ -502,11 +504,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. By default :mod:`shutil` provides these formats: - - *gztar*: gzip'ed tar-file - - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available.) - - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available.) - - *tar*: uncompressed tar file - - *zip*: ZIP file + - *zip*: ZIP file (if the :mod:`zlib` module is available). + - *tar*: uncompressed tar file. + - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). + - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). + - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). You can register new formats or provide your own archiver for any existing formats, by using :func:`register_archive_format`. @@ -541,11 +543,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *extract_dir* is the name of the target directory where the archive is unpacked. If not provided, the current working directory is used. - *format* is the archive format: one of "zip", "tar", or "gztar". Or any - other format registered with :func:`register_unpack_format`. If not - provided, :func:`unpack_archive` will use the archive file name extension - and see if an unpacker was registered for that extension. In case none is - found, a :exc:`ValueError` is raised. + *format* is the archive format: one of "zip", "tar", "gztar", "bztar", or + "xztar". Or any other format registered with + :func:`register_unpack_format`. If not provided, :func:`unpack_archive` + will use the archive file name extension and see if an unpacker was + registered for that extension. In case none is found, + a :exc:`ValueError` is raised. .. function:: register_unpack_format(name, extensions, function[, extra_args[, description]]) @@ -578,11 +581,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. By default :mod:`shutil` provides these formats: - - *gztar*: gzip'ed tar-file - - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available.) - - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available.) - - *tar*: uncompressed tar file - - *zip*: ZIP file + - *zip*: ZIP file (unpacking compressed files works only if corresponding + module is available). + - *tar*: uncompressed tar file. + - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). + - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). + - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). You can register new formats or provide your own unpacker for any existing formats, by using :func:`register_unpack_format`. diff --git a/Lib/shutil.py b/Lib/shutil.py index 6cf88d9..16ca80b 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,7 +10,13 @@ import stat import fnmatch import collections import errno -import tarfile + +try: + import zlib + del zlib + _ZLIB_SUPPORTED = True +except ImportError: + _ZLIB_SUPPORTED = False try: import bz2 @@ -602,23 +608,22 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, Returns the output filename. """ - tar_compression = {'gzip': 'gz', None: ''} - compress_ext = {'gzip': '.gz'} - - if _BZ2_SUPPORTED: - tar_compression['bzip2'] = 'bz2' - compress_ext['bzip2'] = '.bz2' - - if _LZMA_SUPPORTED: - tar_compression['xz'] = 'xz' - compress_ext['xz'] = '.xz' - - # flags for compression program, each element of list will be an argument - if compress is not None and compress not in compress_ext: + if compress is None: + tar_compression = '' + elif _ZLIB_SUPPORTED and compress == 'gzip': + tar_compression = 'gz' + elif _BZ2_SUPPORTED and compress == 'bzip2': + tar_compression = 'bz2' + elif _LZMA_SUPPORTED and compress == 'xz': + tar_compression = 'xz' + else: raise ValueError("bad value for 'compress', or compression format not " "supported : {0}".format(compress)) - archive_name = base_name + '.tar' + compress_ext.get(compress, '') + import tarfile # late import for breaking circular dependency + + compress_ext = '.' + tar_compression if compress else '' + archive_name = base_name + '.tar' + compress_ext archive_dir = os.path.dirname(archive_name) if archive_dir and not os.path.exists(archive_dir): @@ -644,7 +649,7 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, return tarinfo if not dry_run: - tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + tar = tarfile.open(archive_name, 'w|%s' % tar_compression) try: tar.add(base_dir, filter=_set_uid_gid) finally: @@ -655,13 +660,10 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): """Create a zip file from all the files under 'base_dir'. - The output zip file will be named 'base_name' + ".zip". Uses either the - "zipfile" Python module (if available) or the InfoZIP "zip" utility - (if installed and found on the default search path). If neither tool is - available, raises ExecError. Returns the name of the output zip - file. + The output zip file will be named 'base_name' + ".zip". Returns the + name of the output zip file. """ - import zipfile + import zipfile # late import for breaking circular dependency zip_filename = base_name + ".zip" archive_dir = os.path.dirname(base_name) @@ -700,10 +702,13 @@ def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): return zip_filename _ARCHIVE_FORMATS = { - 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (_make_zipfile, [], "ZIP file") - } +} + +if _ZLIB_SUPPORTED: + _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')], + "gzip'ed tar-file") + _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file") if _BZ2_SUPPORTED: _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], @@ -752,8 +757,8 @@ def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, """Create an archive file (eg. zip or tar). 'base_name' is the name of the file to create, minus any format-specific - extension; 'format' is the archive format: one of "zip", "tar", "bztar" - or "gztar". + extension; 'format' is the archive format: one of "zip", "tar", "gztar", + "bztar", or "xztar". Or any other registered format. 'root_dir' is a directory that will be the root directory of the archive; ie. we typically chdir into 'root_dir' before creating the @@ -866,10 +871,7 @@ def _ensure_directory(path): def _unpack_zipfile(filename, extract_dir): """Unpack zip `filename` to `extract_dir` """ - try: - import zipfile - except ImportError: - raise ReadError('zlib not supported, cannot unpack this archive.') + import zipfile # late import for breaking circular dependency if not zipfile.is_zipfile(filename): raise ReadError("%s is not a zip file" % filename) @@ -903,6 +905,7 @@ def _unpack_zipfile(filename, extract_dir): def _unpack_tarfile(filename, extract_dir): """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` """ + import tarfile # late import for breaking circular dependency try: tarobj = tarfile.open(filename) except tarfile.TarError: @@ -914,10 +917,13 @@ def _unpack_tarfile(filename, extract_dir): tarobj.close() _UNPACK_FORMATS = { - 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), - 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") - } + 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"), +} + +if _ZLIB_SUPPORTED: + _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [], + "gzip'ed tar-file") if _BZ2_SUPPORTED: _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [], @@ -942,10 +948,10 @@ def unpack_archive(filename, extract_dir=None, format=None): `extract_dir` is the name of the target directory, where the archive is unpacked. If not provided, the current working directory is used. - `format` is the archive format: one of "zip", "tar", or "gztar". Or any - other registered format. If not provided, unpack_archive will use the - filename extension and see if an unpacker was registered for that - extension. + `format` is the archive format: one of "zip", "tar", "gztar", "bztar", + or "xztar". Or any other registered format. If not provided, + unpack_archive will use the filename extension and see if an unpacker + was registered for that extension. In case none is found, a ValueError is raised. """ diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 1e65a2a..709feeb 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -19,22 +19,11 @@ from shutil import (make_archive, unregister_unpack_format, get_unpack_formats, SameFileError) import tarfile +import zipfile import warnings from test import support -from test.support import TESTFN, check_warnings, captured_stdout, requires_zlib - -try: - import bz2 - BZ2_SUPPORTED = True -except ImportError: - BZ2_SUPPORTED = False - -try: - import lzma - LZMA_SUPPORTED = True -except ImportError: - LZMA_SUPPORTED = False +from test.support import TESTFN, check_warnings, captured_stdout TESTFN2 = TESTFN + "2" @@ -45,12 +34,6 @@ try: except ImportError: UID_GID_SUPPORT = False -try: - import zipfile - ZIP_SUPPORT = True -except ImportError: - ZIP_SUPPORT = shutil.which('zip') - def _fake_rename(*args, **kwargs): # Pretend the destination path is on a different filesystem. raise OSError(getattr(errno, 'EXDEV', 18), "Invalid cross-device link") @@ -964,7 +947,7 @@ class TestShutil(unittest.TestCase): self.assertEqual(getattr(file1_stat, 'st_flags'), getattr(file2_stat, 'st_flags')) - @requires_zlib + @support.requires_zlib def test_make_tarball(self): # creating something to tar root_dir, base_dir = self._create_files('') @@ -1020,7 +1003,7 @@ class TestShutil(unittest.TestCase): write_file((root_dir, 'outer'), 'xxx') return root_dir, base_dir - @requires_zlib + @support.requires_zlib @unittest.skipUnless(shutil.which('tar'), 'Need the tar command to run') def test_tarfile_vs_tar(self): @@ -1053,8 +1036,7 @@ class TestShutil(unittest.TestCase): self.assertEqual(tarball, base_name + '.tar') self.assertTrue(os.path.isfile(tarball)) - @requires_zlib - @unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run') + @support.requires_zlib def test_make_zipfile(self): # creating something to zip root_dir, base_dir = self._create_files() @@ -1091,8 +1073,7 @@ class TestShutil(unittest.TestCase): ['dist/', 'dist/sub/', 'dist/sub2/', 'dist/file1', 'dist/file2', 'dist/sub/file3']) - @requires_zlib - @unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run') + @support.requires_zlib @unittest.skipUnless(shutil.which('zip'), 'Need the zip command to run') def test_zipfile_vs_zip(self): @@ -1118,8 +1099,7 @@ class TestShutil(unittest.TestCase): names2 = zf.namelist() self.assertEqual(sorted(names), sorted(names2)) - @requires_zlib - @unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run') + @support.requires_zlib @unittest.skipUnless(shutil.which('unzip'), 'Need the unzip command to run') def test_unzip_zipfile(self): @@ -1146,7 +1126,7 @@ class TestShutil(unittest.TestCase): base_name = os.path.join(tmpdir, 'archive') self.assertRaises(ValueError, make_archive, base_name, 'xxx') - @requires_zlib + @support.requires_zlib def test_make_archive_owner_group(self): # testing make_archive with owner and group, with various combinations # this works even if there's not gid/uid support @@ -1174,7 +1154,7 @@ class TestShutil(unittest.TestCase): self.assertTrue(os.path.isfile(res)) - @requires_zlib + @support.requires_zlib @unittest.skipUnless(UID_GID_SUPPORT, "Requires grp and pwd support") def test_tarfile_root_owner(self): root_dir, base_dir = self._create_files() @@ -1219,7 +1199,7 @@ class TestShutil(unittest.TestCase): self.assertEqual(make_archive('test', 'tar'), 'test.tar') self.assertTrue(os.path.isfile('test.tar')) - @requires_zlib + @support.requires_zlib def test_make_zipfile_in_curdir(self): # Issue #21280 root_dir = self.mkdtemp() @@ -1243,33 +1223,46 @@ class TestShutil(unittest.TestCase): formats = [name for name, params in get_archive_formats()] self.assertNotIn('xxx', formats) - @requires_zlib - def test_unpack_archive(self): - formats = ['tar', 'gztar', 'zip'] - if BZ2_SUPPORTED: - formats.append('bztar') - if LZMA_SUPPORTED: - formats.append('xztar') - + def check_unpack_archive(self, format): root_dir, base_dir = self._create_files() expected = rlistdir(root_dir) expected.remove('outer') - for format in formats: - base_name = os.path.join(self.mkdtemp(), 'archive') - filename = make_archive(base_name, format, root_dir, base_dir) - - # let's try to unpack it now - tmpdir2 = self.mkdtemp() - unpack_archive(filename, tmpdir2) - self.assertEqual(rlistdir(tmpdir2), expected) - - # and again, this time with the format specified - tmpdir3 = self.mkdtemp() - unpack_archive(filename, tmpdir3, format=format) - self.assertEqual(rlistdir(tmpdir3), expected) + + base_name = os.path.join(self.mkdtemp(), 'archive') + filename = make_archive(base_name, format, root_dir, base_dir) + + # let's try to unpack it now + tmpdir2 = self.mkdtemp() + unpack_archive(filename, tmpdir2) + self.assertEqual(rlistdir(tmpdir2), expected) + + # and again, this time with the format specified + tmpdir3 = self.mkdtemp() + unpack_archive(filename, tmpdir3, format=format) + self.assertEqual(rlistdir(tmpdir3), expected) + self.assertRaises(shutil.ReadError, unpack_archive, TESTFN) self.assertRaises(ValueError, unpack_archive, TESTFN, format='xxx') + def test_unpack_archive_tar(self): + self.check_unpack_archive('tar') + + @support.requires_zlib + def test_unpack_archive_gztar(self): + self.check_unpack_archive('gztar') + + @support.requires_bz2 + def test_unpack_archive_bztar(self): + self.check_unpack_archive('bztar') + + @support.requires_lzma + def test_unpack_archive_xztar(self): + self.check_unpack_archive('xztar') + + @support.requires_zlib + def test_unpack_archive_zip(self): + self.check_unpack_archive('zip') + def test_unpack_registry(self): formats = get_unpack_formats() -- cgit v0.12