From 7a80389ce5b2687137884e423d3bda27ff5d3acc Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 15 Apr 2015 10:27:58 -0400 Subject: Issue 23193: Add numeric_owner to tarfile.TarFile.extract() and tarfile.TarFile.extractall(). --- Doc/library/tarfile.rst | 19 +++++-- Doc/whatsnew/3.5.rst | 22 +++++--- Lib/tarfile.py | 45 ++++++++++------ Lib/test/test_tarfile.py | 132 +++++++++++++++++++++++++++++++++++++++++++++++ Misc/ACKS | 1 + Misc/NEWS | 4 ++ 6 files changed, 195 insertions(+), 28 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 4fd94fd..67796eb 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -367,7 +367,7 @@ be finalized; only the internally used file object will be closed. See the available. -.. method:: TarFile.extractall(path=".", members=None) +.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False) Extract all members from the archive to the current working directory or directory *path*. If optional *members* is given, it must be a subset of the @@ -377,6 +377,10 @@ be finalized; only the internally used file object will be closed. See the reset each time a file is created in it. And, if a directory's permissions do not allow writing, extracting files to it will fail. + If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile + are used to set the owner/group for the extracted files. Otherwise, the named + values from the tarfile are used. + .. warning:: Never extract archives from untrusted sources without prior inspection. @@ -384,8 +388,11 @@ be finalized; only the internally used file object will be closed. See the that have absolute filenames starting with ``"/"`` or filenames with two dots ``".."``. + .. versionchanged:: 3.5 + Added the *numeric_only* parameter. + -.. method:: TarFile.extract(member, path="", set_attrs=True) +.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False) Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* @@ -393,6 +400,10 @@ be finalized; only the internally used file object will be closed. See the directory using *path*. File attributes (owner, mtime, mode) are set unless *set_attrs* is false. + If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile + are used to set the owner/group for the extracted files. Otherwise, the named + values from the tarfile are used. + .. note:: The :meth:`extract` method does not take care of several extraction issues. @@ -405,6 +416,9 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.2 Added the *set_attrs* parameter. + .. versionchanged:: 3.5 + Added the *numeric_only* parameter. + .. method:: TarFile.extractfile(member) Extract a member from the archive as a file object. *member* may be a filename @@ -827,4 +841,3 @@ In case of :const:`PAX_FORMAT` archives, *encoding* is generally not needed because all the metadata is stored using *UTF-8*. *encoding* is only used in the rare cases when binary pax headers are decoded or when strings with surrogate characters are stored. - diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index e865402..5d010bf 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -479,26 +479,32 @@ socket :meth:`socket.socket.send`. (Contributed by Giampaolo Rodola' in :issue:`17552`.) +subprocess +---------- + +* The new :func:`subprocess.run` function runs subprocesses and returns a + :class:`subprocess.CompletedProcess` object. It Provides a more consistent + API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and + :func:`~subprocess.check_output`. + sysconfig --------- * The user scripts directory on Windows is now versioned. (Contributed by Paul Moore in :issue:`23437`.) - tarfile ------- * The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation) mode. (Contributed by Berker Peksag in :issue:`21717`.) -subprocess ----------- - -* The new :func:`subprocess.run` function runs subprocesses and returns a - :class:`subprocess.CompletedProcess` object. It Provides a more consistent - API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and - :func:`~subprocess.check_output`. +* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract` + methods now take a keyword parameter *numeric_only*. If set to ``True``, + the extracted files and directories will be owned by the numeric uid and gid + from the tarfile. If set to ``False`` (the default, and the behavior in + versions prior to 3.5), they will be owned bythe named user and group in the + tarfile. (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.) time ---- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 43909f0..0f1c825 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1972,12 +1972,13 @@ class TarFile(object): self.members.append(tarinfo) - def extractall(self, path=".", members=None): + def extractall(self, path=".", members=None, *, numeric_owner=False): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). + list returned by getmembers(). If `numeric_owner` is True, only + the numbers for user/group names are used and not the names. """ directories = [] @@ -1991,7 +1992,8 @@ class TarFile(object): tarinfo = copy.copy(tarinfo) tarinfo.mode = 0o700 # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. directories.sort(key=lambda a: a.name) @@ -2001,7 +2003,7 @@ class TarFile(object): for tarinfo in directories: dirpath = os.path.join(path, tarinfo.name) try: - self.chown(tarinfo, dirpath) + self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: @@ -2010,12 +2012,14 @@ class TarFile(object): else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True): + def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is True, only the numbers for user/group names are used and not + the names. """ self._check("r") @@ -2030,7 +2034,8 @@ class TarFile(object): try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) + set_attrs=set_attrs, + numeric_owner=numeric_owner) except OSError as e: if self.errorlevel > 0: raise @@ -2076,7 +2081,8 @@ class TarFile(object): # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath, set_attrs=True): + def _extract_member(self, tarinfo, targetpath, set_attrs=True, + numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2114,7 +2120,7 @@ class TarFile(object): self.makefile(tarinfo, targetpath) if set_attrs: - self.chown(tarinfo, targetpath) + self.chown(tarinfo, targetpath, numeric_owner) if not tarinfo.issym(): self.chmod(tarinfo, targetpath) self.utime(tarinfo, targetpath) @@ -2203,19 +2209,24 @@ class TarFile(object): except KeyError: raise ExtractError("unable to resolve link inside archive") - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. + def chown(self, tarinfo, targetpath, numeric_owner): + """Set owner of targetpath according to tarinfo. If numeric_owner + is True, use .gid/.uid instead of .gname/.uname. """ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: + if numeric_owner: g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: u = tarinfo.uid + else: + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 01d1a92..483c587 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2,8 +2,10 @@ import sys import os import io from hashlib import md5 +from contextlib import contextmanager import unittest +import unittest.mock import tarfile from test import support, script_helper @@ -2264,6 +2266,136 @@ class Bz2PartialReadTest(Bz2Test, unittest.TestCase): self._test_partial_input("r:bz2") +def root_is_uid_gid_0(): + try: + import pwd, grp + except ImportError: + return False + if pwd.getpwuid(0)[0] != 'root': + return False + if grp.getgrgid(0)[0] != 'root': + return False + return True + + +class NumericOwnerTest(unittest.TestCase): + # mock the following: + # os.chown: so we can test what's being called + # os.chmod: so the modes are not actually changed. if they are, we can't + # delete the files/directories + # os.geteuid: so we can lie and say we're root (uid = 0) + + @staticmethod + def _make_test_archive(filename_1, dirname_1, filename_2): + # the file contents to write + fobj = io.BytesIO(b"content") + + # create a tar file with a file, a directory, and a file within that + # directory. Assign various .uid/.gid values to them + items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj), + (dirname_1, 77, 76, tarfile.DIRTYPE, None), + (filename_2, 88, 87, tarfile.REGTYPE, fobj), + ] + with tarfile.open(tmpname, 'w') as tarfl: + for name, uid, gid, typ, contents in items: + t = tarfile.TarInfo(name) + t.uid = uid + t.gid = gid + t.uname = 'root' + t.gname = 'root' + t.type = typ + tarfl.addfile(t, contents) + + # return the full pathname to the tar file + return tmpname + + @staticmethod + @contextmanager + def _setup_test(mock_geteuid): + mock_geteuid.return_value = 0 # lie and say we're root + fname = 'numeric-owner-testfile' + dirname = 'dir' + + # the names we want stored in the tarfile + filename_1 = fname + dirname_1 = dirname + filename_2 = os.path.join(dirname, fname) + + # create the tarfile with the contents we're after + tar_filename = NumericOwnerTest._make_test_archive(filename_1, + dirname_1, + filename_2) + + # open the tarfile for reading. yield it and the names of the items + # we stored into the file + with tarfile.open(tar_filename) as tarfl: + yield tarfl, filename_1, dirname_1, filename_2 + + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, + filename_2): + tarfl.extract(filename_1, TEMPDIR, numeric_owner=True) + tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + f_filename_2 = os.path.join(TEMPDIR, filename_2) + + mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98), + unittest.mock.call(f_filename_2, 88, 87), + ], + any_order=True) + + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1, + filename_2): + tarfl.extractall(TEMPDIR, numeric_owner=True) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + f_dirname_1 = os.path.join(TEMPDIR, dirname_1) + f_filename_2 = os.path.join(TEMPDIR, filename_2) + + mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98), + unittest.mock.call(f_dirname_1, 77, 76), + unittest.mock.call(f_filename_2, 88, 87), + ], + any_order=True) + + # this test requires that uid=0 and gid=0 really be named 'root'. that's + # because the uname and gname in the test file are 'root', and extract() + # will look them up using pwd and grp to find their uid and gid, which we + # test here to be 0. + @unittest.skipUnless(root_is_uid_gid_0(), + 'uid=0,gid=0 must be named "root"') + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): + tarfl.extract(filename_1, TEMPDIR, numeric_owner=False) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + + mock_chown.assert_called_with(f_filename_1, 0, 0) + + @unittest.mock.patch('os.geteuid') + def test_keyword_only(self, mock_geteuid): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): + self.assertRaises(TypeError, + tarfl.extract, filename_1, TEMPDIR, False, True) + + def setUpModule(): support.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/ACKS b/Misc/ACKS index e04d986..3d4b2cc 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1458,6 +1458,7 @@ Norman Vine Pauli Virtanen Frank Visser Johannes Vogel +Michael Vogt Radu Voicilas Alex Volkov Martijn Vries diff --git a/Misc/NEWS b/Misc/NEWS index 2472c27..71ad4a4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -32,6 +32,10 @@ Core and Builtins Library ------- +- Issue #23193: Add a numeric_owner parameter to + tarfile.TarFile.extract and tarfile.TarFile.extractall. Patch by + Michael Vogt and Eric Smith. + - Issue #23342: Add a subprocess.run() function than returns a CalledProcess instance for a more consistent API than the existing call* functions. -- cgit v0.12