summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/tarfile.rst19
-rw-r--r--Doc/whatsnew/3.5.rst22
-rwxr-xr-xLib/tarfile.py45
-rw-r--r--Lib/test/test_tarfile.py132
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS4
6 files changed, 195 insertions, 28 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 4fd94fd..67796eb 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -367,7 +367,7 @@ be finalized; only the internally used file object will be closed. See the
available.
-.. method:: TarFile.extractall(path=".", members=None)
+.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False)
Extract all members from the archive to the current working directory or
directory *path*. If optional *members* is given, it must be a subset of the
@@ -377,6 +377,10 @@ be finalized; only the internally used file object will be closed. See the
reset each time a file is created in it. And, if a directory's permissions do
not allow writing, extracting files to it will fail.
+ If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
+ are used to set the owner/group for the extracted files. Otherwise, the named
+ values from the tarfile are used.
+
.. warning::
Never extract archives from untrusted sources without prior inspection.
@@ -384,8 +388,11 @@ be finalized; only the internally used file object will be closed. See the
that have absolute filenames starting with ``"/"`` or filenames with two
dots ``".."``.
+ .. versionchanged:: 3.5
+ Added the *numeric_only* parameter.
+
-.. method:: TarFile.extract(member, path="", set_attrs=True)
+.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False)
Extract a member from the archive to the current working directory, using its
full name. Its file information is extracted as accurately as possible. *member*
@@ -393,6 +400,10 @@ be finalized; only the internally used file object will be closed. See the
directory using *path*. File attributes (owner, mtime, mode) are set unless
*set_attrs* is false.
+ If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
+ are used to set the owner/group for the extracted files. Otherwise, the named
+ values from the tarfile are used.
+
.. note::
The :meth:`extract` method does not take care of several extraction issues.
@@ -405,6 +416,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.2
Added the *set_attrs* parameter.
+ .. versionchanged:: 3.5
+ Added the *numeric_only* parameter.
+
.. method:: TarFile.extractfile(member)
Extract a member from the archive as a file object. *member* may be a filename
@@ -827,4 +841,3 @@ In case of :const:`PAX_FORMAT` archives, *encoding* is generally not needed
because all the metadata is stored using *UTF-8*. *encoding* is only used in
the rare cases when binary pax headers are decoded or when strings with
surrogate characters are stored.
-
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index e865402..5d010bf 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -479,26 +479,32 @@ socket
:meth:`socket.socket.send`.
(Contributed by Giampaolo Rodola' in :issue:`17552`.)
+subprocess
+----------
+
+* The new :func:`subprocess.run` function runs subprocesses and returns a
+ :class:`subprocess.CompletedProcess` object. It Provides a more consistent
+ API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
+ :func:`~subprocess.check_output`.
+
sysconfig
---------
* The user scripts directory on Windows is now versioned.
(Contributed by Paul Moore in :issue:`23437`.)
-
tarfile
-------
* The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
mode. (Contributed by Berker Peksag in :issue:`21717`.)
-subprocess
-----------
-
-* The new :func:`subprocess.run` function runs subprocesses and returns a
- :class:`subprocess.CompletedProcess` object. It Provides a more consistent
- API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
- :func:`~subprocess.check_output`.
+* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract`
+ methods now take a keyword parameter *numeric_only*. If set to ``True``,
+ the extracted files and directories will be owned by the numeric uid and gid
+ from the tarfile. If set to ``False`` (the default, and the behavior in
+ versions prior to 3.5), they will be owned bythe named user and group in the
+ tarfile. (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.)
time
----
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 43909f0..0f1c825 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1972,12 +1972,13 @@ class TarFile(object):
self.members.append(tarinfo)
- def extractall(self, path=".", members=None):
+ def extractall(self, path=".", members=None, *, numeric_owner=False):
"""Extract all members from the archive to the current working
directory and set owner, modification time and permissions on
directories afterwards. `path' specifies a different directory
to extract to. `members' is optional and must be a subset of the
- list returned by getmembers().
+ list returned by getmembers(). If `numeric_owner` is True, only
+ the numbers for user/group names are used and not the names.
"""
directories = []
@@ -1991,7 +1992,8 @@ class TarFile(object):
tarinfo = copy.copy(tarinfo)
tarinfo.mode = 0o700
# Do not set_attrs directories, as we will do that further down
- self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
+ self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
+ numeric_owner=numeric_owner)
# Reverse sort directories.
directories.sort(key=lambda a: a.name)
@@ -2001,7 +2003,7 @@ class TarFile(object):
for tarinfo in directories:
dirpath = os.path.join(path, tarinfo.name)
try:
- self.chown(tarinfo, dirpath)
+ self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
self.utime(tarinfo, dirpath)
self.chmod(tarinfo, dirpath)
except ExtractError as e:
@@ -2010,12 +2012,14 @@ class TarFile(object):
else:
self._dbg(1, "tarfile: %s" % e)
- def extract(self, member, path="", set_attrs=True):
+ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a TarInfo object. You can
specify a different directory using `path'. File attributes (owner,
- mtime, mode) are set unless `set_attrs' is False.
+ mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
+ is True, only the numbers for user/group names are used and not
+ the names.
"""
self._check("r")
@@ -2030,7 +2034,8 @@ class TarFile(object):
try:
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
- set_attrs=set_attrs)
+ set_attrs=set_attrs,
+ numeric_owner=numeric_owner)
except OSError as e:
if self.errorlevel > 0:
raise
@@ -2076,7 +2081,8 @@ class TarFile(object):
# blkdev, etc.), return None instead of a file object.
return None
- def _extract_member(self, tarinfo, targetpath, set_attrs=True):
+ def _extract_member(self, tarinfo, targetpath, set_attrs=True,
+ numeric_owner=False):
"""Extract the TarInfo object tarinfo to a physical
file called targetpath.
"""
@@ -2114,7 +2120,7 @@ class TarFile(object):
self.makefile(tarinfo, targetpath)
if set_attrs:
- self.chown(tarinfo, targetpath)
+ self.chown(tarinfo, targetpath, numeric_owner)
if not tarinfo.issym():
self.chmod(tarinfo, targetpath)
self.utime(tarinfo, targetpath)
@@ -2203,19 +2209,24 @@ class TarFile(object):
except KeyError:
raise ExtractError("unable to resolve link inside archive")
- def chown(self, tarinfo, targetpath):
- """Set owner of targetpath according to tarinfo.
+ def chown(self, tarinfo, targetpath, numeric_owner):
+ """Set owner of targetpath according to tarinfo. If numeric_owner
+ is True, use .gid/.uid instead of .gname/.uname.
"""
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
# We have to be root to do so.
- try:
- g = grp.getgrnam(tarinfo.gname)[2]
- except KeyError:
+ if numeric_owner:
g = tarinfo.gid
- try:
- u = pwd.getpwnam(tarinfo.uname)[2]
- except KeyError:
u = tarinfo.uid
+ else:
+ try:
+ g = grp.getgrnam(tarinfo.gname)[2]
+ except KeyError:
+ g = tarinfo.gid
+ try:
+ u = pwd.getpwnam(tarinfo.uname)[2]
+ except KeyError:
+ u = tarinfo.uid
try:
if tarinfo.issym() and hasattr(os, "lchown"):
os.lchown(targetpath, u, g)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 01d1a92..483c587 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2,8 +2,10 @@ import sys
import os
import io
from hashlib import md5
+from contextlib import contextmanager
import unittest
+import unittest.mock
import tarfile
from test import support, script_helper
@@ -2264,6 +2266,136 @@ class Bz2PartialReadTest(Bz2Test, unittest.TestCase):
self._test_partial_input("r:bz2")
+def root_is_uid_gid_0():
+ try:
+ import pwd, grp
+ except ImportError:
+ return False
+ if pwd.getpwuid(0)[0] != 'root':
+ return False
+ if grp.getgrgid(0)[0] != 'root':
+ return False
+ return True
+
+
+class NumericOwnerTest(unittest.TestCase):
+ # mock the following:
+ # os.chown: so we can test what's being called
+ # os.chmod: so the modes are not actually changed. if they are, we can't
+ # delete the files/directories
+ # os.geteuid: so we can lie and say we're root (uid = 0)
+
+ @staticmethod
+ def _make_test_archive(filename_1, dirname_1, filename_2):
+ # the file contents to write
+ fobj = io.BytesIO(b"content")
+
+ # create a tar file with a file, a directory, and a file within that
+ # directory. Assign various .uid/.gid values to them
+ items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
+ (dirname_1, 77, 76, tarfile.DIRTYPE, None),
+ (filename_2, 88, 87, tarfile.REGTYPE, fobj),
+ ]
+ with tarfile.open(tmpname, 'w') as tarfl:
+ for name, uid, gid, typ, contents in items:
+ t = tarfile.TarInfo(name)
+ t.uid = uid
+ t.gid = gid
+ t.uname = 'root'
+ t.gname = 'root'
+ t.type = typ
+ tarfl.addfile(t, contents)
+
+ # return the full pathname to the tar file
+ return tmpname
+
+ @staticmethod
+ @contextmanager
+ def _setup_test(mock_geteuid):
+ mock_geteuid.return_value = 0 # lie and say we're root
+ fname = 'numeric-owner-testfile'
+ dirname = 'dir'
+
+ # the names we want stored in the tarfile
+ filename_1 = fname
+ dirname_1 = dirname
+ filename_2 = os.path.join(dirname, fname)
+
+ # create the tarfile with the contents we're after
+ tar_filename = NumericOwnerTest._make_test_archive(filename_1,
+ dirname_1,
+ filename_2)
+
+ # open the tarfile for reading. yield it and the names of the items
+ # we stored into the file
+ with tarfile.open(tar_filename) as tarfl:
+ yield tarfl, filename_1, dirname_1, filename_2
+
+ @unittest.mock.patch('os.chown')
+ @unittest.mock.patch('os.chmod')
+ @unittest.mock.patch('os.geteuid')
+ def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
+ mock_chown):
+ with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
+ filename_2):
+ tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
+ tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
+
+ # convert to filesystem paths
+ f_filename_1 = os.path.join(TEMPDIR, filename_1)
+ f_filename_2 = os.path.join(TEMPDIR, filename_2)
+
+ mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
+ unittest.mock.call(f_filename_2, 88, 87),
+ ],
+ any_order=True)
+
+ @unittest.mock.patch('os.chown')
+ @unittest.mock.patch('os.chmod')
+ @unittest.mock.patch('os.geteuid')
+ def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
+ mock_chown):
+ with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
+ filename_2):
+ tarfl.extractall(TEMPDIR, numeric_owner=True)
+
+ # convert to filesystem paths
+ f_filename_1 = os.path.join(TEMPDIR, filename_1)
+ f_dirname_1 = os.path.join(TEMPDIR, dirname_1)
+ f_filename_2 = os.path.join(TEMPDIR, filename_2)
+
+ mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
+ unittest.mock.call(f_dirname_1, 77, 76),
+ unittest.mock.call(f_filename_2, 88, 87),
+ ],
+ any_order=True)
+
+ # this test requires that uid=0 and gid=0 really be named 'root'. that's
+ # because the uname and gname in the test file are 'root', and extract()
+ # will look them up using pwd and grp to find their uid and gid, which we
+ # test here to be 0.
+ @unittest.skipUnless(root_is_uid_gid_0(),
+ 'uid=0,gid=0 must be named "root"')
+ @unittest.mock.patch('os.chown')
+ @unittest.mock.patch('os.chmod')
+ @unittest.mock.patch('os.geteuid')
+ def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
+ mock_chown):
+ with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
+ tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
+
+ # convert to filesystem paths
+ f_filename_1 = os.path.join(TEMPDIR, filename_1)
+
+ mock_chown.assert_called_with(f_filename_1, 0, 0)
+
+ @unittest.mock.patch('os.geteuid')
+ def test_keyword_only(self, mock_geteuid):
+ with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
+ self.assertRaises(TypeError,
+ tarfl.extract, filename_1, TEMPDIR, False, True)
+
+
def setUpModule():
support.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
diff --git a/Misc/ACKS b/Misc/ACKS
index e04d986..3d4b2cc 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1458,6 +1458,7 @@ Norman Vine
Pauli Virtanen
Frank Visser
Johannes Vogel
+Michael Vogt
Radu Voicilas
Alex Volkov
Martijn Vries
diff --git a/Misc/NEWS b/Misc/NEWS
index 2472c27..71ad4a4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,10 @@ Core and Builtins
Library
-------
+- Issue #23193: Add a numeric_owner parameter to
+ tarfile.TarFile.extract and tarfile.TarFile.extractall. Patch by
+ Michael Vogt and Eric Smith.
+
- Issue #23342: Add a subprocess.run() function than returns a CalledProcess
instance for a more consistent API than the existing call* functions.