From dcafb362f7eab84710ad924cac1724bbf3b9c304 Mon Sep 17 00:00:00 2001 From: WilliamRoyNelson Date: Fri, 26 Jul 2024 07:34:13 -0700 Subject: gh-121999: Change default tarfile filter to 'data' (GH-122002) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomas R Co-authored-by: Scott Odle Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Petr Viktorin --- Doc/library/shutil.rst | 12 ++-- Doc/library/tarfile.rst | 75 ++++++++++++++-------- Lib/tarfile.py | 8 +-- Lib/test/test_shutil.py | 3 - Lib/test/test_tarfile.py | 52 ++++++--------- .../2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst | 2 + 6 files changed, 76 insertions(+), 76 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index fd32479..220207e 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -706,11 +706,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. The keyword-only *filter* argument is passed to the underlying unpacking function. For zip files, *filter* is not accepted. - For tar files, it is recommended to set it to ``'data'``, - unless using features specific to tar and UNIX-like filesystems. + For tar files, it is recommended to use ``'data'`` (default since Python + 3.14), unless using features specific to tar and UNIX-like filesystems. (See :ref:`tarfile-extraction-filter` for details.) - The ``'data'`` filter will become the default for tar files - in Python 3.14. .. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive @@ -721,6 +719,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. the *extract_dir* argument, e.g. members that have absolute filenames starting with "/" or filenames with two dots "..". + Since Python 3.14, the defaults for both built-in formats (zip and tar + files) will prevent the most dangerous of such security issues, + but will not prevent *all* unintended behavior. + Read the :ref:`tarfile-further-verification` + section for tar-specific details. + .. versionchanged:: 3.7 Accepts a :term:`path-like object` for *filename* and *extract_dir*. diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 5b624f3..631d869 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -40,9 +40,12 @@ Some facts and figures: Archives are extracted using a :ref:`filter `, which makes it possible to either limit surprising/dangerous features, or to acknowledge that they are expected and the archive is fully trusted. - By default, archives are fully trusted, but this default is deprecated - and slated to change in Python 3.14. +.. versionchanged:: 3.14 + Set the default extraction filter to :func:`data `, + which disallows some dangerous features such as links to absolute paths + or paths outside of the destination. Previously, the filter strategy + was equivalent to :func:`fully_trusted `. .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) @@ -495,18 +498,18 @@ be finalized; only the internally used file object will be closed. See the The *filter* argument specifies how ``members`` are modified or rejected before extraction. See :ref:`tarfile-extraction-filter` for details. - It is recommended to set this explicitly depending on which *tar* features - you need to support. + It is recommended to set this explicitly only if specific *tar* features + are required, or as ``filter='data'`` to support Python versions with a less + secure default (3.13 and lower). .. warning:: Never extract archives from untrusted sources without prior inspection. - It is possible that files are created outside of *path*, e.g. members - that have absolute filenames starting with ``"/"`` or filenames with two - dots ``".."``. - Set ``filter='data'`` to prevent the most dangerous security issues, - and read the :ref:`tarfile-extraction-filter` section for details. + Since Python 3.14, the default (:func:`data `) will prevent + the most dangerous security issues. + However, it will not prevent *all* unintended or insecure behavior. + Read the :ref:`tarfile-extraction-filter` section for details. .. versionchanged:: 3.5 Added the *numeric_owner* parameter. @@ -517,6 +520,9 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.12 Added the *filter* parameter. + .. versionchanged:: 3.14 + The *filter* parameter now defaults to ``'data'``. + .. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None) @@ -536,10 +542,8 @@ be finalized; only the internally used file object will be closed. See the .. warning:: - See the warning for :meth:`extractall`. - - Set ``filter='data'`` to prevent the most dangerous security issues, - and read the :ref:`tarfile-extraction-filter` section for details. + Never extract archives from untrusted sources without prior inspection. + See the warning for :meth:`extractall` for details. .. versionchanged:: 3.2 Added the *set_attrs* parameter. @@ -602,14 +606,8 @@ be finalized; only the internally used file object will be closed. See the String names are not allowed for this attribute, unlike the *filter* argument to :meth:`~TarFile.extract`. - If ``extraction_filter`` is ``None`` (the default), - calling an extraction method without a *filter* argument will raise a - ``DeprecationWarning``, - and fall back to the :func:`fully_trusted ` filter, - whose dangerous behavior matches previous versions of Python. - - In Python 3.14+, leaving ``extraction_filter=None`` will cause - extraction methods to use the :func:`data ` filter by default. + If ``extraction_filter`` is ``None`` (the default), extraction methods + will use the :func:`data ` filter by default. The attribute may be set on instances or overridden in subclasses. It also is possible to set it on the ``TarFile`` class itself to set a @@ -619,6 +617,14 @@ be finalized; only the internally used file object will be closed. See the To set a global default this way, a filter function needs to be wrapped in :func:`staticmethod()` to prevent injection of a ``self`` argument. + .. versionchanged:: 3.14 + + The default filter is set to :func:`data `, + which disallows some dangerous features such as links to absolute paths + or paths outside of the destination. + Previously, the default was equivalent to + :func:`fully_trusted `. + .. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None) Add the file *name* to the archive. *name* may be any type of file @@ -969,6 +975,12 @@ In most cases, the full functionality is not needed. Therefore, *tarfile* supports extraction filters: a mechanism to limit functionality, and thus mitigate some of the security issues. +.. warning:: + + None of the available filters blocks *all* dangerous archive features. + Never extract archives from untrusted sources without prior inspection. + See also :ref:`tarfile-further-verification`. + .. seealso:: :pep:`706` @@ -992,12 +1004,13 @@ can be: * ``None`` (default): Use :attr:`TarFile.extraction_filter`. - If that is also ``None`` (the default), raise a ``DeprecationWarning``, - and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior - matches previous versions of Python. + If that is also ``None`` (the default), the ``'data'`` filter will be used. + + .. versionchanged:: 3.14 - In Python 3.14, the ``'data'`` filter will become the default instead. - It's possible to switch earlier; see :attr:`TarFile.extraction_filter`. + The default filter is set to :func:`data `. + Previously, the default was equivalent to + :func:`fully_trusted `. * A callable which will be called for each extracted member with a :ref:`TarInfo ` describing the member and the destination @@ -1080,6 +1093,9 @@ reused in custom filters: Return the modified ``TarInfo`` member. + Note that this filter does not block *all* dangerous archive features. + See :ref:`tarfile-further-verification` for details. + .. _tarfile-extraction-refuse: @@ -1093,6 +1109,8 @@ With ``errorlevel=0`` the error will be logged and the member will be skipped, but extraction will continue. +.. _tarfile-further-verification: + Hints for further verification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1110,9 +1128,10 @@ Here is an incomplete list of things to consider: disk, memory and CPU usage. * Check filenames against an allow-list of characters (to filter out control characters, confusables, foreign path separators, - etc.). + and so on). * Check that filenames have expected extensions (discouraging files that - execute when you “click on them”, or extension-less files like Windows special device names). + execute when you “click on them”, or extension-less files like Windows + special device names). * Limit the number of extracted files, total size of extracted data, filename length (including symlink length), and size of individual files. * Check for files that would be shadowed on case-insensitive filesystems. diff --git a/Lib/tarfile.py b/Lib/tarfile.py index d5d8a46..4fa7bb6 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2248,13 +2248,7 @@ class TarFile(object): if filter is None: filter = self.extraction_filter if filter is None: - import warnings - warnings.warn( - 'Python 3.14 will, by default, filter extracted tar ' - + 'archives and reject files or modify their metadata. ' - + 'Use the filter argument to control this behavior.', - DeprecationWarning, stacklevel=3) - return fully_trusted_filter + return data_filter if isinstance(filter, str): raise TypeError( 'String names are not supported for ' diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index c458c5d..c770be2 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2145,9 +2145,6 @@ class TestArchives(BaseTest, unittest.TestCase): def check_unpack_tarball(self, format): self.check_unpack_archive(format, filter='fully_trusted') self.check_unpack_archive(format, filter='data') - with warnings_helper.check_warnings( - ('Python 3.14', DeprecationWarning)): - self.check_unpack_archive(format) def test_unpack_archive_tar(self): self.check_unpack_tarball('tar') diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index f715940..5600f07 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -722,6 +722,24 @@ class MiscReadTestBase(CommonReadTest): tar.close() os_helper.rmtree(DIR) + @staticmethod + def test_extractall_default_filter(): + # Test that the default filter is now "data", and the other filter types are not used. + DIR = pathlib.Path(TEMPDIR) / "extractall_default_filter" + with ( + os_helper.temp_dir(DIR), + tarfile.open(tarname, encoding="iso8859-1") as tar, + unittest.mock.patch("tarfile.data_filter", wraps=tarfile.data_filter) as mock_data_filter, + unittest.mock.patch("tarfile.tar_filter", wraps=tarfile.tar_filter) as mock_tar_filter, + unittest.mock.patch("tarfile.fully_trusted_filter", wraps=tarfile.fully_trusted_filter) as mock_ft_filter + ): + directories = [t for t in tar if t.isdir()] + tar.extractall(DIR, directories) + + mock_data_filter.assert_called() + mock_ft_filter.assert_not_called() + mock_tar_filter.assert_not_called() + @os_helper.skip_unless_working_chmod def test_extract_directory(self): dirtype = "ustar/dirtype" @@ -738,31 +756,6 @@ class MiscReadTestBase(CommonReadTest): finally: os_helper.rmtree(DIR) - def test_deprecation_if_no_filter_passed_to_extractall(self): - DIR = pathlib.Path(TEMPDIR) / "extractall" - with ( - os_helper.temp_dir(DIR), - tarfile.open(tarname, encoding="iso8859-1") as tar - ): - directories = [t for t in tar if t.isdir()] - with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm: - tar.extractall(DIR, directories) - # check that the stacklevel of the deprecation warning is correct: - self.assertEqual(cm.filename, __file__) - - def test_deprecation_if_no_filter_passed_to_extract(self): - dirtype = "ustar/dirtype" - DIR = pathlib.Path(TEMPDIR) / "extractall" - with ( - os_helper.temp_dir(DIR), - tarfile.open(tarname, encoding="iso8859-1") as tar - ): - tarinfo = tar.getmember(dirtype) - with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm: - tar.extract(tarinfo, path=DIR) - # check that the stacklevel of the deprecation warning is correct: - self.assertEqual(cm.filename, __file__) - def test_extractall_pathlike_dir(self): DIR = os.path.join(TEMPDIR, "extractall") with os_helper.temp_dir(DIR), \ @@ -4011,15 +4004,6 @@ class TestExtractionFilters(unittest.TestCase): self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) - def test_default_filter_warns(self): - """Ensure the default filter warns""" - with ArchiveMaker() as arc: - arc.add('foo') - with warnings_helper.check_warnings( - ('Python 3.14', DeprecationWarning)): - with self.check_context(arc.open(), None): - self.expect_file('foo') - def test_change_default_filter_on_instance(self): tar = tarfile.TarFile(tarname, 'r') def strict_filter(tarinfo, path): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst new file mode 100644 index 0000000..e65aa99 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst @@ -0,0 +1,2 @@ +The default extraction filter for the :mod:`tarfile` module is now +set to :func:`'data' `. -- cgit v0.12