summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMiss Skeleton (bot) <31488909+miss-islington@users.noreply.github.com>2020-10-21 05:29:44 (GMT)
committerGitHub <noreply@github.com>2020-10-21 05:29:44 (GMT)
commite866f33a48ee24e447fafd181f0da5f9584e0340 (patch)
treefd854846ce3fce16bae8b11c96a86d4da422e0ac /Lib
parent6443a8ccc886749f5e83a8ca073006742b605d90 (diff)
downloadcpython-e866f33a48ee24e447fafd181f0da5f9584e0340.zip
cpython-e866f33a48ee24e447fafd181f0da5f9584e0340.tar.gz
cpython-e866f33a48ee24e447fafd181f0da5f9584e0340.tar.bz2
bpo-41316: Make tarfile follow specs for FNAME (GH-21511)
tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. Automerge-Triggered-By: @jaraco (cherry picked from commit 22748a83d927d3da1beaed771be30887c42b2500) Co-authored-by: Artem Bulgakov <ArtemSBulgakov@ya.ru>
Diffstat (limited to 'Lib')
-rwxr-xr-xLib/tarfile.py2
-rw-r--r--Lib/test/test_tarfile.py14
2 files changed, 15 insertions, 1 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 7a69e1b..39f63b9 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -420,6 +420,8 @@ class _Stream:
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
self.name = self.name[:-3]
+ # Honor "directory components removed" from RFC1952
+ self.name = os.path.basename(self.name)
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index b512168..be717e3 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1384,12 +1384,15 @@ class WriteTest(WriteTestBase, unittest.TestCase):
pax_headers={'non': 'empty'})
self.assertFalse(f.closed)
+
class GzipWriteTest(GzipTest, WriteTest):
pass
+
class Bz2WriteTest(Bz2Test, WriteTest):
pass
+
class LzmaWriteTest(LzmaTest, WriteTest):
pass
@@ -1432,8 +1435,17 @@ class StreamWriteTest(WriteTestBase, unittest.TestCase):
finally:
os.umask(original_umask)
+
class GzipStreamWriteTest(GzipTest, StreamWriteTest):
- pass
+ def test_source_directory_not_leaked(self):
+ """
+ Ensure the source directory is not included in the tar header
+ per bpo-41316.
+ """
+ tarfile.open(tmpname, self.mode).close()
+ payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
+ assert os.path.dirname(tmpname) not in payload
+
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
decompressor = bz2.BZ2Decompressor if bz2 else None