summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/zipfile.rst17
-rw-r--r--Lib/test/test_zipfile.py86
-rw-r--r--Lib/zipfile.py27
-rw-r--r--Misc/NEWS9
-rw-r--r--Modules/_io/iobase.c5
-rw-r--r--Modules/_io/textio.c7
6 files changed, 123 insertions, 28 deletions
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 9f6e077..54f9b4b 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -242,6 +242,16 @@ ZipFile Objects
to extract to. *member* can be a filename or a :class:`ZipInfo` object.
*pwd* is the password used for encrypted files.
+ .. note::
+
+ If a member filename is an absolute path, a drive/UNC sharepoint and
+ leading (back)slashes will be stripped, e.g.: ``///foo/bar`` becomes
+ ``foo/bar`` on Unix, and ``ะก:\foo\bar`` becomes ``foo\bar`` on Windows.
+ And all ``".."`` components in a member filename will be removed, e.g.:
+ ``../../foo../../ba..r`` becomes ``foo../ba..r``. On Windows illegal
+ characters (``:``, ``<``, ``>``, ``|``, ``"``, ``?``, and ``*``)
+ replaced by underscore (``_``).
+
.. method:: ZipFile.extractall(path=None, members=None, pwd=None)
@@ -250,12 +260,9 @@ ZipFile Objects
be a subset of the list returned by :meth:`namelist`. *pwd* is the password
used for encrypted files.
- .. warning::
+ .. note::
- Never extract archives from untrusted sources without prior inspection.
- It is possible that files are created outside of *path*, e.g. members
- that have absolute filenames starting with ``"/"`` or filenames with two
- dots ``".."``.
+ See :meth:`extract` note.
.. method:: ZipFile.printdir()
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index b0c529f..81e3036 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -24,7 +24,7 @@ DATAFILES_DIR = 'zipfile_datafiles'
SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
('ziptest2dir/_ziptest2', 'qawsedrftg'),
- ('/ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
+ ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
@@ -501,10 +501,7 @@ class TestsWithSourceFile(unittest.TestCase):
writtenfile = zipfp.extract(fpath)
# make sure it was written to the right place
- if os.path.isabs(fpath):
- correctfile = os.path.join(os.getcwd(), fpath[1:])
- else:
- correctfile = os.path.join(os.getcwd(), fpath)
+ correctfile = os.path.join(os.getcwd(), fpath)
correctfile = os.path.normpath(correctfile)
self.assertEqual(writtenfile, correctfile)
@@ -526,10 +523,7 @@ class TestsWithSourceFile(unittest.TestCase):
with zipfile.ZipFile(TESTFN2, "r") as zipfp:
zipfp.extractall()
for fpath, fdata in SMALL_TEST_DATA:
- if os.path.isabs(fpath):
- outfile = os.path.join(os.getcwd(), fpath[1:])
- else:
- outfile = os.path.join(os.getcwd(), fpath)
+ outfile = os.path.join(os.getcwd(), fpath)
with open(outfile, "rb") as f:
self.assertEqual(fdata.encode(), f.read())
@@ -539,6 +533,80 @@ class TestsWithSourceFile(unittest.TestCase):
# remove the test file subdirectories
shutil.rmtree(os.path.join(os.getcwd(), 'ziptest2dir'))
+ def check_file(self, filename, content):
+ self.assertTrue(os.path.isfile(filename))
+ with open(filename, 'rb') as f:
+ self.assertEqual(f.read(), content)
+
+ def test_extract_hackers_arcnames(self):
+ hacknames = [
+ ('../foo/bar', 'foo/bar'),
+ ('foo/../bar', 'foo/bar'),
+ ('foo/../../bar', 'foo/bar'),
+ ('foo/bar/..', 'foo/bar'),
+ ('./../foo/bar', 'foo/bar'),
+ ('/foo/bar', 'foo/bar'),
+ ('/foo/../bar', 'foo/bar'),
+ ('/foo/../../bar', 'foo/bar'),
+ ('//foo/bar', 'foo/bar'),
+ ('../../foo../../ba..r', 'foo../ba..r'),
+ ]
+ if os.path.sep == '\\': # Windows.
+ hacknames.extend([
+ (r'..\foo\bar', 'foo/bar'),
+ (r'..\/foo\/bar', 'foo/bar'),
+ (r'foo/\..\/bar', 'foo/bar'),
+ (r'foo\/../\bar', 'foo/bar'),
+ (r'C:foo/bar', 'foo/bar'),
+ (r'C:/foo/bar', 'foo/bar'),
+ (r'C://foo/bar', 'foo/bar'),
+ (r'C:\foo\bar', 'foo/bar'),
+ (r'//conky/mountpoint/foo/bar', 'foo/bar'),
+ (r'\\conky\mountpoint\foo\bar', 'foo/bar'),
+ (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
+ (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
+ (r'//conky//mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
+ (r'\\conky\\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
+ (r'//?/C:/foo/bar', 'foo/bar'),
+ (r'\\?\C:\foo\bar', 'foo/bar'),
+ (r'C:/../C:/foo/bar', 'C_/foo/bar'),
+ (r'a:b\c<d>e|f"g?h*i', 'b/c_d_e_f_g_h_i'),
+ ])
+
+ for arcname, fixedname in hacknames:
+ content = b'foobar' + arcname.encode()
+ with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_STORED) as zipfp:
+ zipfp.writestr(arcname, content)
+
+ targetpath = os.path.join('target', 'subdir', 'subsub')
+ correctfile = os.path.join(targetpath, *fixedname.split('/'))
+
+ with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+ writtenfile = zipfp.extract(arcname, targetpath)
+ self.assertEqual(writtenfile, correctfile)
+ self.check_file(correctfile, content)
+ shutil.rmtree('target')
+
+ with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+ zipfp.extractall(targetpath)
+ self.check_file(correctfile, content)
+ shutil.rmtree('target')
+
+ correctfile = os.path.join(os.getcwd(), *fixedname.split('/'))
+
+ with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+ writtenfile = zipfp.extract(arcname)
+ self.assertEqual(writtenfile, correctfile)
+ self.check_file(correctfile, content)
+ shutil.rmtree(fixedname.split('/')[0])
+
+ with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+ zipfp.extractall()
+ self.check_file(correctfile, content)
+ shutil.rmtree(fixedname.split('/')[0])
+
+ os.remove(TESTFN2)
+
def test_writestr_compression_stored(self):
zipfp = zipfile.ZipFile(TESTFN2, "w")
zipfp.writestr("a.txt", "hello world", compress_type=zipfile.ZIP_STORED)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 235b811..f5c3f17 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1229,17 +1229,22 @@ class ZipFile:
"""
# build the destination pathname, replacing
# forward slashes to platform specific separators.
- # Strip trailing path separator, unless it represents the root.
- if (targetpath[-1:] in (os.path.sep, os.path.altsep)
- and len(os.path.splitdrive(targetpath)[1]) > 1):
- targetpath = targetpath[:-1]
-
- # don't include leading "/" from file name if present
- if member.filename[0] == '/':
- targetpath = os.path.join(targetpath, member.filename[1:])
- else:
- targetpath = os.path.join(targetpath, member.filename)
-
+ arcname = member.filename.replace('/', os.path.sep)
+
+ if os.path.altsep:
+ arcname = arcname.replace(os.path.altsep, os.path.sep)
+ # interpret absolute pathname as relative, remove drive letter or
+ # UNC path, redundant separators, "." and ".." components.
+ arcname = os.path.splitdrive(arcname)[1]
+ arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
+ if x not in ('', os.path.curdir, os.path.pardir))
+ # filter illegal characters on Windows
+ if os.path.sep == '\\':
+ illegal = ':<>|"?*'
+ table = str.maketrans(illegal, '_' * len(illegal))
+ arcname = arcname.translate(table)
+
+ targetpath = os.path.join(targetpath, arcname)
targetpath = os.path.normpath(targetpath)
# Create all upper directories if necessary.
diff --git a/Misc/NEWS b/Misc/NEWS
index cbce33f..062e401 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -167,6 +167,9 @@ Core and Builtins
Library
-------
+- Issue #6972: The zipfile module no longer overwrites files outside of
+ its destination path when extracting malicious zip files.
+
- Issue #4844: ZipFile now raises BadZipFile when opens a ZIP file with an
incomplete "End of Central Directory" record. Original patch by Guilherme
Polo and Alan McIntyre.
@@ -503,6 +506,12 @@ Library
- Issue #15906: Fix a regression in `argparse` caused by the preceding change,
when ``action='append'``, ``type='str'`` and ``default=[]``.
+Extension Modules
+-----------------
+
+- Issue #12268: The io module file object write methods no longer abort early
+ when one of its write system calls is interrupted (EINTR).
+
Tests
-----
diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c
index babb019..2039147 100644
--- a/Modules/_io/iobase.c
+++ b/Modules/_io/iobase.c
@@ -669,7 +669,10 @@ iobase_writelines(PyObject *self, PyObject *args)
break; /* Stop Iteration */
}
- res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL);
+ res = NULL;
+ do {
+ res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL);
+ } while (res == NULL && _PyIO_trap_eintr());
Py_DECREF(line);
if (res == NULL) {
Py_DECREF(iter);
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index a93049f..ffaa945 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1247,8 +1247,11 @@ _textiowrapper_writeflush(textio *self)
Py_DECREF(pending);
if (b == NULL)
return -1;
- ret = PyObject_CallMethodObjArgs(self->buffer,
- _PyIO_str_write, b, NULL);
+ ret = NULL;
+ do {
+ ret = PyObject_CallMethodObjArgs(self->buffer,
+ _PyIO_str_write, b, NULL);
+ } while (ret == NULL && _PyIO_trap_eintr());
Py_DECREF(b);
if (ret == NULL)
return -1;