From b47acbf46abd425f69dcc03e9b4f0c7f7c321ac2 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 1 Feb 2013 11:22:43 -0800 Subject: Fixes Issue #6972: The zipfile module no longer overwrites files outside of its destination path when extracting malicious zip files. --- Doc/library/zipfile.rst | 17 +++++++--- Lib/test/test_zipfile.py | 86 +++++++++++++++++++++++++++++++++++++++++++----- Lib/zipfile.py | 27 ++++++++------- Misc/NEWS | 3 ++ 4 files changed, 108 insertions(+), 25 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 264cd47..3c1f5f1 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -214,6 +214,16 @@ ZipFile Objects to extract to. *member* can be a filename or a :class:`ZipInfo` object. *pwd* is the password used for encrypted files. + .. note:: + + If a member filename is an absolute path, a drive/UNC sharepoint and + leading (back)slashes will be stripped, e.g.: ``///foo/bar`` becomes + ``foo/bar`` on Unix, and ``ะก:\foo\bar`` becomes ``foo\bar`` on Windows. + And all ``".."`` components in a member filename will be removed, e.g.: + ``../../foo../../ba..r`` becomes ``foo../ba..r``. On Windows illegal + characters (``:``, ``<``, ``>``, ``|``, ``"``, ``?``, and ``*``) + replaced by underscore (``_``). + .. method:: ZipFile.extractall(path=None, members=None, pwd=None) @@ -222,12 +232,9 @@ ZipFile Objects be a subset of the list returned by :meth:`namelist`. *pwd* is the password used for encrypted files. - .. warning:: + .. note:: - Never extract archives from untrusted sources without prior inspection. - It is possible that files are created outside of *path*, e.g. members - that have absolute filenames starting with ``"/"`` or filenames with two - dots ``".."``. + See :meth:`extract` note. .. method:: ZipFile.printdir() diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 741cee9..85cd074 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -29,7 +29,7 @@ DATAFILES_DIR = 'zipfile_datafiles' SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'), ('ziptest2dir/_ziptest2', 'qawsedrftg'), - ('/ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'), + ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'), ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')] @@ -409,10 +409,7 @@ class TestsWithSourceFile(unittest.TestCase): writtenfile = zipfp.extract(fpath) # make sure it was written to the right place - if os.path.isabs(fpath): - correctfile = os.path.join(os.getcwd(), fpath[1:]) - else: - correctfile = os.path.join(os.getcwd(), fpath) + correctfile = os.path.join(os.getcwd(), fpath) correctfile = os.path.normpath(correctfile) self.assertEqual(writtenfile, correctfile) @@ -434,10 +431,7 @@ class TestsWithSourceFile(unittest.TestCase): with zipfile.ZipFile(TESTFN2, "r") as zipfp: zipfp.extractall() for fpath, fdata in SMALL_TEST_DATA: - if os.path.isabs(fpath): - outfile = os.path.join(os.getcwd(), fpath[1:]) - else: - outfile = os.path.join(os.getcwd(), fpath) + outfile = os.path.join(os.getcwd(), fpath) with open(outfile, "rb") as f: self.assertEqual(fdata.encode(), f.read()) @@ -447,6 +441,80 @@ class TestsWithSourceFile(unittest.TestCase): # remove the test file subdirectories shutil.rmtree(os.path.join(os.getcwd(), 'ziptest2dir')) + def check_file(self, filename, content): + self.assertTrue(os.path.isfile(filename)) + with open(filename, 'rb') as f: + self.assertEqual(f.read(), content) + + def test_extract_hackers_arcnames(self): + hacknames = [ + ('../foo/bar', 'foo/bar'), + ('foo/../bar', 'foo/bar'), + ('foo/../../bar', 'foo/bar'), + ('foo/bar/..', 'foo/bar'), + ('./../foo/bar', 'foo/bar'), + ('/foo/bar', 'foo/bar'), + ('/foo/../bar', 'foo/bar'), + ('/foo/../../bar', 'foo/bar'), + ('//foo/bar', 'foo/bar'), + ('../../foo../../ba..r', 'foo../ba..r'), + ] + if os.path.sep == '\\': # Windows. + hacknames.extend([ + (r'..\foo\bar', 'foo/bar'), + (r'..\/foo\/bar', 'foo/bar'), + (r'foo/\..\/bar', 'foo/bar'), + (r'foo\/../\bar', 'foo/bar'), + (r'C:foo/bar', 'foo/bar'), + (r'C:/foo/bar', 'foo/bar'), + (r'C://foo/bar', 'foo/bar'), + (r'C:\foo\bar', 'foo/bar'), + (r'//conky/mountpoint/foo/bar', 'foo/bar'), + (r'\\conky\mountpoint\foo\bar', 'foo/bar'), + (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'), + (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'), + (r'//conky//mountpoint/foo/bar', 'conky/mountpoint/foo/bar'), + (r'\\conky\\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'), + (r'//?/C:/foo/bar', 'foo/bar'), + (r'\\?\C:\foo\bar', 'foo/bar'), + (r'C:/../C:/foo/bar', 'C_/foo/bar'), + (r'a:b\ce|f"g?h*i', 'b/c_d_e_f_g_h_i'), + ]) + + for arcname, fixedname in hacknames: + content = b'foobar' + arcname.encode() + with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.writestr(arcname, content) + + targetpath = os.path.join('target', 'subdir', 'subsub') + correctfile = os.path.join(targetpath, *fixedname.split('/')) + + with zipfile.ZipFile(TESTFN2, 'r') as zipfp: + writtenfile = zipfp.extract(arcname, targetpath) + self.assertEqual(writtenfile, correctfile) + self.check_file(correctfile, content) + shutil.rmtree('target') + + with zipfile.ZipFile(TESTFN2, 'r') as zipfp: + zipfp.extractall(targetpath) + self.check_file(correctfile, content) + shutil.rmtree('target') + + correctfile = os.path.join(os.getcwd(), *fixedname.split('/')) + + with zipfile.ZipFile(TESTFN2, 'r') as zipfp: + writtenfile = zipfp.extract(arcname) + self.assertEqual(writtenfile, correctfile) + self.check_file(correctfile, content) + shutil.rmtree(fixedname.split('/')[0]) + + with zipfile.ZipFile(TESTFN2, 'r') as zipfp: + zipfp.extractall() + self.check_file(correctfile, content) + shutil.rmtree(fixedname.split('/')[0]) + + os.remove(TESTFN2) + def test_writestr_compression(self): zipfp = zipfile.ZipFile(TESTFN2, "w") zipfp.writestr("a.txt", "hello world", compress_type=zipfile.ZIP_STORED) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index f900abd..0b5d3e8 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1062,17 +1062,22 @@ class ZipFile: """ # build the destination pathname, replacing # forward slashes to platform specific separators. - # Strip trailing path separator, unless it represents the root. - if (targetpath[-1:] in (os.path.sep, os.path.altsep) - and len(os.path.splitdrive(targetpath)[1]) > 1): - targetpath = targetpath[:-1] - - # don't include leading "/" from file name if present - if member.filename[0] == '/': - targetpath = os.path.join(targetpath, member.filename[1:]) - else: - targetpath = os.path.join(targetpath, member.filename) - + arcname = member.filename.replace('/', os.path.sep) + + if os.path.altsep: + arcname = arcname.replace(os.path.altsep, os.path.sep) + # interpret absolute pathname as relative, remove drive letter or + # UNC path, redundant separators, "." and ".." components. + arcname = os.path.splitdrive(arcname)[1] + arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) + if x not in ('', os.path.curdir, os.path.pardir)) + # filter illegal characters on Windows + if os.path.sep == '\\': + illegal = ':<>|"?*' + table = str.maketrans(illegal, '_' * len(illegal)) + arcname = arcname.translate(table) + + targetpath = os.path.join(targetpath, arcname) targetpath = os.path.normpath(targetpath) # Create all upper directories if necessary. diff --git a/Misc/NEWS b/Misc/NEWS index 34f4a6b..917ab8d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -216,6 +216,9 @@ Core and Builtins Library ------- +- Issue #6972: The zipfile module no longer overwrites files outside of + its destination path when extracting malicious zip files. + - Issue #4844: ZipFile now raises BadZipFile when opens a ZIP file with an incomplete "End of Central Directory" record. Original patch by Guilherme Polo and Alan McIntyre. -- cgit v0.12 From b9817b01edfe6fa369c4c291a5de00732b8abc7c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 1 Feb 2013 13:03:39 -0800 Subject: Additional fix for Issue #12268: The io module file object writelines() methods no longer abort early when one of its write system calls is interrupted (EINTR). --- Misc/NEWS | 3 +++ Modules/_io/iobase.c | 5 ++++- Modules/_io/textio.c | 7 +++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 917ab8d..4431f18 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -817,6 +817,9 @@ Library Extension Modules ----------------- +- Issue #12268: The io module file object writelines() methods no longer + abort early when one of its write system calls is interrupted (EINTR). + - Fix the leak of a dict in the time module when used in an embedded interpreter that is repeatedly initialized and shutdown and reinitialized. diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index ba86146..8b17ecd 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -674,7 +674,10 @@ iobase_writelines(PyObject *self, PyObject *args) break; /* Stop Iteration */ } - res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL); + res = NULL; + do { + res = PyObject_CallMethodObjArgs(self, _PyIO_str_write, line, NULL); + } while (res == NULL && _PyIO_trap_eintr()); Py_DECREF(line); if (res == NULL) { Py_DECREF(iter); diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 2fbb8f3..7d48388 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1249,8 +1249,11 @@ _textiowrapper_writeflush(textio *self) Py_DECREF(pending); if (b == NULL) return -1; - ret = PyObject_CallMethodObjArgs(self->buffer, - _PyIO_str_write, b, NULL); + ret = NULL; + do { + ret = PyObject_CallMethodObjArgs(self->buffer, + _PyIO_str_write, b, NULL); + } while (ret == NULL && _PyIO_trap_eintr()); Py_DECREF(b); if (ret == NULL) return -1; -- cgit v0.12 From 9b57cf581090527a1571bfb42d24b6f26e44774f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 1 Feb 2013 13:06:44 -0800 Subject: better corrected news entry --- Misc/NEWS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 4431f18..6978201 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -817,8 +817,8 @@ Library Extension Modules ----------------- -- Issue #12268: The io module file object writelines() methods no longer - abort early when one of its write system calls is interrupted (EINTR). +- Issue #12268: The io module file object write methods no longer abort early + when one of its write system calls is interrupted (EINTR). - Fix the leak of a dict in the time module when used in an embedded interpreter that is repeatedly initialized and shutdown and reinitialized. -- cgit v0.12