summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBernhard M. Wiedemann <githubbmw@lsmod.de>2018-01-31 10:17:10 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2018-01-31 10:17:10 (GMT)
commit84521047e413d7d1150aaa1c333580b683b3f4b1 (patch)
tree1aae1c2f5a8b27562c9ea1fa153769066b1be5af
parent8d83e4ba7823827bcbc119db887004d5c3a63dc6 (diff)
downloadcpython-84521047e413d7d1150aaa1c333580b683b3f4b1.zip
cpython-84521047e413d7d1150aaa1c333580b683b3f4b1.tar.gz
cpython-84521047e413d7d1150aaa1c333580b683b3f4b1.tar.bz2
bpo-30693: zip+tarfile: sort directory listing (#2263)
tarfile and zipfile now sort directory listing to generate tar and zip archives in a more reproducible way. See also https://reproducible-builds.org/docs/stable-inputs/ on that topic.
-rw-r--r--Doc/library/tarfile.rst6
-rw-r--r--Doc/library/zipfile.rst5
-rwxr-xr-xLib/tarfile.py2
-rw-r--r--Lib/test/test_tarfile.py24
-rw-r--r--Lib/zipfile.py6
-rw-r--r--Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst1
-rw-r--r--Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst1
7 files changed, 39 insertions, 6 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 2450716..9cd0715 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the
(directory, fifo, symbolic link, etc.). If given, *arcname* specifies an
alternative name for the file in the archive. Directories are added
recursively by default. This can be avoided by setting *recursive* to
- :const:`False`. If *filter* is given, it
+ :const:`False`. Recursion adds entries in sorted order.
+ If *filter* is given, it
should be a function that takes a :class:`TarInfo` object argument and
returns the changed :class:`TarInfo` object. If it instead returns
:const:`None` the :class:`TarInfo` object will be excluded from the
@@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.2
Added the *filter* parameter.
+ .. versionchanged:: 3.7
+ Recursion adds entries in sorted order.
+
.. method:: TarFile.addfile(tarinfo, fileobj=None)
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 7c9a8c8..c0f2a89 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
:file:`\*.pyc` are added at the top level. If the directory is a
package directory, then all :file:`\*.pyc` are added under the package
name as a file path, and if any subdirectories are package directories,
- all of these are added recursively.
+ all of these are added recursively in sorted order.
*basename* is intended for internal use only.
@@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the
.. versionchanged:: 3.6.2
The *pathname* parameter accepts a :term:`path-like object`.
+ .. versionchanged:: 3.7
+ Recursion sorts directory entries.
+
.. _zipinfo-objects:
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 0b8d31f..a24ee42 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1943,7 +1943,7 @@ class TarFile(object):
elif tarinfo.isdir():
self.addfile(tarinfo)
if recursive:
- for f in os.listdir(name):
+ for f in sorted(os.listdir(name)):
self.add(os.path.join(name, f), os.path.join(arcname, f),
recursive, filter=filter)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 179cbc6..8ef4294 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1129,6 +1129,30 @@ class WriteTest(WriteTestBase, unittest.TestCase):
finally:
support.rmdir(path)
+ # mock the following:
+ # os.listdir: so we know that files are in the wrong order
+ @unittest.mock.patch('os.listdir')
+ def test_ordered_recursion(self, mock_listdir):
+ path = os.path.join(TEMPDIR, "directory")
+ os.mkdir(path)
+ open(os.path.join(path, "1"), "a").close()
+ open(os.path.join(path, "2"), "a").close()
+ mock_listdir.return_value = ["2", "1"]
+ try:
+ tar = tarfile.open(tmpname, self.mode)
+ try:
+ tar.add(path)
+ paths = []
+ for m in tar.getmembers():
+ paths.append(os.path.split(m.name)[-1])
+ self.assertEqual(paths, ["directory", "1", "2"]);
+ finally:
+ tar.close()
+ finally:
+ support.unlink(os.path.join(path, "1"))
+ support.unlink(os.path.join(path, "2"))
+ support.rmdir(path)
+
def test_gettarinfo_pathlike_name(self):
with tarfile.open(tmpname, self.mode) as tar:
path = pathlib.Path(TEMPDIR) / "file"
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5df7b1b..b90b60f 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1940,7 +1940,7 @@ class PyZipFile(ZipFile):
if self.debug:
print("Adding", arcname)
self.write(fname, arcname)
- dirlist = os.listdir(pathname)
+ dirlist = sorted(os.listdir(pathname))
dirlist.remove("__init__.py")
# Add all *.py files and package subdirectories
for filename in dirlist:
@@ -1965,7 +1965,7 @@ class PyZipFile(ZipFile):
# This is NOT a package directory, add its files at top level
if self.debug:
print("Adding files from directory", pathname)
- for filename in os.listdir(pathname):
+ for filename in sorted(os.listdir(pathname)):
path = os.path.join(pathname, filename)
root, ext = os.path.splitext(filename)
if ext == ".py":
@@ -2116,7 +2116,7 @@ def main(args=None):
elif os.path.isdir(path):
if zippath:
zf.write(path, zippath)
- for nm in os.listdir(path):
+ for nm in sorted(os.listdir(path)):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
diff --git a/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst
new file mode 100644
index 0000000..9c895c5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst
@@ -0,0 +1 @@
+The ZipFile class now recurses directories in a reproducible way.
diff --git a/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst
new file mode 100644
index 0000000..a622e7e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst
@@ -0,0 +1 @@
+The TarFile class now recurses directories in a reproducible way.