summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_shutil.py
diff options
context:
space:
mode:
authorGiampaolo Rodola <g.rodola@gmail.com>2018-06-12 21:04:50 (GMT)
committerGitHub <noreply@github.com>2018-06-12 21:04:50 (GMT)
commit4a172ccc739065bb658c75e8929774a8e94af9e9 (patch)
tree7f76b26eccd0de5d4b697138fdbe120028d03e9e /Lib/test/test_shutil.py
parent33cd058f21d0673253c88cea70388282918992bc (diff)
downloadcpython-4a172ccc739065bb658c75e8929774a8e94af9e9.zip
cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.gz
cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.bz2
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible * refactoring: use ctx manager * add test with non-regular file obj * emulate case where file size can't be determined * reference _copyfileobj_sendfile directly * add test for offset() at certain position * add test for empty file * add test for non regular file dst * small refactoring * leave copyfileobj() alone in order to not introduce any incompatibility * minor refactoring * remove old test * update docstring * update docstring; rename exception class * detect platforms which only support file to socket zero copy * don't run test on platforms where file-to-file zero copy is not supported * use tempfiles * reset verbosity * add test for smaller chunks * add big file size test * add comment * update doc * update whatsnew doc * update doc * catch Exception * remove unused import * add test case for error on second sendfile() call * turn docstring into comment * add one more test * update comment * add Misc/NEWS entry * get rid of COPY_BUFSIZE; it belongs to another PR * update doc * expose posix._fcopyfile() for OSX * merge from linux branch * merge from linux branch * expose fcopyfile * arg clinic for the win implementation * convert path type to path_t * expose CopyFileW * fix windows tests * release GIL * minor refactoring * update doc * update comment * update docstrings * rename functions * rename test classes * update doc * update doc * update docstrings and comments * avoid do import nt|posix modules if unnecessary * set nt|posix modules to None if not available * micro speedup * update description * add doc note * use better wording in doc * rename function using 'fastcopy' prefix instead of 'zerocopy' * use :ref: in rst doc * change wording in doc * add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies * move CopyFileW in _winapi and actually expose CopyFileExW instead * fix line endings * add tests for mode bits * add docstring * remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies * update doc to reflect new changes * update doc * adjust tests on win * fix argument clinic error * update doc * OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python * osx / copyfile: use path_t instead of char * do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux) * add same file test * add test for same file * have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted * turn PermissionError into appropriate SameFileError * expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError * honour follow_symlinks arg when using CopyFileEx * update Misc/NEWS * expose CreateDirectoryEx mock * change C type * CreateDirectoryExW actual implementation * provide specific makedirs() implementation for win * fix typo * skeleton for SetNamedSecurityInfo * get security info for src path * finally set security attrs * add unit tests * mimick os.makedirs() behavior and raise if dst dir exists * set 2 paths for OSError object * set 2 paths for OSError object * expand windows test * in case of exception on os.sendfile() set filename and filename2 exception attributes * set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX * update doc * do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation * use bytearray() and readinto() * use memoryview() with bytearray() * refactoring + introduce a new _fastcopy_binfileobj() fun * remove CopyFileEx and other C wrappers * remove code related to CopyFileEx * Recognize binary files in copyfileobj() ...and use fastest _fastcopy_binfileobj() when possible * set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable * use ctx manager for memoryview() * update doc * remove outdated doc * remove last CopyFileEx remnants * OSX - use fcopyfile(3) instead of copyfile(3) ...as an extra safety measure: in case src/dst are "exotic" files (non regular or living on a network fs etc.) we better fail on open() instead of copyfile(3) as we're not quite sure what's gonna happen in that case. * update doc
Diffstat (limited to 'Lib/test/test_shutil.py')
-rw-r--r--Lib/test/test_shutil.py294
1 files changed, 292 insertions, 2 deletions
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 2cb2f14..8d51994 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -12,20 +12,28 @@ import errno
import functools
import pathlib
import subprocess
+import random
+import string
+import contextlib
+import io
from shutil import (make_archive,
register_archive_format, unregister_archive_format,
get_archive_formats, Error, unpack_archive,
register_unpack_format, RegistryError,
unregister_unpack_format, get_unpack_formats,
- SameFileError)
+ SameFileError, _GiveupOnFastCopy)
import tarfile
import zipfile
+try:
+ import posix
+except ImportError:
+ posix = None
from test import support
from test.support import TESTFN, FakePath
TESTFN2 = TESTFN + "2"
-
+OSX = sys.platform.startswith("darwin")
try:
import grp
import pwd
@@ -60,6 +68,24 @@ def write_file(path, content, binary=False):
with open(path, 'wb' if binary else 'w') as fp:
fp.write(content)
+def write_test_file(path, size):
+ """Create a test file with an arbitrary size and random text content."""
+ def chunks(total, step):
+ assert total >= step
+ while total > step:
+ yield step
+ total -= step
+ if total:
+ yield total
+
+ bufsize = min(size, 8192)
+ chunk = b"".join([random.choice(string.ascii_letters).encode()
+ for i in range(bufsize)])
+ with open(path, 'wb') as f:
+ for csize in chunks(size, bufsize):
+ f.write(chunk)
+ assert os.path.getsize(path) == size
+
def read_file(path, binary=False):
"""Return contents from a file located at *path*.
@@ -84,6 +110,37 @@ def rlistdir(path):
res.append(name)
return res
+def supports_file2file_sendfile():
+ # ...apparently Linux and Solaris are the only ones
+ if not hasattr(os, "sendfile"):
+ return False
+ srcname = None
+ dstname = None
+ try:
+ with tempfile.NamedTemporaryFile("wb", delete=False) as f:
+ srcname = f.name
+ f.write(b"0123456789")
+
+ with open(srcname, "rb") as src:
+ with tempfile.NamedTemporaryFile("wb", delete=False) as dst:
+ dstname = f.name
+ infd = src.fileno()
+ outfd = dst.fileno()
+ try:
+ os.sendfile(outfd, infd, 0, 2)
+ except OSError:
+ return False
+ else:
+ return True
+ finally:
+ if srcname is not None:
+ support.unlink(srcname)
+ if dstname is not None:
+ support.unlink(dstname)
+
+
+SUPPORTS_SENDFILE = supports_file2file_sendfile()
+
class TestShutil(unittest.TestCase):
@@ -1401,6 +1458,8 @@ class TestShutil(unittest.TestCase):
self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file)
# But Error should work too, to stay backward compatible.
self.assertRaises(Error, shutil.copyfile, src_file, src_file)
+ # Make sure file is not corrupted.
+ self.assertEqual(read_file(src_file), 'foo')
def test_copytree_return_value(self):
# copytree returns its destination path.
@@ -1749,6 +1808,7 @@ class TestCopyFile(unittest.TestCase):
self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile')
+ @unittest.skipIf(OSX, "skipped on OSX")
def test_w_dest_open_fails(self):
srcfile = self.Faux()
@@ -1768,6 +1828,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot open "destfile"',))
+ @unittest.skipIf(OSX, "skipped on OSX")
def test_w_dest_close_fails(self):
srcfile = self.Faux()
@@ -1790,6 +1851,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot close',))
+ @unittest.skipIf(OSX, "skipped on OSX")
def test_w_source_close_fails(self):
srcfile = self.Faux(True)
@@ -1829,6 +1891,234 @@ class TestCopyFile(unittest.TestCase):
finally:
os.rmdir(dst_dir)
+
+class _ZeroCopyFileTest(object):
+ """Tests common to all zero-copy APIs."""
+ FILESIZE = (10 * 1024 * 1024) # 10 MiB
+ FILEDATA = b""
+ PATCHPOINT = ""
+
+ @classmethod
+ def setUpClass(cls):
+ write_test_file(TESTFN, cls.FILESIZE)
+ with open(TESTFN, 'rb') as f:
+ cls.FILEDATA = f.read()
+ assert len(cls.FILEDATA) == cls.FILESIZE
+
+ @classmethod
+ def tearDownClass(cls):
+ support.unlink(TESTFN)
+
+ def tearDown(self):
+ support.unlink(TESTFN2)
+
+ @contextlib.contextmanager
+ def get_files(self):
+ with open(TESTFN, "rb") as src:
+ with open(TESTFN2, "wb") as dst:
+ yield (src, dst)
+
+ def zerocopy_fun(self, *args, **kwargs):
+ raise NotImplementedError("must be implemented in subclass")
+
+ def reset(self):
+ self.tearDown()
+ self.tearDownClass()
+ self.setUpClass()
+ self.setUp()
+
+ # ---
+
+ def test_regular_copy(self):
+ with self.get_files() as (src, dst):
+ self.zerocopy_fun(src, dst)
+ self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
+ # Make sure the fallback function is not called.
+ with self.get_files() as (src, dst):
+ with unittest.mock.patch('shutil.copyfileobj') as m:
+ shutil.copyfile(TESTFN, TESTFN2)
+ assert not m.called
+
+ def test_same_file(self):
+ self.addCleanup(self.reset)
+ with self.get_files() as (src, dst):
+ with self.assertRaises(Exception):
+ self.zerocopy_fun(src, src)
+ # Make sure src file is not corrupted.
+ self.assertEqual(read_file(TESTFN, binary=True), self.FILEDATA)
+
+ def test_non_existent_src(self):
+ name = tempfile.mktemp()
+ with self.assertRaises(FileNotFoundError) as cm:
+ shutil.copyfile(name, "new")
+ self.assertEqual(cm.exception.filename, name)
+
+ def test_empty_file(self):
+ srcname = TESTFN + 'src'
+ dstname = TESTFN + 'dst'
+ self.addCleanup(lambda: support.unlink(srcname))
+ self.addCleanup(lambda: support.unlink(dstname))
+ with open(srcname, "wb"):
+ pass
+
+ with open(srcname, "rb") as src:
+ with open(dstname, "wb") as dst:
+ self.zerocopy_fun(src, dst)
+
+ self.assertEqual(read_file(dstname, binary=True), b"")
+
+ def test_unhandled_exception(self):
+ with unittest.mock.patch(self.PATCHPOINT,
+ side_effect=ZeroDivisionError):
+ self.assertRaises(ZeroDivisionError,
+ shutil.copyfile, TESTFN, TESTFN2)
+
+ def test_exception_on_first_call(self):
+ # Emulate a case where the first call to the zero-copy
+ # function raises an exception in which case the function is
+ # supposed to give up immediately.
+ with unittest.mock.patch(self.PATCHPOINT,
+ side_effect=OSError(errno.EINVAL, "yo")):
+ with self.get_files() as (src, dst):
+ with self.assertRaises(_GiveupOnFastCopy):
+ self.zerocopy_fun(src, dst)
+
+ def test_filesystem_full(self):
+ # Emulate a case where filesystem is full and sendfile() fails
+ # on first call.
+ with unittest.mock.patch(self.PATCHPOINT,
+ side_effect=OSError(errno.ENOSPC, "yo")):
+ with self.get_files() as (src, dst):
+ self.assertRaises(OSError, self.zerocopy_fun, src, dst)
+
+
+@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
+class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
+ PATCHPOINT = "os.sendfile"
+
+ def zerocopy_fun(self, fsrc, fdst):
+ return shutil._fastcopy_sendfile(fsrc, fdst)
+
+ def test_non_regular_file_src(self):
+ with io.BytesIO(self.FILEDATA) as src:
+ with open(TESTFN2, "wb") as dst:
+ with self.assertRaises(_GiveupOnFastCopy):
+ self.zerocopy_fun(src, dst)
+ shutil.copyfileobj(src, dst)
+
+ self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
+
+ def test_non_regular_file_dst(self):
+ with open(TESTFN, "rb") as src:
+ with io.BytesIO() as dst:
+ with self.assertRaises(_GiveupOnFastCopy):
+ self.zerocopy_fun(src, dst)
+ shutil.copyfileobj(src, dst)
+ dst.seek(0)
+ self.assertEqual(dst.read(), self.FILEDATA)
+
+ def test_exception_on_second_call(self):
+ def sendfile(*args, **kwargs):
+ if not flag:
+ flag.append(None)
+ return orig_sendfile(*args, **kwargs)
+ else:
+ raise OSError(errno.EBADF, "yo")
+
+ flag = []
+ orig_sendfile = os.sendfile
+ with unittest.mock.patch('os.sendfile', create=True,
+ side_effect=sendfile):
+ with self.get_files() as (src, dst):
+ with self.assertRaises(OSError) as cm:
+ shutil._fastcopy_sendfile(src, dst)
+ assert flag
+ self.assertEqual(cm.exception.errno, errno.EBADF)
+
+ def test_cant_get_size(self):
+ # Emulate a case where src file size cannot be determined.
+ # Internally bufsize will be set to a small value and
+ # sendfile() will be called repeatedly.
+ with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
+ with self.get_files() as (src, dst):
+ shutil._fastcopy_sendfile(src, dst)
+ assert m.called
+ self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
+
+ def test_small_chunks(self):
+ # Force internal file size detection to be smaller than the
+ # actual file size. We want to force sendfile() to be called
+ # multiple times, also in order to emulate a src fd which gets
+ # bigger while it is being copied.
+ mock = unittest.mock.Mock()
+ mock.st_size = 65536 + 1
+ with unittest.mock.patch('os.fstat', return_value=mock) as m:
+ with self.get_files() as (src, dst):
+ shutil._fastcopy_sendfile(src, dst)
+ assert m.called
+ self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
+
+ def test_big_chunk(self):
+ # Force internal file size detection to be +100MB bigger than
+ # the actual file size. Make sure sendfile() does not rely on
+ # file size value except for (maybe) a better throughput /
+ # performance.
+ mock = unittest.mock.Mock()
+ mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
+ with unittest.mock.patch('os.fstat', return_value=mock) as m:
+ with self.get_files() as (src, dst):
+ shutil._fastcopy_sendfile(src, dst)
+ assert m.called
+ self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
+
+ def test_blocksize_arg(self):
+ with unittest.mock.patch('os.sendfile',
+ side_effect=ZeroDivisionError) as m:
+ self.assertRaises(ZeroDivisionError,
+ shutil.copyfile, TESTFN, TESTFN2)
+ blocksize = m.call_args[0][3]
+ # Make sure file size and the block size arg passed to
+ # sendfile() are the same.
+ self.assertEqual(blocksize, os.path.getsize(TESTFN))
+ # ...unless we're dealing with a small file.
+ support.unlink(TESTFN2)
+ write_file(TESTFN2, b"hello", binary=True)
+ self.addCleanup(support.unlink, TESTFN2 + '3')
+ self.assertRaises(ZeroDivisionError,
+ shutil.copyfile, TESTFN2, TESTFN2 + '3')
+ blocksize = m.call_args[0][3]
+ self.assertEqual(blocksize, 2 ** 23)
+
+ def test_file2file_not_supported(self):
+ # Emulate a case where sendfile() only support file->socket
+ # fds. In such a case copyfile() is supposed to skip the
+ # fast-copy attempt from then on.
+ assert shutil._HAS_SENDFILE
+ try:
+ with unittest.mock.patch(
+ self.PATCHPOINT,
+ side_effect=OSError(errno.ENOTSOCK, "yo")) as m:
+ with self.get_files() as (src, dst):
+ with self.assertRaises(_GiveupOnFastCopy):
+ shutil._fastcopy_sendfile(src, dst)
+ assert m.called
+ assert not shutil._HAS_SENDFILE
+
+ with unittest.mock.patch(self.PATCHPOINT) as m:
+ shutil.copyfile(TESTFN, TESTFN2)
+ assert not m.called
+ finally:
+ shutil._HAS_SENDFILE = True
+
+
+@unittest.skipIf(not OSX, 'OSX only')
+class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase):
+ PATCHPOINT = "posix._fcopyfile"
+
+ def zerocopy_fun(self, src, dst):
+ return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA)
+
+
class TermsizeTests(unittest.TestCase):
def test_does_not_crash(self):
"""Check if get_terminal_size() returns a meaningful value.