diff options
author | Giampaolo Rodola <g.rodola@gmail.com> | 2018-06-12 21:04:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-12 21:04:50 (GMT) |
commit | 4a172ccc739065bb658c75e8929774a8e94af9e9 (patch) | |
tree | 7f76b26eccd0de5d4b697138fdbe120028d03e9e /Lib/test/test_shutil.py | |
parent | 33cd058f21d0673253c88cea70388282918992bc (diff) | |
download | cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.zip cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.gz cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.bz2 |
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible
* refactoring: use ctx manager
* add test with non-regular file obj
* emulate case where file size can't be determined
* reference _copyfileobj_sendfile directly
* add test for offset() at certain position
* add test for empty file
* add test for non regular file dst
* small refactoring
* leave copyfileobj() alone in order to not introduce any incompatibility
* minor refactoring
* remove old test
* update docstring
* update docstring; rename exception class
* detect platforms which only support file to socket zero copy
* don't run test on platforms where file-to-file zero copy is not supported
* use tempfiles
* reset verbosity
* add test for smaller chunks
* add big file size test
* add comment
* update doc
* update whatsnew doc
* update doc
* catch Exception
* remove unused import
* add test case for error on second sendfile() call
* turn docstring into comment
* add one more test
* update comment
* add Misc/NEWS entry
* get rid of COPY_BUFSIZE; it belongs to another PR
* update doc
* expose posix._fcopyfile() for OSX
* merge from linux branch
* merge from linux branch
* expose fcopyfile
* arg clinic for the win implementation
* convert path type to path_t
* expose CopyFileW
* fix windows tests
* release GIL
* minor refactoring
* update doc
* update comment
* update docstrings
* rename functions
* rename test classes
* update doc
* update doc
* update docstrings and comments
* avoid do import nt|posix modules if unnecessary
* set nt|posix modules to None if not available
* micro speedup
* update description
* add doc note
* use better wording in doc
* rename function using 'fastcopy' prefix instead of 'zerocopy'
* use :ref: in rst doc
* change wording in doc
* add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies
* move CopyFileW in _winapi and actually expose CopyFileExW instead
* fix line endings
* add tests for mode bits
* add docstring
* remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies
* update doc to reflect new changes
* update doc
* adjust tests on win
* fix argument clinic error
* update doc
* OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python
* osx / copyfile: use path_t instead of char
* do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux)
* add same file test
* add test for same file
* have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted
* turn PermissionError into appropriate SameFileError
* expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError
* honour follow_symlinks arg when using CopyFileEx
* update Misc/NEWS
* expose CreateDirectoryEx mock
* change C type
* CreateDirectoryExW actual implementation
* provide specific makedirs() implementation for win
* fix typo
* skeleton for SetNamedSecurityInfo
* get security info for src path
* finally set security attrs
* add unit tests
* mimick os.makedirs() behavior and raise if dst dir exists
* set 2 paths for OSError object
* set 2 paths for OSError object
* expand windows test
* in case of exception on os.sendfile() set filename and filename2 exception attributes
* set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX
* update doc
* do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation
* use bytearray() and readinto()
* use memoryview() with bytearray()
* refactoring + introduce a new _fastcopy_binfileobj() fun
* remove CopyFileEx and other C wrappers
* remove code related to CopyFileEx
* Recognize binary files in copyfileobj()
...and use fastest _fastcopy_binfileobj() when possible
* set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable
* use ctx manager for memoryview()
* update doc
* remove outdated doc
* remove last CopyFileEx remnants
* OSX - use fcopyfile(3) instead of copyfile(3)
...as an extra safety measure: in case src/dst are "exotic" files (non
regular or living on a network fs etc.) we better fail on open() instead
of copyfile(3) as we're not quite sure what's gonna happen in that
case.
* update doc
Diffstat (limited to 'Lib/test/test_shutil.py')
-rw-r--r-- | Lib/test/test_shutil.py | 294 |
1 files changed, 292 insertions, 2 deletions
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 2cb2f14..8d51994 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -12,20 +12,28 @@ import errno import functools import pathlib import subprocess +import random +import string +import contextlib +import io from shutil import (make_archive, register_archive_format, unregister_archive_format, get_archive_formats, Error, unpack_archive, register_unpack_format, RegistryError, unregister_unpack_format, get_unpack_formats, - SameFileError) + SameFileError, _GiveupOnFastCopy) import tarfile import zipfile +try: + import posix +except ImportError: + posix = None from test import support from test.support import TESTFN, FakePath TESTFN2 = TESTFN + "2" - +OSX = sys.platform.startswith("darwin") try: import grp import pwd @@ -60,6 +68,24 @@ def write_file(path, content, binary=False): with open(path, 'wb' if binary else 'w') as fp: fp.write(content) +def write_test_file(path, size): + """Create a test file with an arbitrary size and random text content.""" + def chunks(total, step): + assert total >= step + while total > step: + yield step + total -= step + if total: + yield total + + bufsize = min(size, 8192) + chunk = b"".join([random.choice(string.ascii_letters).encode() + for i in range(bufsize)]) + with open(path, 'wb') as f: + for csize in chunks(size, bufsize): + f.write(chunk) + assert os.path.getsize(path) == size + def read_file(path, binary=False): """Return contents from a file located at *path*. @@ -84,6 +110,37 @@ def rlistdir(path): res.append(name) return res +def supports_file2file_sendfile(): + # ...apparently Linux and Solaris are the only ones + if not hasattr(os, "sendfile"): + return False + srcname = None + dstname = None + try: + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + srcname = f.name + f.write(b"0123456789") + + with open(srcname, "rb") as src: + with tempfile.NamedTemporaryFile("wb", delete=False) as dst: + dstname = f.name + infd = src.fileno() + outfd = dst.fileno() + try: + os.sendfile(outfd, infd, 0, 2) + except OSError: + return False + else: + return True + finally: + if srcname is not None: + support.unlink(srcname) + if dstname is not None: + support.unlink(dstname) + + +SUPPORTS_SENDFILE = supports_file2file_sendfile() + class TestShutil(unittest.TestCase): @@ -1401,6 +1458,8 @@ class TestShutil(unittest.TestCase): self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file) # But Error should work too, to stay backward compatible. self.assertRaises(Error, shutil.copyfile, src_file, src_file) + # Make sure file is not corrupted. + self.assertEqual(read_file(src_file), 'foo') def test_copytree_return_value(self): # copytree returns its destination path. @@ -1749,6 +1808,7 @@ class TestCopyFile(unittest.TestCase): self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile') + @unittest.skipIf(OSX, "skipped on OSX") def test_w_dest_open_fails(self): srcfile = self.Faux() @@ -1768,6 +1828,7 @@ class TestCopyFile(unittest.TestCase): self.assertEqual(srcfile._exited_with[1].args, ('Cannot open "destfile"',)) + @unittest.skipIf(OSX, "skipped on OSX") def test_w_dest_close_fails(self): srcfile = self.Faux() @@ -1790,6 +1851,7 @@ class TestCopyFile(unittest.TestCase): self.assertEqual(srcfile._exited_with[1].args, ('Cannot close',)) + @unittest.skipIf(OSX, "skipped on OSX") def test_w_source_close_fails(self): srcfile = self.Faux(True) @@ -1829,6 +1891,234 @@ class TestCopyFile(unittest.TestCase): finally: os.rmdir(dst_dir) + +class _ZeroCopyFileTest(object): + """Tests common to all zero-copy APIs.""" + FILESIZE = (10 * 1024 * 1024) # 10 MiB + FILEDATA = b"" + PATCHPOINT = "" + + @classmethod + def setUpClass(cls): + write_test_file(TESTFN, cls.FILESIZE) + with open(TESTFN, 'rb') as f: + cls.FILEDATA = f.read() + assert len(cls.FILEDATA) == cls.FILESIZE + + @classmethod + def tearDownClass(cls): + support.unlink(TESTFN) + + def tearDown(self): + support.unlink(TESTFN2) + + @contextlib.contextmanager + def get_files(self): + with open(TESTFN, "rb") as src: + with open(TESTFN2, "wb") as dst: + yield (src, dst) + + def zerocopy_fun(self, *args, **kwargs): + raise NotImplementedError("must be implemented in subclass") + + def reset(self): + self.tearDown() + self.tearDownClass() + self.setUpClass() + self.setUp() + + # --- + + def test_regular_copy(self): + with self.get_files() as (src, dst): + self.zerocopy_fun(src, dst) + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + # Make sure the fallback function is not called. + with self.get_files() as (src, dst): + with unittest.mock.patch('shutil.copyfileobj') as m: + shutil.copyfile(TESTFN, TESTFN2) + assert not m.called + + def test_same_file(self): + self.addCleanup(self.reset) + with self.get_files() as (src, dst): + with self.assertRaises(Exception): + self.zerocopy_fun(src, src) + # Make sure src file is not corrupted. + self.assertEqual(read_file(TESTFN, binary=True), self.FILEDATA) + + def test_non_existent_src(self): + name = tempfile.mktemp() + with self.assertRaises(FileNotFoundError) as cm: + shutil.copyfile(name, "new") + self.assertEqual(cm.exception.filename, name) + + def test_empty_file(self): + srcname = TESTFN + 'src' + dstname = TESTFN + 'dst' + self.addCleanup(lambda: support.unlink(srcname)) + self.addCleanup(lambda: support.unlink(dstname)) + with open(srcname, "wb"): + pass + + with open(srcname, "rb") as src: + with open(dstname, "wb") as dst: + self.zerocopy_fun(src, dst) + + self.assertEqual(read_file(dstname, binary=True), b"") + + def test_unhandled_exception(self): + with unittest.mock.patch(self.PATCHPOINT, + side_effect=ZeroDivisionError): + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN, TESTFN2) + + def test_exception_on_first_call(self): + # Emulate a case where the first call to the zero-copy + # function raises an exception in which case the function is + # supposed to give up immediately. + with unittest.mock.patch(self.PATCHPOINT, + side_effect=OSError(errno.EINVAL, "yo")): + with self.get_files() as (src, dst): + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + + def test_filesystem_full(self): + # Emulate a case where filesystem is full and sendfile() fails + # on first call. + with unittest.mock.patch(self.PATCHPOINT, + side_effect=OSError(errno.ENOSPC, "yo")): + with self.get_files() as (src, dst): + self.assertRaises(OSError, self.zerocopy_fun, src, dst) + + +@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') +class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase): + PATCHPOINT = "os.sendfile" + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_sendfile(fsrc, fdst) + + def test_non_regular_file_src(self): + with io.BytesIO(self.FILEDATA) as src: + with open(TESTFN2, "wb") as dst: + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + shutil.copyfileobj(src, dst) + + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_non_regular_file_dst(self): + with open(TESTFN, "rb") as src: + with io.BytesIO() as dst: + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + shutil.copyfileobj(src, dst) + dst.seek(0) + self.assertEqual(dst.read(), self.FILEDATA) + + def test_exception_on_second_call(self): + def sendfile(*args, **kwargs): + if not flag: + flag.append(None) + return orig_sendfile(*args, **kwargs) + else: + raise OSError(errno.EBADF, "yo") + + flag = [] + orig_sendfile = os.sendfile + with unittest.mock.patch('os.sendfile', create=True, + side_effect=sendfile): + with self.get_files() as (src, dst): + with self.assertRaises(OSError) as cm: + shutil._fastcopy_sendfile(src, dst) + assert flag + self.assertEqual(cm.exception.errno, errno.EBADF) + + def test_cant_get_size(self): + # Emulate a case where src file size cannot be determined. + # Internally bufsize will be set to a small value and + # sendfile() will be called repeatedly. + with unittest.mock.patch('os.fstat', side_effect=OSError) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_small_chunks(self): + # Force internal file size detection to be smaller than the + # actual file size. We want to force sendfile() to be called + # multiple times, also in order to emulate a src fd which gets + # bigger while it is being copied. + mock = unittest.mock.Mock() + mock.st_size = 65536 + 1 + with unittest.mock.patch('os.fstat', return_value=mock) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_big_chunk(self): + # Force internal file size detection to be +100MB bigger than + # the actual file size. Make sure sendfile() does not rely on + # file size value except for (maybe) a better throughput / + # performance. + mock = unittest.mock.Mock() + mock.st_size = self.FILESIZE + (100 * 1024 * 1024) + with unittest.mock.patch('os.fstat', return_value=mock) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_blocksize_arg(self): + with unittest.mock.patch('os.sendfile', + side_effect=ZeroDivisionError) as m: + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN, TESTFN2) + blocksize = m.call_args[0][3] + # Make sure file size and the block size arg passed to + # sendfile() are the same. + self.assertEqual(blocksize, os.path.getsize(TESTFN)) + # ...unless we're dealing with a small file. + support.unlink(TESTFN2) + write_file(TESTFN2, b"hello", binary=True) + self.addCleanup(support.unlink, TESTFN2 + '3') + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN2, TESTFN2 + '3') + blocksize = m.call_args[0][3] + self.assertEqual(blocksize, 2 ** 23) + + def test_file2file_not_supported(self): + # Emulate a case where sendfile() only support file->socket + # fds. In such a case copyfile() is supposed to skip the + # fast-copy attempt from then on. + assert shutil._HAS_SENDFILE + try: + with unittest.mock.patch( + self.PATCHPOINT, + side_effect=OSError(errno.ENOTSOCK, "yo")) as m: + with self.get_files() as (src, dst): + with self.assertRaises(_GiveupOnFastCopy): + shutil._fastcopy_sendfile(src, dst) + assert m.called + assert not shutil._HAS_SENDFILE + + with unittest.mock.patch(self.PATCHPOINT) as m: + shutil.copyfile(TESTFN, TESTFN2) + assert not m.called + finally: + shutil._HAS_SENDFILE = True + + +@unittest.skipIf(not OSX, 'OSX only') +class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase): + PATCHPOINT = "posix._fcopyfile" + + def zerocopy_fun(self, src, dst): + return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA) + + class TermsizeTests(unittest.TestCase): def test_does_not_crash(self): """Check if get_terminal_size() returns a meaningful value. |