summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIllia Volochii <illia.volochii@gmail.com>2025-02-03 10:23:27 (GMT)
committerGitHub <noreply@github.com>2025-02-03 10:23:27 (GMT)
commita33dcb9e431c463c20ecdc02a206ddf0b7388687 (patch)
treec2923796aad97b54770aebc890978df16e43f229
parenta29a9c0f3890fec843b7151f6a1defa25f570504 (diff)
downloadcpython-a33dcb9e431c463c20ecdc02a206ddf0b7388687.zip
cpython-a33dcb9e431c463c20ecdc02a206ddf0b7388687.tar.gz
cpython-a33dcb9e431c463c20ecdc02a206ddf0b7388687.tar.bz2
gh-81340: Use copy_file_range in shutil.copyfile copy functions (GH-93152)
This allows the underlying file system an opportunity to optimise or avoid the actual copy.
-rw-r--r--Doc/library/shutil.rst8
-rw-r--r--Lib/shutil.py96
-rw-r--r--Lib/test/test_shutil.py71
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst5
5 files changed, 139 insertions, 42 deletions
diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst
index 2a8592f..06800c4 100644
--- a/Doc/library/shutil.rst
+++ b/Doc/library/shutil.rst
@@ -512,7 +512,9 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``".
On macOS `fcopyfile`_ is used to copy the file content (not metadata).
-On Linux and Solaris :func:`os.sendfile` is used.
+On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used.
+
+On Solaris :func:`os.sendfile` is used.
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
instead of 64 KiB) and a :func:`memoryview`-based variant of
@@ -527,6 +529,10 @@ file then shutil will silently fallback on using less efficient
.. versionchanged:: 3.14
Solaris now uses :func:`os.sendfile`.
+.. versionchanged:: next
+ Copy-on-write or server-side copy may be used internally via
+ :func:`os.copy_file_range` on supported Linux filesystems.
+
.. _shutil-copytree-example:
copytree example
diff --git a/Lib/shutil.py b/Lib/shutil.py
index 171489c..510ae8c 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -49,6 +49,7 @@ COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 256 * 1024
# https://bugs.python.org/issue43743#msg393429
_USE_CP_SENDFILE = (hasattr(os, "sendfile")
and sys.platform.startswith(("linux", "android", "sunos")))
+_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range")
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
# CMD defaults in Windows 10
@@ -107,6 +108,66 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags):
else:
raise err from None
+def _determine_linux_fastcopy_blocksize(infd):
+ """Determine blocksize for fastcopying on Linux.
+
+ Hopefully the whole file will be copied in a single call.
+ The copying itself should be performed in a loop 'till EOF is
+ reached (0 return) so a blocksize smaller or bigger than the actual
+ file size should not make any difference, also in case the file
+ content changes while being copied.
+ """
+ try:
+ blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
+ except OSError:
+ blocksize = 2 ** 27 # 128 MiB
+ # On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
+ # see gh-82500.
+ if sys.maxsize < 2 ** 32:
+ blocksize = min(blocksize, 2 ** 30)
+ return blocksize
+
+def _fastcopy_copy_file_range(fsrc, fdst):
+ """Copy data from one regular mmap-like fd to another by using
+ a high-performance copy_file_range(2) syscall that gives filesystems
+ an opportunity to implement the use of reflinks or server-side copy.
+
+ This should work on Linux >= 4.5 only.
+ """
+ try:
+ infd = fsrc.fileno()
+ outfd = fdst.fileno()
+ except Exception as err:
+ raise _GiveupOnFastCopy(err) # not a regular file
+
+ blocksize = _determine_linux_fastcopy_blocksize(infd)
+ offset = 0
+ while True:
+ try:
+ n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset)
+ except OSError as err:
+ # ...in oder to have a more informative exception.
+ err.filename = fsrc.name
+ err.filename2 = fdst.name
+
+ if err.errno == errno.ENOSPC: # filesystem is full
+ raise err from None
+
+ # Give up on first call and if no data was copied.
+ if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
+ raise _GiveupOnFastCopy(err)
+
+ raise err
+ else:
+ if n_copied == 0:
+ # If no bytes have been copied yet, copy_file_range
+ # might silently fail.
+ # https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
+ if offset == 0:
+ raise _GiveupOnFastCopy()
+ break
+ offset += n_copied
+
def _fastcopy_sendfile(fsrc, fdst):
"""Copy data from one regular mmap-like fd to another by using
high-performance sendfile(2) syscall.
@@ -128,20 +189,7 @@ def _fastcopy_sendfile(fsrc, fdst):
except Exception as err:
raise _GiveupOnFastCopy(err) # not a regular file
- # Hopefully the whole file will be copied in a single call.
- # sendfile() is called in a loop 'till EOF is reached (0 return)
- # so a bufsize smaller or bigger than the actual file size
- # should not make any difference, also in case the file content
- # changes while being copied.
- try:
- blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB
- except OSError:
- blocksize = 2 ** 27 # 128MiB
- # On 32-bit architectures truncate to 1GiB to avoid OverflowError,
- # see bpo-38319.
- if sys.maxsize < 2 ** 32:
- blocksize = min(blocksize, 2 ** 30)
-
+ blocksize = _determine_linux_fastcopy_blocksize(infd)
offset = 0
while True:
try:
@@ -266,12 +314,20 @@ def copyfile(src, dst, *, follow_symlinks=True):
except _GiveupOnFastCopy:
pass
# Linux / Android / Solaris
- elif _USE_CP_SENDFILE:
- try:
- _fastcopy_sendfile(fsrc, fdst)
- return dst
- except _GiveupOnFastCopy:
- pass
+ elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE:
+ # reflink may be implicit in copy_file_range.
+ if _USE_CP_COPY_FILE_RANGE:
+ try:
+ _fastcopy_copy_file_range(fsrc, fdst)
+ return dst
+ except _GiveupOnFastCopy:
+ pass
+ if _USE_CP_SENDFILE:
+ try:
+ _fastcopy_sendfile(fsrc, fdst)
+ return dst
+ except _GiveupOnFastCopy:
+ pass
# Windows, see:
# https://github.com/python/cpython/pull/7160#discussion_r195405230
elif _WINDOWS and file_size > 0:
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 1f18b1f..078ddd6 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -3239,12 +3239,8 @@ class _ZeroCopyFileTest(object):
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
-@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
-class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
- PATCHPOINT = "os.sendfile"
-
- def zerocopy_fun(self, fsrc, fdst):
- return shutil._fastcopy_sendfile(fsrc, fdst)
+class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest):
+ BLOCKSIZE_INDEX = None
def test_non_regular_file_src(self):
with io.BytesIO(self.FILEDATA) as src:
@@ -3265,65 +3261,65 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
self.assertEqual(dst.read(), self.FILEDATA)
def test_exception_on_second_call(self):
- def sendfile(*args, **kwargs):
+ def syscall(*args, **kwargs):
if not flag:
flag.append(None)
- return orig_sendfile(*args, **kwargs)
+ return orig_syscall(*args, **kwargs)
else:
raise OSError(errno.EBADF, "yo")
flag = []
- orig_sendfile = os.sendfile
- with unittest.mock.patch('os.sendfile', create=True,
- side_effect=sendfile):
+ orig_syscall = eval(self.PATCHPOINT)
+ with unittest.mock.patch(self.PATCHPOINT, create=True,
+ side_effect=syscall):
with self.get_files() as (src, dst):
with self.assertRaises(OSError) as cm:
- shutil._fastcopy_sendfile(src, dst)
+ self.zerocopy_fun(src, dst)
assert flag
self.assertEqual(cm.exception.errno, errno.EBADF)
def test_cant_get_size(self):
# Emulate a case where src file size cannot be determined.
# Internally bufsize will be set to a small value and
- # sendfile() will be called repeatedly.
+ # a system call will be called repeatedly.
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
with self.get_files() as (src, dst):
- shutil._fastcopy_sendfile(src, dst)
+ self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_small_chunks(self):
# Force internal file size detection to be smaller than the
- # actual file size. We want to force sendfile() to be called
+ # actual file size. We want to force a system call to be called
# multiple times, also in order to emulate a src fd which gets
# bigger while it is being copied.
mock = unittest.mock.Mock()
mock.st_size = 65536 + 1
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
- shutil._fastcopy_sendfile(src, dst)
+ self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_big_chunk(self):
# Force internal file size detection to be +100MB bigger than
- # the actual file size. Make sure sendfile() does not rely on
+ # the actual file size. Make sure a system call does not rely on
# file size value except for (maybe) a better throughput /
# performance.
mock = unittest.mock.Mock()
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
- shutil._fastcopy_sendfile(src, dst)
+ self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
def test_blocksize_arg(self):
- with unittest.mock.patch('os.sendfile',
+ with unittest.mock.patch(self.PATCHPOINT,
side_effect=ZeroDivisionError) as m:
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)
- blocksize = m.call_args[0][3]
+ blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
# Make sure file size and the block size arg passed to
# sendfile() are the same.
self.assertEqual(blocksize, os.path.getsize(TESTFN))
@@ -3333,9 +3329,19 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN2, TESTFN2 + '3')
- blocksize = m.call_args[0][3]
+ blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
self.assertEqual(blocksize, 2 ** 23)
+
+@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
+@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False)
+class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase):
+ PATCHPOINT = "os.sendfile"
+ BLOCKSIZE_INDEX = 3
+
+ def zerocopy_fun(self, fsrc, fdst):
+ return shutil._fastcopy_sendfile(fsrc, fdst)
+
def test_file2file_not_supported(self):
# Emulate a case where sendfile() only support file->socket
# fds. In such a case copyfile() is supposed to skip the
@@ -3358,6 +3364,29 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
shutil._USE_CP_SENDFILE = True
+@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
+class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
+ PATCHPOINT = "os.copy_file_range"
+ BLOCKSIZE_INDEX = 2
+
+ def zerocopy_fun(self, fsrc, fdst):
+ return shutil._fastcopy_copy_file_range(fsrc, fdst)
+
+ def test_empty_file(self):
+ srcname = f"{TESTFN}src"
+ dstname = f"{TESTFN}dst"
+ self.addCleanup(lambda: os_helper.unlink(srcname))
+ self.addCleanup(lambda: os_helper.unlink(dstname))
+ with open(srcname, "wb"):
+ pass
+
+ with open(srcname, "rb") as src, open(dstname, "wb") as dst:
+ # _fastcopy_copy_file_range gives up copying empty files due
+ # to a bug in older Linux.
+ with self.assertRaises(shutil._GiveupOnFastCopy):
+ self.zerocopy_fun(src, dst)
+
+
@unittest.skipIf(not MACOS, 'macOS only')
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "posix._fcopyfile"
diff --git a/Misc/ACKS b/Misc/ACKS
index a10b0b6..47c8d2b 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1972,6 +1972,7 @@ Johannes Vogel
Michael Vogt
Radu Voicilas
Alex Volkov
+Illia Volochii
Ruben Vorderman
Guido Vranken
Martijn Vries
diff --git a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst
new file mode 100644
index 0000000..49e6305
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst
@@ -0,0 +1,5 @@
+Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`,
+and :func:`shutil.copyfile` functions by default. An underlying Linux system
+call gives filesystems an opportunity to implement the use of copy-on-write
+(in case of btrfs and XFS) or server-side copy (in the case of NFS.)
+Patch by Illia Volochii.