summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/shutil.rst6
-rw-r--r--Doc/whatsnew/3.8.rst28
-rw-r--r--Lib/shutil.py72
-rw-r--r--Lib/test/test_shutil.py88
-rw-r--r--Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst15
-rw-r--r--Modules/clinic/posixmodule.c.h4
-rw-r--r--Modules/posixmodule.c4
7 files changed, 153 insertions, 64 deletions
diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst
index a3b87ee..c692cf4 100644
--- a/Doc/library/shutil.rst
+++ b/Doc/library/shutil.rst
@@ -407,11 +407,15 @@ efficiently (see :issue:`33671`).
"fast-copy" means that the copying operation occurs within the kernel, avoiding
the use of userspace buffers in Python as in "``outfd.write(infd.read())``".
-On OSX `fcopyfile`_ is used to copy the file content (not metadata).
+On macOS `fcopyfile`_ is used to copy the file content (not metadata).
On Linux, Solaris and other POSIX platforms where :func:`os.sendfile` supports
copies between 2 regular file descriptors :func:`os.sendfile` is used.
+On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
+instead of 16 KiB) and a :func:`memoryview`-based variant of
+:func:`shutil.copyfileobj` is used.
+
If the fast-copy operation fails and no data was written in the destination
file then shutil will silently fallback on using less efficient
:func:`copyfileobj` function internally.
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 58be90f..32c45ec 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -95,20 +95,18 @@ Optimizations
* :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
:func:`shutil.copytree` and :func:`shutil.move` use platform-specific
- "fast-copy" syscalls on Linux, OSX and Solaris in order to copy the file more
- efficiently.
+ "fast-copy" syscalls on Linux, macOS and Solaris in order to copy the file
+ more efficiently.
"fast-copy" means that the copying operation occurs within the kernel,
avoiding the use of userspace buffers in Python as in
"``outfd.write(infd.read())``".
- All other platforms not using such technique will rely on a faster
- :func:`shutil.copyfile` implementation using :func:`memoryview`,
- :class:`bytearray` and
- :meth:`BufferedIOBase.readinto() <io.BufferedIOBase.readinto>`.
- Finally, :func:`shutil.copyfile` default buffer size on Windows was increased
- from 16KB to 1MB.
- The speedup for copying a 512MB file within the same partition is about +26%
- on Linux, +50% on OSX and +38% on Windows. Also, much less CPU cycles are
- consumed.
+ On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
+ instead of 16 KiB) and a :func:`memoryview`-based variant of
+ :func:`shutil.copyfileobj` is used.
+ The speedup for copying a 512 MiB file within the same partition is about
+ +26% on Linux, +50% on macOS and +40% on Windows. Also, much less CPU cycles
+ are consumed.
+ See :ref:`shutil-platform-dependent-efficient-copy-operations` section.
(Contributed by Giampaolo Rodola' in :issue:`25427`.)
* The default protocol in the :mod:`pickle` module is now Protocol 4,
@@ -179,6 +177,14 @@ Changes in the Python API
* The :class:`cProfile.Profile` class can now be used as a context
manager. (Contributed by Scott Sanderson in :issue:`29235`.)
+* :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
+ :func:`shutil.copytree` and :func:`shutil.move` use platform-specific
+ "fast-copy" syscalls (see
+ :ref:`shutil-platform-dependent-efficient-copy-operations` section).
+
+* :func:`shutil.copyfile` default buffer size on Windows was changed from
+ 16 KiB to 1 MiB.
+
CPython bytecode changes
------------------------
diff --git a/Lib/shutil.py b/Lib/shutil.py
index 09a5727..a4aa0df 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -43,15 +43,16 @@ try:
except ImportError:
getgrnam = None
+_WINDOWS = os.name == 'nt'
posix = nt = None
if os.name == 'posix':
import posix
-elif os.name == 'nt':
+elif _WINDOWS:
import nt
-COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024
+COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
_HAS_SENDFILE = posix and hasattr(os, "sendfile")
-_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX
+_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"copytree", "move", "rmtree", "Error", "SpecialFileError",
@@ -88,9 +89,9 @@ class _GiveupOnFastCopy(Exception):
file copy when fast-copy functions fail to do so.
"""
-def _fastcopy_osx(fsrc, fdst, flags):
+def _fastcopy_fcopyfile(fsrc, fdst, flags):
"""Copy a regular file content or metadata by using high-performance
- fcopyfile(3) syscall (OSX).
+ fcopyfile(3) syscall (macOS).
"""
try:
infd = fsrc.fileno()
@@ -168,8 +169,11 @@ def _fastcopy_sendfile(fsrc, fdst):
break # EOF
offset += sent
-def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
- """Copy 2 regular file objects open in binary mode."""
+def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
+ """readinto()/memoryview() based variant of copyfileobj().
+ *fsrc* must support readinto() method and both files must be
+ open in binary mode.
+ """
# Localize variable access to minimize overhead.
fsrc_readinto = fsrc.readinto
fdst_write = fdst.write
@@ -179,28 +183,21 @@ def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
if not n:
break
elif n < length:
- fdst_write(mv[:n])
+ with mv[:n] as smv:
+ fdst.write(smv)
else:
fdst_write(mv)
-def _is_binary_files_pair(fsrc, fdst):
- return hasattr(fsrc, 'readinto') and \
- isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \
- isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '')
-
def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
"""copy data from file-like object fsrc to file-like object fdst"""
- if _is_binary_files_pair(fsrc, fdst):
- _copybinfileobj(fsrc, fdst, length=length)
- else:
- # Localize variable access to minimize overhead.
- fsrc_read = fsrc.read
- fdst_write = fdst.write
- while 1:
- buf = fsrc_read(length)
- if not buf:
- break
- fdst_write(buf)
+ # Localize variable access to minimize overhead.
+ fsrc_read = fsrc.read
+ fdst_write = fdst.write
+ while True:
+ buf = fsrc_read(length)
+ if not buf:
+ break
+ fdst_write(buf)
def _samefile(src, dst):
# Macintosh, Unix.
@@ -215,7 +212,7 @@ def _samefile(src, dst):
os.path.normcase(os.path.abspath(dst)))
def copyfile(src, dst, *, follow_symlinks=True):
- """Copy data from src to dst.
+ """Copy data from src to dst in the most efficient way possible.
If follow_symlinks is not set and src is a symbolic link, a new
symlink will be created instead of copying the file it points to.
@@ -224,7 +221,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
if _samefile(src, dst):
raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
- for fn in [src, dst]:
+ file_size = 0
+ for i, fn in enumerate([src, dst]):
try:
st = os.stat(fn)
except OSError:
@@ -234,26 +232,34 @@ def copyfile(src, dst, *, follow_symlinks=True):
# XXX What about other special files? (sockets, devices...)
if stat.S_ISFIFO(st.st_mode):
raise SpecialFileError("`%s` is a named pipe" % fn)
+ if _WINDOWS and i == 0:
+ file_size = st.st_size
if not follow_symlinks and os.path.islink(src):
os.symlink(os.readlink(src), dst)
else:
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
- if _HAS_SENDFILE:
+ # macOS
+ if _HAS_FCOPYFILE:
try:
- _fastcopy_sendfile(fsrc, fdst)
+ _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
return dst
except _GiveupOnFastCopy:
pass
-
- if _HAS_FCOPYFILE:
+ # Linux / Solaris
+ elif _HAS_SENDFILE:
try:
- _fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA)
+ _fastcopy_sendfile(fsrc, fdst)
return dst
except _GiveupOnFastCopy:
pass
+ # Windows, see:
+ # https://github.com/python/cpython/pull/7160#discussion_r195405230
+ elif _WINDOWS and file_size > 0:
+ _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
+ return dst
- _copybinfileobj(fsrc, fdst)
+ copyfileobj(fsrc, fdst)
return dst
@@ -1147,7 +1153,7 @@ if hasattr(os, 'statvfs'):
used = (st.f_blocks - st.f_bfree) * st.f_frsize
return _ntuple_diskusage(total, used, free)
-elif os.name == 'nt':
+elif _WINDOWS:
__all__.append('disk_usage')
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 8d51994..7e0a329 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -33,7 +33,7 @@ from test import support
from test.support import TESTFN, FakePath
TESTFN2 = TESTFN + "2"
-OSX = sys.platform.startswith("darwin")
+MACOS = sys.platform.startswith("darwin")
try:
import grp
import pwd
@@ -1808,7 +1808,7 @@ class TestCopyFile(unittest.TestCase):
self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile')
- @unittest.skipIf(OSX, "skipped on OSX")
+ @unittest.skipIf(MACOS, "skipped on macOS")
def test_w_dest_open_fails(self):
srcfile = self.Faux()
@@ -1828,7 +1828,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot open "destfile"',))
- @unittest.skipIf(OSX, "skipped on OSX")
+ @unittest.skipIf(MACOS, "skipped on macOS")
def test_w_dest_close_fails(self):
srcfile = self.Faux()
@@ -1851,7 +1851,7 @@ class TestCopyFile(unittest.TestCase):
self.assertEqual(srcfile._exited_with[1].args,
('Cannot close',))
- @unittest.skipIf(OSX, "skipped on OSX")
+ @unittest.skipIf(MACOS, "skipped on macOS")
def test_w_source_close_fails(self):
srcfile = self.Faux(True)
@@ -1892,6 +1892,80 @@ class TestCopyFile(unittest.TestCase):
os.rmdir(dst_dir)
+class TestCopyFileObj(unittest.TestCase):
+ FILESIZE = 2 * 1024 * 1024
+
+ @classmethod
+ def setUpClass(cls):
+ write_test_file(TESTFN, cls.FILESIZE)
+
+ @classmethod
+ def tearDownClass(cls):
+ support.unlink(TESTFN)
+ support.unlink(TESTFN2)
+
+ def tearDown(self):
+ support.unlink(TESTFN2)
+
+ @contextlib.contextmanager
+ def get_files(self):
+ with open(TESTFN, "rb") as src:
+ with open(TESTFN2, "wb") as dst:
+ yield (src, dst)
+
+ def assert_files_eq(self, src, dst):
+ with open(src, 'rb') as fsrc:
+ with open(dst, 'rb') as fdst:
+ self.assertEqual(fsrc.read(), fdst.read())
+
+ def test_content(self):
+ with self.get_files() as (src, dst):
+ shutil.copyfileobj(src, dst)
+ self.assert_files_eq(TESTFN, TESTFN2)
+
+ def test_file_not_closed(self):
+ with self.get_files() as (src, dst):
+ shutil.copyfileobj(src, dst)
+ assert not src.closed
+ assert not dst.closed
+
+ def test_file_offset(self):
+ with self.get_files() as (src, dst):
+ shutil.copyfileobj(src, dst)
+ self.assertEqual(src.tell(), self.FILESIZE)
+ self.assertEqual(dst.tell(), self.FILESIZE)
+
+ @unittest.skipIf(os.name != 'nt', "Windows only")
+ def test_win_impl(self):
+ # Make sure alternate Windows implementation is called.
+ with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
+ shutil.copyfile(TESTFN, TESTFN2)
+ assert m.called
+
+ # File size is 2 MiB but max buf size should be 1 MiB.
+ self.assertEqual(m.call_args[0][2], 1 * 1024 * 1024)
+
+ # If file size < 1 MiB memoryview() length must be equal to
+ # the actual file size.
+ with tempfile.NamedTemporaryFile(delete=False) as f:
+ f.write(b'foo')
+ fname = f.name
+ self.addCleanup(support.unlink, fname)
+ with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
+ shutil.copyfile(fname, TESTFN2)
+ self.assertEqual(m.call_args[0][2], 3)
+
+ # Empty files should not rely on readinto() variant.
+ with tempfile.NamedTemporaryFile(delete=False) as f:
+ pass
+ fname = f.name
+ self.addCleanup(support.unlink, fname)
+ with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
+ shutil.copyfile(fname, TESTFN2)
+ assert not m.called
+ self.assert_files_eq(fname, TESTFN2)
+
+
class _ZeroCopyFileTest(object):
"""Tests common to all zero-copy APIs."""
FILESIZE = (10 * 1024 * 1024) # 10 MiB
@@ -2111,12 +2185,12 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
shutil._HAS_SENDFILE = True
-@unittest.skipIf(not OSX, 'OSX only')
-class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase):
+@unittest.skipIf(not MACOS, 'macOS only')
+class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "posix._fcopyfile"
def zerocopy_fun(self, src, dst):
- return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA)
+ return shutil._fastcopy_fcopyfile(src, dst, posix._COPYFILE_DATA)
class TermsizeTests(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst b/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst
index 5fd7e1f..b1523f2 100644
--- a/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst
+++ b/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst
@@ -1,11 +1,10 @@
:func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
:func:`shutil.copytree` and :func:`shutil.move` use platform-specific
-fast-copy syscalls on Linux, Solaris and OSX in order to copy the file
-more efficiently. All other platforms not using such technique will rely on a
-faster :func:`shutil.copyfile` implementation using :func:`memoryview`,
-:class:`bytearray` and
-:meth:`BufferedIOBase.readinto() <io.BufferedIOBase.readinto>`.
-Finally, :func:`shutil.copyfile` default buffer size on Windows was increased
-from 16KB to 1MB. The speedup for copying a 512MB file is about +26% on Linux,
-+50% on OSX and +38% on Windows. Also, much less CPU cycles are consumed
+fast-copy syscalls on Linux, Solaris and macOS in order to copy the file
+more efficiently.
+On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
+instead of 16 KiB) and a :func:`memoryview`-based variant of
+:func:`shutil.copyfileobj` is used.
+The speedup for copying a 512MiB file is about +26% on Linux, +50% on macOS and
++40% on Windows. Also, much less CPU cycles are consumed.
(Contributed by Giampaolo Rodola' in :issue:`25427`.)
diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h
index c41d131..7a21885 100644
--- a/Modules/clinic/posixmodule.c.h
+++ b/Modules/clinic/posixmodule.c.h
@@ -3859,7 +3859,7 @@ PyDoc_STRVAR(os__fcopyfile__doc__,
"_fcopyfile($module, infd, outfd, flags, /)\n"
"--\n"
"\n"
-"Efficiently copy content or metadata of 2 regular file descriptors (OSX).");
+"Efficiently copy content or metadata of 2 regular file descriptors (macOS).");
#define OS__FCOPYFILE_METHODDEF \
{"_fcopyfile", (PyCFunction)os__fcopyfile, METH_FASTCALL, os__fcopyfile__doc__},
@@ -6627,4 +6627,4 @@ exit:
#ifndef OS_GETRANDOM_METHODDEF
#define OS_GETRANDOM_METHODDEF
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
-/*[clinic end generated code: output=b5d1ec71bc6f0651 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=47fb6a3e88cba6d9 input=a9049054013a1b77]*/
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 51fc1c5..435e5f6 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -8774,12 +8774,12 @@ os._fcopyfile
flags: int
/
-Efficiently copy content or metadata of 2 regular file descriptors (OSX).
+Efficiently copy content or metadata of 2 regular file descriptors (macOS).
[clinic start generated code]*/
static PyObject *
os__fcopyfile_impl(PyObject *module, int infd, int outfd, int flags)
-/*[clinic end generated code: output=8e8885c721ec38e3 input=aeb9456804eec879]*/
+/*[clinic end generated code: output=8e8885c721ec38e3 input=69e0770e600cb44f]*/
{
int ret;