summaryrefslogtreecommitdiffstats
path: root/Lib/shutil.py
diff options
context:
space:
mode:
authorGiampaolo Rodola <g.rodola@gmail.com>2018-06-12 21:04:50 (GMT)
committerGitHub <noreply@github.com>2018-06-12 21:04:50 (GMT)
commit4a172ccc739065bb658c75e8929774a8e94af9e9 (patch)
tree7f76b26eccd0de5d4b697138fdbe120028d03e9e /Lib/shutil.py
parent33cd058f21d0673253c88cea70388282918992bc (diff)
downloadcpython-4a172ccc739065bb658c75e8929774a8e94af9e9.zip
cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.gz
cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.bz2
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible * refactoring: use ctx manager * add test with non-regular file obj * emulate case where file size can't be determined * reference _copyfileobj_sendfile directly * add test for offset() at certain position * add test for empty file * add test for non regular file dst * small refactoring * leave copyfileobj() alone in order to not introduce any incompatibility * minor refactoring * remove old test * update docstring * update docstring; rename exception class * detect platforms which only support file to socket zero copy * don't run test on platforms where file-to-file zero copy is not supported * use tempfiles * reset verbosity * add test for smaller chunks * add big file size test * add comment * update doc * update whatsnew doc * update doc * catch Exception * remove unused import * add test case for error on second sendfile() call * turn docstring into comment * add one more test * update comment * add Misc/NEWS entry * get rid of COPY_BUFSIZE; it belongs to another PR * update doc * expose posix._fcopyfile() for OSX * merge from linux branch * merge from linux branch * expose fcopyfile * arg clinic for the win implementation * convert path type to path_t * expose CopyFileW * fix windows tests * release GIL * minor refactoring * update doc * update comment * update docstrings * rename functions * rename test classes * update doc * update doc * update docstrings and comments * avoid do import nt|posix modules if unnecessary * set nt|posix modules to None if not available * micro speedup * update description * add doc note * use better wording in doc * rename function using 'fastcopy' prefix instead of 'zerocopy' * use :ref: in rst doc * change wording in doc * add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies * move CopyFileW in _winapi and actually expose CopyFileExW instead * fix line endings * add tests for mode bits * add docstring * remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies * update doc to reflect new changes * update doc * adjust tests on win * fix argument clinic error * update doc * OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python * osx / copyfile: use path_t instead of char * do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux) * add same file test * add test for same file * have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted * turn PermissionError into appropriate SameFileError * expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError * honour follow_symlinks arg when using CopyFileEx * update Misc/NEWS * expose CreateDirectoryEx mock * change C type * CreateDirectoryExW actual implementation * provide specific makedirs() implementation for win * fix typo * skeleton for SetNamedSecurityInfo * get security info for src path * finally set security attrs * add unit tests * mimick os.makedirs() behavior and raise if dst dir exists * set 2 paths for OSError object * set 2 paths for OSError object * expand windows test * in case of exception on os.sendfile() set filename and filename2 exception attributes * set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX * update doc * do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation * use bytearray() and readinto() * use memoryview() with bytearray() * refactoring + introduce a new _fastcopy_binfileobj() fun * remove CopyFileEx and other C wrappers * remove code related to CopyFileEx * Recognize binary files in copyfileobj() ...and use fastest _fastcopy_binfileobj() when possible * set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable * use ctx manager for memoryview() * update doc * remove outdated doc * remove last CopyFileEx remnants * OSX - use fcopyfile(3) instead of copyfile(3) ...as an extra safety measure: in case src/dst are "exotic" files (non regular or living on a network fs etc.) we better fail on open() instead of copyfile(3) as we're not quite sure what's gonna happen in that case. * update doc
Diffstat (limited to 'Lib/shutil.py')
-rw-r--r--Lib/shutil.py157
1 files changed, 145 insertions, 12 deletions
diff --git a/Lib/shutil.py b/Lib/shutil.py
index 3c02776..09a5727 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -10,6 +10,7 @@ import stat
import fnmatch
import collections
import errno
+import io
try:
import zlib
@@ -42,6 +43,16 @@ try:
except ImportError:
getgrnam = None
+posix = nt = None
+if os.name == 'posix':
+ import posix
+elif os.name == 'nt':
+ import nt
+
+COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024
+_HAS_SENDFILE = posix and hasattr(os, "sendfile")
+_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX
+
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"copytree", "move", "rmtree", "Error", "SpecialFileError",
"ExecError", "make_archive", "get_archive_formats",
@@ -72,14 +83,124 @@ class RegistryError(Exception):
"""Raised when a registry operation with the archiving
and unpacking registries fails"""
+class _GiveupOnFastCopy(Exception):
+ """Raised as a signal to fallback on using raw read()/write()
+ file copy when fast-copy functions fail to do so.
+ """
+
+def _fastcopy_osx(fsrc, fdst, flags):
+ """Copy a regular file content or metadata by using high-performance
+ fcopyfile(3) syscall (OSX).
+ """
+ try:
+ infd = fsrc.fileno()
+ outfd = fdst.fileno()
+ except Exception as err:
+ raise _GiveupOnFastCopy(err) # not a regular file
+
+ try:
+ posix._fcopyfile(infd, outfd, flags)
+ except OSError as err:
+ err.filename = fsrc.name
+ err.filename2 = fdst.name
+ if err.errno in {errno.EINVAL, errno.ENOTSUP}:
+ raise _GiveupOnFastCopy(err)
+ else:
+ raise err from None
+
+def _fastcopy_sendfile(fsrc, fdst):
+ """Copy data from one regular mmap-like fd to another by using
+ high-performance sendfile(2) syscall.
+ This should work on Linux >= 2.6.33 and Solaris only.
+ """
+ # Note: copyfileobj() is left alone in order to not introduce any
+ # unexpected breakage. Possible risks by using zero-copy calls
+ # in copyfileobj() are:
+ # - fdst cannot be open in "a"(ppend) mode
+ # - fsrc and fdst may be open in "t"(ext) mode
+ # - fsrc may be a BufferedReader (which hides unread data in a buffer),
+ # GzipFile (which decompresses data), HTTPResponse (which decodes
+ # chunks).
+ # - possibly others (e.g. encrypted fs/partition?)
+ global _HAS_SENDFILE
+ try:
+ infd = fsrc.fileno()
+ outfd = fdst.fileno()
+ except Exception as err:
+ raise _GiveupOnFastCopy(err) # not a regular file
+
+ # Hopefully the whole file will be copied in a single call.
+ # sendfile() is called in a loop 'till EOF is reached (0 return)
+ # so a bufsize smaller or bigger than the actual file size
+ # should not make any difference, also in case the file content
+ # changes while being copied.
+ try:
+ blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
+ except Exception:
+ blocksize = 2 ** 27 # 128MB
+
+ offset = 0
+ while True:
+ try:
+ sent = os.sendfile(outfd, infd, offset, blocksize)
+ except OSError as err:
+ # ...in oder to have a more informative exception.
+ err.filename = fsrc.name
+ err.filename2 = fdst.name
+
+ if err.errno == errno.ENOTSOCK:
+ # sendfile() on this platform (probably Linux < 2.6.33)
+ # does not support copies between regular files (only
+ # sockets).
+ _HAS_SENDFILE = False
+ raise _GiveupOnFastCopy(err)
+
+ if err.errno == errno.ENOSPC: # filesystem is full
+ raise err from None
+
+ # Give up on first call and if no data was copied.
+ if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
+ raise _GiveupOnFastCopy(err)
+
+ raise err
+ else:
+ if sent == 0:
+ break # EOF
+ offset += sent
+
+def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
+ """Copy 2 regular file objects open in binary mode."""
+ # Localize variable access to minimize overhead.
+ fsrc_readinto = fsrc.readinto
+ fdst_write = fdst.write
+ with memoryview(bytearray(length)) as mv:
+ while True:
+ n = fsrc_readinto(mv)
+ if not n:
+ break
+ elif n < length:
+ fdst_write(mv[:n])
+ else:
+ fdst_write(mv)
+
+def _is_binary_files_pair(fsrc, fdst):
+ return hasattr(fsrc, 'readinto') and \
+ isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \
+ isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '')
-def copyfileobj(fsrc, fdst, length=16*1024):
+def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
"""copy data from file-like object fsrc to file-like object fdst"""
- while 1:
- buf = fsrc.read(length)
- if not buf:
- break
- fdst.write(buf)
+ if _is_binary_files_pair(fsrc, fdst):
+ _copybinfileobj(fsrc, fdst, length=length)
+ else:
+ # Localize variable access to minimize overhead.
+ fsrc_read = fsrc.read
+ fdst_write = fdst.write
+ while 1:
+ buf = fsrc_read(length)
+ if not buf:
+ break
+ fdst_write(buf)
def _samefile(src, dst):
# Macintosh, Unix.
@@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True):
if not follow_symlinks and os.path.islink(src):
os.symlink(os.readlink(src), dst)
else:
- with open(src, 'rb') as fsrc:
- with open(dst, 'wb') as fdst:
- copyfileobj(fsrc, fdst)
+ with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
+ if _HAS_SENDFILE:
+ try:
+ _fastcopy_sendfile(fsrc, fdst)
+ return dst
+ except _GiveupOnFastCopy:
+ pass
+
+ if _HAS_FCOPYFILE:
+ try:
+ _fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA)
+ return dst
+ except _GiveupOnFastCopy:
+ pass
+
+ _copybinfileobj(fsrc, fdst)
+
return dst
def copymode(src, dst, *, follow_symlinks=True):
@@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True):
def copy2(src, dst, *, follow_symlinks=True):
"""Copy data and all stat info ("cp -p src dst"). Return the file's
- destination."
+ destination.
The destination may be a directory.
If follow_symlinks is false, symlinks won't be followed. This
resembles GNU's "cp -P src dst".
-
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
@@ -1015,7 +1149,6 @@ if hasattr(os, 'statvfs'):
elif os.name == 'nt':
- import nt
__all__.append('disk_usage')
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')