diff options
author | Giampaolo Rodola <g.rodola@gmail.com> | 2018-06-12 21:04:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-12 21:04:50 (GMT) |
commit | 4a172ccc739065bb658c75e8929774a8e94af9e9 (patch) | |
tree | 7f76b26eccd0de5d4b697138fdbe120028d03e9e /Lib/shutil.py | |
parent | 33cd058f21d0673253c88cea70388282918992bc (diff) | |
download | cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.zip cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.gz cpython-4a172ccc739065bb658c75e8929774a8e94af9e9.tar.bz2 |
bpo-33671: efficient zero-copy for shutil.copy* functions (Linux, OSX and Win) (#7160)
* have shutil.copyfileobj use sendfile() if possible
* refactoring: use ctx manager
* add test with non-regular file obj
* emulate case where file size can't be determined
* reference _copyfileobj_sendfile directly
* add test for offset() at certain position
* add test for empty file
* add test for non regular file dst
* small refactoring
* leave copyfileobj() alone in order to not introduce any incompatibility
* minor refactoring
* remove old test
* update docstring
* update docstring; rename exception class
* detect platforms which only support file to socket zero copy
* don't run test on platforms where file-to-file zero copy is not supported
* use tempfiles
* reset verbosity
* add test for smaller chunks
* add big file size test
* add comment
* update doc
* update whatsnew doc
* update doc
* catch Exception
* remove unused import
* add test case for error on second sendfile() call
* turn docstring into comment
* add one more test
* update comment
* add Misc/NEWS entry
* get rid of COPY_BUFSIZE; it belongs to another PR
* update doc
* expose posix._fcopyfile() for OSX
* merge from linux branch
* merge from linux branch
* expose fcopyfile
* arg clinic for the win implementation
* convert path type to path_t
* expose CopyFileW
* fix windows tests
* release GIL
* minor refactoring
* update doc
* update comment
* update docstrings
* rename functions
* rename test classes
* update doc
* update doc
* update docstrings and comments
* avoid do import nt|posix modules if unnecessary
* set nt|posix modules to None if not available
* micro speedup
* update description
* add doc note
* use better wording in doc
* rename function using 'fastcopy' prefix instead of 'zerocopy'
* use :ref: in rst doc
* change wording in doc
* add test to make sure sendfile() doesn't get called aymore in case it doesn't support file to file copies
* move CopyFileW in _winapi and actually expose CopyFileExW instead
* fix line endings
* add tests for mode bits
* add docstring
* remove test file mode class; let's keep it for later when Istart addressing OSX fcopyfile() specific copies
* update doc to reflect new changes
* update doc
* adjust tests on win
* fix argument clinic error
* update doc
* OSX: expose copyfile(3) instead of fcopyfile(3); also expose flags arg to python
* osx / copyfile: use path_t instead of char
* do not set dst name in the OSError exception in order to remain consistent with platforms which cannot do that (e.g. linux)
* add same file test
* add test for same file
* have osx copyfile() pre-emptively check if src and dst are the same, otherwise it will return immedialtey and src file content gets deleted
* turn PermissionError into appropriate SameFileError
* expose ERROR_SHARING_VIOLATION in order to raise more appropriate SameFileError
* honour follow_symlinks arg when using CopyFileEx
* update Misc/NEWS
* expose CreateDirectoryEx mock
* change C type
* CreateDirectoryExW actual implementation
* provide specific makedirs() implementation for win
* fix typo
* skeleton for SetNamedSecurityInfo
* get security info for src path
* finally set security attrs
* add unit tests
* mimick os.makedirs() behavior and raise if dst dir exists
* set 2 paths for OSError object
* set 2 paths for OSError object
* expand windows test
* in case of exception on os.sendfile() set filename and filename2 exception attributes
* set 2 filenames (src, dst) for OSError in case copyfile() fails on OSX
* update doc
* do not use CreateDirectoryEx() in copytree() if source dir is a symlink (breaks test_copytree_symlink_dir); instead just create a plain dir and remain consistent with POSIX implementation
* use bytearray() and readinto()
* use memoryview() with bytearray()
* refactoring + introduce a new _fastcopy_binfileobj() fun
* remove CopyFileEx and other C wrappers
* remove code related to CopyFileEx
* Recognize binary files in copyfileobj()
...and use fastest _fastcopy_binfileobj() when possible
* set 1MB copy bufsize on win; also add a global _COPY_BUFSIZE variable
* use ctx manager for memoryview()
* update doc
* remove outdated doc
* remove last CopyFileEx remnants
* OSX - use fcopyfile(3) instead of copyfile(3)
...as an extra safety measure: in case src/dst are "exotic" files (non
regular or living on a network fs etc.) we better fail on open() instead
of copyfile(3) as we're not quite sure what's gonna happen in that
case.
* update doc
Diffstat (limited to 'Lib/shutil.py')
-rw-r--r-- | Lib/shutil.py | 157 |
1 files changed, 145 insertions, 12 deletions
diff --git a/Lib/shutil.py b/Lib/shutil.py index 3c02776..09a5727 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,6 +10,7 @@ import stat import fnmatch import collections import errno +import io try: import zlib @@ -42,6 +43,16 @@ try: except ImportError: getgrnam = None +posix = nt = None +if os.name == 'posix': + import posix +elif os.name == 'nt': + import nt + +COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024 +_HAS_SENDFILE = posix and hasattr(os, "sendfile") +_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX + __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", "copytree", "move", "rmtree", "Error", "SpecialFileError", "ExecError", "make_archive", "get_archive_formats", @@ -72,14 +83,124 @@ class RegistryError(Exception): """Raised when a registry operation with the archiving and unpacking registries fails""" +class _GiveupOnFastCopy(Exception): + """Raised as a signal to fallback on using raw read()/write() + file copy when fast-copy functions fail to do so. + """ + +def _fastcopy_osx(fsrc, fdst, flags): + """Copy a regular file content or metadata by using high-performance + fcopyfile(3) syscall (OSX). + """ + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + try: + posix._fcopyfile(infd, outfd, flags) + except OSError as err: + err.filename = fsrc.name + err.filename2 = fdst.name + if err.errno in {errno.EINVAL, errno.ENOTSUP}: + raise _GiveupOnFastCopy(err) + else: + raise err from None + +def _fastcopy_sendfile(fsrc, fdst): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 and Solaris only. + """ + # Note: copyfileobj() is left alone in order to not introduce any + # unexpected breakage. Possible risks by using zero-copy calls + # in copyfileobj() are: + # - fdst cannot be open in "a"(ppend) mode + # - fsrc and fdst may be open in "t"(ext) mode + # - fsrc may be a BufferedReader (which hides unread data in a buffer), + # GzipFile (which decompresses data), HTTPResponse (which decodes + # chunks). + # - possibly others (e.g. encrypted fs/partition?) + global _HAS_SENDFILE + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + # Hopefully the whole file will be copied in a single call. + # sendfile() is called in a loop 'till EOF is reached (0 return) + # so a bufsize smaller or bigger than the actual file size + # should not make any difference, also in case the file content + # changes while being copied. + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB + except Exception: + blocksize = 2 ** 27 # 128MB + + offset = 0 + while True: + try: + sent = os.sendfile(outfd, infd, offset, blocksize) + except OSError as err: + # ...in oder to have a more informative exception. + err.filename = fsrc.name + err.filename2 = fdst.name + + if err.errno == errno.ENOTSOCK: + # sendfile() on this platform (probably Linux < 2.6.33) + # does not support copies between regular files (only + # sockets). + _HAS_SENDFILE = False + raise _GiveupOnFastCopy(err) + + if err.errno == errno.ENOSPC: # filesystem is full + raise err from None + + # Give up on first call and if no data was copied. + if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: + raise _GiveupOnFastCopy(err) + + raise err + else: + if sent == 0: + break # EOF + offset += sent + +def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE): + """Copy 2 regular file objects open in binary mode.""" + # Localize variable access to minimize overhead. + fsrc_readinto = fsrc.readinto + fdst_write = fdst.write + with memoryview(bytearray(length)) as mv: + while True: + n = fsrc_readinto(mv) + if not n: + break + elif n < length: + fdst_write(mv[:n]) + else: + fdst_write(mv) + +def _is_binary_files_pair(fsrc, fdst): + return hasattr(fsrc, 'readinto') and \ + isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \ + isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '') -def copyfileobj(fsrc, fdst, length=16*1024): +def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): """copy data from file-like object fsrc to file-like object fdst""" - while 1: - buf = fsrc.read(length) - if not buf: - break - fdst.write(buf) + if _is_binary_files_pair(fsrc, fdst): + _copybinfileobj(fsrc, fdst, length=length) + else: + # Localize variable access to minimize overhead. + fsrc_read = fsrc.read + fdst_write = fdst.write + while 1: + buf = fsrc_read(length) + if not buf: + break + fdst_write(buf) def _samefile(src, dst): # Macintosh, Unix. @@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True): if not follow_symlinks and os.path.islink(src): os.symlink(os.readlink(src), dst) else: - with open(src, 'rb') as fsrc: - with open(dst, 'wb') as fdst: - copyfileobj(fsrc, fdst) + with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: + if _HAS_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + + if _HAS_FCOPYFILE: + try: + _fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA) + return dst + except _GiveupOnFastCopy: + pass + + _copybinfileobj(fsrc, fdst) + return dst def copymode(src, dst, *, follow_symlinks=True): @@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True): def copy2(src, dst, *, follow_symlinks=True): """Copy data and all stat info ("cp -p src dst"). Return the file's - destination." + destination. The destination may be a directory. If follow_symlinks is false, symlinks won't be followed. This resembles GNU's "cp -P src dst". - """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) @@ -1015,7 +1149,6 @@ if hasattr(os, 'statvfs'): elif os.name == 'nt': - import nt __all__.append('disk_usage') _ntuple_diskusage = collections.namedtuple('usage', 'total used free') |