diff options
author | Igor Bolshakov <ibolsch@gmail.com> | 2021-05-17 08:28:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-17 08:28:21 (GMT) |
commit | f32c7950e0077b6d9a8e217c2796fc582f18ca08 (patch) | |
tree | 783fb3a3cd88974bf33f813170cd29789774de80 /Lib | |
parent | 83f0f8d62f279f846a92fede2244beaa0149b9d8 (diff) | |
download | cpython-f32c7950e0077b6d9a8e217c2796fc582f18ca08.zip cpython-f32c7950e0077b6d9a8e217c2796fc582f18ca08.tar.gz cpython-f32c7950e0077b6d9a8e217c2796fc582f18ca08.tar.bz2 |
bpo-43650: Fix MemoryError on zip.read in shutil._unpack_zipfile for large files (GH-25058)
`shutil.unpack_archive()` tries to read the whole file into memory, making no use of any kind of smaller buffer. Process crashes for really large files: I.e. archive: ~1.7G, unpacked: ~10G. Before the crash it can easily take away all available RAM on smaller systems. Had to pull the code form `zipfile.Zipfile.extractall()` to fix this
Automerge-Triggered-By: GH:gpshead
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/shutil.py | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/Lib/shutil.py b/Lib/shutil.py index 55cfe35..1982b1c 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1163,20 +1163,16 @@ def _unpack_zipfile(filename, extract_dir): if name.startswith('/') or '..' in name: continue - target = os.path.join(extract_dir, *name.split('/')) - if not target: + targetpath = os.path.join(extract_dir, *name.split('/')) + if not targetpath: continue - _ensure_directory(target) + _ensure_directory(targetpath) if not name.endswith('/'): # file - data = zip.read(info.filename) - f = open(target, 'wb') - try: - f.write(data) - finally: - f.close() - del data + with zip.open(name, 'r') as source, \ + open(targetpath, 'wb') as target: + copyfileobj(source, target) finally: zip.close() |