diff options
author | Ma Lin <animalize@users.noreply.github.com> | 2021-04-28 06:58:54 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-28 06:58:54 (GMT) |
commit | f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231 (patch) | |
tree | ca8e232aebdae960f8a55737897cd20766df71ca /Lib/_compression.py | |
parent | a5e64444e6df7d1d498576bab26deaddc288a7bd (diff) | |
download | cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.zip cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.tar.gz cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.tar.bz2 |
bpo-41486: Faster bz2/lzma/zlib via new output buffering (GH-21740)
Faster bz2/lzma/zlib via new output buffering.
Also adds .readall() function to _compression.DecompressReader class
to take best advantage of this in the consume-all-output at once scenario.
Often a 5-20% speedup in common scenarios due to less data copying.
Contributed by Ma Lin.
Diffstat (limited to 'Lib/_compression.py')
-rw-r--r-- | Lib/_compression.py | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/Lib/_compression.py b/Lib/_compression.py index b00f31b..e8b70aa 100644 --- a/Lib/_compression.py +++ b/Lib/_compression.py @@ -1,7 +1,7 @@ """Internal classes used by the gzip, lzma and bz2 modules""" import io - +import sys BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size @@ -110,6 +110,16 @@ class DecompressReader(io.RawIOBase): self._pos += len(data) return data + def readall(self): + chunks = [] + # sys.maxsize means the max length of output buffer is unlimited, + # so that the whole input buffer can be decompressed within one + # .decompress() call. + while data := self.read(sys.maxsize): + chunks.append(data) + + return b"".join(chunks) + # Rewind the file to the beginning of the data stream. def _rewind(self): self._fp.seek(0) |