diff options
author | Arjun <ccldarjun@icloud.com> | 2023-05-08 17:55:59 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-08 17:55:59 (GMT) |
commit | 9af485436b83003b5705a6e54bdeb900c70e0c69 (patch) | |
tree | 69506164b3b413a22146cbf7250eaec8836d7b3d /Lib/gzip.py | |
parent | 405eacc1b87a42e19fd176131e70537f0539e05e (diff) | |
download | cpython-9af485436b83003b5705a6e54bdeb900c70e0c69.zip cpython-9af485436b83003b5705a6e54bdeb900c70e0c69.tar.gz cpython-9af485436b83003b5705a6e54bdeb900c70e0c69.tar.bz2 |
gh-89550: Buffer GzipFile.write to reduce execution time by ~15% (#101251)
Use `io.BufferedWriter` to buffer gzip writes.
---------
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Diffstat (limited to 'Lib/gzip.py')
-rw-r--r-- | Lib/gzip.py | 40 |
1 files changed, 35 insertions, 5 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 75c6ddc..8796c8d 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -22,6 +22,7 @@ _COMPRESS_LEVEL_TRADEOFF = 6 _COMPRESS_LEVEL_BEST = 9 READ_BUFFER_SIZE = 128 * 1024 +_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, @@ -120,6 +121,21 @@ class BadGzipFile(OSError): """Exception raised in some cases for invalid gzip files.""" +class _WriteBufferStream(io.RawIOBase): + """Minimal object to pass WriteBuffer flushes into GzipFile""" + def __init__(self, gzip_file): + self.gzip_file = gzip_file + + def write(self, data): + return self.gzip_file._write_raw(data) + + def seekable(self): + return False + + def writable(self): + return True + + class GzipFile(_compression.BaseStream): """The GzipFile class simulates most of the methods of a file object with the exception of the truncate() method. @@ -184,6 +200,7 @@ class GzipFile(_compression.BaseStream): if mode is None: mode = getattr(fileobj, 'mode', 'rb') + if mode.startswith('r'): self.mode = READ raw = _GzipReader(fileobj) @@ -206,6 +223,9 @@ class GzipFile(_compression.BaseStream): zlib.DEF_MEM_LEVEL, 0) self._write_mtime = mtime + self._buffer_size = _WRITE_BUFFER_SIZE + self._buffer = io.BufferedWriter(_WriteBufferStream(self), + buffer_size=self._buffer_size) else: raise ValueError("Invalid mode: {!r}".format(mode)) @@ -231,6 +251,11 @@ class GzipFile(_compression.BaseStream): self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc + def tell(self): + self._check_not_closed() + self._buffer.flush() + return super().tell() + def _write_gzip_header(self, compresslevel): self.fileobj.write(b'\037\213') # magic header self.fileobj.write(b'\010') # compression method @@ -272,6 +297,10 @@ class GzipFile(_compression.BaseStream): if self.fileobj is None: raise ValueError("write() on closed GzipFile object") + return self._buffer.write(data) + + def _write_raw(self, data): + # Called by our self._buffer underlying WriteBufferStream. if isinstance(data, (bytes, bytearray)): length = len(data) else: @@ -322,9 +351,9 @@ class GzipFile(_compression.BaseStream): fileobj = self.fileobj if fileobj is None: return - self.fileobj = None try: if self.mode == WRITE: + self._buffer.flush() fileobj.write(self.compress.flush()) write32u(fileobj, self.crc) # self.size may exceed 2 GiB, or even 4 GiB @@ -332,6 +361,7 @@ class GzipFile(_compression.BaseStream): elif self.mode == READ: self._buffer.close() finally: + self.fileobj = None myfileobj = self.myfileobj if myfileobj: self.myfileobj = None @@ -341,7 +371,7 @@ class GzipFile(_compression.BaseStream): self._check_not_closed() if self.mode == WRITE: # Ensure the compressor's buffer is flushed - self.fileobj.write(self.compress.flush(zlib_mode)) + self._buffer.flush() self.fileobj.flush() def fileno(self): @@ -378,10 +408,10 @@ class GzipFile(_compression.BaseStream): if offset < self.offset: raise OSError('Negative seek in write mode') count = offset - self.offset - chunk = b'\0' * 1024 - for i in range(count // 1024): + chunk = b'\0' * self._buffer_size + for i in range(count // self._buffer_size): self.write(chunk) - self.write(b'\0' * (count % 1024)) + self.write(b'\0' * (count % self._buffer_size)) elif self.mode == READ: self._check_not_closed() return self._buffer.seek(offset, whence) |