From 72e5b25efb580fb1f0fdfade516be90d90822164 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 7 Mar 2025 22:04:45 -0500 Subject: gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647) The new methods simply delegate to the underlying buffer, much like the existing GzipFile.read[1] methods. This avoids extra allocations caused by the BufferedIOBase.readinto implementation previously used. This commit also factors out a common readability check rather than copying it an additional two times. --- Lib/gzip.py | 28 +++++++++++++------ Lib/test/test_gzip.py | 32 ++++++++++++++++++++++ .../2025-01-08-15-14-17.gh-issue-128647.GabglU.rst | 4 +++ 3 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst diff --git a/Lib/gzip.py b/Lib/gzip.py index 7e384f8..d681ef6 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -325,11 +325,15 @@ class GzipFile(_compression.BaseStream): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -337,19 +341,25 @@ class GzipFile(_compression.BaseStream): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + self._check_read("readinto") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + self._check_read("readinto1") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 0940bb1..260fae5 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -143,6 +143,38 @@ class TestGzip(BaseTest): self.assertEqual(f.tell(), nread) self.assertEqual(b''.join(blocks), data1 * 50) + def test_readinto(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + buf = bytearray(len(large_data)) + with gzip.GzipFile(self.filename, 'r') as f: + nbytes = f.readinto(buf) + self.assertEqual(nbytes, len(large_data)) + self.assertEqual(buf, large_data) + + def test_readinto1(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + nread = 0 + buf = bytearray(len(large_data)) + memview = memoryview(buf) # Simplifies slicing + with gzip.GzipFile(self.filename, 'r') as f: + for count in range(200): + nbytes = f.readinto1(memview[nread:]) + if not nbytes: + break + nread += nbytes + self.assertEqual(f.tell(), nread) + self.assertEqual(buf, large_data) + # readinto1() should require multiple loops + self.assertGreater(count, 1) + @bigmemtest(size=_4G, memuse=1) def test_read_large(self, size): # Read chunk size over UINT_MAX should be supported, despite zlib's diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst new file mode 100644 index 0000000..034a66b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst @@ -0,0 +1,4 @@ +Eagerly write to buffers passed to :class:`gzip.GzipFile`'s +:meth:`~io.BufferedIOBase.readinto` and +:meth:`~io.BufferedIOBase.readinto1` implementations, +avoiding unnecessary allocations. Patch by Chris Markiewicz. -- cgit v0.12