diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2021-06-22 13:59:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-22 13:59:53 (GMT) |
commit | 01858fbe31e8e0185edfbd3f10172f7c61391c9d (patch) | |
tree | 64698776a22ee2514d6ea7c75f101cda254ebf0a /Lib | |
parent | cf739332bd039cd2303b58663a804f784883820d (diff) | |
download | cpython-01858fbe31e8e0185edfbd3f10172f7c61391c9d.zip cpython-01858fbe31e8e0185edfbd3f10172f7c61391c9d.tar.gz cpython-01858fbe31e8e0185edfbd3f10172f7c61391c9d.tar.bz2 |
bpo-44439: BZ2File.write() / LZMAFile.write() handle buffer protocol correctly (GH-26764) (GH-26845)
No longer use len() to get the length of the input data. For some buffer protocol objects,
the length obtained by using len() is wrong.
(cherry picked from commit bc6c12c72a9536acc96e7b9355fd69d1083a43c1)
Co-authored-by: Ma Lin <animalize@users.noreply.github.com>
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/bz2.py | 16 | ||||
-rw-r--r-- | Lib/gzip.py | 2 | ||||
-rw-r--r-- | Lib/lzma.py | 16 | ||||
-rw-r--r-- | Lib/test/test_bz2.py | 9 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 9 | ||||
-rw-r--r-- | Lib/test/test_lzma.py | 9 |
6 files changed, 52 insertions, 9 deletions
@@ -219,14 +219,22 @@ class BZ2File(_compression.BaseStream): """Write a byte string to the file. Returns the number of uncompressed bytes written, which is - always len(data). Note that due to buffering, the file on disk - may not reflect the data written until close() is called. + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. """ self._check_can_write() + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + compressed = self._compressor.compress(data) self._fp.write(compressed) - self._pos += len(data) - return len(data) + self._pos += length + return length def writelines(self, seq): """Write a sequence of byte strings to the file. diff --git a/Lib/gzip.py b/Lib/gzip.py index 1c1e795..3d837b7 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -278,7 +278,7 @@ class GzipFile(_compression.BaseStream): if self.fileobj is None: raise ValueError("write() on closed GzipFile object") - if isinstance(data, bytes): + if isinstance(data, (bytes, bytearray)): length = len(data) else: # accept any data that supports the buffer protocol diff --git a/Lib/lzma.py b/Lib/lzma.py index 2ada7d8..9abf06d 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -229,14 +229,22 @@ class LZMAFile(_compression.BaseStream): """Write a bytes object to the file. Returns the number of uncompressed bytes written, which is - always len(data). Note that due to buffering, the file on disk - may not reflect the data written until close() is called. + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. """ self._check_can_write() + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + compressed = self._compressor.compress(data) self._fp.write(compressed) - self._pos += len(data) - return len(data) + self._pos += length + return length def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index efed3a8..7913beb 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1,6 +1,7 @@ from test import support from test.support import bigmemtest, _4G +import array import unittest from io import BytesIO, DEFAULT_BUFFER_SIZE import os @@ -620,6 +621,14 @@ class BZ2FileTest(BaseTest): with BZ2File(BytesIO(truncated[:i])) as f: self.assertRaises(EOFError, f.read, 1) + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize + + with BZ2File(BytesIO(), 'w') as f: + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) + class BZ2CompressorTest(BaseTest): def testCompress(self): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 446b61a..7b51e45 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -592,6 +592,15 @@ class TestGzip(BaseTest): with gzip.open(self.filename, "rb") as f: f._buffer.raw._fp.prepend() + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize + + with gzip.GzipFile(fileobj=io.BytesIO(), mode='w') as f: + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) + + class TestOpen(BaseTest): def test_binary_modes(self): uncompressed = data1 * 50 diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index db20300..1e2066b 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1,4 +1,5 @@ import _compression +import array from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE import os import pathlib @@ -1231,6 +1232,14 @@ class FileTestCase(unittest.TestCase): self.assertTrue(d2.eof) self.assertEqual(out1 + out2, entire) + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize + + with LZMAFile(BytesIO(), 'w') as f: + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) + class OpenTestCase(unittest.TestCase): |