diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2024-06-26 10:11:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-26 10:11:28 (GMT) |
commit | f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec (patch) | |
tree | 7447aa74c63a2f3ca08fe1ee436b6e0af3ab088f | |
parent | 84634254fef19ab31439e88ec0213acb46bd7b1f (diff) | |
download | cpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.zip cpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.tar.gz cpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.tar.bz2 |
[3.13] gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422) (GH-120832)
gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422)
(cherry picked from commit 7595e6743ac78ac0dd19418176f66d251668fafc)
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
-rw-r--r-- | Lib/pickle.py | 50 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 49 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst | 3 |
3 files changed, 81 insertions, 21 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index 33c97c8..d719ceb 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -782,14 +782,10 @@ class _Pickler: self.write(FLOAT + repr(obj).encode("ascii") + b'\n') dispatch[float] = save_float - def save_bytes(self, obj): - if self.proto < 3: - if not obj: # bytes object is empty - self.save_reduce(bytes, (), obj=obj) - else: - self.save_reduce(codecs.encode, - (str(obj, 'latin1'), 'latin1'), obj=obj) - return + def _save_bytes_no_memo(self, obj): + # helper for writing bytes objects for protocol >= 3 + # without memoizing them + assert self.proto >= 3 n = len(obj) if n <= 0xff: self.write(SHORT_BINBYTES + pack("<B", n) + obj) @@ -799,9 +795,29 @@ class _Pickler: self._write_large_bytes(BINBYTES + pack("<I", n), obj) else: self.write(BINBYTES + pack("<I", n) + obj) + + def save_bytes(self, obj): + if self.proto < 3: + if not obj: # bytes object is empty + self.save_reduce(bytes, (), obj=obj) + else: + self.save_reduce(codecs.encode, + (str(obj, 'latin1'), 'latin1'), obj=obj) + return + self._save_bytes_no_memo(obj) self.memoize(obj) dispatch[bytes] = save_bytes + def _save_bytearray_no_memo(self, obj): + # helper for writing bytearray objects for protocol >= 5 + # without memoizing them + assert self.proto >= 5 + n = len(obj) + if n >= self.framer._FRAME_SIZE_TARGET: + self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj) + else: + self.write(BYTEARRAY8 + pack("<Q", n) + obj) + def save_bytearray(self, obj): if self.proto < 5: if not obj: # bytearray is empty @@ -809,11 +825,7 @@ class _Pickler: else: self.save_reduce(bytearray, (bytes(obj),), obj=obj) return - n = len(obj) - if n >= self.framer._FRAME_SIZE_TARGET: - self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj) - else: - self.write(BYTEARRAY8 + pack("<Q", n) + obj) + self._save_bytearray_no_memo(obj) self.memoize(obj) dispatch[bytearray] = save_bytearray @@ -832,10 +844,18 @@ class _Pickler: if in_band: # Write data in-band # XXX The C implementation avoids a copy here + buf = m.tobytes() + in_memo = id(buf) in self.memo if m.readonly: - self.save_bytes(m.tobytes()) + if in_memo: + self._save_bytes_no_memo(buf) + else: + self.save_bytes(buf) else: - self.save_bytearray(m.tobytes()) + if in_memo: + self._save_bytearray_no_memo(buf) + else: + self.save_bytearray(buf) else: # Write data out-of-band self.write(NEXT_BUFFER) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 93e7dbb..9922591 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1845,6 +1845,25 @@ class AbstractPickleTests: p = self.dumps(s, proto) self.assert_is_copy(s, self.loads(p)) + def test_bytes_memoization(self): + for proto in protocols: + for array_type in [bytes, ZeroCopyBytes]: + for s in b'', b'xyz', b'xyz'*100: + with self.subTest(proto=proto, array_type=array_type, s=s, independent=False): + b = array_type(s) + p = self.dumps((b, b), proto) + x, y = self.loads(p) + self.assertIs(x, y) + self.assert_is_copy((b, b), (x, y)) + + with self.subTest(proto=proto, array_type=array_type, s=s, independent=True): + b1, b2 = array_type(s), array_type(s) + p = self.dumps((b1, b2), proto) + # Note that (b1, b2) = self.loads(p) might have identical + # components, i.e., b1 is b2, but this is not always the + # case if the content is large (equality still holds). + self.assert_is_copy((b1, b2), self.loads(p)) + def test_bytearray(self): for proto in protocols: for s in b'', b'xyz', b'xyz'*100: @@ -1864,13 +1883,31 @@ class AbstractPickleTests: self.assertNotIn(b'bytearray', p) self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p)) - def test_bytearray_memoization_bug(self): + def test_bytearray_memoization(self): for proto in protocols: - for s in b'', b'xyz', b'xyz'*100: - b = bytearray(s) - p = self.dumps((b, b), proto) - b1, b2 = self.loads(p) - self.assertIs(b1, b2) + for array_type in [bytearray, ZeroCopyBytearray]: + for s in b'', b'xyz', b'xyz'*100: + with self.subTest(proto=proto, array_type=array_type, s=s, independent=False): + b = array_type(s) + p = self.dumps((b, b), proto) + b1, b2 = self.loads(p) + self.assertIs(b1, b2) + + with self.subTest(proto=proto, array_type=array_type, s=s, independent=True): + b1a, b2a = array_type(s), array_type(s) + # Unlike bytes, equal but independent bytearray objects are + # never identical. + self.assertIsNot(b1a, b2a) + + p = self.dumps((b1a, b2a), proto) + b1b, b2b = self.loads(p) + self.assertIsNot(b1b, b2b) + + self.assertIsNot(b1a, b1b) + self.assert_is_copy(b1a, b1b) + + self.assertIsNot(b2a, b2b) + self.assert_is_copy(b2a, b2b) def test_ints(self): for proto in protocols: diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst new file mode 100644 index 0000000..c682a0b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst @@ -0,0 +1,3 @@ +Fix Python implementation of :class:`pickle.Pickler` for :class:`bytes` and +:class:`bytearray` objects when using protocol version 5. Patch by Bénédikt +Tran. |