summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-06-26 10:11:28 (GMT)
committerGitHub <noreply@github.com>2024-06-26 10:11:28 (GMT)
commitf2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec (patch)
tree7447aa74c63a2f3ca08fe1ee436b6e0af3ab088f
parent84634254fef19ab31439e88ec0213acb46bd7b1f (diff)
downloadcpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.zip
cpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.tar.gz
cpython-f2b4f517b9a0dbe4d2ebd1e1912615ede46d7aec.tar.bz2
[3.13] gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422) (GH-120832)
gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422) (cherry picked from commit 7595e6743ac78ac0dd19418176f66d251668fafc) Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
-rw-r--r--Lib/pickle.py50
-rw-r--r--Lib/test/pickletester.py49
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst3
3 files changed, 81 insertions, 21 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 33c97c8..d719ceb 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -782,14 +782,10 @@ class _Pickler:
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
dispatch[float] = save_float
- def save_bytes(self, obj):
- if self.proto < 3:
- if not obj: # bytes object is empty
- self.save_reduce(bytes, (), obj=obj)
- else:
- self.save_reduce(codecs.encode,
- (str(obj, 'latin1'), 'latin1'), obj=obj)
- return
+ def _save_bytes_no_memo(self, obj):
+ # helper for writing bytes objects for protocol >= 3
+ # without memoizing them
+ assert self.proto >= 3
n = len(obj)
if n <= 0xff:
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
@@ -799,9 +795,29 @@ class _Pickler:
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
else:
self.write(BINBYTES + pack("<I", n) + obj)
+
+ def save_bytes(self, obj):
+ if self.proto < 3:
+ if not obj: # bytes object is empty
+ self.save_reduce(bytes, (), obj=obj)
+ else:
+ self.save_reduce(codecs.encode,
+ (str(obj, 'latin1'), 'latin1'), obj=obj)
+ return
+ self._save_bytes_no_memo(obj)
self.memoize(obj)
dispatch[bytes] = save_bytes
+ def _save_bytearray_no_memo(self, obj):
+ # helper for writing bytearray objects for protocol >= 5
+ # without memoizing them
+ assert self.proto >= 5
+ n = len(obj)
+ if n >= self.framer._FRAME_SIZE_TARGET:
+ self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
+ else:
+ self.write(BYTEARRAY8 + pack("<Q", n) + obj)
+
def save_bytearray(self, obj):
if self.proto < 5:
if not obj: # bytearray is empty
@@ -809,11 +825,7 @@ class _Pickler:
else:
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
return
- n = len(obj)
- if n >= self.framer._FRAME_SIZE_TARGET:
- self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
- else:
- self.write(BYTEARRAY8 + pack("<Q", n) + obj)
+ self._save_bytearray_no_memo(obj)
self.memoize(obj)
dispatch[bytearray] = save_bytearray
@@ -832,10 +844,18 @@ class _Pickler:
if in_band:
# Write data in-band
# XXX The C implementation avoids a copy here
+ buf = m.tobytes()
+ in_memo = id(buf) in self.memo
if m.readonly:
- self.save_bytes(m.tobytes())
+ if in_memo:
+ self._save_bytes_no_memo(buf)
+ else:
+ self.save_bytes(buf)
else:
- self.save_bytearray(m.tobytes())
+ if in_memo:
+ self._save_bytearray_no_memo(buf)
+ else:
+ self.save_bytearray(buf)
else:
# Write data out-of-band
self.write(NEXT_BUFFER)
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 93e7dbb..9922591 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1845,6 +1845,25 @@ class AbstractPickleTests:
p = self.dumps(s, proto)
self.assert_is_copy(s, self.loads(p))
+ def test_bytes_memoization(self):
+ for proto in protocols:
+ for array_type in [bytes, ZeroCopyBytes]:
+ for s in b'', b'xyz', b'xyz'*100:
+ with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
+ b = array_type(s)
+ p = self.dumps((b, b), proto)
+ x, y = self.loads(p)
+ self.assertIs(x, y)
+ self.assert_is_copy((b, b), (x, y))
+
+ with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
+ b1, b2 = array_type(s), array_type(s)
+ p = self.dumps((b1, b2), proto)
+ # Note that (b1, b2) = self.loads(p) might have identical
+ # components, i.e., b1 is b2, but this is not always the
+ # case if the content is large (equality still holds).
+ self.assert_is_copy((b1, b2), self.loads(p))
+
def test_bytearray(self):
for proto in protocols:
for s in b'', b'xyz', b'xyz'*100:
@@ -1864,13 +1883,31 @@ class AbstractPickleTests:
self.assertNotIn(b'bytearray', p)
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
- def test_bytearray_memoization_bug(self):
+ def test_bytearray_memoization(self):
for proto in protocols:
- for s in b'', b'xyz', b'xyz'*100:
- b = bytearray(s)
- p = self.dumps((b, b), proto)
- b1, b2 = self.loads(p)
- self.assertIs(b1, b2)
+ for array_type in [bytearray, ZeroCopyBytearray]:
+ for s in b'', b'xyz', b'xyz'*100:
+ with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
+ b = array_type(s)
+ p = self.dumps((b, b), proto)
+ b1, b2 = self.loads(p)
+ self.assertIs(b1, b2)
+
+ with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
+ b1a, b2a = array_type(s), array_type(s)
+ # Unlike bytes, equal but independent bytearray objects are
+ # never identical.
+ self.assertIsNot(b1a, b2a)
+
+ p = self.dumps((b1a, b2a), proto)
+ b1b, b2b = self.loads(p)
+ self.assertIsNot(b1b, b2b)
+
+ self.assertIsNot(b1a, b1b)
+ self.assert_is_copy(b1a, b1b)
+
+ self.assertIsNot(b2a, b2b)
+ self.assert_is_copy(b2a, b2b)
def test_ints(self):
for proto in protocols:
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst
new file mode 100644
index 0000000..c682a0b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-18-23-15.gh-issue-120380.edtqjq.rst
@@ -0,0 +1,3 @@
+Fix Python implementation of :class:`pickle.Pickler` for :class:`bytes` and
+:class:`bytearray` objects when using protocol version 5. Patch by Bénédikt
+Tran.