summaryrefslogtreecommitdiffstats
path: root/Lib/test/pickletester.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-01-12 22:28:31 (GMT)
committerGitHub <noreply@github.com>2018-01-12 22:28:31 (GMT)
commit5b76bdba071e7bbd9fda0b9b100d1506d95c04bd (patch)
tree6330db8f2ec3db059cdad53010c11bce85b388fc /Lib/test/pickletester.py
parentf3031b8a7ad71d3b6ed05da7f3041d9efbe773cf (diff)
downloadcpython-5b76bdba071e7bbd9fda0b9b100d1506d95c04bd.zip
cpython-5b76bdba071e7bbd9fda0b9b100d1506d95c04bd.tar.gz
cpython-5b76bdba071e7bbd9fda0b9b100d1506d95c04bd.tar.bz2
bpo-31993: Do not use memoryview when pickle large strings. (#5154)
PyMemoryView_FromMemory() created a memoryview referring to the internal data of the string. When the string is destroyed the memoryview become referring to a freed memory.
Diffstat (limited to 'Lib/test/pickletester.py')
-rw-r--r--Lib/test/pickletester.py60
1 files changed, 30 insertions, 30 deletions
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index f4e3f81..062ec5a 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2169,67 +2169,67 @@ class AbstractPickleTests(unittest.TestCase):
def test_framed_write_sizes_with_delayed_writer(self):
class ChunkAccumulator:
"""Accumulate pickler output in a list of raw chunks."""
-
def __init__(self):
self.chunks = []
-
def write(self, chunk):
self.chunks.append(chunk)
-
def concatenate_chunks(self):
- # Some chunks can be memoryview instances, we need to convert
- # them to bytes to be able to call join
- return b"".join([c.tobytes() if hasattr(c, 'tobytes') else c
- for c in self.chunks])
-
- small_objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
- for i in range(int(1e4))]
+ return b"".join(self.chunks)
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
+ objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
+ for i in range(int(1e4))]
+ # Add a large unique ASCII string
+ objects.append('0123456789abcdef' *
+ (self.FRAME_SIZE_TARGET // 16 + 1))
+
# Protocol 4 packs groups of small objects into frames and issues
# calls to write only once or twice per frame:
# The C pickler issues one call to write per-frame (header and
# contents) while Python pickler issues two calls to write: one for
# the frame header and one for the frame binary contents.
writer = ChunkAccumulator()
- self.pickler(writer, proto).dump(small_objects)
+ self.pickler(writer, proto).dump(objects)
# Actually read the binary content of the chunks after the end
- # of the call to dump: ant memoryview passed to write should not
+ # of the call to dump: any memoryview passed to write should not
# be released otherwise this delayed access would not be possible.
pickled = writer.concatenate_chunks()
reconstructed = self.loads(pickled)
- self.assertEqual(reconstructed, small_objects)
+ self.assertEqual(reconstructed, objects)
self.assertGreater(len(writer.chunks), 1)
- n_frames, remainder = divmod(len(pickled), self.FRAME_SIZE_TARGET)
- if remainder > 0:
- n_frames += 1
+ # memoryviews should own the memory.
+ del objects
+ support.gc_collect()
+ self.assertEqual(writer.concatenate_chunks(), pickled)
+ n_frames = (len(pickled) - 1) // self.FRAME_SIZE_TARGET + 1
# There should be at least one call to write per frame
self.assertGreaterEqual(len(writer.chunks), n_frames)
# but not too many either: there can be one for the proto,
- # one per-frame header and one per frame for the actual contents.
- self.assertGreaterEqual(2 * n_frames + 1, len(writer.chunks))
+ # one per-frame header, one per frame for the actual contents,
+ # and two for the header.
+ self.assertLessEqual(len(writer.chunks), 2 * n_frames + 3)
- chunk_sizes = [len(c) for c in writer.chunks[:-1]]
+ chunk_sizes = [len(c) for c in writer.chunks]
large_sizes = [s for s in chunk_sizes
if s >= self.FRAME_SIZE_TARGET]
- small_sizes = [s for s in chunk_sizes
- if s < self.FRAME_SIZE_TARGET]
+ medium_sizes = [s for s in chunk_sizes
+ if 9 < s < self.FRAME_SIZE_TARGET]
+ small_sizes = [s for s in chunk_sizes if s <= 9]
# Large chunks should not be too large:
for chunk_size in large_sizes:
- self.assertGreater(2 * self.FRAME_SIZE_TARGET, chunk_size)
-
- last_chunk_size = len(writer.chunks[-1])
- self.assertGreater(2 * self.FRAME_SIZE_TARGET, last_chunk_size)
-
- # Small chunks (if any) should be very small
- # (only proto and frame headers)
- for chunk_size in small_sizes:
- self.assertGreaterEqual(9, chunk_size)
+ self.assertLess(chunk_size, 2 * self.FRAME_SIZE_TARGET,
+ chunk_sizes)
+ # There shouldn't bee too many small chunks: the protocol header,
+ # the frame headers and the large string headers are written
+ # in small chunks.
+ self.assertLessEqual(len(small_sizes),
+ len(large_sizes) + len(medium_sizes) + 3,
+ chunk_sizes)
def test_nested_names(self):
global Nested