summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-01-20 14:42:44 (GMT)
committerGitHub <noreply@github.com>2018-01-20 14:42:44 (GMT)
commit1211c9a9897a174b7261ca258cabf289815a40d8 (patch)
treea43814a9172d44d64d7427d10249c816e5f4ef0e
parentbd5c7d238c01b90fbfae8ea45b47bd601900abaf (diff)
downloadcpython-1211c9a9897a174b7261ca258cabf289815a40d8.zip
cpython-1211c9a9897a174b7261ca258cabf289815a40d8.tar.gz
cpython-1211c9a9897a174b7261ca258cabf289815a40d8.tar.bz2
bpo-32503: Avoid creating too small frames in pickles. (#5127)
-rw-r--r--Lib/pickle.py12
-rw-r--r--Lib/test/pickletester.py82
-rw-r--r--Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst1
-rw-r--r--Modules/_pickle.c18
4 files changed, 62 insertions, 51 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 301e8cf..e6d0037 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -183,6 +183,7 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
class _Framer:
+ _FRAME_SIZE_MIN = 4
_FRAME_SIZE_TARGET = 64 * 1024
def __init__(self, file_write):
@@ -203,11 +204,12 @@ class _Framer:
if f.tell() >= self._FRAME_SIZE_TARGET or force:
data = f.getbuffer()
write = self.file_write
- # Issue a single call to the write method of the underlying
- # file object for the frame opcode with the size of the
- # frame. The concatenation is expected to be less expensive
- # than issuing an additional call to write.
- write(FRAME + pack("<Q", len(data)))
+ if len(data) >= self._FRAME_SIZE_MIN:
+ # Issue a single call to the write method of the underlying
+ # file object for the frame opcode with the size of the
+ # frame. The concatenation is expected to be less expensive
+ # than issuing an additional call to write.
+ write(FRAME + pack("<Q", len(data)))
# Issue a separate call to write to append the frame
# contents without concatenation to the above to avoid a
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 062ec5a..b84b878 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2037,6 +2037,7 @@ class AbstractPickleTests(unittest.TestCase):
# Exercise framing (proto >= 4) for significant workloads
+ FRAME_SIZE_MIN = 4
FRAME_SIZE_TARGET = 64 * 1024
def check_frame_opcodes(self, pickled):
@@ -2047,36 +2048,43 @@ class AbstractPickleTests(unittest.TestCase):
framed by default and are therefore considered a frame by themselves in
the following consistency check.
"""
- last_arg = last_pos = last_frame_opcode_size = None
- frameless_opcode_sizes = {
- 'BINBYTES': 5,
- 'BINUNICODE': 5,
- 'BINBYTES8': 9,
- 'BINUNICODE8': 9,
- }
+ frame_end = frameless_start = None
+ frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
for op, arg, pos in pickletools.genops(pickled):
- if op.name in frameless_opcode_sizes:
- if len(arg) > self.FRAME_SIZE_TARGET:
- frame_opcode_size = frameless_opcode_sizes[op.name]
- arg = len(arg)
- else:
- continue
- elif op.name == 'FRAME':
- frame_opcode_size = 9
- else:
- continue
-
- if last_pos is not None:
- # The previous frame's size should be equal to the number
- # of bytes up to the current frame.
- frame_size = pos - last_pos - last_frame_opcode_size
- self.assertEqual(frame_size, last_arg)
- last_arg, last_pos = arg, pos
- last_frame_opcode_size = frame_opcode_size
- # The last frame's size should be equal to the number of bytes up
- # to the pickle's end.
- frame_size = len(pickled) - last_pos - last_frame_opcode_size
- self.assertEqual(frame_size, last_arg)
+ if frame_end is not None:
+ self.assertLessEqual(pos, frame_end)
+ if pos == frame_end:
+ frame_end = None
+
+ if frame_end is not None: # framed
+ self.assertNotEqual(op.name, 'FRAME')
+ if op.name in frameless_opcodes:
+ # Only short bytes and str objects should be written
+ # in a frame
+ self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
+
+ else: # not framed
+ if (op.name == 'FRAME' or
+ (op.name in frameless_opcodes and
+ len(arg) > self.FRAME_SIZE_TARGET)):
+ # Frame or large bytes or str object
+ if frameless_start is not None:
+ # Only short data should be written outside of a frame
+ self.assertLess(pos - frameless_start,
+ self.FRAME_SIZE_MIN)
+ frameless_start = None
+ elif frameless_start is None and op.name != 'PROTO':
+ frameless_start = pos
+
+ if op.name == 'FRAME':
+ self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
+ frame_end = pos + 9 + arg
+
+ pos = len(pickled)
+ if frame_end is not None:
+ self.assertEqual(frame_end, pos)
+ elif frameless_start is not None:
+ self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
def test_framing_many_objects(self):
obj = list(range(10**5))
@@ -2095,7 +2103,8 @@ class AbstractPickleTests(unittest.TestCase):
def test_framing_large_objects(self):
N = 1024 * 1024
- obj = [b'x' * N, b'y' * N, 'z' * N]
+ small_items = [[i] for i in range(10)]
+ obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
for fast in [False, True]:
with self.subTest(proto=proto, fast=fast):
@@ -2119,12 +2128,9 @@ class AbstractPickleTests(unittest.TestCase):
# Perform full equality check if the lengths match.
self.assertEqual(obj, unpickled)
n_frames = count_opcode(pickle.FRAME, pickled)
- if not fast:
- # One frame per memoize for each large object.
- self.assertGreaterEqual(n_frames, len(obj))
- else:
- # One frame at the beginning and one at the end.
- self.assertGreaterEqual(n_frames, 2)
+ # A single frame for small objects between
+ # first two large objects.
+ self.assertEqual(n_frames, 1)
self.check_frame_opcodes(pickled)
def test_optional_frames(self):
@@ -2152,7 +2158,9 @@ class AbstractPickleTests(unittest.TestCase):
frame_size = self.FRAME_SIZE_TARGET
num_frames = 20
- obj = [bytes([i]) * frame_size for i in range(num_frames)]
+ # Large byte objects (dict values) intermitted with small objects
+ # (dict keys)
+ obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
pickled = self.dumps(obj, proto)
diff --git a/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst b/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
new file mode 100644
index 0000000..609c593
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
@@ -0,0 +1 @@
+Pickling with protocol 4 no longer creates too small frames.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index f8cbea1..f06a87d 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -119,8 +119,8 @@ enum {
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH = 8192 * 16,
+ FRAME_SIZE_MIN = 4,
FRAME_SIZE_TARGET = 64 * 1024,
-
FRAME_HEADER_SIZE = 9
};
@@ -949,13 +949,6 @@ _write_size64(char *out, size_t value)
}
}
-static void
-_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
-{
- qdata[0] = FRAME;
- _write_size64(qdata + 1, frame_len);
-}
-
static int
_Pickler_CommitFrame(PicklerObject *self)
{
@@ -966,7 +959,14 @@ _Pickler_CommitFrame(PicklerObject *self)
return 0;
frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
- _Pickler_WriteFrameHeader(self, qdata, frame_len);
+ if (frame_len >= FRAME_SIZE_MIN) {
+ qdata[0] = FRAME;
+ _write_size64(qdata + 1, frame_len);
+ }
+ else {
+ memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
+ self->output_len -= FRAME_HEADER_SIZE;
+ }
self->frame_start = -1;
return 0;
}