summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pickle.py130
-rw-r--r--Lib/test/pickletester.py39
-rw-r--r--Modules/_pickle.c160
3 files changed, 132 insertions, 197 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index d1f1538..8449340 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -188,87 +188,72 @@ class _Framer:
self.file_write = file_write
self.current_frame = None
- def _commit_frame(self):
- f = self.current_frame
- with f.getbuffer() as data:
- n = len(data)
- write = self.file_write
- write(FRAME)
- write(pack("<Q", n))
- write(data)
- f.seek(0)
- f.truncate()
-
def start_framing(self):
self.current_frame = io.BytesIO()
def end_framing(self):
- if self.current_frame is not None:
- self._commit_frame()
+ if self.current_frame and self.current_frame.tell() > 0:
+ self.commit_frame(force=True)
self.current_frame = None
+ def commit_frame(self, force=False):
+ if self.current_frame:
+ f = self.current_frame
+ if f.tell() >= self._FRAME_SIZE_TARGET or force:
+ with f.getbuffer() as data:
+ n = len(data)
+ write = self.file_write
+ write(FRAME)
+ write(pack("<Q", n))
+ write(data)
+ f.seek(0)
+ f.truncate()
+
def write(self, data):
- f = self.current_frame
- if f is None:
- return self.file_write(data)
+ if self.current_frame:
+ return self.current_frame.write(data)
else:
- n = len(data)
- if f.tell() >= self._FRAME_SIZE_TARGET:
- self._commit_frame()
- return f.write(data)
+ return self.file_write(data)
+
class _Unframer:
def __init__(self, file_read, file_readline, file_tell=None):
self.file_read = file_read
self.file_readline = file_readline
- self.file_tell = file_tell
- self.framing_enabled = False
self.current_frame = None
- self.frame_start = None
def read(self, n):
- if n == 0:
- return b''
- _file_read = self.file_read
- if not self.framing_enabled:
- return _file_read(n)
- f = self.current_frame
- if f is not None:
- data = f.read(n)
- if data:
- if len(data) < n:
- raise UnpicklingError(
- "pickle exhausted before end of frame")
- return data
- frame_opcode = _file_read(1)
- if frame_opcode != FRAME:
- raise UnpicklingError(
- "expected a FRAME opcode, got {} instead".format(frame_opcode))
- frame_size, = unpack("<Q", _file_read(8))
- if frame_size > sys.maxsize:
- raise ValueError("frame size > sys.maxsize: %d" % frame_size)
- if self.file_tell is not None:
- self.frame_start = self.file_tell()
- f = self.current_frame = io.BytesIO(_file_read(frame_size))
- self.readline = f.readline
- data = f.read(n)
- assert len(data) == n, (len(data), n)
- return data
+ if self.current_frame:
+ data = self.current_frame.read(n)
+ if not data and n != 0:
+ self.current_frame = None
+ return self.file_read(n)
+ if len(data) < n:
+ raise UnpicklingError(
+ "pickle exhausted before end of frame")
+ return data
+ else:
+ return self.file_read(n)
def readline(self):
- if not self.framing_enabled:
- return self.file_readline()
+ if self.current_frame:
+ data = self.current_frame.readline()
+ if not data:
+ self.current_frame = None
+ return self.file_readline()
+ if data[-1] != b'\n':
+ raise UnpicklingError(
+ "pickle exhausted before end of frame")
+ return data
else:
- return self.current_frame.readline()
+ return self.file_readline()
- def tell(self):
- if self.file_tell is None:
- return None
- elif self.current_frame is None:
- return self.file_tell()
- else:
- return self.frame_start + self.current_frame.tell()
+ def load_frame(self, frame_size):
+ if self.current_frame and self.current_frame.read() != b'':
+ raise UnpicklingError(
+ "beginning of a new frame before end of current frame")
+ self.current_frame = io.BytesIO(self.file_read(frame_size))
# Tools used for pickling.
@@ -392,6 +377,8 @@ class _Pickler:
self._file_write = file.write
except AttributeError:
raise TypeError("file must have a 'write' attribute")
+ self.framer = _Framer(self._file_write)
+ self.write = self.framer.write
self.memo = {}
self.proto = int(protocol)
self.bin = protocol >= 1
@@ -417,18 +404,12 @@ class _Pickler:
raise PicklingError("Pickler.__init__() was not called by "
"%s.__init__()" % (self.__class__.__name__,))
if self.proto >= 2:
- self._file_write(PROTO + pack("<B", self.proto))
+ self.write(PROTO + pack("<B", self.proto))
if self.proto >= 4:
- framer = _Framer(self._file_write)
- framer.start_framing()
- self.write = framer.write
- else:
- framer = None
- self.write = self._file_write
+ self.framer.start_framing()
self.save(obj)
self.write(STOP)
- if framer is not None:
- framer.end_framing()
+ self.framer.end_framing()
def memoize(self, obj):
"""Store an object in the memo."""
@@ -475,6 +456,8 @@ class _Pickler:
return GET + repr(i).encode("ascii") + b'\n'
def save(self, obj, save_persistent_id=True):
+ self.framer.commit_frame()
+
# Check for persistent id (defined by a subclass)
pid = self.persistent_id(obj)
if pid is not None and save_persistent_id:
@@ -1078,10 +1061,15 @@ class _Unpickler:
if not 0 <= proto <= HIGHEST_PROTOCOL:
raise ValueError("unsupported pickle protocol: %d" % proto)
self.proto = proto
- if proto >= 4:
- self._unframer.framing_enabled = True
dispatch[PROTO[0]] = load_proto
+ def load_frame(self):
+ frame_size, = unpack('<Q', self.read(8))
+ if frame_size > sys.maxsize:
+ raise ValueError("frame size > sys.maxsize: %d" % frame_size)
+ self._unframer.load_frame(frame_size)
+ dispatch[FRAME[0]] = load_frame
+
def load_persid(self):
pid = self.readline()[:-1].decode("ascii")
self.append(self.persistent_load(pid))
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 34e46f6..ffa1cfb 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1353,6 +1353,45 @@ class AbstractPickleTests(unittest.TestCase):
n_frames = pickled.count(b'\x00\x00\x00\x00\x00')
self.assertGreaterEqual(n_frames, len(obj))
+ def test_optional_frames(self):
+ if pickle.HIGHEST_PROTOCOL < 4:
+ return
+
+ def remove_frames(pickled, keep_frame=None):
+ """Remove frame opcodes from the given pickle."""
+ frame_starts = []
+ # 1 byte for the opcode and 8 for the argument
+ frame_opcode_size = 9
+ for opcode, _, pos in pickletools.genops(pickled):
+ if opcode.name == 'FRAME':
+ frame_starts.append(pos)
+
+ newpickle = bytearray()
+ last_frame_end = 0
+ for i, pos in enumerate(frame_starts):
+ if keep_frame and keep_frame(i):
+ continue
+ newpickle += pickled[last_frame_end:pos]
+ last_frame_end = pos + frame_opcode_size
+ newpickle += pickled[last_frame_end:]
+ return newpickle
+
+ target_frame_size = 64 * 1024
+ num_frames = 20
+ obj = [bytes([i]) * target_frame_size for i in range(num_frames)]
+
+ for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
+ pickled = self.dumps(obj, proto)
+
+ frameless_pickle = remove_frames(pickled)
+ self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
+ self.assertEqual(obj, self.loads(frameless_pickle))
+
+ some_frames_pickle = remove_frames(pickled, lambda i: i % 2 == 0)
+ self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
+ count_opcode(pickle.FRAME, pickled))
+ self.assertEqual(obj, self.loads(some_frames_pickle))
+
def test_nested_names(self):
global Nested
class Nested:
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 22ce7a5..741cb8a 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -110,10 +110,6 @@ enum {
/* Initial size of the write buffer of Pickler. */
WRITE_BUF_SIZE = 4096,
- /* Maximum size of the write buffer of Pickler when pickling to a
- stream. This is ignored for in-memory pickling. */
- MAX_WRITE_BUF_SIZE = 64 * 1024,
-
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH = 8192 * 16,
@@ -381,7 +377,6 @@ typedef struct UnpicklerObject {
char *input_line;
Py_ssize_t input_len;
Py_ssize_t next_read_idx;
- Py_ssize_t frame_end_idx;
Py_ssize_t prefetched_idx; /* index of first prefetched byte */
PyObject *read; /* read() method of the input stream. */
@@ -401,7 +396,6 @@ typedef struct UnpicklerObject {
int proto; /* Protocol of the pickle loaded. */
int fix_imports; /* Indicate whether Unpickler should fix
the name of globals pickled by Python 2.x. */
- int framing; /* True when framing is enabled, proto >= 4 */
} UnpicklerObject;
/* Forward declarations */
@@ -802,46 +796,6 @@ _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
n = data_len;
required = self->output_len + n;
- if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
- /* XXX This reallocates a new buffer every time, which is a bit
- wasteful. */
- if (_Pickler_FlushToFile(self) < 0)
- return -1;
- if (_Pickler_ClearBuffer(self) < 0)
- return -1;
- /* The previous frame was just committed by _Pickler_FlushToFile */
- need_new_frame = self->framing;
- if (need_new_frame)
- n = data_len + FRAME_HEADER_SIZE;
- else
- n = data_len;
- required = self->output_len + n;
- }
- if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
- /* For large pickle chunks, we write directly to the output
- file instead of buffering. Note the buffer is empty at this
- point (it was flushed above, since required >= n). */
- PyObject *output, *result;
- if (need_new_frame) {
- char frame_header[FRAME_HEADER_SIZE];
- _Pickler_WriteFrameHeader(self, frame_header, (size_t) data_len);
- output = PyBytes_FromStringAndSize(frame_header, FRAME_HEADER_SIZE);
- if (output == NULL)
- return -1;
- result = _Pickler_FastCall(self, self->write, output);
- Py_XDECREF(result);
- if (result == NULL)
- return -1;
- }
- /* XXX we could spare an intermediate copy and pass
- a memoryview instead */
- output = PyBytes_FromStringAndSize(s, data_len);
- if (output == NULL)
- return -1;
- result = _Pickler_FastCall(self, self->write, output);
- Py_XDECREF(result);
- return (result == NULL) ? -1 : 0;
- }
if (required > self->max_output_len) {
/* Make place in buffer for the pickle chunk */
if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
@@ -987,7 +941,6 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
self->input_buffer = self->buffer.buf;
self->input_len = self->buffer.len;
self->next_read_idx = 0;
- self->frame_end_idx = -1;
self->prefetched_idx = self->input_len;
return self->input_len;
}
@@ -1052,7 +1005,7 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
return -1;
/* Prefetch some data without advancing the file pointer, if possible */
- if (self->peek && !self->framing) {
+ if (self->peek) {
PyObject *len, *prefetched;
len = PyLong_FromSsize_t(PREFETCH);
if (len == NULL) {
@@ -1100,7 +1053,7 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
Returns -1 (with an exception set) on failure. On success, return the
number of chars read. */
static Py_ssize_t
-_Unpickler_ReadUnframed(UnpicklerObject *self, char **s, Py_ssize_t n)
+_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
{
Py_ssize_t num_read;
@@ -1126,67 +1079,6 @@ _Unpickler_ReadUnframed(UnpicklerObject *self, char **s, Py_ssize_t n)
}
static Py_ssize_t
-_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
-{
- if (self->framing &&
- (self->frame_end_idx == -1 ||
- self->frame_end_idx <= self->next_read_idx)) {
- /* Need to read new frame */
- char *dummy = NULL;
- unsigned char *frame_start;
- size_t frame_len;
- if (_Unpickler_ReadUnframed(self, &dummy, FRAME_HEADER_SIZE) < 0)
- return -1;
- frame_start = (unsigned char *) dummy;
- if (frame_start[0] != (unsigned char)FRAME) {
- PyErr_Format(UnpicklingError,
- "expected FRAME opcode, got 0x%x instead",
- frame_start[0]);
- return -1;
- }
- frame_len = (size_t) frame_start[1];
- frame_len |= (size_t) frame_start[2] << 8;
- frame_len |= (size_t) frame_start[3] << 16;
- frame_len |= (size_t) frame_start[4] << 24;
-#if SIZEOF_SIZE_T >= 8
- frame_len |= (size_t) frame_start[5] << 32;
- frame_len |= (size_t) frame_start[6] << 40;
- frame_len |= (size_t) frame_start[7] << 48;
- frame_len |= (size_t) frame_start[8] << 56;
-#else
- if (frame_start[5] || frame_start[6] ||
- frame_start[7] || frame_start[8]) {
- PyErr_Format(PyExc_OverflowError,
- "Frame size too large for 32-bit build");
- return -1;
- }
-#endif
- if (frame_len > PY_SSIZE_T_MAX) {
- PyErr_Format(UnpicklingError, "Invalid frame length");
- return -1;
- }
- if ((Py_ssize_t) frame_len < n) {
- PyErr_Format(UnpicklingError, "Bad framing");
- return -1;
- }
- if (_Unpickler_ReadUnframed(self, &dummy /* unused */,
- frame_len) < 0)
- return -1;
- /* Rewind to start of frame */
- self->frame_end_idx = self->next_read_idx;
- self->next_read_idx -= frame_len;
- }
- if (self->framing) {
- /* Check for bad input */
- if (n + self->next_read_idx > self->frame_end_idx) {
- PyErr_Format(UnpicklingError, "Bad framing");
- return -1;
- }
- }
- return _Unpickler_ReadUnframed(self, s, n);
-}
-
-static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
char **result)
{
@@ -1336,7 +1228,6 @@ _Unpickler_New(void)
self->input_line = NULL;
self->input_len = 0;
self->next_read_idx = 0;
- self->frame_end_idx = -1;
self->prefetched_idx = 0;
self->read = NULL;
self->readline = NULL;
@@ -1347,7 +1238,6 @@ _Unpickler_New(void)
self->num_marks = 0;
self->marks_size = 0;
self->proto = 0;
- self->framing = 0;
self->fix_imports = 0;
memset(&self->buffer, 0, sizeof(Py_buffer));
self->memo_size = 32;
@@ -1474,8 +1364,6 @@ memo_put(PicklerObject *self, PyObject *obj)
if (self->fast)
return 0;
- if (_Pickler_OpcodeBoundary(self))
- return -1;
idx = PyMemoTable_Size(self->memo);
if (PyMemoTable_Set(self->memo, obj, idx) < 0)
@@ -3661,6 +3549,9 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
PyObject *reduce_value = NULL;
int status = 0;
+ if (_Pickler_OpcodeBoundary(self) < 0)
+ return -1;
+
if (Py_EnterRecursiveCall(" while pickling an object"))
return -1;
@@ -3855,8 +3746,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
status = -1;
}
done:
- if (status == 0)
- status = _Pickler_OpcodeBoundary(self);
+
Py_LeaveRecursiveCall();
Py_XDECREF(reduce_func);
Py_XDECREF(reduce_value);
@@ -4514,7 +4404,7 @@ calc_binsize(char *bytes, int nbytes)
int i;
size_t x = 0;
- for (i = 0; i < nbytes; i++) {
+ for (i = 0; i < nbytes && i < sizeof(size_t); i++) {
x |= (size_t) s[i] << (8 * i);
}
@@ -5972,7 +5862,6 @@ load_proto(UnpicklerObject *self)
i = (unsigned char)s[0];
if (i <= HIGHEST_PROTOCOL) {
self->proto = i;
- self->framing = (self->proto >= 4);
return 0;
}
@@ -5980,16 +5869,39 @@ load_proto(UnpicklerObject *self)
return -1;
}
+static int
+load_frame(UnpicklerObject *self)
+{
+ char *s;
+ Py_ssize_t frame_len;
+
+ if (_Unpickler_Read(self, &s, 8) < 0)
+ return -1;
+
+ frame_len = calc_binsize(s, 8);
+ if (frame_len < 0) {
+ PyErr_Format(PyExc_OverflowError,
+ "FRAME length exceeds system's maximum of %zd bytes",
+ PY_SSIZE_T_MAX);
+ return -1;
+ }
+
+ if (_Unpickler_Read(self, &s, frame_len) < 0)
+ return -1;
+
+ /* Rewind to start of frame */
+ self->next_read_idx -= frame_len;
+ return 0;
+}
+
static PyObject *
load(UnpicklerObject *self)
{
- PyObject *err;
PyObject *value = NULL;
char *s;
self->num_marks = 0;
self->proto = 0;
- self->framing = 0;
if (Py_SIZE(self->stack))
Pdata_clear(self->stack, 0);
@@ -6063,6 +5975,7 @@ load(UnpicklerObject *self)
OP(BINPERSID, load_binpersid)
OP(REDUCE, load_reduce)
OP(PROTO, load_proto)
+ OP(FRAME, load_frame)
OP_ARG(EXT1, load_extension, 1)
OP_ARG(EXT2, load_extension, 2)
OP_ARG(EXT4, load_extension, 4)
@@ -6084,11 +5997,7 @@ load(UnpicklerObject *self)
break; /* and we are done! */
}
- /* XXX: It is not clear what this is actually for. */
- if ((err = PyErr_Occurred())) {
- if (err == PyExc_EOFError) {
- PyErr_SetNone(PyExc_EOFError);
- }
+ if (PyErr_Occurred()) {
return NULL;
}
@@ -6383,7 +6292,6 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
self->arg = NULL;
self->proto = 0;
- self->framing = 0;
return 0;
}