diff options
-rw-r--r-- | Lib/pickle.py | 130 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 39 | ||||
-rw-r--r-- | Modules/_pickle.c | 160 |
3 files changed, 132 insertions, 197 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index d1f1538..8449340 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -188,87 +188,72 @@ class _Framer: self.file_write = file_write self.current_frame = None - def _commit_frame(self): - f = self.current_frame - with f.getbuffer() as data: - n = len(data) - write = self.file_write - write(FRAME) - write(pack("<Q", n)) - write(data) - f.seek(0) - f.truncate() - def start_framing(self): self.current_frame = io.BytesIO() def end_framing(self): - if self.current_frame is not None: - self._commit_frame() + if self.current_frame and self.current_frame.tell() > 0: + self.commit_frame(force=True) self.current_frame = None + def commit_frame(self, force=False): + if self.current_frame: + f = self.current_frame + if f.tell() >= self._FRAME_SIZE_TARGET or force: + with f.getbuffer() as data: + n = len(data) + write = self.file_write + write(FRAME) + write(pack("<Q", n)) + write(data) + f.seek(0) + f.truncate() + def write(self, data): - f = self.current_frame - if f is None: - return self.file_write(data) + if self.current_frame: + return self.current_frame.write(data) else: - n = len(data) - if f.tell() >= self._FRAME_SIZE_TARGET: - self._commit_frame() - return f.write(data) + return self.file_write(data) + class _Unframer: def __init__(self, file_read, file_readline, file_tell=None): self.file_read = file_read self.file_readline = file_readline - self.file_tell = file_tell - self.framing_enabled = False self.current_frame = None - self.frame_start = None def read(self, n): - if n == 0: - return b'' - _file_read = self.file_read - if not self.framing_enabled: - return _file_read(n) - f = self.current_frame - if f is not None: - data = f.read(n) - if data: - if len(data) < n: - raise UnpicklingError( - "pickle exhausted before end of frame") - return data - frame_opcode = _file_read(1) - if frame_opcode != FRAME: - raise UnpicklingError( - "expected a FRAME opcode, got {} instead".format(frame_opcode)) - frame_size, = unpack("<Q", _file_read(8)) - if frame_size > sys.maxsize: - raise ValueError("frame size > sys.maxsize: %d" % frame_size) - if self.file_tell is not None: - self.frame_start = self.file_tell() - f = self.current_frame = io.BytesIO(_file_read(frame_size)) - self.readline = f.readline - data = f.read(n) - assert len(data) == n, (len(data), n) - return data + if self.current_frame: + data = self.current_frame.read(n) + if not data and n != 0: + self.current_frame = None + return self.file_read(n) + if len(data) < n: + raise UnpicklingError( + "pickle exhausted before end of frame") + return data + else: + return self.file_read(n) def readline(self): - if not self.framing_enabled: - return self.file_readline() + if self.current_frame: + data = self.current_frame.readline() + if not data: + self.current_frame = None + return self.file_readline() + if data[-1] != b'\n': + raise UnpicklingError( + "pickle exhausted before end of frame") + return data else: - return self.current_frame.readline() + return self.file_readline() - def tell(self): - if self.file_tell is None: - return None - elif self.current_frame is None: - return self.file_tell() - else: - return self.frame_start + self.current_frame.tell() + def load_frame(self, frame_size): + if self.current_frame and self.current_frame.read() != b'': + raise UnpicklingError( + "beginning of a new frame before end of current frame") + self.current_frame = io.BytesIO(self.file_read(frame_size)) # Tools used for pickling. @@ -392,6 +377,8 @@ class _Pickler: self._file_write = file.write except AttributeError: raise TypeError("file must have a 'write' attribute") + self.framer = _Framer(self._file_write) + self.write = self.framer.write self.memo = {} self.proto = int(protocol) self.bin = protocol >= 1 @@ -417,18 +404,12 @@ class _Pickler: raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: - self._file_write(PROTO + pack("<B", self.proto)) + self.write(PROTO + pack("<B", self.proto)) if self.proto >= 4: - framer = _Framer(self._file_write) - framer.start_framing() - self.write = framer.write - else: - framer = None - self.write = self._file_write + self.framer.start_framing() self.save(obj) self.write(STOP) - if framer is not None: - framer.end_framing() + self.framer.end_framing() def memoize(self, obj): """Store an object in the memo.""" @@ -475,6 +456,8 @@ class _Pickler: return GET + repr(i).encode("ascii") + b'\n' def save(self, obj, save_persistent_id=True): + self.framer.commit_frame() + # Check for persistent id (defined by a subclass) pid = self.persistent_id(obj) if pid is not None and save_persistent_id: @@ -1078,10 +1061,15 @@ class _Unpickler: if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) self.proto = proto - if proto >= 4: - self._unframer.framing_enabled = True dispatch[PROTO[0]] = load_proto + def load_frame(self): + frame_size, = unpack('<Q', self.read(8)) + if frame_size > sys.maxsize: + raise ValueError("frame size > sys.maxsize: %d" % frame_size) + self._unframer.load_frame(frame_size) + dispatch[FRAME[0]] = load_frame + def load_persid(self): pid = self.readline()[:-1].decode("ascii") self.append(self.persistent_load(pid)) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 34e46f6..ffa1cfb 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1353,6 +1353,45 @@ class AbstractPickleTests(unittest.TestCase): n_frames = pickled.count(b'\x00\x00\x00\x00\x00') self.assertGreaterEqual(n_frames, len(obj)) + def test_optional_frames(self): + if pickle.HIGHEST_PROTOCOL < 4: + return + + def remove_frames(pickled, keep_frame=None): + """Remove frame opcodes from the given pickle.""" + frame_starts = [] + # 1 byte for the opcode and 8 for the argument + frame_opcode_size = 9 + for opcode, _, pos in pickletools.genops(pickled): + if opcode.name == 'FRAME': + frame_starts.append(pos) + + newpickle = bytearray() + last_frame_end = 0 + for i, pos in enumerate(frame_starts): + if keep_frame and keep_frame(i): + continue + newpickle += pickled[last_frame_end:pos] + last_frame_end = pos + frame_opcode_size + newpickle += pickled[last_frame_end:] + return newpickle + + target_frame_size = 64 * 1024 + num_frames = 20 + obj = [bytes([i]) * target_frame_size for i in range(num_frames)] + + for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): + pickled = self.dumps(obj, proto) + + frameless_pickle = remove_frames(pickled) + self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0) + self.assertEqual(obj, self.loads(frameless_pickle)) + + some_frames_pickle = remove_frames(pickled, lambda i: i % 2 == 0) + self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle), + count_opcode(pickle.FRAME, pickled)) + self.assertEqual(obj, self.loads(some_frames_pickle)) + def test_nested_names(self): global Nested class Nested: diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 22ce7a5..741cb8a 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -110,10 +110,6 @@ enum { /* Initial size of the write buffer of Pickler. */ WRITE_BUF_SIZE = 4096, - /* Maximum size of the write buffer of Pickler when pickling to a - stream. This is ignored for in-memory pickling. */ - MAX_WRITE_BUF_SIZE = 64 * 1024, - /* Prefetch size when unpickling (disabled on unpeekable streams) */ PREFETCH = 8192 * 16, @@ -381,7 +377,6 @@ typedef struct UnpicklerObject { char *input_line; Py_ssize_t input_len; Py_ssize_t next_read_idx; - Py_ssize_t frame_end_idx; Py_ssize_t prefetched_idx; /* index of first prefetched byte */ PyObject *read; /* read() method of the input stream. */ @@ -401,7 +396,6 @@ typedef struct UnpicklerObject { int proto; /* Protocol of the pickle loaded. */ int fix_imports; /* Indicate whether Unpickler should fix the name of globals pickled by Python 2.x. */ - int framing; /* True when framing is enabled, proto >= 4 */ } UnpicklerObject; /* Forward declarations */ @@ -802,46 +796,6 @@ _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len) n = data_len; required = self->output_len + n; - if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) { - /* XXX This reallocates a new buffer every time, which is a bit - wasteful. */ - if (_Pickler_FlushToFile(self) < 0) - return -1; - if (_Pickler_ClearBuffer(self) < 0) - return -1; - /* The previous frame was just committed by _Pickler_FlushToFile */ - need_new_frame = self->framing; - if (need_new_frame) - n = data_len + FRAME_HEADER_SIZE; - else - n = data_len; - required = self->output_len + n; - } - if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) { - /* For large pickle chunks, we write directly to the output - file instead of buffering. Note the buffer is empty at this - point (it was flushed above, since required >= n). */ - PyObject *output, *result; - if (need_new_frame) { - char frame_header[FRAME_HEADER_SIZE]; - _Pickler_WriteFrameHeader(self, frame_header, (size_t) data_len); - output = PyBytes_FromStringAndSize(frame_header, FRAME_HEADER_SIZE); - if (output == NULL) - return -1; - result = _Pickler_FastCall(self, self->write, output); - Py_XDECREF(result); - if (result == NULL) - return -1; - } - /* XXX we could spare an intermediate copy and pass - a memoryview instead */ - output = PyBytes_FromStringAndSize(s, data_len); - if (output == NULL) - return -1; - result = _Pickler_FastCall(self, self->write, output); - Py_XDECREF(result); - return (result == NULL) ? -1 : 0; - } if (required > self->max_output_len) { /* Make place in buffer for the pickle chunk */ if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) { @@ -987,7 +941,6 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input) self->input_buffer = self->buffer.buf; self->input_len = self->buffer.len; self->next_read_idx = 0; - self->frame_end_idx = -1; self->prefetched_idx = self->input_len; return self->input_len; } @@ -1052,7 +1005,7 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) return -1; /* Prefetch some data without advancing the file pointer, if possible */ - if (self->peek && !self->framing) { + if (self->peek) { PyObject *len, *prefetched; len = PyLong_FromSsize_t(PREFETCH); if (len == NULL) { @@ -1100,7 +1053,7 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) Returns -1 (with an exception set) on failure. On success, return the number of chars read. */ static Py_ssize_t -_Unpickler_ReadUnframed(UnpicklerObject *self, char **s, Py_ssize_t n) +_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n) { Py_ssize_t num_read; @@ -1126,67 +1079,6 @@ _Unpickler_ReadUnframed(UnpicklerObject *self, char **s, Py_ssize_t n) } static Py_ssize_t -_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n) -{ - if (self->framing && - (self->frame_end_idx == -1 || - self->frame_end_idx <= self->next_read_idx)) { - /* Need to read new frame */ - char *dummy = NULL; - unsigned char *frame_start; - size_t frame_len; - if (_Unpickler_ReadUnframed(self, &dummy, FRAME_HEADER_SIZE) < 0) - return -1; - frame_start = (unsigned char *) dummy; - if (frame_start[0] != (unsigned char)FRAME) { - PyErr_Format(UnpicklingError, - "expected FRAME opcode, got 0x%x instead", - frame_start[0]); - return -1; - } - frame_len = (size_t) frame_start[1]; - frame_len |= (size_t) frame_start[2] << 8; - frame_len |= (size_t) frame_start[3] << 16; - frame_len |= (size_t) frame_start[4] << 24; -#if SIZEOF_SIZE_T >= 8 - frame_len |= (size_t) frame_start[5] << 32; - frame_len |= (size_t) frame_start[6] << 40; - frame_len |= (size_t) frame_start[7] << 48; - frame_len |= (size_t) frame_start[8] << 56; -#else - if (frame_start[5] || frame_start[6] || - frame_start[7] || frame_start[8]) { - PyErr_Format(PyExc_OverflowError, - "Frame size too large for 32-bit build"); - return -1; - } -#endif - if (frame_len > PY_SSIZE_T_MAX) { - PyErr_Format(UnpicklingError, "Invalid frame length"); - return -1; - } - if ((Py_ssize_t) frame_len < n) { - PyErr_Format(UnpicklingError, "Bad framing"); - return -1; - } - if (_Unpickler_ReadUnframed(self, &dummy /* unused */, - frame_len) < 0) - return -1; - /* Rewind to start of frame */ - self->frame_end_idx = self->next_read_idx; - self->next_read_idx -= frame_len; - } - if (self->framing) { - /* Check for bad input */ - if (n + self->next_read_idx > self->frame_end_idx) { - PyErr_Format(UnpicklingError, "Bad framing"); - return -1; - } - } - return _Unpickler_ReadUnframed(self, s, n); -} - -static Py_ssize_t _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len, char **result) { @@ -1336,7 +1228,6 @@ _Unpickler_New(void) self->input_line = NULL; self->input_len = 0; self->next_read_idx = 0; - self->frame_end_idx = -1; self->prefetched_idx = 0; self->read = NULL; self->readline = NULL; @@ -1347,7 +1238,6 @@ _Unpickler_New(void) self->num_marks = 0; self->marks_size = 0; self->proto = 0; - self->framing = 0; self->fix_imports = 0; memset(&self->buffer, 0, sizeof(Py_buffer)); self->memo_size = 32; @@ -1474,8 +1364,6 @@ memo_put(PicklerObject *self, PyObject *obj) if (self->fast) return 0; - if (_Pickler_OpcodeBoundary(self)) - return -1; idx = PyMemoTable_Size(self->memo); if (PyMemoTable_Set(self->memo, obj, idx) < 0) @@ -3661,6 +3549,9 @@ save(PicklerObject *self, PyObject *obj, int pers_save) PyObject *reduce_value = NULL; int status = 0; + if (_Pickler_OpcodeBoundary(self) < 0) + return -1; + if (Py_EnterRecursiveCall(" while pickling an object")) return -1; @@ -3855,8 +3746,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save) status = -1; } done: - if (status == 0) - status = _Pickler_OpcodeBoundary(self); + Py_LeaveRecursiveCall(); Py_XDECREF(reduce_func); Py_XDECREF(reduce_value); @@ -4514,7 +4404,7 @@ calc_binsize(char *bytes, int nbytes) int i; size_t x = 0; - for (i = 0; i < nbytes; i++) { + for (i = 0; i < nbytes && i < sizeof(size_t); i++) { x |= (size_t) s[i] << (8 * i); } @@ -5972,7 +5862,6 @@ load_proto(UnpicklerObject *self) i = (unsigned char)s[0]; if (i <= HIGHEST_PROTOCOL) { self->proto = i; - self->framing = (self->proto >= 4); return 0; } @@ -5980,16 +5869,39 @@ load_proto(UnpicklerObject *self) return -1; } +static int +load_frame(UnpicklerObject *self) +{ + char *s; + Py_ssize_t frame_len; + + if (_Unpickler_Read(self, &s, 8) < 0) + return -1; + + frame_len = calc_binsize(s, 8); + if (frame_len < 0) { + PyErr_Format(PyExc_OverflowError, + "FRAME length exceeds system's maximum of %zd bytes", + PY_SSIZE_T_MAX); + return -1; + } + + if (_Unpickler_Read(self, &s, frame_len) < 0) + return -1; + + /* Rewind to start of frame */ + self->next_read_idx -= frame_len; + return 0; +} + static PyObject * load(UnpicklerObject *self) { - PyObject *err; PyObject *value = NULL; char *s; self->num_marks = 0; self->proto = 0; - self->framing = 0; if (Py_SIZE(self->stack)) Pdata_clear(self->stack, 0); @@ -6063,6 +5975,7 @@ load(UnpicklerObject *self) OP(BINPERSID, load_binpersid) OP(REDUCE, load_reduce) OP(PROTO, load_proto) + OP(FRAME, load_frame) OP_ARG(EXT1, load_extension, 1) OP_ARG(EXT2, load_extension, 2) OP_ARG(EXT4, load_extension, 4) @@ -6084,11 +5997,7 @@ load(UnpicklerObject *self) break; /* and we are done! */ } - /* XXX: It is not clear what this is actually for. */ - if ((err = PyErr_Occurred())) { - if (err == PyExc_EOFError) { - PyErr_SetNone(PyExc_EOFError); - } + if (PyErr_Occurred()) { return NULL; } @@ -6383,7 +6292,6 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) self->arg = NULL; self->proto = 0; - self->framing = 0; return 0; } |