From 1d63c9f15134a2776b39fb979c506637f73c8a51 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Sun, 2 Feb 2003 20:29:39 +0000 Subject: cPickle support for TUPLE[123]. Incidentally plugged several undetected overflow holes in Pdata_grow(). --- Lib/pickle.py | 16 +-- Lib/test/pickletester.py | 31 +++++ Modules/cPickle.c | 294 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 250 insertions(+), 91 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 4e80cca..926869e 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -621,8 +621,11 @@ class Pickler: proto = self.proto n = len(obj) - if n == 0 and proto: - write(EMPTY_TUPLE) + if n == 0: + if proto: + write(EMPTY_TUPLE) + else: + write(MARK + TUPLE) return save = self.save @@ -639,13 +642,13 @@ class Pickler: self.memoize(obj) return - # proto 0, or proto 1 and tuple isn't empty, or proto > 1 and tuple + # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple # has more than 3 elements. write(MARK) for element in obj: save(element) - if n and id(obj) in memo: + if id(obj) in memo: # Subtle. d was not in memo when we entered save_tuple(), so # the process of saving the tuple's elements must have saved # the tuple itself: the tuple is recursive. The proper action @@ -660,10 +663,9 @@ class Pickler: write(POP * (n+1) + get) return - # No recursion (including the empty-tuple case for protocol 0). + # No recursion. self.write(TUPLE) - if obj: # No need to memoize empty tuple - self.memoize(obj) + self.memoize(obj) dispatch[TupleType] = save_tuple diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index f1a1384..249cc54 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -484,6 +484,27 @@ class AbstractPickleTests(unittest.TestCase): self.assertEqual(x, y) def test_short_tuples(self): + # Map (proto, len(tuple)) to expected opcode. + expected_opcode = {(0, 0): pickle.TUPLE, + (0, 1): pickle.TUPLE, + (0, 2): pickle.TUPLE, + (0, 3): pickle.TUPLE, + (0, 4): pickle.TUPLE, + + (1, 0): pickle.EMPTY_TUPLE, + (1, 1): pickle.TUPLE, + (1, 2): pickle.TUPLE, + (1, 3): pickle.TUPLE, + (1, 4): pickle.TUPLE, + + (2, 0): pickle.EMPTY_TUPLE, + (2, 1): pickle.TUPLE1, + (2, 2): pickle.TUPLE2, + (2, 3): pickle.TUPLE3, + (2, 4): pickle.TUPLE, + } + all_tuple_opcodes = (pickle.TUPLE, pickle.EMPTY_TUPLE, + pickle.TUPLE1, pickle.TUPLE2, pickle.TUPLE3) a = () b = (1,) c = (1, 2) @@ -495,6 +516,16 @@ class AbstractPickleTests(unittest.TestCase): y = self.loads(s) self.assertEqual(x, y, (proto, x, s, y)) + # Verify that the protocol-correct tuple-building opcode + # was generated. + expected = expected_opcode[proto, len(x)] + for opcode in s: + if opcode in all_tuple_opcodes: + self.assertEqual(expected, opcode) + break + else: + self.fail("didn't find a tuple-building opcode in pickle") + def test_singletons(self): for proto in protocols: for x in None, False, True: diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 1454248..f2fe5b0 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -110,7 +110,8 @@ static PyObject *__class___str, *__getinitargs___str, *__dict___str, typedef struct { PyObject_HEAD - int length, size; + int length; /* number of initial slots in data currently used */ + int size; /* number of slots in data allocated */ PyObject **data; } Pdata; @@ -120,10 +121,11 @@ Pdata_dealloc(Pdata *self) int i; PyObject **p; - for (i=self->length, p=self->data; --i >= 0; p++) Py_DECREF(*p); - - if (self->data) free(self->data); - + for (i = self->length, p = self->data; --i >= 0; p++) { + Py_DECREF(*p); + } + if (self->data) + free(self->data); PyObject_Del(self); } @@ -140,11 +142,13 @@ Pdata_New(void) { Pdata *self; - if (!( self = PyObject_New(Pdata, &PdataType))) return NULL; - self->size=8; - self->length=0; - self->data=malloc(self->size * sizeof(PyObject*)); - if (self->data) return (PyObject*)self; + if (!(self = PyObject_New(Pdata, &PdataType))) + return NULL; + self->size = 8; + self->length = 0; + self->data = malloc(self->size * sizeof(PyObject*)); + if (self->data) + return (PyObject*)self; Py_DECREF(self); return PyErr_NoMemory(); } @@ -156,6 +160,9 @@ stackUnderflow(void) return -1; } +/* Retain only the initial clearto items. If clearto >= the current + * number of items, this is a (non-erroneous) NOP. + */ static int Pdata_clear(Pdata *self, int clearto) { @@ -165,39 +172,56 @@ Pdata_clear(Pdata *self, int clearto) if (clearto < 0) return stackUnderflow(); if (clearto >= self->length) return 0; - for (i=self->length, p=self->data+clearto; --i >= clearto; p++) + for (i = self->length, p = self->data + clearto; + --i >= clearto; + p++) { Py_DECREF(*p); - self->length=clearto; + } + self->length = clearto; return 0; } - static int Pdata_grow(Pdata *self) { - if (! self->size) { - PyErr_NoMemory(); - return -1; - } - self->size *= 2; - self->data = realloc(self->data, self->size*sizeof(PyObject*)); - if (! self->data) { - self->size = 0; - PyErr_NoMemory(); - return -1; - } + int bigger; + size_t nbytes; + + if (! self->size) + goto nomemory; + bigger = self->size << 1; + if (bigger <= 0) + goto nomemory; + if ((int)(size_t)bigger != bigger) + goto nomemory; + nbytes = (size_t)bigger * sizeof(PyObject *); + if (nbytes / sizeof(PyObject *) != (size_t)bigger) + goto nomemory; + self->data = realloc(self->data, nbytes); + if (self->data == NULL) + goto nomemory; + self->size = bigger; return 0; -} -#define PDATA_POP(D,V) { \ - if ((D)->length) V=D->data[--((D)->length)]; \ - else { \ - PyErr_SetString(UnpicklingError, "bad pickle data"); \ - V=NULL; \ - } \ + nomemory: + self->size = 0; + PyErr_NoMemory(); + return -1; } +/* D is a Pdata *. Pop the topmost element and store it into V, which + * must be an lvalue holding PyObject *. On stack underflow, UnpicklingError + * is raised and V is set to NULL. D and V may be evaluated several times. + */ +#define PDATA_POP(D, V) { \ + if ((D)->length) \ + (V) = (D)->data[--((D)->length)]; \ + else { \ + PyErr_SetString(UnpicklingError, "bad pickle data"); \ + (V) = NULL; \ + } \ +} static PyObject * Pdata_popTuple(Pdata *self, int start) @@ -205,12 +229,14 @@ Pdata_popTuple(Pdata *self, int start) PyObject *r; int i, j, l; - l=self->length-start; - if (!( r=PyTuple_New(l))) return NULL; - for (i=start, j=0 ; j < l; i++, j++) + l = self->length-start; + r = PyTuple_New(l); + if (r == NULL) + return NULL; + for (i = start, j = 0 ; j < l; i++, j++) PyTuple_SET_ITEM(r, j, self->data[i]); - self->length=start; + self->length = start; return r; } @@ -1444,82 +1470,143 @@ save_unicode(Picklerobject *self, PyObject *args, int doput) } #endif +/* A helper for save_tuple. Push the len elements in tuple t on the stack. */ +static int +store_tuple_elememts(Picklerobject *self, PyObject *t, int len) +{ + int i; + int res = -1; /* guilty until proved innocent */ + + assert(PyTuple_Size(t) == len); + for (i = 0; i < len; i++) { + PyObject *element = PyTuple_GET_ITEM(t, i); + + if (element == NULL) + goto finally; + if (save(self, element, 0) < 0) + goto finally; + } + res = 0; + + finally: + return res; +} + +/* Tuples are ubiquitous in the pickle protocols, so many techniques are + * used across protocols to minimize the space needed to pickle them. + * Tuples are also the only builtin immuatable type that can be recursive + * (a tuple can be reached from itself), and that requires some subtle + * magic so that it works in all cases. IOW, this is a long routine. + */ static int save_tuple(Picklerobject *self, PyObject *args) { - PyObject *element = 0, *py_tuple_id = 0; - int len, i, res = -1; + PyObject *py_tuple_id = NULL; + int len, i; + int res = -1; static char tuple = TUPLE; - - if (self->write_func(self, &MARKv, 1) < 0) - goto finally; + static char pop = POP; + static char pop_mark = POP_MARK; + static char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3}; if ((len = PyTuple_Size(args)) < 0) goto finally; - for (i = 0; i < len; i++) { - if (!( element = PyTuple_GET_ITEM((PyTupleObject *)args, i))) - goto finally; + if (len == 0) { + char c_str[2]; - if (save(self, element, 0) < 0) - goto finally; + if (self->proto) { + c_str[0] = EMPTY_TUPLE; + len = 1; + } + else { + c_str[0] = MARK; + c_str[1] = TUPLE; + len = 2; + } + if (self->write_func(self, c_str, len) >= 0) + res = 0; + /* Don't memoize an empty tuple. */ + goto finally; } - if (!( py_tuple_id = PyLong_FromVoidPtr(args))) + /* A non-empty tuple. */ + + /* id(tuple) isn't in the memo now. If it shows up there after + * saving the tuple elements, the tuple must be recursive, in + * which case we'll pop everything we put on the stack, and fetch + * its value from the memo. + */ + py_tuple_id = PyLong_FromVoidPtr(args); + if (py_tuple_id == NULL) goto finally; - if (len) { + if (len <= 3 && self->proto >= 2) { + /* Use TUPLE{1,2,3} opcodes. */ + if (store_tuple_elememts(self, args, len) < 0) + goto finally; if (PyDict_GetItem(self->memo, py_tuple_id)) { - if (self->bin) { - static char pop_mark = POP_MARK; - - if (self->write_func(self, &pop_mark, 1) < 0) + /* pop the len elements */ + for (i = 0; i < len; ++i) + if (self->write_func(self, &pop, 1) < 0) goto finally; - } - else { - static char pop = POP; - - for (i = 0; i <= len; i++) { - if (self->write_func(self, &pop, 1) < 0) - goto finally; - } - } - + /* fetch from memo */ if (get(self, py_tuple_id) < 0) goto finally; - res = 0; goto finally; } + /* Not recursive. */ + if (self->write_func(self, len2opcode + len, 1) < 0) + goto finally; + goto memoize; } - if (self->write_func(self, &tuple, 1) < 0) { + /* proto < 2 and len > 0, or proto >= 2 and len > 3. + * Generate MARK elt1 elt2 ... TUPLE + */ + if (self->write_func(self, &MARKv, 1) < 0) + goto finally; + + if (store_tuple_elememts(self, args, len) < 0) + goto finally; + + if (PyDict_GetItem(self->memo, py_tuple_id)) { + /* pop the stack stuff we pushed */ + if (self->bin) { + if (self->write_func(self, &pop_mark, 1) < 0) + goto finally; + } + else { + /* Note that we pop one more than len, to remove + * the MARK too. + */ + for (i = 0; i <= len; i++) + if (self->write_func(self, &pop, 1) < 0) + goto finally; + } + /* fetch from memo */ + if (get(self, py_tuple_id) >= 0) + res = 0; goto finally; } - if (put(self, args) < 0) + /* Not recursive. */ + if (self->write_func(self, &tuple, 1) < 0) goto finally; - res = 0; + memoize: + if (put(self, args) >= 0) + res = 0; finally: Py_XDECREF(py_tuple_id); - return res; } static int -save_empty_tuple(Picklerobject *self, PyObject *args) -{ - static char tuple = EMPTY_TUPLE; - - return self->write_func(self, &tuple, 1); -} - - -static int save_list(Picklerobject *self, PyObject *args) { PyObject *element = 0; @@ -1972,7 +2059,7 @@ save_reduce(Picklerobject *self, PyObject *callable, } static int -save(Picklerobject *self, PyObject *args, int pers_save) +save(Picklerobject *self, PyObject *args, int pers_save) { PyTypeObject *type; PyObject *py_ob_id = 0, *__reduce__ = 0, *t = 0, *arg_tup = 0, @@ -2028,9 +2115,8 @@ save(Picklerobject *self, PyObject *args, int pers_save) break; case 't': - if (type == &PyTuple_Type && PyTuple_Size(args)==0) { - if (self->bin) res = save_empty_tuple(self, args); - else res = save_tuple(self, args); + if (type == &PyTuple_Type && PyTuple_Size(args) == 0) { + res = save_tuple(self, args); goto finally; } break; @@ -3193,11 +3279,21 @@ load_tuple(Unpicklerobject *self) } static int -load_empty_tuple(Unpicklerobject *self) +load_counted_tuple(Unpicklerobject *self, int len) { - PyObject *tup; + PyObject *tup = PyTuple_New(len); + + if (tup == NULL) + return -1; - if (!( tup=PyTuple_New(0))) return -1; + while (--len >= 0) { + PyObject *element; + + PDATA_POP(self->stack, element); + if (element == NULL) + return -1; + PyTuple_SET_ITEM(tup, len, element); + } PDATA_PUSH(self->stack, tup, -1); return 0; } @@ -4018,7 +4114,22 @@ load(Unpicklerobject *self) #endif case EMPTY_TUPLE: - if (load_empty_tuple(self) < 0) + if (load_counted_tuple(self, 0) < 0) + break; + continue; + + case TUPLE1: + if (load_counted_tuple(self, 1) < 0) + break; + continue; + + case TUPLE2: + if (load_counted_tuple(self, 2) < 0) + break; + continue; + + case TUPLE3: + if (load_counted_tuple(self, 3) < 0) break; continue; @@ -4346,7 +4457,22 @@ noload(Unpicklerobject *self) #endif case EMPTY_TUPLE: - if (load_empty_tuple(self) < 0) + if (load_counted_tuple(self, 0) < 0) + break; + continue; + + case TUPLE1: + if (load_counted_tuple(self, 1) < 0) + break; + continue; + + case TUPLE2: + if (load_counted_tuple(self, 2) < 0) + break; + continue; + + case TUPLE3: + if (load_counted_tuple(self, 3) < 0) break; continue; -- cgit v0.12