From 42f08ac1e303117ea789a8ad2a1326db75f923f8 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 11 Feb 2003 22:43:24 +0000 Subject: Implemented batching for dicts in cPickle. This is after two failed attempts to merge the C list-batch and dict-batch code -- they worked, but it was a godawful mess to read. --- Lib/pickle.py | 3 +- Lib/test/pickletester.py | 34 +++++------ Modules/cPickle.c | 149 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 138 insertions(+), 48 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 0173c1f..00f5834 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -612,7 +612,8 @@ class Pickler: dispatch[ListType] = save_list - # Keep in synch with cPickle's BATCHSIZE. + # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets + # out of synch, though. _BATCHSIZE = 1000 def _batch_appends(self, items): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 6ed29b1..734f2a3 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase): else: self.failUnless(num_appends >= 2) -# XXX Temporary hack, so long as the C implementation of pickle protocol -# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests -# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests -# XXX along with the references to it in test_pickle.py. -class TempAbstractPickleTests(unittest.TestCase): - - def test_newobj_list_slots(self): - x = SlotList([1, 2, 3]) - x.foo = 42 - x.bar = "hello" - s = self.dumps(x, 2) - y = self.loads(s) - self.assertEqual(list(x), list(y)) - self.assertEqual(x.__dict__, y.__dict__) - self.assertEqual(x.foo, y.foo) - self.assertEqual(x.bar, y.bar) - def test_dict_chunking(self): n = 10 # too small to chunk x = dict.fromkeys(range(n)) @@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase): else: self.failUnless(num_setitems >= 2) +# XXX Temporary hack, so long as the C implementation of pickle protocol +# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests +# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests +# XXX along with the references to it in test_pickle.py. +class TempAbstractPickleTests(unittest.TestCase): + + def test_newobj_list_slots(self): + x = SlotList([1, 2, 3]) + x.foo = 42 + x.bar = "hello" + s = self.dumps(x, 2) + y = self.loads(s) + self.assertEqual(list(x), list(y)) + self.assertEqual(x.__dict__, y.__dict__) + self.assertEqual(x.foo, y.foo) + self.assertEqual(x.bar, y.bar) + class MyInt(int): sample = 1 diff --git a/Modules/cPickle.c b/Modules/cPickle.c index a35905d..6af4afd 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation, #define FALSE "I00\n" /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements - * batch_{list, dict} pump out before doing APPENDS/SETITEMS. + * batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will + * break if this gets out of synch with pickle.py, but it's unclear that + * would help anything either. */ #define BATCHSIZE 1000 @@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args) int len; PyObject *iter; - if (self->fast && !fast_save_enter(self, args)) goto finally; @@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args) } +/* iter is an iterator giving (key, value) pairs, and we batch up chunks of + * MARK key value ... key value SETITEMS + * opcode sequences. Calling code should have arranged to first create an + * empty dict, or dict-like object, for the SETITEMS to operate on. + * Returns 0 on success, <0 on error. + * + * This is very much like batch_list(). The difference between saving + * elements directly, and picking apart two-tuples, is so long-winded at + * the C level, though, that attempts to combine these routines were too + * ugly to bear. + */ +static int +batch_dict(Picklerobject *self, PyObject *iter) +{ + PyObject *p; + PyObject *slice[BATCHSIZE]; + int i, n; + + static char setitem = SETITEM; + static char setitems = SETITEMS; + + assert(iter != NULL); + + if (self->proto == 0) { + /* SETITEMS isn't available; do one at a time. */ + for (;;) { + p = PyIter_Next(iter); + if (p == NULL) { + if (PyErr_Occurred()) + return -1; + break; + } + if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) { + PyErr_SetString(PyExc_TypeError, "dict items " + "iterator must return 2-tuples"); + return -1; + } + i = save(self, PyTuple_GET_ITEM(p, 0), 0); + if (i >= 0) + i = save(self, PyTuple_GET_ITEM(p, 1), 0); + Py_DECREF(p); + if (i < 0) + return -1; + if (self->write_func(self, &setitem, 1) < 0) + return -1; + + } + return 0; + } + + /* proto > 0: write in batches of BATCHSIZE. */ + do { + /* Get next group of (no more than) BATCHSIZE elements. */ + for (n = 0; n < BATCHSIZE; ++n) { + p = PyIter_Next(iter); + if (p == NULL) { + if (PyErr_Occurred()) + goto BatchFailed; + break; + } + if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) { + PyErr_SetString(PyExc_TypeError, "dict items " + "iterator must return 2-tuples"); + goto BatchFailed; + } + slice[n] = p; + } + + if (n > 1) { + /* Pump out MARK, slice[0:n], SETITEMS. */ + if (self->write_func(self, &MARKv, 1) < 0) + goto BatchFailed; + for (i = 0; i < n; ++i) { + p = slice[i]; + if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0) + goto BatchFailed; + if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0) + goto BatchFailed; + } + if (self->write_func(self, &setitems, 1) < 0) + goto BatchFailed; + } + else if (n == 1) { + p = slice[0]; + if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0) + goto BatchFailed; + if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0) + goto BatchFailed; + if (self->write_func(self, &setitem, 1) < 0) + goto BatchFailed; + } + + for (i = 0; i < n; ++i) { + Py_DECREF(slice[i]); + } + }while (n == BATCHSIZE); + return 0; + +BatchFailed: + while (--n >= 0) { + Py_DECREF(slice[n]); + } + return -1; +} + static int save_dict(Picklerobject *self, PyObject *args) { - PyObject *key = 0, *value = 0; - int i, len, res = -1, using_setitems; + int res = -1; char s[3]; - - static char setitem = SETITEM, setitems = SETITEMS; + int len; + PyObject *iter; if (self->fast && !fast_save_enter(self, args)) goto finally; + /* Create an empty dict. */ if (self->bin) { s[0] = EMPTY_DICT; len = 1; @@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args) if (self->write_func(self, s, len) < 0) goto finally; + /* Get dict size, and bow out early if empty. */ if ((len = PyDict_Size(args)) < 0) goto finally; @@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args) goto finally; } - if ((using_setitems = (self->bin && (PyDict_Size(args) > 1)))) - if (self->write_func(self, &MARKv, 1) < 0) - goto finally; - - i = 0; - while (PyDict_Next(args, &i, &key, &value)) { - if (save(self, key, 0) < 0) - goto finally; - - if (save(self, value, 0) < 0) - goto finally; - - if (!using_setitems) { - if (self->write_func(self, &setitem, 1) < 0) - goto finally; - } - } - - if (using_setitems) { - if (self->write_func(self, &setitems, 1) < 0) - goto finally; - } - - res = 0; + /* Materialize the dict items. */ + iter = PyObject_CallMethod(args, "iteritems", "()"); + if (iter == NULL) + goto finally; + res = batch_dict(self, iter); + Py_DECREF(iter); finally: if (self->fast && !fast_save_leave(self, args)) -- cgit v0.12