summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2003-02-11 22:43:24 (GMT)
committerTim Peters <tim.peters@gmail.com>2003-02-11 22:43:24 (GMT)
commit42f08ac1e303117ea789a8ad2a1326db75f923f8 (patch)
treeee82ccd3ada46a1ee43cded38425bf3a4146b67d
parente7b33db22d52424ac99ff02f102f83d3d953e93b (diff)
downloadcpython-42f08ac1e303117ea789a8ad2a1326db75f923f8.zip
cpython-42f08ac1e303117ea789a8ad2a1326db75f923f8.tar.gz
cpython-42f08ac1e303117ea789a8ad2a1326db75f923f8.tar.bz2
Implemented batching for dicts in cPickle. This is after two failed
attempts to merge the C list-batch and dict-batch code -- they worked, but it was a godawful mess to read.
-rw-r--r--Lib/pickle.py3
-rw-r--r--Lib/test/pickletester.py34
-rw-r--r--Modules/cPickle.c149
3 files changed, 138 insertions, 48 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 0173c1f..00f5834 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -612,7 +612,8 @@ class Pickler:
dispatch[ListType] = save_list
- # Keep in synch with cPickle's BATCHSIZE.
+ # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
+ # out of synch, though.
_BATCHSIZE = 1000
def _batch_appends(self, items):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 6ed29b1..734f2a3 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase):
else:
self.failUnless(num_appends >= 2)
-# XXX Temporary hack, so long as the C implementation of pickle protocol
-# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests
-# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
-# XXX along with the references to it in test_pickle.py.
-class TempAbstractPickleTests(unittest.TestCase):
-
- def test_newobj_list_slots(self):
- x = SlotList([1, 2, 3])
- x.foo = 42
- x.bar = "hello"
- s = self.dumps(x, 2)
- y = self.loads(s)
- self.assertEqual(list(x), list(y))
- self.assertEqual(x.__dict__, y.__dict__)
- self.assertEqual(x.foo, y.foo)
- self.assertEqual(x.bar, y.bar)
-
def test_dict_chunking(self):
n = 10 # too small to chunk
x = dict.fromkeys(range(n))
@@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase):
else:
self.failUnless(num_setitems >= 2)
+# XXX Temporary hack, so long as the C implementation of pickle protocol
+# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests
+# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
+# XXX along with the references to it in test_pickle.py.
+class TempAbstractPickleTests(unittest.TestCase):
+
+ def test_newobj_list_slots(self):
+ x = SlotList([1, 2, 3])
+ x.foo = 42
+ x.bar = "hello"
+ s = self.dumps(x, 2)
+ y = self.loads(s)
+ self.assertEqual(list(x), list(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+ self.assertEqual(x.foo, y.foo)
+ self.assertEqual(x.bar, y.bar)
+
class MyInt(int):
sample = 1
diff --git a/Modules/cPickle.c b/Modules/cPickle.c
index a35905d..6af4afd 100644
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation,
#define FALSE "I00\n"
/* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
- * batch_{list, dict} pump out before doing APPENDS/SETITEMS.
+ * batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
+ * break if this gets out of synch with pickle.py, but it's unclear that
+ * would help anything either.
*/
#define BATCHSIZE 1000
@@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args)
int len;
PyObject *iter;
-
if (self->fast && !fast_save_enter(self, args))
goto finally;
@@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args)
}
+/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
+ * MARK key value ... key value SETITEMS
+ * opcode sequences. Calling code should have arranged to first create an
+ * empty dict, or dict-like object, for the SETITEMS to operate on.
+ * Returns 0 on success, <0 on error.
+ *
+ * This is very much like batch_list(). The difference between saving
+ * elements directly, and picking apart two-tuples, is so long-winded at
+ * the C level, though, that attempts to combine these routines were too
+ * ugly to bear.
+ */
+static int
+batch_dict(Picklerobject *self, PyObject *iter)
+{
+ PyObject *p;
+ PyObject *slice[BATCHSIZE];
+ int i, n;
+
+ static char setitem = SETITEM;
+ static char setitems = SETITEMS;
+
+ assert(iter != NULL);
+
+ if (self->proto == 0) {
+ /* SETITEMS isn't available; do one at a time. */
+ for (;;) {
+ p = PyIter_Next(iter);
+ if (p == NULL) {
+ if (PyErr_Occurred())
+ return -1;
+ break;
+ }
+ if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+ PyErr_SetString(PyExc_TypeError, "dict items "
+ "iterator must return 2-tuples");
+ return -1;
+ }
+ i = save(self, PyTuple_GET_ITEM(p, 0), 0);
+ if (i >= 0)
+ i = save(self, PyTuple_GET_ITEM(p, 1), 0);
+ Py_DECREF(p);
+ if (i < 0)
+ return -1;
+ if (self->write_func(self, &setitem, 1) < 0)
+ return -1;
+
+ }
+ return 0;
+ }
+
+ /* proto > 0: write in batches of BATCHSIZE. */
+ do {
+ /* Get next group of (no more than) BATCHSIZE elements. */
+ for (n = 0; n < BATCHSIZE; ++n) {
+ p = PyIter_Next(iter);
+ if (p == NULL) {
+ if (PyErr_Occurred())
+ goto BatchFailed;
+ break;
+ }
+ if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+ PyErr_SetString(PyExc_TypeError, "dict items "
+ "iterator must return 2-tuples");
+ goto BatchFailed;
+ }
+ slice[n] = p;
+ }
+
+ if (n > 1) {
+ /* Pump out MARK, slice[0:n], SETITEMS. */
+ if (self->write_func(self, &MARKv, 1) < 0)
+ goto BatchFailed;
+ for (i = 0; i < n; ++i) {
+ p = slice[i];
+ if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+ goto BatchFailed;
+ if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+ goto BatchFailed;
+ }
+ if (self->write_func(self, &setitems, 1) < 0)
+ goto BatchFailed;
+ }
+ else if (n == 1) {
+ p = slice[0];
+ if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+ goto BatchFailed;
+ if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+ goto BatchFailed;
+ if (self->write_func(self, &setitem, 1) < 0)
+ goto BatchFailed;
+ }
+
+ for (i = 0; i < n; ++i) {
+ Py_DECREF(slice[i]);
+ }
+ }while (n == BATCHSIZE);
+ return 0;
+
+BatchFailed:
+ while (--n >= 0) {
+ Py_DECREF(slice[n]);
+ }
+ return -1;
+}
+
static int
save_dict(Picklerobject *self, PyObject *args)
{
- PyObject *key = 0, *value = 0;
- int i, len, res = -1, using_setitems;
+ int res = -1;
char s[3];
-
- static char setitem = SETITEM, setitems = SETITEMS;
+ int len;
+ PyObject *iter;
if (self->fast && !fast_save_enter(self, args))
goto finally;
+ /* Create an empty dict. */
if (self->bin) {
s[0] = EMPTY_DICT;
len = 1;
@@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args)
if (self->write_func(self, s, len) < 0)
goto finally;
+ /* Get dict size, and bow out early if empty. */
if ((len = PyDict_Size(args)) < 0)
goto finally;
@@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args)
goto finally;
}
- if ((using_setitems = (self->bin && (PyDict_Size(args) > 1))))
- if (self->write_func(self, &MARKv, 1) < 0)
- goto finally;
-
- i = 0;
- while (PyDict_Next(args, &i, &key, &value)) {
- if (save(self, key, 0) < 0)
- goto finally;
-
- if (save(self, value, 0) < 0)
- goto finally;
-
- if (!using_setitems) {
- if (self->write_func(self, &setitem, 1) < 0)
- goto finally;
- }
- }
-
- if (using_setitems) {
- if (self->write_func(self, &setitems, 1) < 0)
- goto finally;
- }
-
- res = 0;
+ /* Materialize the dict items. */
+ iter = PyObject_CallMethod(args, "iteritems", "()");
+ if (iter == NULL)
+ goto finally;
+ res = batch_dict(self, iter);
+ Py_DECREF(iter);
finally:
if (self->fast && !fast_save_leave(self, args))