summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Bendersky <eliben@gmail.com>2013-01-10 14:01:06 (GMT)
committerEli Bendersky <eliben@gmail.com>2013-01-10 14:01:06 (GMT)
commit698bdb2a6c28a54792a1715fd012e98d9eb668e2 (patch)
tree2867ce3db29d0398b85673161354a53fd5b313cf
parent0dceb560b6b7bfc0a2e11d3a6c27f0a7c7e2e619 (diff)
downloadcpython-698bdb2a6c28a54792a1715fd012e98d9eb668e2.zip
cpython-698bdb2a6c28a54792a1715fd012e98d9eb668e2.tar.gz
cpython-698bdb2a6c28a54792a1715fd012e98d9eb668e2.tar.bz2
Issue #16076: make _elementtree.Element pickle-able in a way that is compatible
with the Python version of the class. Patch by Daniel Shahaf.
-rw-r--r--Lib/test/test_xml_etree.py81
-rw-r--r--Misc/ACKS1
-rw-r--r--Modules/_elementtree.c180
3 files changed, 239 insertions, 23 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 8913845..c88700e 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -16,14 +16,20 @@
import html
import io
+import operator
import pickle
import sys
import unittest
import weakref
+from itertools import product
from test import support
from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect
+# pyET is the pure-Python implementation.
+#
+# ET is pyET in test_xml_etree and is the C accelerated version in
+# test_xml_etree_c.
pyET = None
ET = None
@@ -171,6 +177,38 @@ def check_element(element):
for elem in element:
check_element(elem)
+class ElementTestCase:
+ @classmethod
+ def setUpClass(cls):
+ cls.modules = {pyET, ET}
+
+ def pickleRoundTrip(self, obj, name, dumper, loader):
+ save_m = sys.modules[name]
+ try:
+ sys.modules[name] = dumper
+ temp = pickle.dumps(obj)
+ sys.modules[name] = loader
+ result = pickle.loads(temp)
+ except pickle.PicklingError as pe:
+ # pyET must be second, because pyET may be (equal to) ET.
+ human = dict([(ET, "cET"), (pyET, "pyET")])
+ raise support.TestFailed("Failed to round-trip %r from %r to %r"
+ % (obj,
+ human.get(dumper, dumper),
+ human.get(loader, loader))) from pe
+ finally:
+ sys.modules[name] = save_m
+ return result
+
+ def assertEqualElements(self, alice, bob):
+ self.assertIsInstance(alice, (ET.Element, pyET.Element))
+ self.assertIsInstance(bob, (ET.Element, pyET.Element))
+ self.assertEqual(len(list(alice)), len(list(bob)))
+ for x, y in zip(alice, bob):
+ self.assertEqualElements(x, y)
+ properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
+ self.assertEqual(properties(alice), properties(bob))
+
# --------------------------------------------------------------------
# element tree tests
@@ -1715,7 +1753,7 @@ def check_issue10777():
# --------------------------------------------------------------------
-class BasicElementTest(unittest.TestCase):
+class BasicElementTest(ElementTestCase, unittest.TestCase):
def test_augmentation_type_errors(self):
e = ET.Element('joe')
self.assertRaises(TypeError, e.append, 'b')
@@ -1775,19 +1813,22 @@ class BasicElementTest(unittest.TestCase):
self.assertEqual(e1.get('w', default=7), 7)
def test_pickle(self):
- # For now this test only works for the Python version of ET,
- # so set sys.modules accordingly because pickle uses __import__
- # to load the __module__ of the class.
- if pyET:
- sys.modules['xml.etree.ElementTree'] = pyET
- else:
- raise unittest.SkipTest('only for the Python version')
- e1 = ET.Element('foo', bar=42)
- s = pickle.dumps(e1)
- e2 = pickle.loads(s)
- self.assertEqual(e2.tag, 'foo')
- self.assertEqual(e2.attrib['bar'], 42)
-
+ # issue #16076: the C implementation wasn't pickleable.
+ for dumper, loader in product(self.modules, repeat=2):
+ e = dumper.Element('foo', bar=42)
+ e.text = "text goes here"
+ e.tail = "opposite of head"
+ dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
+ e.append(dumper.Element('child'))
+ e.findall('.//grandchild')[0].set('attr', 'other value')
+
+ e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
+ dumper, loader)
+
+ self.assertEqual(e2.tag, 'foo')
+ self.assertEqual(e2.attrib['bar'], 42)
+ self.assertEqual(len(e2), 2)
+ self.assertEqualElements(e, e2)
class ElementTreeTest(unittest.TestCase):
def test_istype(self):
@@ -2433,7 +2474,7 @@ class KeywordArgsTest(unittest.TestCase):
class NoAcceleratorTest(unittest.TestCase):
def setUp(self):
if not pyET:
- raise SkipTest('only for the Python version')
+ raise unittest.SkipTest('only for the Python version')
# Test that the C accelerator was not imported for pyET
def test_correct_import_pyET(self):
@@ -2486,10 +2527,10 @@ class CleanContext(object):
def test_main(module=None):
# When invoked without a module, runs the Python ET tests by loading pyET.
# Otherwise, uses the given module as the ET.
+ global pyET
+ pyET = import_fresh_module('xml.etree.ElementTree',
+ blocked=['_elementtree'])
if module is None:
- global pyET
- pyET = import_fresh_module('xml.etree.ElementTree',
- blocked=['_elementtree'])
module = pyET
global ET
@@ -2509,7 +2550,7 @@ def test_main(module=None):
# These tests will only run for the pure-Python version that doesn't import
# _elementtree. We can't use skipUnless here, because pyET is filled in only
# after the module is loaded.
- if pyET:
+ if pyET is not ET:
test_classes.extend([
NoAcceleratorTest,
])
@@ -2518,7 +2559,7 @@ def test_main(module=None):
support.run_unittest(*test_classes)
# XXX the C module should give the same warnings as the Python module
- with CleanContext(quiet=(module is not pyET)):
+ with CleanContext(quiet=(pyET is not ET)):
support.run_doctest(sys.modules[__name__], verbosity=True)
finally:
# don't interfere with subsequent tests
diff --git a/Misc/ACKS b/Misc/ACKS
index 714b807..5e06870 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1080,6 +1080,7 @@ Jerry Seutter
Pete Sevander
Denis Severson
Ian Seyer
+Daniel Shahaf
Ha Shao
Mark Shannon
Richard Shapiro
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 9f302f9..4b53037 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -814,6 +814,176 @@ element_sizeof(PyObject* _self, PyObject* args)
return PyLong_FromSsize_t(result);
}
+/* dict keys for getstate/setstate. */
+#define PICKLED_TAG "tag"
+#define PICKLED_CHILDREN "_children"
+#define PICKLED_ATTRIB "attrib"
+#define PICKLED_TAIL "tail"
+#define PICKLED_TEXT "text"
+
+/* __getstate__ returns a fabricated instance dict as in the pure-Python
+ * Element implementation, for interoperability/interchangeability. This
+ * makes the pure-Python implementation details an API, but (a) there aren't
+ * any unnecessary structures there; and (b) it buys compatibility with 3.2
+ * pickles. See issue #16076.
+ */
+static PyObject *
+element_getstate(ElementObject *self)
+{
+ int i, noattrib;
+ PyObject *instancedict = NULL, *children;
+
+ /* Build a list of children. */
+ children = PyList_New(self->extra ? self->extra->length : 0);
+ if (!children)
+ return NULL;
+ for (i = 0; i < PyList_GET_SIZE(children); i++) {
+ PyObject *child = self->extra->children[i];
+ Py_INCREF(child);
+ PyList_SET_ITEM(children, i, child);
+ }
+
+ /* Construct the state object. */
+ noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
+ if (noattrib)
+ instancedict = Py_BuildValue("{sOsOs{}sOsO}",
+ PICKLED_TAG, self->tag,
+ PICKLED_CHILDREN, children,
+ PICKLED_ATTRIB,
+ PICKLED_TEXT, self->text,
+ PICKLED_TAIL, self->tail);
+ else
+ instancedict = Py_BuildValue("{sOsOsOsOsO}",
+ PICKLED_TAG, self->tag,
+ PICKLED_CHILDREN, children,
+ PICKLED_ATTRIB, self->extra->attrib,
+ PICKLED_TEXT, self->text,
+ PICKLED_TAIL, self->tail);
+ if (instancedict)
+ return instancedict;
+ else {
+ for (i = 0; i < PyList_GET_SIZE(children); i++)
+ Py_DECREF(PyList_GET_ITEM(children, i));
+ Py_DECREF(children);
+
+ return NULL;
+ }
+}
+
+static PyObject *
+element_setstate_from_attributes(ElementObject *self,
+ PyObject *tag,
+ PyObject *attrib,
+ PyObject *text,
+ PyObject *tail,
+ PyObject *children)
+{
+ Py_ssize_t i, nchildren;
+
+ if (!tag) {
+ PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
+ return NULL;
+ }
+ if (!text) {
+ Py_INCREF(Py_None);
+ text = Py_None;
+ }
+ if (!tail) {
+ Py_INCREF(Py_None);
+ tail = Py_None;
+ }
+
+ Py_CLEAR(self->tag);
+ self->tag = tag;
+ Py_INCREF(self->tag);
+
+ Py_CLEAR(self->text);
+ self->text = text;
+ Py_INCREF(self->text);
+
+ Py_CLEAR(self->tail);
+ self->tail = tail;
+ Py_INCREF(self->tail);
+
+ /* Handle ATTRIB and CHILDREN. */
+ if (!children && !attrib)
+ Py_RETURN_NONE;
+
+ /* Compute 'nchildren'. */
+ if (children) {
+ if (!PyList_Check(children)) {
+ PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
+ return NULL;
+ }
+ nchildren = PyList_Size(children);
+ }
+ else {
+ nchildren = 0;
+ }
+
+ /* Allocate 'extra'. */
+ if (element_resize(self, nchildren)) {
+ return NULL;
+ }
+ assert(self->extra && self->extra->allocated >= nchildren);
+
+ /* Copy children */
+ for (i = 0; i < nchildren; i++) {
+ self->extra->children[i] = PyList_GET_ITEM(children, i);
+ Py_INCREF(self->extra->children[i]);
+ }
+
+ self->extra->length = nchildren;
+ self->extra->allocated = nchildren;
+
+ /* Stash attrib. */
+ if (attrib) {
+ Py_CLEAR(self->extra->attrib);
+ self->extra->attrib = attrib;
+ Py_INCREF(attrib);
+ }
+
+ Py_RETURN_NONE;
+}
+
+/* __setstate__ for Element instance from the Python implementation.
+ * 'state' should be the instance dict.
+ */
+static PyObject *
+element_setstate_from_Python(ElementObject *self, PyObject *state)
+{
+ static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
+ PICKLED_TAIL, PICKLED_CHILDREN, 0};
+ PyObject *args;
+ PyObject *tag, *attrib, *text, *tail, *children;
+ int error;
+
+ /* More instance dict members than we know to handle? */
+ tag = attrib = text = tail = children = NULL;
+ args = PyTuple_New(0);
+ error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
+ &attrib, &text, &tail, &children);
+ Py_DECREF(args);
+ if (error)
+ return NULL;
+ else
+ return element_setstate_from_attributes(self, tag, attrib, text,
+ tail, children);
+}
+
+static PyObject *
+element_setstate(ElementObject *self, PyObject *state)
+{
+ if (!PyDict_CheckExact(state)) {
+ PyErr_Format(PyExc_TypeError,
+ "Don't know how to unpickle \"%.200R\" as an Element",
+ state);
+ return NULL;
+ }
+ else
+ return element_setstate_from_Python(self, state);
+}
+
LOCAL(int)
checkpath(PyObject* tag)
{
@@ -1587,6 +1757,8 @@ static PyMethodDef element_methods[] = {
{"__copy__", (PyCFunction) element_copy, METH_VARARGS},
{"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
{"__sizeof__", element_sizeof, METH_NOARGS},
+ {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
+ {"__setstate__", (PyCFunction)element_setstate, METH_O},
{NULL, NULL}
};
@@ -1691,7 +1863,7 @@ static PyMappingMethods element_as_mapping = {
static PyTypeObject Element_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "Element", sizeof(ElementObject), 0,
+ "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
/* methods */
(destructor)element_dealloc, /* tp_dealloc */
0, /* tp_print */
@@ -1913,6 +2085,8 @@ elementiter_next(ElementIterObject *it)
static PyTypeObject ElementIter_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
+ /* Using the module's name since the pure-Python implementation does not
+ have such a type. */
"_elementtree._element_iterator", /* tp_name */
sizeof(ElementIterObject), /* tp_basicsize */
0, /* tp_itemsize */
@@ -2458,7 +2632,7 @@ static PyMethodDef treebuilder_methods[] = {
static PyTypeObject TreeBuilder_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "TreeBuilder", sizeof(TreeBuilderObject), 0,
+ "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
/* methods */
(destructor)treebuilder_dealloc, /* tp_dealloc */
0, /* tp_print */
@@ -3420,7 +3594,7 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
static PyTypeObject XMLParser_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "XMLParser", sizeof(XMLParserObject), 0,
+ "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
/* methods */
(destructor)xmlparser_dealloc, /* tp_dealloc */
0, /* tp_print */