diff options
author | Eli Bendersky <eliben@gmail.com> | 2012-05-30 14:57:50 (GMT) |
---|---|---|
committer | Eli Bendersky <eliben@gmail.com> | 2012-05-30 14:57:50 (GMT) |
commit | 48d358ba8665ecc5d5a9f5c0ca572df39f294cb8 (patch) | |
tree | e0f298df8270b5d483ecf4ee2d2b6fbc3d524d44 | |
parent | a0cf90e3b1bebde4dea8b99083830b9fb209a1c0 (diff) | |
download | cpython-48d358ba8665ecc5d5a9f5c0ca572df39f294cb8.zip cpython-48d358ba8665ecc5d5a9f5c0ca572df39f294cb8.tar.gz cpython-48d358ba8665ecc5d5a9f5c0ca572df39f294cb8.tar.bz2 |
Issue #14007: implemented the 'element_factory' feature of TreeBuilder in
_elementtree, with a test.
-rw-r--r-- | Doc/library/xml.etree.elementtree.rst | 6 | ||||
-rw-r--r-- | Lib/test/test_xml_etree.py | 18 | ||||
-rw-r--r-- | Modules/_elementtree.c | 103 |
3 files changed, 90 insertions, 37 deletions
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 156571c..13f98e4 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -716,9 +716,9 @@ TreeBuilder Objects Generic element structure builder. This builder converts a sequence of start, data, and end method calls to a well-formed element structure. You can use this class to build an element structure using a custom XML parser, - or a parser for some other XML-like format. The *element_factory* is called - to create new :class:`Element` instances when given. - + or a parser for some other XML-like format. *element_factory*, when given, + must be a callable accepting two positional arguments: a tag and + a dict of attributes. It is expected to return a new element instance. .. method:: close() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 74fade5..ec352d8 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1959,6 +1959,8 @@ class TreeBuilderTest(unittest.TestCase): ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' '<html>text</html>') + sample2 = '''<toplevel>sometext</toplevel>''' + def test_dummy_builder(self): class BaseDummyBuilder: def close(self): @@ -1993,11 +1995,19 @@ class TreeBuilderTest(unittest.TestCase): e = parser.close() self.assertEqual(e.tag, 'html') - # XXX in _elementtree, the constructor of TreeBuilder expects no - # arguments - @unittest.expectedFailure def test_element_factory(self): - tb = ET.TreeBuilder(element_factory=lambda: ET.Element()) + lst = [] + def myfactory(tag, attrib): + nonlocal lst + lst.append(tag) + return ET.Element(tag, attrib) + + tb = ET.TreeBuilder(element_factory=myfactory) + parser = ET.XMLParser(target=tb) + parser.feed(self.sample2) + parser.close() + + self.assertEqual(lst, ['toplevel']) @unittest.expectedFailure # XXX issue 14007 with C ElementTree def test_doctype(self): diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 12607fa..528a912 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -191,6 +191,15 @@ list_join(PyObject* list) return result; } +/* Is the given object an empty dictionary? +*/ +static int +is_empty_dict(PyObject *obj) +{ + return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0; +} + + /* -------------------------------------------------------------------- */ /* the Element type */ @@ -297,14 +306,9 @@ create_new_element(PyObject* tag, PyObject* attrib) self = PyObject_GC_New(ElementObject, &Element_Type); if (self == NULL) return NULL; - - /* use None for empty dictionaries */ - if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) - attrib = Py_None; - self->extra = NULL; - if (attrib != Py_None) { + if (attrib != Py_None && !is_empty_dict(attrib)) { if (create_extra(self, attrib) < 0) { PyObject_Del(self); return NULL; @@ -416,22 +420,14 @@ element_init(PyObject *self, PyObject *args, PyObject *kwds) self_elem = (ElementObject *)self; - /* Use None for empty dictionaries */ - if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) { - Py_INCREF(Py_None); - attrib = Py_None; - } - - if (attrib != Py_None) { + if (attrib != Py_None && !is_empty_dict(attrib)) { if (create_extra(self_elem, attrib) < 0) { PyObject_Del(self_elem); return -1; } } - /* If create_extra needed attrib, it took a reference to it, so we can - * release ours anyway. - */ + /* We own a reference to attrib here and it's no longer needed. */ Py_DECREF(attrib); /* Replace the objects already pointed to by tag, text and tail. */ @@ -1813,6 +1809,8 @@ typedef struct { PyObject *stack; /* element stack */ Py_ssize_t index; /* current stack size (0 means empty) */ + PyObject *element_factory; + /* element tracing */ PyObject *events; /* list of events, or NULL if not collecting */ PyObject *start_event_obj; /* event objects (NULL to ignore) */ @@ -1841,6 +1839,7 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) t->last = (ElementObject *)Py_None; t->data = NULL; + t->element_factory = NULL; t->stack = PyList_New(20); if (!t->stack) { Py_DECREF(t->this); @@ -1859,11 +1858,38 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static int treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"element_factory", NULL}; + PyObject *element_factory = NULL; + TreeBuilderObject *self_tb = (TreeBuilderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist, + &element_factory)) { + return -1; + } + + if (element_factory) { + Py_INCREF(element_factory); + Py_XDECREF(self_tb->element_factory); + self_tb->element_factory = element_factory; + } + return 0; } -static void -treebuilder_dealloc(TreeBuilderObject *self) +static int +treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->root); + Py_VISIT(self->this); + Py_VISIT(self->last); + Py_VISIT(self->data); + Py_VISIT(self->stack); + Py_VISIT(self->element_factory); + return 0; +} + +static int +treebuilder_gc_clear(TreeBuilderObject *self) { Py_XDECREF(self->end_ns_event_obj); Py_XDECREF(self->start_ns_event_obj); @@ -1874,8 +1900,16 @@ treebuilder_dealloc(TreeBuilderObject *self) Py_XDECREF(self->data); Py_DECREF(self->last); Py_DECREF(self->this); + Py_CLEAR(self->element_factory); Py_XDECREF(self->root); + return 0; +} +static void +treebuilder_dealloc(TreeBuilderObject *self) +{ + PyObject_GC_UnTrack(self); + treebuilder_gc_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1904,9 +1938,14 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, self->data = NULL; } - node = create_new_element(tag, attrib); - if (!node) + if (self->element_factory) { + node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib); + } else { + node = create_new_element(tag, attrib); + } + if (!node) { return NULL; + } this = (PyObject*) self->this; @@ -2180,10 +2219,11 @@ static PyTypeObject TreeBuilder_Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ + (traverseproc)treebuilder_gc_traverse, /* tp_traverse */ + (inquiry)treebuilder_gc_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ @@ -2443,17 +2483,20 @@ expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, attrib = Py_None; } - if (TreeBuilder_CheckExact(self->target)) + /* If we get None, pass an empty dictionary on */ + if (attrib == Py_None) { + Py_DECREF(attrib); + attrib = PyDict_New(); + if (!attrib) + return; + } + + if (TreeBuilder_CheckExact(self->target)) { /* shortcut */ res = treebuilder_handle_start((TreeBuilderObject*) self->target, tag, attrib); + } else if (self->handle_start) { - if (attrib == Py_None) { - Py_DECREF(attrib); - attrib = PyDict_New(); - if (!attrib) - return; - } res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); } else res = NULL; |