From 52467b167e28108509804d82fc050216fce3dc05 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 1 Jun 2012 07:13:08 +0300 Subject: Issue #14007: make XMLParser a real subclassable type exported from _elementtree. +cleanups --- Doc/library/xml.etree.elementtree.rst | 10 +- Lib/test/test_xml_etree.py | 29 ++++ Modules/_elementtree.c | 250 +++++++++++++++++++--------------- 3 files changed, 174 insertions(+), 115 deletions(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 13f98e4..51ff1ee 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -646,8 +646,8 @@ ElementTree Objects Loads an external XML section into this element tree. *source* is a file name or :term:`file object`. *parser* is an optional parser instance. - If not given, the standard XMLParser parser is used. Returns the section - root element. + If not given, the standard :class:`XMLParser` parser is used. Returns the + section root element. .. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml") @@ -767,9 +767,9 @@ XMLParser Objects :class:`Element` structure builder for XML source data, based on the expat parser. *html* are predefined HTML entities. This flag is not supported by the current implementation. *target* is the target object. If omitted, the - builder uses an instance of the standard TreeBuilder class. *encoding* [1]_ - is optional. If given, the value overrides the encoding specified in the - XML file. + builder uses an instance of the standard :class:`TreeBuilder` class. + *encoding* [1]_ is optional. If given, the value overrides the encoding + specified in the XML file. .. method:: close() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index ec352d8..31e005b 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2028,6 +2028,34 @@ class TreeBuilderTest(unittest.TestCase): 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) +class XMLParserTest(unittest.TestCase): + sample1 = '22' + + def _check_sample_element(self, e): + self.assertEqual(e.tag, 'file') + self.assertEqual(e[0].tag, 'line') + self.assertEqual(e[0].text, '22') + + def test_constructor_args(self): + # Positional args. The first (html) is not supported, but should be + # nevertheless correctly accepted. + parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8') + parser.feed(self.sample1) + self._check_sample_element(parser.close()) + + # Now as keyword args. + parser2 = ET.XMLParser(encoding='utf-8', html=[{}], target=ET.TreeBuilder()) + parser2.feed(self.sample1) + self._check_sample_element(parser2.close()) + + def test_subclass(self): + class MyParser(ET.XMLParser): + pass + parser = MyParser() + parser.feed(self.sample1) + self._check_sample_element(parser.close()) + + class NoAcceleratorTest(unittest.TestCase): # Test that the C accelerator was not imported for pyET def test_correct_import_pyET(self): @@ -2245,6 +2273,7 @@ def test_main(module=pyET): ElementTreeTest, NamespaceParseTest, TreeBuilderTest, + XMLParserTest, KeywordArgsTest] if module is pyET: # Run the tests specific to the Python implementation diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 528a912..f2a1e64 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2257,6 +2257,9 @@ static struct PyExpat_CAPI* expat_capi; #define EXPAT(func) (XML_##func) #endif +static XML_Memory_Handling_Suite ExpatMemoryHandler = { + PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + typedef struct { PyObject_HEAD @@ -2671,121 +2674,125 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, } /* -------------------------------------------------------------------- */ -/* constructor and destructor */ -static PyObject* -xmlparser(PyObject* self_, PyObject* args, PyObject* kw) +static PyObject * +xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - XMLParserObject* self; - /* FIXME: does this need to be static? */ - static XML_Memory_Handling_Suite memory_handler; - - PyObject* target = NULL; - char* encoding = NULL; - static char* kwlist[] = { "target", "encoding", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, - &target, &encoding)) - return NULL; - -#if defined(USE_PYEXPAT_CAPI) - if (!expat_capi) { - PyErr_SetString( - PyExc_RuntimeError, "cannot load dispatch table from pyexpat" - ); - return NULL; + XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); + if (self) { + self->parser = NULL; + self->target = self->entity = self->names = NULL; + self->handle_start = self->handle_data = self->handle_end = NULL; + self->handle_comment = self->handle_pi = self->handle_close = NULL; } -#endif + return (PyObject *)self; +} - self = PyObject_New(XMLParserObject, &XMLParser_Type); - if (self == NULL) - return NULL; +static int +xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + XMLParserObject *self_xp = (XMLParserObject *)self; + PyObject *target = NULL, *html = NULL; + char *encoding = NULL; + static char *kwlist[] = {"html", "target", "encoding"}; - self->entity = PyDict_New(); - if (!self->entity) { - PyObject_Del(self); - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist, + &html, &target, &encoding)) { + return -1; } - self->names = PyDict_New(); - if (!self->names) { - PyObject_Del(self->entity); - PyObject_Del(self); - return NULL; - } + self_xp->entity = PyDict_New(); + if (!self_xp->entity) + return -1; - memory_handler.malloc_fcn = PyObject_Malloc; - memory_handler.realloc_fcn = PyObject_Realloc; - memory_handler.free_fcn = PyObject_Free; + self_xp->names = PyDict_New(); + if (!self_xp->names) { + Py_XDECREF(self_xp->entity); + return -1; + } - self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); - if (!self->parser) { - PyObject_Del(self->names); - PyObject_Del(self->entity); - PyObject_Del(self); + self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); + if (!self_xp->parser) { + Py_XDECREF(self_xp->entity); + Py_XDECREF(self_xp->names); PyErr_NoMemory(); - return NULL; + return -1; } - /* setup target handlers */ - if (!target) { + if (target) { + Py_INCREF(target); + } else { target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); if (!target) { - EXPAT(ParserFree)(self->parser); - PyObject_Del(self->names); - PyObject_Del(self->entity); - PyObject_Del(self); - return NULL; + Py_XDECREF(self_xp->entity); + Py_XDECREF(self_xp->names); + EXPAT(ParserFree)(self_xp->parser); + return -1; } - } else - Py_INCREF(target); - self->target = target; + } + self_xp->target = target; - self->handle_start = PyObject_GetAttrString(target, "start"); - self->handle_data = PyObject_GetAttrString(target, "data"); - self->handle_end = PyObject_GetAttrString(target, "end"); - self->handle_comment = PyObject_GetAttrString(target, "comment"); - self->handle_pi = PyObject_GetAttrString(target, "pi"); - self->handle_close = PyObject_GetAttrString(target, "close"); + self_xp->handle_start = PyObject_GetAttrString(target, "start"); + self_xp->handle_data = PyObject_GetAttrString(target, "data"); + self_xp->handle_end = PyObject_GetAttrString(target, "end"); + self_xp->handle_comment = PyObject_GetAttrString(target, "comment"); + self_xp->handle_pi = PyObject_GetAttrString(target, "pi"); + self_xp->handle_close = PyObject_GetAttrString(target, "close"); PyErr_Clear(); - + /* configure parser */ - EXPAT(SetUserData)(self->parser, self); + EXPAT(SetUserData)(self_xp->parser, self_xp); EXPAT(SetElementHandler)( - self->parser, + self_xp->parser, (XML_StartElementHandler) expat_start_handler, (XML_EndElementHandler) expat_end_handler ); EXPAT(SetDefaultHandlerExpand)( - self->parser, + self_xp->parser, (XML_DefaultHandler) expat_default_handler ); EXPAT(SetCharacterDataHandler)( - self->parser, + self_xp->parser, (XML_CharacterDataHandler) expat_data_handler ); - if (self->handle_comment) + if (self_xp->handle_comment) EXPAT(SetCommentHandler)( - self->parser, + self_xp->parser, (XML_CommentHandler) expat_comment_handler ); - if (self->handle_pi) + if (self_xp->handle_pi) EXPAT(SetProcessingInstructionHandler)( - self->parser, + self_xp->parser, (XML_ProcessingInstructionHandler) expat_pi_handler ); EXPAT(SetUnknownEncodingHandler)( - self->parser, + self_xp->parser, (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL ); - ALLOC(sizeof(XMLParserObject), "create expatparser"); + return 0; +} - return (PyObject*) self; +static int +xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->handle_close); + Py_VISIT(self->handle_pi); + Py_VISIT(self->handle_comment); + Py_VISIT(self->handle_end); + Py_VISIT(self->handle_data); + Py_VISIT(self->handle_start); + + Py_VISIT(self->target); + Py_VISIT(self->entity); + Py_VISIT(self->names); + + return 0; } -static void -xmlparser_dealloc(XMLParserObject* self) +static int +xmlparser_gc_clear(XMLParserObject *self) { EXPAT(ParserFree)(self->parser); @@ -2796,17 +2803,20 @@ xmlparser_dealloc(XMLParserObject* self) Py_XDECREF(self->handle_data); Py_XDECREF(self->handle_start); - Py_DECREF(self->target); - Py_DECREF(self->entity); - Py_DECREF(self->names); - - RELEASE(sizeof(XMLParserObject), "destroy expatparser"); + Py_XDECREF(self->target); + Py_XDECREF(self->entity); + Py_XDECREF(self->names); - PyObject_Del(self); + return 0; } -/* -------------------------------------------------------------------- */ -/* methods (in alphabetical order) */ +static void +xmlparser_dealloc(XMLParserObject* self) +{ + PyObject_GC_UnTrack(self); + xmlparser_gc_clear(self); + Py_TYPE(self)->tp_free((PyObject *)self); +} LOCAL(PyObject*) expat_parse(XMLParserObject* self, char* data, int data_len, int final) @@ -3083,31 +3093,42 @@ static PyTypeObject XMLParser_Type = { PyVarObject_HEAD_INIT(NULL, 0) "XMLParser", sizeof(XMLParserObject), 0, /* methods */ - (destructor)xmlparser_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - (getattrofunc)xmlparser_getattro, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - xmlparser_methods, /* tp_methods */ - 0, /* tp_members */ + (destructor)xmlparser_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)xmlparser_getattro, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)xmlparser_gc_traverse, /* tp_traverse */ + (inquiry)xmlparser_gc_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + xmlparser_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)xmlparser_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + xmlparser_new, /* tp_new */ + 0, /* tp_free */ }; #endif @@ -3117,9 +3138,6 @@ static PyTypeObject XMLParser_Type = { static PyMethodDef _functions[] = { {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, -#if defined(USE_EXPAT) - {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, -#endif {NULL, NULL} }; @@ -3214,8 +3232,15 @@ PyInit__elementtree(void) expat_capi->size < sizeof(struct PyExpat_CAPI) || expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || expat_capi->MINOR_VERSION != XML_MINOR_VERSION || - expat_capi->MICRO_VERSION != XML_MICRO_VERSION) + expat_capi->MICRO_VERSION != XML_MICRO_VERSION) { expat_capi = NULL; + } + } + if (!expat_capi) { + PyErr_SetString( + PyExc_RuntimeError, "cannot load dispatch table from pyexpat" + ); + return NULL; } #endif @@ -3231,5 +3256,10 @@ PyInit__elementtree(void) Py_INCREF((PyObject *)&TreeBuilder_Type); PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type); +#if defined(USE_EXPAT) + Py_INCREF((PyObject *)&XMLParser_Type); + PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type); +#endif + return m; } -- cgit v0.12