summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Bendersky <eliben@gmail.com>2012-06-01 08:32:34 (GMT)
committerEli Bendersky <eliben@gmail.com>2012-06-01 08:32:34 (GMT)
commit2b6b73e7e1a9d13545bea9636936ce5d3e1a87df (patch)
tree821b6042eb5a92d2cbda4931dc4515b0fa3d4348
parent20d4174b3d211609c774bd0711dcbc1793f146aa (diff)
downloadcpython-2b6b73e7e1a9d13545bea9636936ce5d3e1a87df.zip
cpython-2b6b73e7e1a9d13545bea9636936ce5d3e1a87df.tar.gz
cpython-2b6b73e7e1a9d13545bea9636936ce5d3e1a87df.tar.bz2
Issue #14007: implement doctype() method calling in XMLParser of _elementtree.
Includes exposing a doctype handler from expat through pyexpat.
-rw-r--r--Include/pyexpat.h2
-rw-r--r--Lib/test/test_xml_etree.py19
-rw-r--r--Modules/_elementtree.c109
-rw-r--r--Modules/pyexpat.c1
4 files changed, 120 insertions, 11 deletions
diff --git a/Include/pyexpat.h b/Include/pyexpat.h
index 5340ef5..168b5b2 100644
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -43,6 +43,8 @@ struct PyExpat_CAPI
XML_Parser parser, XML_UnknownEncodingHandler handler,
void *encodingHandlerData);
void (*SetUserData)(XML_Parser parser, void *userData);
+ void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
+ XML_StartDoctypeDeclHandler start);
/* always add new stuff to the end! */
};
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 31e005b..49a5633 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2009,7 +2009,6 @@ class TreeBuilderTest(unittest.TestCase):
self.assertEqual(lst, ['toplevel'])
- @unittest.expectedFailure # XXX issue 14007 with C ElementTree
def test_doctype(self):
class DoctypeParser:
_doctype = None
@@ -2030,6 +2029,10 @@ class TreeBuilderTest(unittest.TestCase):
class XMLParserTest(unittest.TestCase):
sample1 = '<file><line>22</line></file>'
+ sample2 = ('<!DOCTYPE html PUBLIC'
+ ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
+ ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
+ '<html>text</html>')
def _check_sample_element(self, e):
self.assertEqual(e.tag, 'file')
@@ -2055,6 +2058,20 @@ class XMLParserTest(unittest.TestCase):
parser.feed(self.sample1)
self._check_sample_element(parser.close())
+ def test_subclass_doctype(self):
+ _doctype = None
+ class MyParserWithDoctype(ET.XMLParser):
+ def doctype(self, name, pubid, system):
+ nonlocal _doctype
+ _doctype = (name, pubid, system)
+
+ parser = MyParserWithDoctype()
+ parser.feed(self.sample2)
+ parser.close()
+ self.assertEqual(_doctype,
+ ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
+
class NoAcceleratorTest(unittest.TestCase):
# Test that the C accelerator was not imported for pyET
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 7bc1880..d984b51 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -2257,24 +2257,27 @@ typedef struct {
XML_Parser parser;
- PyObject* target;
- PyObject* entity;
+ PyObject *target;
+ PyObject *entity;
- PyObject* names;
+ PyObject *names;
- PyObject* handle_start;
- PyObject* handle_data;
- PyObject* handle_end;
+ PyObject *handle_start;
+ PyObject *handle_data;
+ PyObject *handle_end;
- PyObject* handle_comment;
- PyObject* handle_pi;
+ PyObject *handle_comment;
+ PyObject *handle_pi;
+ PyObject *handle_doctype;
- PyObject* handle_close;
+ PyObject *handle_close;
} XMLParserObject;
static PyTypeObject XMLParser_Type;
+#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
+
/* helpers */
LOCAL(PyObject*)
@@ -2601,6 +2604,78 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
}
}
+static void
+expat_start_doctype_handler(XMLParserObject *self,
+ const XML_Char *doctype_name,
+ const XML_Char *sysid,
+ const XML_Char *pubid,
+ int has_internal_subset)
+{
+ PyObject *self_pyobj = (PyObject *)self;
+ PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
+ PyObject *parser_doctype = NULL;
+ PyObject *res = NULL;
+
+ doctype_name_obj = makeuniversal(self, doctype_name);
+ if (!doctype_name_obj)
+ return;
+
+ if (sysid) {
+ sysid_obj = makeuniversal(self, sysid);
+ if (!sysid_obj) {
+ Py_DECREF(doctype_name_obj);
+ return;
+ }
+ } else {
+ Py_INCREF(Py_None);
+ sysid_obj = Py_None;
+ }
+
+ if (pubid) {
+ pubid_obj = makeuniversal(self, pubid);
+ if (!pubid_obj) {
+ Py_DECREF(doctype_name_obj);
+ Py_DECREF(sysid_obj);
+ return;
+ }
+ } else {
+ Py_INCREF(Py_None);
+ pubid_obj = Py_None;
+ }
+
+ /* If the target has a handler for doctype, call it. */
+ if (self->handle_doctype) {
+ res = PyObject_CallFunction(self->handle_doctype, "OOO",
+ doctype_name_obj, pubid_obj, sysid_obj);
+ Py_CLEAR(res);
+ }
+
+ /* Now see if the parser itself has a doctype method. If yes and it's
+ * a subclass, call it but warn about deprecation. If it's not a subclass
+ * (i.e. vanilla XMLParser), do nothing.
+ */
+ parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
+ if (parser_doctype) {
+ if (!XMLParser_CheckExact(self_pyobj)) {
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "This method of XMLParser is deprecated. Define"
+ " doctype() method on the TreeBuilder target.",
+ 1) < 0) {
+ goto clear;
+ }
+ res = PyObject_CallFunction(parser_doctype, "OOO",
+ doctype_name_obj, pubid_obj, sysid_obj);
+ Py_CLEAR(res);
+ }
+ }
+
+clear:
+ Py_XDECREF(parser_doctype);
+ Py_DECREF(doctype_name_obj);
+ Py_DECREF(pubid_obj);
+ Py_DECREF(sysid_obj);
+}
+
static void
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
const XML_Char* data_in)
@@ -2676,6 +2751,7 @@ xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->target = self->entity = self->names = NULL;
self->handle_start = self->handle_data = self->handle_end = NULL;
self->handle_comment = self->handle_pi = self->handle_close = NULL;
+ self->handle_doctype = NULL;
}
return (PyObject *)self;
}
@@ -2730,6 +2806,7 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
self_xp->handle_close = PyObject_GetAttrString(target, "close");
+ self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
PyErr_Clear();
@@ -2758,6 +2835,10 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
self_xp->parser,
(XML_ProcessingInstructionHandler) expat_pi_handler
);
+ EXPAT(SetStartDoctypeDeclHandler)(
+ self_xp->parser,
+ (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
+ );
EXPAT(SetUnknownEncodingHandler)(
self_xp->parser,
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
@@ -2794,6 +2875,7 @@ xmlparser_gc_clear(XMLParserObject *self)
Py_XDECREF(self->handle_end);
Py_XDECREF(self->handle_data);
Py_XDECREF(self->handle_start);
+ Py_XDECREF(self->handle_doctype);
Py_XDECREF(self->target);
Py_XDECREF(self->entity);
@@ -2950,7 +3032,13 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
}
static PyObject*
-xmlparser_setevents(XMLParserObject* self, PyObject* args)
+xmlparser_doctype(XMLParserObject *self, PyObject *args)
+{
+ Py_RETURN_NONE;
+}
+
+static PyObject*
+xmlparser_setevents(XMLParserObject *self, PyObject* args)
{
/* activate element event reporting */
@@ -3054,6 +3142,7 @@ static PyMethodDef xmlparser_methods[] = {
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
+ {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
{NULL, NULL}
};
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index fb02329..a500a1e 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1904,6 +1904,7 @@ MODULE_INITFUNC(void)
capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
capi.SetUserData = XML_SetUserData;
+ capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
/* export using capsule */
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);