diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-05-01 19:20:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-01 19:20:38 (GMT) |
commit | 43851a202cabce1e6be699e7177735c778b6697e (patch) | |
tree | b517d3f81642bf531c8876522d7fabf55b9155c2 /Modules/_elementtree.c | |
parent | 3d37ea25dc97e4cb024045581979570835deb13c (diff) | |
download | cpython-43851a202cabce1e6be699e7177735c778b6697e.zip cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.gz cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.bz2 |
bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883)
* bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree.
* bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default).
Diffstat (limited to 'Modules/_elementtree.c')
-rw-r--r-- | Modules/_elementtree.c | 331 |
1 files changed, 302 insertions, 29 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 1e58cd0..5481c61 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -92,6 +92,8 @@ typedef struct { PyObject *parseerror_obj; PyObject *deepcopy_obj; PyObject *elementpath_obj; + PyObject *comment_factory; + PyObject *pi_factory; } elementtreestate; static struct PyModuleDef elementtreemodule; @@ -114,6 +116,8 @@ elementtree_clear(PyObject *m) Py_CLEAR(st->parseerror_obj); Py_CLEAR(st->deepcopy_obj); Py_CLEAR(st->elementpath_obj); + Py_CLEAR(st->comment_factory); + Py_CLEAR(st->pi_factory); return 0; } @@ -124,6 +128,8 @@ elementtree_traverse(PyObject *m, visitproc visit, void *arg) Py_VISIT(st->parseerror_obj); Py_VISIT(st->deepcopy_obj); Py_VISIT(st->elementpath_obj); + Py_VISIT(st->comment_factory); + Py_VISIT(st->pi_factory); return 0; } @@ -2385,6 +2391,8 @@ typedef struct { Py_ssize_t index; /* current stack size (0 means empty) */ PyObject *element_factory; + PyObject *comment_factory; + PyObject *pi_factory; /* element tracing */ PyObject *events_append; /* the append method of the list of events, or NULL */ @@ -2392,6 +2400,11 @@ typedef struct { PyObject *end_event_obj; PyObject *start_ns_event_obj; PyObject *end_ns_event_obj; + PyObject *comment_event_obj; + PyObject *pi_event_obj; + + char insert_comments; + char insert_pis; } TreeBuilderObject; #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) @@ -2413,6 +2426,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) t->data = NULL; t->element_factory = NULL; + t->comment_factory = NULL; + t->pi_factory = NULL; t->stack = PyList_New(20); if (!t->stack) { Py_DECREF(t->this); @@ -2425,6 +2440,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) t->events_append = NULL; t->start_event_obj = t->end_event_obj = NULL; t->start_ns_event_obj = t->end_ns_event_obj = NULL; + t->comment_event_obj = t->pi_event_obj = NULL; + t->insert_comments = t->insert_pis = 0; } return (PyObject *)t; } @@ -2433,17 +2450,53 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _elementtree.TreeBuilder.__init__ element_factory: object = NULL + * + comment_factory: object = NULL + pi_factory: object = NULL + insert_comments: bool = False + insert_pis: bool = False [clinic start generated code]*/ static int _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, - PyObject *element_factory) -/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/ + PyObject *element_factory, + PyObject *comment_factory, + PyObject *pi_factory, + int insert_comments, int insert_pis) +/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/ { - if (element_factory) { + if (element_factory && element_factory != Py_None) { Py_INCREF(element_factory); Py_XSETREF(self->element_factory, element_factory); + } else { + Py_CLEAR(self->element_factory); + } + + if (!comment_factory || comment_factory == Py_None) { + elementtreestate *st = ET_STATE_GLOBAL; + comment_factory = st->comment_factory; + } + if (comment_factory) { + Py_INCREF(comment_factory); + Py_XSETREF(self->comment_factory, comment_factory); + self->insert_comments = insert_comments; + } else { + Py_CLEAR(self->comment_factory); + self->insert_comments = 0; + } + + if (!pi_factory || pi_factory == Py_None) { + elementtreestate *st = ET_STATE_GLOBAL; + pi_factory = st->pi_factory; + } + if (pi_factory) { + Py_INCREF(pi_factory); + Py_XSETREF(self->pi_factory, pi_factory); + self->insert_pis = insert_pis; + } else { + Py_CLEAR(self->pi_factory); + self->insert_pis = 0; } return 0; @@ -2452,6 +2505,8 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, static int treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) { + Py_VISIT(self->pi_event_obj); + Py_VISIT(self->comment_event_obj); Py_VISIT(self->end_ns_event_obj); Py_VISIT(self->start_ns_event_obj); Py_VISIT(self->end_event_obj); @@ -2462,6 +2517,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) Py_VISIT(self->last); Py_VISIT(self->data); Py_VISIT(self->stack); + Py_VISIT(self->pi_factory); + Py_VISIT(self->comment_factory); Py_VISIT(self->element_factory); return 0; } @@ -2469,6 +2526,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) static int treebuilder_gc_clear(TreeBuilderObject *self) { + Py_CLEAR(self->pi_event_obj); + Py_CLEAR(self->comment_event_obj); Py_CLEAR(self->end_ns_event_obj); Py_CLEAR(self->start_ns_event_obj); Py_CLEAR(self->end_event_obj); @@ -2478,6 +2537,8 @@ treebuilder_gc_clear(TreeBuilderObject *self) Py_CLEAR(self->data); Py_CLEAR(self->last); Py_CLEAR(self->this); + Py_CLEAR(self->pi_factory); + Py_CLEAR(self->comment_factory); Py_CLEAR(self->element_factory); Py_CLEAR(self->root); return 0; @@ -2494,6 +2555,57 @@ treebuilder_dealloc(TreeBuilderObject *self) /* -------------------------------------------------------------------- */ /* helpers for handling of arbitrary element-like objects */ +/*[clinic input] +_elementtree._set_factories + + comment_factory: object + pi_factory: object + / + +Change the factories used to create comments and processing instructions. + +For internal use only. +[clinic start generated code]*/ + +static PyObject * +_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, + PyObject *pi_factory) +/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/ +{ + elementtreestate *st = ET_STATE_GLOBAL; + PyObject *old; + + if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s", + Py_TYPE(comment_factory)->tp_name); + return NULL; + } + if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s", + Py_TYPE(pi_factory)->tp_name); + return NULL; + } + + old = PyTuple_Pack(2, + st->comment_factory ? st->comment_factory : Py_None, + st->pi_factory ? st->pi_factory : Py_None); + + if (comment_factory == Py_None) { + Py_CLEAR(st->comment_factory); + } else { + Py_INCREF(comment_factory); + Py_XSETREF(st->comment_factory, comment_factory); + } + if (pi_factory == Py_None) { + Py_CLEAR(st->pi_factory); + } else { + Py_INCREF(pi_factory); + Py_XSETREF(st->pi_factory, pi_factory); + } + + return old; +} + static int treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, PyObject **dest, _Py_Identifier *name) @@ -2569,7 +2681,7 @@ treebuilder_append_event(TreeBuilderObject *self, PyObject *action, PyObject *event = PyTuple_Pack(2, action, node); if (event == NULL) return -1; - res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL); + res = _PyObject_FastCall(self->events_append, &event, 1); Py_DECREF(event); if (res == NULL) return -1; @@ -2593,7 +2705,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, return NULL; } - if (!self->element_factory || self->element_factory == Py_None) { + if (!self->element_factory) { node = create_new_element(tag, attrib); } else if (attrib == Py_None) { attrib = PyDict_New(); @@ -2721,6 +2833,84 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) return (PyObject*) self->last; } +LOCAL(PyObject*) +treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) +{ + PyObject* comment = NULL; + PyObject* this; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->comment_factory) { + comment = _PyObject_FastCall(self->comment_factory, &text, 1); + if (!comment) + return NULL; + + this = self->this; + if (self->insert_comments && this != Py_None) { + if (treebuilder_add_subelement(this, comment) < 0) + goto error; + } + } else { + Py_INCREF(text); + comment = text; + } + + if (self->events_append && self->comment_event_obj) { + if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0) + goto error; + } + + return comment; + + error: + Py_DECREF(comment); + return NULL; +} + +LOCAL(PyObject*) +treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) +{ + PyObject* pi = NULL; + PyObject* this; + PyObject* stack[2] = {target, text}; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->pi_factory) { + pi = _PyObject_FastCall(self->pi_factory, stack, 2); + if (!pi) { + return NULL; + } + + this = self->this; + if (self->insert_pis && this != Py_None) { + if (treebuilder_add_subelement(this, pi) < 0) + goto error; + } + } else { + pi = PyTuple_Pack(2, target, text); + if (!pi) { + return NULL; + } + } + + if (self->events_append && self->pi_event_obj) { + if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0) + goto error; + } + + return pi; + + error: + Py_DECREF(pi); + return NULL; +} + /* -------------------------------------------------------------------- */ /* methods (in alphabetical order) */ @@ -2754,6 +2944,38 @@ _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag) return treebuilder_handle_end(self, tag); } +/*[clinic input] +_elementtree.TreeBuilder.comment + + text: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text) +/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/ +{ + return treebuilder_handle_comment(self, text); +} + +/*[clinic input] +_elementtree.TreeBuilder.pi + + target: object + text: object = None + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target, + PyObject *text) +/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/ +{ + return treebuilder_handle_pi(self, target, text); +} + LOCAL(PyObject*) treebuilder_done(TreeBuilderObject* self) { @@ -2925,7 +3147,7 @@ expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column, if (errmsg == NULL) return; - error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL); + error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1); Py_DECREF(errmsg); if (!error) return; @@ -2988,7 +3210,7 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, (TreeBuilderObject*) self->target, value ); else if (self->handle_data) - res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL); + res = _PyObject_FastCall(self->handle_data, &value, 1); else res = NULL; Py_XDECREF(res); @@ -3099,7 +3321,7 @@ expat_data_handler(XMLParserObject* self, const XML_Char* data_in, /* shortcut */ res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); else if (self->handle_data) - res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL); + res = _PyObject_FastCall(self->handle_data, &data, 1); else res = NULL; @@ -3126,7 +3348,7 @@ expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) else if (self->handle_end) { tag = makeuniversal(self, tag_in); if (tag) { - res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL); + res = _PyObject_FastCall(self->handle_end, &tag, 1); Py_DECREF(tag); } } @@ -3176,21 +3398,31 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) static void expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) { - PyObject* comment; - PyObject* res; + PyObject* comment = NULL; + PyObject* res = NULL; if (PyErr_Occurred()) return; - if (self->handle_comment) { + if (TreeBuilder_CheckExact(self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); - if (comment) { - res = PyObject_CallFunctionObjArgs(self->handle_comment, - comment, NULL); - Py_XDECREF(res); - Py_DECREF(comment); - } + if (!comment) + return; /* parser will look for errors */ + + res = treebuilder_handle_comment(target, comment); + } else if (self->handle_comment) { + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); + if (!comment) + return; + + res = _PyObject_FastCall(self->handle_comment, &comment, 1); } + + Py_XDECREF(res); + Py_DECREF(comment); } static void @@ -3258,27 +3490,51 @@ static void expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, const XML_Char* data_in) { - PyObject* target; + PyObject* pi_target = NULL; PyObject* data; PyObject* res; + PyObject* stack[2]; if (PyErr_Occurred()) return; - if (self->handle_pi) { - target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); - data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); - if (target && data) { - res = PyObject_CallFunctionObjArgs(self->handle_pi, - target, data, NULL); + if (TreeBuilder_CheckExact(self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + if (target->events_append && target->pi_event_obj) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + res = treebuilder_handle_pi(target, pi_target, data); Py_XDECREF(res); Py_DECREF(data); - Py_DECREF(target); - } else { - Py_XDECREF(data); - Py_XDECREF(target); + Py_DECREF(pi_target); } + } else if (self->handle_pi) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + + stack[0] = pi_target; + stack[1] = data; + res = _PyObject_FastCall(self->handle_pi, stack, 2); + Py_XDECREF(res); + Py_DECREF(data); + Py_DECREF(pi_target); } + + return; + + error: + Py_XDECREF(pi_target); + return; } /* -------------------------------------------------------------------- */ @@ -3695,6 +3951,8 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, Py_CLEAR(target->end_event_obj); Py_CLEAR(target->start_ns_event_obj); Py_CLEAR(target->end_ns_event_obj); + Py_CLEAR(target->comment_event_obj); + Py_CLEAR(target->pi_event_obj); if (events_to_report == Py_None) { /* default is "end" only */ @@ -3740,6 +3998,18 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, (XML_StartNamespaceDeclHandler) expat_start_ns_handler, (XML_EndNamespaceDeclHandler) expat_end_ns_handler ); + } else if (strcmp(event_name, "comment") == 0) { + Py_XSETREF(target->comment_event_obj, event_name_obj); + EXPAT(SetCommentHandler)( + self->parser, + (XML_CommentHandler) expat_comment_handler + ); + } else if (strcmp(event_name, "pi") == 0) { + Py_XSETREF(target->pi_event_obj, event_name_obj); + EXPAT(SetProcessingInstructionHandler)( + self->parser, + (XML_ProcessingInstructionHandler) expat_pi_handler + ); } else { Py_DECREF(event_name_obj); Py_DECREF(events_seq); @@ -3882,6 +4152,8 @@ static PyMethodDef treebuilder_methods[] = { _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF _ELEMENTTREE_TREEBUILDER_START_METHODDEF _ELEMENTTREE_TREEBUILDER_END_METHODDEF + _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF + _ELEMENTTREE_TREEBUILDER_PI_METHODDEF _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF {NULL, NULL} }; @@ -3983,6 +4255,7 @@ static PyTypeObject XMLParser_Type = { static PyMethodDef _functions[] = { {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS}, + _ELEMENTTREE__SET_FACTORIES_METHODDEF {NULL, NULL} }; |