diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-07-24 18:08:02 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-24 18:08:02 (GMT) |
commit | c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def (patch) | |
tree | 6594bbede68651f6be2fdcb4b8db1e537a6bc4f2 /Modules | |
parent | 151b91dfd21a100ecb1eba9e293c0a8695bf3bf5 (diff) | |
download | cpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.zip cpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.tar.gz cpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.tar.bz2 |
bpo-37399: Correctly attach tail text to the last element/comment/pi (GH-14856)
* bpo-37399: Correctly attach tail text to the last element/comment/pi, even when comments or pis are discarded.
Also fixes the insertion of PIs when "insert_pis=True" is configured for a TreeBuilder.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_elementtree.c | 88 |
1 files changed, 64 insertions, 24 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b9b5016..830ce86 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2399,6 +2399,7 @@ typedef struct { PyObject *this; /* current node */ PyObject *last; /* most recently created node */ + PyObject *last_for_tail; /* most recently created node that takes a tail */ PyObject *data; /* data collector (string or list), or NULL */ @@ -2530,6 +2531,7 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) Py_VISIT(self->root); Py_VISIT(self->this); Py_VISIT(self->last); + Py_VISIT(self->last_for_tail); Py_VISIT(self->data); Py_VISIT(self->stack); Py_VISIT(self->pi_factory); @@ -2551,6 +2553,7 @@ treebuilder_gc_clear(TreeBuilderObject *self) Py_CLEAR(self->stack); Py_CLEAR(self->data); Py_CLEAR(self->last); + Py_CLEAR(self->last_for_tail); Py_CLEAR(self->this); Py_CLEAR(self->pi_factory); Py_CLEAR(self->comment_factory); @@ -2622,21 +2625,50 @@ _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, } static int -treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, - PyObject **dest, _Py_Identifier *name) +treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data, + PyObject **dest, _Py_Identifier *name) { + /* Fast paths for the "almost always" cases. */ if (Element_CheckExact(element)) { - PyObject *tmp = JOIN_OBJ(*dest); - *dest = JOIN_SET(*data, PyList_CheckExact(*data)); - *data = NULL; - Py_DECREF(tmp); - return 0; + PyObject *dest_obj = JOIN_OBJ(*dest); + if (dest_obj == Py_None) { + *dest = JOIN_SET(*data, PyList_CheckExact(*data)); + *data = NULL; + Py_DECREF(dest_obj); + return 0; + } + else if (JOIN_GET(*dest)) { + if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) { + return -1; + } + Py_CLEAR(*data); + return 0; + } } - else { - PyObject *joined = list_join(*data); + + /* Fallback for the non-Element / non-trivial cases. */ + { int r; - if (joined == NULL) + PyObject* joined; + PyObject* previous = _PyObject_GetAttrId(element, name); + if (!previous) + return -1; + joined = list_join(*data); + if (!joined) { + Py_DECREF(previous); return -1; + } + if (previous != Py_None) { + PyObject *tmp = PyNumber_Add(previous, joined); + Py_DECREF(joined); + Py_DECREF(previous); + if (!tmp) + return -1; + joined = tmp; + } else { + Py_DECREF(previous); + } + r = _PyObject_SetAttrId(element, name, joined); Py_DECREF(joined); if (r < 0) @@ -2649,21 +2681,21 @@ treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, LOCAL(int) treebuilder_flush_data(TreeBuilderObject* self) { - PyObject *element = self->last; - if (!self->data) { return 0; } - if (self->this == element) { + if (!self->last_for_tail) { + PyObject *element = self->last; _Py_IDENTIFIER(text); - return treebuilder_set_element_text_or_tail( + return treebuilder_extend_element_text_or_tail( element, &self->data, &((ElementObject *) element)->text, &PyId_text); } else { + PyObject *element = self->last_for_tail; _Py_IDENTIFIER(tail); - return treebuilder_set_element_text_or_tail( + return treebuilder_extend_element_text_or_tail( element, &self->data, &((ElementObject *) element)->tail, &PyId_tail); } @@ -2739,6 +2771,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, } this = self->this; + Py_CLEAR(self->last_for_tail); if (this != Py_None) { if (treebuilder_add_subelement(this, node) < 0) @@ -2836,6 +2869,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) item = self->last; self->last = self->this; + Py_INCREF(self->last); + Py_XSETREF(self->last_for_tail, self->last); self->index--; self->this = PyList_GET_ITEM(self->stack, self->index); Py_INCREF(self->this); @@ -2851,7 +2886,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) LOCAL(PyObject*) treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) { - PyObject* comment = NULL; + PyObject* comment; PyObject* this; if (treebuilder_flush_data(self) < 0) { @@ -2867,6 +2902,8 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) if (self->insert_comments && this != Py_None) { if (treebuilder_add_subelement(this, comment) < 0) goto error; + Py_INCREF(comment); + Py_XSETREF(self->last_for_tail, comment); } } else { Py_INCREF(text); @@ -2888,7 +2925,7 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) LOCAL(PyObject*) treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) { - PyObject* pi = NULL; + PyObject* pi; PyObject* this; PyObject* stack[2] = {target, text}; @@ -2906,6 +2943,8 @@ treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) if (self->insert_pis && this != Py_None) { if (treebuilder_add_subelement(this, pi) < 0) goto error; + Py_INCREF(pi); + Py_XSETREF(self->last_for_tail, pi); } } else { pi = PyTuple_Pack(2, target, text); @@ -3495,8 +3534,8 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) static void expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) { - PyObject* comment = NULL; - PyObject* res = NULL; + PyObject* comment; + PyObject* res; if (PyErr_Occurred()) return; @@ -3510,16 +3549,17 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) return; /* parser will look for errors */ res = treebuilder_handle_comment(target, comment); + Py_XDECREF(res); + Py_DECREF(comment); } else if (self->handle_comment) { comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); if (!comment) return; res = _PyObject_CallOneArg(self->handle_comment, comment); + Py_XDECREF(res); + Py_DECREF(comment); } - - Py_XDECREF(res); - Py_DECREF(comment); } static void @@ -3587,7 +3627,7 @@ static void expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, const XML_Char* data_in) { - PyObject* pi_target = NULL; + PyObject* pi_target; PyObject* data; PyObject* res; PyObject* stack[2]; @@ -3599,7 +3639,7 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, /* shortcut */ TreeBuilderObject *target = (TreeBuilderObject*) self->target; - if (target->events_append && target->pi_event_obj) { + if ((target->events_append && target->pi_event_obj) || target->insert_pis) { pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); if (!pi_target) goto error; |