summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-07-24 18:08:02 (GMT)
committerGitHub <noreply@github.com>2019-07-24 18:08:02 (GMT)
commitc6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def (patch)
tree6594bbede68651f6be2fdcb4b8db1e537a6bc4f2 /Modules
parent151b91dfd21a100ecb1eba9e293c0a8695bf3bf5 (diff)
downloadcpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.zip
cpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.tar.gz
cpython-c6cb4cdd21c0c3a09b0617dbfaa7053d3bfa6def.tar.bz2
bpo-37399: Correctly attach tail text to the last element/comment/pi (GH-14856)
* bpo-37399: Correctly attach tail text to the last element/comment/pi, even when comments or pis are discarded. Also fixes the insertion of PIs when "insert_pis=True" is configured for a TreeBuilder.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_elementtree.c88
1 files changed, 64 insertions, 24 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index b9b5016..830ce86 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -2399,6 +2399,7 @@ typedef struct {
PyObject *this; /* current node */
PyObject *last; /* most recently created node */
+ PyObject *last_for_tail; /* most recently created node that takes a tail */
PyObject *data; /* data collector (string or list), or NULL */
@@ -2530,6 +2531,7 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
Py_VISIT(self->root);
Py_VISIT(self->this);
Py_VISIT(self->last);
+ Py_VISIT(self->last_for_tail);
Py_VISIT(self->data);
Py_VISIT(self->stack);
Py_VISIT(self->pi_factory);
@@ -2551,6 +2553,7 @@ treebuilder_gc_clear(TreeBuilderObject *self)
Py_CLEAR(self->stack);
Py_CLEAR(self->data);
Py_CLEAR(self->last);
+ Py_CLEAR(self->last_for_tail);
Py_CLEAR(self->this);
Py_CLEAR(self->pi_factory);
Py_CLEAR(self->comment_factory);
@@ -2622,21 +2625,50 @@ _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
}
static int
-treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
- PyObject **dest, _Py_Identifier *name)
+treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
+ PyObject **dest, _Py_Identifier *name)
{
+ /* Fast paths for the "almost always" cases. */
if (Element_CheckExact(element)) {
- PyObject *tmp = JOIN_OBJ(*dest);
- *dest = JOIN_SET(*data, PyList_CheckExact(*data));
- *data = NULL;
- Py_DECREF(tmp);
- return 0;
+ PyObject *dest_obj = JOIN_OBJ(*dest);
+ if (dest_obj == Py_None) {
+ *dest = JOIN_SET(*data, PyList_CheckExact(*data));
+ *data = NULL;
+ Py_DECREF(dest_obj);
+ return 0;
+ }
+ else if (JOIN_GET(*dest)) {
+ if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
+ return -1;
+ }
+ Py_CLEAR(*data);
+ return 0;
+ }
}
- else {
- PyObject *joined = list_join(*data);
+
+ /* Fallback for the non-Element / non-trivial cases. */
+ {
int r;
- if (joined == NULL)
+ PyObject* joined;
+ PyObject* previous = _PyObject_GetAttrId(element, name);
+ if (!previous)
+ return -1;
+ joined = list_join(*data);
+ if (!joined) {
+ Py_DECREF(previous);
return -1;
+ }
+ if (previous != Py_None) {
+ PyObject *tmp = PyNumber_Add(previous, joined);
+ Py_DECREF(joined);
+ Py_DECREF(previous);
+ if (!tmp)
+ return -1;
+ joined = tmp;
+ } else {
+ Py_DECREF(previous);
+ }
+
r = _PyObject_SetAttrId(element, name, joined);
Py_DECREF(joined);
if (r < 0)
@@ -2649,21 +2681,21 @@ treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
LOCAL(int)
treebuilder_flush_data(TreeBuilderObject* self)
{
- PyObject *element = self->last;
-
if (!self->data) {
return 0;
}
- if (self->this == element) {
+ if (!self->last_for_tail) {
+ PyObject *element = self->last;
_Py_IDENTIFIER(text);
- return treebuilder_set_element_text_or_tail(
+ return treebuilder_extend_element_text_or_tail(
element, &self->data,
&((ElementObject *) element)->text, &PyId_text);
}
else {
+ PyObject *element = self->last_for_tail;
_Py_IDENTIFIER(tail);
- return treebuilder_set_element_text_or_tail(
+ return treebuilder_extend_element_text_or_tail(
element, &self->data,
&((ElementObject *) element)->tail, &PyId_tail);
}
@@ -2739,6 +2771,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
}
this = self->this;
+ Py_CLEAR(self->last_for_tail);
if (this != Py_None) {
if (treebuilder_add_subelement(this, node) < 0)
@@ -2836,6 +2869,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
item = self->last;
self->last = self->this;
+ Py_INCREF(self->last);
+ Py_XSETREF(self->last_for_tail, self->last);
self->index--;
self->this = PyList_GET_ITEM(self->stack, self->index);
Py_INCREF(self->this);
@@ -2851,7 +2886,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
{
- PyObject* comment = NULL;
+ PyObject* comment;
PyObject* this;
if (treebuilder_flush_data(self) < 0) {
@@ -2867,6 +2902,8 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
if (self->insert_comments && this != Py_None) {
if (treebuilder_add_subelement(this, comment) < 0)
goto error;
+ Py_INCREF(comment);
+ Py_XSETREF(self->last_for_tail, comment);
}
} else {
Py_INCREF(text);
@@ -2888,7 +2925,7 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
{
- PyObject* pi = NULL;
+ PyObject* pi;
PyObject* this;
PyObject* stack[2] = {target, text};
@@ -2906,6 +2943,8 @@ treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
if (self->insert_pis && this != Py_None) {
if (treebuilder_add_subelement(this, pi) < 0)
goto error;
+ Py_INCREF(pi);
+ Py_XSETREF(self->last_for_tail, pi);
}
} else {
pi = PyTuple_Pack(2, target, text);
@@ -3495,8 +3534,8 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
static void
expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
{
- PyObject* comment = NULL;
- PyObject* res = NULL;
+ PyObject* comment;
+ PyObject* res;
if (PyErr_Occurred())
return;
@@ -3510,16 +3549,17 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
return; /* parser will look for errors */
res = treebuilder_handle_comment(target, comment);
+ Py_XDECREF(res);
+ Py_DECREF(comment);
} else if (self->handle_comment) {
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
if (!comment)
return;
res = _PyObject_CallOneArg(self->handle_comment, comment);
+ Py_XDECREF(res);
+ Py_DECREF(comment);
}
-
- Py_XDECREF(res);
- Py_DECREF(comment);
}
static void
@@ -3587,7 +3627,7 @@ static void
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
const XML_Char* data_in)
{
- PyObject* pi_target = NULL;
+ PyObject* pi_target;
PyObject* data;
PyObject* res;
PyObject* stack[2];
@@ -3599,7 +3639,7 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
/* shortcut */
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
- if (target->events_append && target->pi_event_obj) {
+ if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
if (!pi_target)
goto error;