diff options
author | Florent Xicluna <florent.xicluna@gmail.com> | 2010-03-13 23:24:31 (GMT) |
---|---|---|
committer | Florent Xicluna <florent.xicluna@gmail.com> | 2010-03-13 23:24:31 (GMT) |
commit | f15351d938e76c4b2a42a638825d67f108685140 (patch) | |
tree | d75a09f72c251d71a714f948f50392249bf91cd9 /Modules/_elementtree.c | |
parent | 9451a1c6ae14cc31ea88eaaf68d5a8f946b82831 (diff) | |
download | cpython-f15351d938e76c4b2a42a638825d67f108685140.zip cpython-f15351d938e76c4b2a42a638825d67f108685140.tar.gz cpython-f15351d938e76c4b2a42a638825d67f108685140.tar.bz2 |
Merged revisions 78838-78839,78917,78919,78934,78937 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r78838 | florent.xicluna | 2010-03-11 15:36:19 +0100 (jeu, 11 mar 2010) | 2 lines
Issue #6472: The xml.etree package is updated to ElementTree 1.3. The cElementTree module is updated too.
........
r78839 | florent.xicluna | 2010-03-11 16:55:11 +0100 (jeu, 11 mar 2010) | 2 lines
Fix repr of tree Element on windows.
........
r78917 | florent.xicluna | 2010-03-13 12:18:49 +0100 (sam, 13 mar 2010) | 2 lines
Move the xml test data to their own directory.
........
r78919 | florent.xicluna | 2010-03-13 13:41:48 +0100 (sam, 13 mar 2010) | 2 lines
Do not chdir when running test_xml_etree, and enhance the findfile helper.
........
r78934 | florent.xicluna | 2010-03-13 18:56:19 +0100 (sam, 13 mar 2010) | 2 lines
Update some parts of the xml.etree documentation.
........
r78937 | florent.xicluna | 2010-03-13 21:30:15 +0100 (sam, 13 mar 2010) | 3 lines
Add the keyword argument "method=None" to the .write() method and the tostring/tostringlist functions.
Update the function, class and method signatures, according to the new convention.
........
Diffstat (limited to 'Modules/_elementtree.c')
-rw-r--r-- | Modules/_elementtree.c | 795 |
1 files changed, 540 insertions, 255 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ae09893..88374cc 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1,21 +1,15 @@ /* * ElementTree - * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $ + * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $ * * elementtree accelerator * * History: * 1999-06-20 fl created (as part of sgmlop) * 2001-05-29 fl effdom edition - * 2001-06-05 fl backported to unix; fixed bogus free in clear - * 2001-07-10 fl added findall helper * 2003-02-27 fl elementtree edition (alpha) * 2004-06-03 fl updates for elementtree 1.2 - * 2005-01-05 fl added universal name cache, Element/SubElement factories - * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support - * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3 - * 2005-01-08 fl added makeelement method; fixed path support - * 2005-01-10 fl optimized memory usage + * 2005-01-05 fl major optimization effort * 2005-01-11 fl first public release (cElementTree 0.8) * 2005-01-12 fl split element object into base and extras * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9) @@ -35,16 +29,23 @@ * 2005-12-16 fl added support for non-standard encodings * 2006-03-08 fl fixed a couple of potential null-refs and leaks * 2006-03-12 fl merge in 2.5 ssize_t changes + * 2007-08-25 fl call custom builder's close method from XMLParser + * 2007-08-31 fl added iter, extend from ET 1.3 + * 2007-09-01 fl fixed ParseError exception, setslice source type, etc + * 2007-09-03 fl fixed handling of negative insert indexes + * 2007-09-04 fl added itertext from ET 1.3 + * 2007-09-06 fl added position attribute to ParseError exception + * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic) * - * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved. - * Copyright (c) 1999-2006 by Fredrik Lundh. + * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. + * Copyright (c) 1999-2009 by Fredrik Lundh. * * info@pythonware.com * http://www.pythonware.com */ /* Licensed to PSF under a Contributor Agreement. */ -/* See http://www.python.org/2.4/license for licensing details. */ +/* See http://www.python.org/psf/license for licensing details. */ #include "Python.h" @@ -56,7 +57,7 @@ /* Leave defined to include the expat-based XMLParser type */ #define USE_EXPAT -/* Define to to all expat calls via pyexpat's embedded expat library */ +/* Define to do all expat calls via pyexpat's embedded expat library */ /* #define USE_PYEXPAT_CAPI */ /* An element can hold this many children without extra memory @@ -93,6 +94,25 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) #define LOCAL(type) static type #endif +/* compatibility macros */ +#if (PY_VERSION_HEX < 0x02060000) +#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif + +#if (PY_VERSION_HEX < 0x02050000) +typedef int Py_ssize_t; +#define lenfunc inquiry +#endif + +#if (PY_VERSION_HEX < 0x02040000) +#define PyDict_CheckExact PyDict_Check + +#if !defined(Py_RETURN_NONE) +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None +#endif +#endif + /* macros used to store 'join' flags in string object pointers. note that all use of text and tail as object pointers must be wrapped in JOIN_OBJ. see comments in the ElementObject definition for more @@ -102,9 +122,11 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) /* glue functions (see the init function for details) */ +static PyObject* elementtree_parseerror_obj; static PyObject* elementtree_copyelement_obj; static PyObject* elementtree_deepcopy_obj; -static PyObject* elementtree_getiterator_obj; +static PyObject* elementtree_iter_obj; +static PyObject* elementtree_itertext_obj; static PyObject* elementpath_obj; /* helpers */ @@ -188,23 +210,6 @@ list_join(PyObject* list) return result; } -#if (PY_VERSION_HEX < 0x02020000) -LOCAL(int) -PyDict_Update(PyObject* dict, PyObject* other) -{ - /* PyDict_Update emulation for 2.1 and earlier */ - - PyObject* res; - - res = PyObject_CallMethod(dict, "update", "O", other); - if (!res) - return -1; - - Py_DECREF(res); - return 0; -} -#endif - /* -------------------------------------------------------------------- */ /* the element type */ @@ -309,7 +314,7 @@ element_new(PyObject* tag, PyObject* attrib) if (element_new_extra(self, attrib) < 0) { PyObject_Del(self); return NULL; - } + } self->extra->length = 0; self->extra->allocated = STATIC_CHILDREN; @@ -407,6 +412,7 @@ element_get_attrib(ElementObject* self) PyObject* res = self->extra->attrib; if (res == Py_None) { + Py_DECREF(res); /* create missing dictionary */ res = PyDict_New(); if (!res) @@ -688,6 +694,8 @@ element_deepcopy(ElementObject* self, PyObject* args) /* add object to memo dictionary (so deepcopy won't visit it again) */ id = PyLong_FromLong((Py_uintptr_t) self); + if (!id) + goto error; i = PyDict_SetItem(memo, id, (PyObject*) element); @@ -711,7 +719,8 @@ checkpath(PyObject* tag) /* check if a tag contains an xpath character */ -#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@') +#define PATHCHAR(ch) \ + (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') if (PyUnicode_Check(tag)) { Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); @@ -742,17 +751,51 @@ checkpath(PyObject* tag) } static PyObject* +element_extend(ElementObject* self, PyObject* args) +{ + PyObject* seq; + Py_ssize_t i, seqlen = 0; + + PyObject* seq_in; + if (!PyArg_ParseTuple(args, "O:extend", &seq_in)) + return NULL; + + seq = PySequence_Fast(seq_in, ""); + if (!seq) { + PyErr_Format( + PyExc_TypeError, + "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name + ); + return NULL; + } + + seqlen = PySequence_Size(seq); + for (i = 0; i < seqlen; i++) { + PyObject* element = PySequence_Fast_GET_ITEM(seq, i); + if (element_add_subelement(self, element) < 0) { + Py_DECREF(seq); + return NULL; + } + } + + Py_DECREF(seq); + + Py_RETURN_NONE; +} + +static PyObject* element_find(ElementObject* self, PyObject* args) { int i; PyObject* tag; - if (!PyArg_ParseTuple(args, "O:find", &tag)) + PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) return NULL; - if (checkpath(tag)) + if (checkpath(tag) || namespaces != Py_None) return PyObject_CallMethod( - elementpath_obj, "find", "OO", self, tag + elementpath_obj, "find", "OOO", self, tag, namespaces ); if (!self->extra) @@ -777,12 +820,13 @@ element_findtext(ElementObject* self, PyObject* args) PyObject* tag; PyObject* default_value = Py_None; - if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value)) + PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) return NULL; - if (checkpath(tag)) + if (checkpath(tag) || namespaces != Py_None) return PyObject_CallMethod( - elementpath_obj, "findtext", "OOO", self, tag, default_value + elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces ); if (!self->extra) { @@ -813,12 +857,13 @@ element_findall(ElementObject* self, PyObject* args) PyObject* out; PyObject* tag; - if (!PyArg_ParseTuple(args, "O:findall", &tag)) + PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) return NULL; - if (checkpath(tag)) + if (checkpath(tag) || namespaces != Py_None) return PyObject_CallMethod( - elementpath_obj, "findall", "OO", self, tag + elementpath_obj, "findall", "OOO", self, tag, namespaces ); out = PyList_New(0); @@ -843,6 +888,19 @@ element_findall(ElementObject* self, PyObject* args) } static PyObject* +element_iterfind(ElementObject* self, PyObject* args) +{ + PyObject* tag; + PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) + return NULL; + + return PyObject_CallMethod( + elementpath_obj, "iterfind", "OOO", self, tag, namespaces + ); +} + +static PyObject* element_get(ElementObject* self, PyObject* args) { PyObject* value; @@ -870,6 +928,8 @@ element_getchildren(ElementObject* self, PyObject* args) int i; PyObject* list; + /* FIXME: report as deprecated? */ + if (!PyArg_ParseTuple(args, ":getchildren")) return NULL; @@ -890,18 +950,18 @@ element_getchildren(ElementObject* self, PyObject* args) } static PyObject* -element_getiterator(ElementObject* self, PyObject* args) +element_iter(ElementObject* self, PyObject* args) { PyObject* result; PyObject* tag = Py_None; - if (!PyArg_ParseTuple(args, "|O:getiterator", &tag)) + if (!PyArg_ParseTuple(args, "|O:iter", &tag)) return NULL; - if (!elementtree_getiterator_obj) { + if (!elementtree_iter_obj) { PyErr_SetString( PyExc_RuntimeError, - "getiterator helper not found" + "iter helper not found" ); return NULL; } @@ -913,61 +973,58 @@ element_getiterator(ElementObject* self, PyObject* args) Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); - result = PyObject_CallObject(elementtree_getiterator_obj, args); + result = PyObject_CallObject(elementtree_iter_obj, args); Py_DECREF(args); return result; } + static PyObject* -element_getitem(PyObject* self_, Py_ssize_t index) +element_itertext(ElementObject* self, PyObject* args) { - ElementObject* self = (ElementObject*) self_; + PyObject* result; + + if (!PyArg_ParseTuple(args, ":itertext")) + return NULL; - if (!self->extra || index < 0 || index >= self->extra->length) { + if (!elementtree_itertext_obj) { PyErr_SetString( - PyExc_IndexError, - "child index out of range" + PyExc_RuntimeError, + "itertext helper not found" ); return NULL; } - Py_INCREF(self->extra->children[index]); - return self->extra->children[index]; + args = PyTuple_New(1); + if (!args) + return NULL; + + Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); + + result = PyObject_CallObject(elementtree_itertext_obj, args); + + Py_DECREF(args); + + return result; } static PyObject* -element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end) +element_getitem(PyObject* self_, Py_ssize_t index) { ElementObject* self = (ElementObject*) self_; - Py_ssize_t i; - PyObject* list; - - if (!self->extra) - return PyList_New(0); - /* standard clamping */ - if (start < 0) - start = 0; - if (end < 0) - end = 0; - if (end > self->extra->length) - end = self->extra->length; - if (start > end) - start = end; - - list = PyList_New(end - start); - if (!list) + if (!self->extra || index < 0 || index >= self->extra->length) { + PyErr_SetString( + PyExc_IndexError, + "child index out of range" + ); return NULL; - - for (i = start; i < end; i++) { - PyObject* item = self->extra->children[i]; - Py_INCREF(item); - PyList_SET_ITEM(list, i - start, item); } - return list; + Py_INCREF(self->extra->children[index]); + return self->extra->children[index]; } static PyObject* @@ -984,8 +1041,11 @@ element_insert(ElementObject* self, PyObject* args) if (!self->extra) element_new_extra(self, NULL); - if (index < 0) - index = 0; + if (index < 0) { + index += self->extra->length; + if (index < 0) + index = 0; + } if (index > self->extra->length) index = self->extra->length; @@ -1156,77 +1216,6 @@ element_set(ElementObject* self, PyObject* args) } static int -element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item) -{ - ElementObject* self = (ElementObject*) self_; - Py_ssize_t i, new, old; - PyObject* recycle = NULL; - - if (!self->extra) - element_new_extra(self, NULL); - - /* standard clamping */ - if (start < 0) - start = 0; - if (end < 0) - end = 0; - if (end > self->extra->length) - end = self->extra->length; - if (start > end) - start = end; - - old = end - start; - - if (item == NULL) - new = 0; - else if (PyList_CheckExact(item)) { - new = PyList_GET_SIZE(item); - } else { - /* FIXME: support arbitrary sequences? */ - PyErr_Format( - PyExc_TypeError, - "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name - ); - return -1; - } - - if (old > 0) { - /* to avoid recursive calls to this method (via decref), move - old items to the recycle bin here, and get rid of them when - we're done modifying the element */ - recycle = PyList_New(old); - for (i = 0; i < old; i++) - PyList_SET_ITEM(recycle, i, self->extra->children[i + start]); - } - - if (new < old) { - /* delete slice */ - for (i = end; i < self->extra->length; i++) - self->extra->children[i + new - old] = self->extra->children[i]; - } else if (new > old) { - /* insert slice */ - if (element_resize(self, new - old) < 0) - return -1; - for (i = self->extra->length-1; i >= end; i--) - self->extra->children[i + new - old] = self->extra->children[i]; - } - - /* replace the slice */ - for (i = 0; i < new; i++) { - PyObject* element = PyList_GET_ITEM(item, i); - Py_INCREF(element); - self->extra->children[i + start] = element; - } - - self->extra->length += new - old; - - /* discard the recycle bin, and everything in it */ - Py_XDECREF(recycle); - - return 0; -} - -static int element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) { ElementObject* self = (ElementObject*) self_; @@ -1256,6 +1245,190 @@ element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) return 0; } +static PyObject* +element_subscr(PyObject* self_, PyObject* item) +{ + ElementObject* self = (ElementObject*) self_; + +#if (PY_VERSION_HEX < 0x02050000) + if (PyInt_Check(item) || PyLong_Check(item)) { + long i = PyInt_AsLong(item); +#else + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); +#endif + + if (i == -1 && PyErr_Occurred()) { + return NULL; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_getitem(self_, i); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, cur, i; + PyObject* list; + + if (!self->extra) + return PyList_New(0); + + if (PySlice_GetIndicesEx((PySliceObject *)item, + self->extra->length, + &start, &stop, &step, &slicelen) < 0) { + return NULL; + } + + if (slicelen <= 0) + return PyList_New(0); + else { + list = PyList_New(slicelen); + if (!list) + return NULL; + + for (cur = start, i = 0; i < slicelen; + cur += step, i++) { + PyObject* item = self->extra->children[cur]; + Py_INCREF(item); + PyList_SET_ITEM(list, i, item); + } + + return list; + } + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return NULL; + } +} + +static int +element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) +{ + ElementObject* self = (ElementObject*) self_; + +#if (PY_VERSION_HEX < 0x02050000) + if (PyInt_Check(item) || PyLong_Check(item)) { + long i = PyInt_AsLong(item); +#else + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); +#endif + + if (i == -1 && PyErr_Occurred()) { + return -1; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_setitem(self_, i, value); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, newlen, cur, i; + + PyObject* recycle = NULL; + PyObject* seq = NULL; + + if (!self->extra) + element_new_extra(self, NULL); + + if (PySlice_GetIndicesEx((PySliceObject *)item, + self->extra->length, + &start, &stop, &step, &slicelen) < 0) { + return -1; + } + + if (value == NULL) + newlen = 0; + else { + seq = PySequence_Fast(value, ""); + if (!seq) { + PyErr_Format( + PyExc_TypeError, + "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name + ); + return -1; + } + newlen = PySequence_Size(seq); + } + + if (step != 1 && newlen != slicelen) + { + PyErr_Format(PyExc_ValueError, +#if (PY_VERSION_HEX < 0x02050000) + "attempt to assign sequence of size %d " + "to extended slice of size %d", +#else + "attempt to assign sequence of size %zd " + "to extended slice of size %zd", +#endif + newlen, slicelen + ); + return -1; + } + + + /* Resize before creating the recycle bin, to prevent refleaks. */ + if (newlen > slicelen) { + if (element_resize(self, newlen - slicelen) < 0) { + if (seq) { + Py_DECREF(seq); + } + return -1; + } + } + + if (slicelen > 0) { + /* to avoid recursive calls to this method (via decref), move + old items to the recycle bin here, and get rid of them when + we're done modifying the element */ + recycle = PyList_New(slicelen); + if (!recycle) { + if (seq) { + Py_DECREF(seq); + } + return -1; + } + for (cur = start, i = 0; i < slicelen; + cur += step, i++) + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + } + + if (newlen < slicelen) { + /* delete slice */ + for (i = stop; i < self->extra->length; i++) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } else if (newlen > slicelen) { + /* insert slice */ + for (i = self->extra->length-1; i >= stop; i--) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } + + /* replace the slice */ + for (cur = start, i = 0; i < newlen; + cur += step, i++) { + PyObject* element = PySequence_Fast_GET_ITEM(seq, i); + Py_INCREF(element); + self->extra->children[cur] = element; + } + + self->extra->length += newlen - slicelen; + + if (seq) { + Py_DECREF(seq); + } + + /* discard the recycle bin, and everything in it */ + Py_XDECREF(recycle); + + return 0; + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return -1; + } +} + static PyMethodDef element_methods[] = { {"clear", (PyCFunction) element_clear, METH_VARARGS}, @@ -1268,10 +1441,15 @@ static PyMethodDef element_methods[] = { {"findall", (PyCFunction) element_findall, METH_VARARGS}, {"append", (PyCFunction) element_append, METH_VARARGS}, + {"extend", (PyCFunction) element_extend, METH_VARARGS}, {"insert", (PyCFunction) element_insert, METH_VARARGS}, {"remove", (PyCFunction) element_remove, METH_VARARGS}, - {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS}, + {"iter", (PyCFunction) element_iter, METH_VARARGS}, + {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, + {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS}, + + {"getiterator", (PyCFunction) element_iter, METH_VARARGS}, {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, {"items", (PyCFunction) element_items, METH_VARARGS}, @@ -1297,30 +1475,46 @@ static PyMethodDef element_methods[] = { {NULL, NULL} }; -static PyObject* +static PyObject* element_getattro(ElementObject* self, PyObject* nameobj) { PyObject* res; char *name = ""; if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); + name = _PyUnicode_AsString(nameobj); - if (strcmp(name, "tag") == 0) - res = self->tag; - else if (strcmp(name, "text") == 0) + /* handle common attributes first */ + if (strcmp(name, "tag") == 0) { + res = self->tag; + Py_INCREF(res); + return res; + } else if (strcmp(name, "text") == 0) { res = element_get_text(self); - else if (strcmp(name, "tail") == 0) { + Py_INCREF(res); + return res; + } + + /* methods */ + res = PyObject_GenericGetAttr((PyObject*) self, nameobj); + if (res) + return res; + + /* less common attributes */ + if (strcmp(name, "tail") == 0) { + PyErr_Clear(); res = element_get_tail(self); } else if (strcmp(name, "attrib") == 0) { + PyErr_Clear(); if (!self->extra) element_new_extra(self, NULL); - res = element_get_attrib(self); - } else { - return PyObject_GenericGetAttr((PyObject*) self, nameobj); + res = element_get_attrib(self); } - Py_XINCREF(res); + if (!res) + return NULL; + + Py_INCREF(res); return res; } @@ -1366,9 +1560,15 @@ static PySequenceMethods element_as_sequence = { 0, /* sq_concat */ 0, /* sq_repeat */ element_getitem, - element_getslice, + 0, element_setitem, - element_setslice, + 0, +}; + +static PyMappingMethods element_as_mapping = { + (lenfunc) element_length, + (binaryfunc) element_subscr, + (objobjargproc) element_ass_subscr, }; static PyTypeObject Element_Type = { @@ -1383,7 +1583,7 @@ static PyTypeObject Element_Type = { (reprfunc)element_repr, /* tp_repr */ 0, /* tp_as_number */ &element_as_sequence, /* tp_as_sequence */ - 0, /* tp_as_mapping */ + &element_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ @@ -1537,7 +1737,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, } else { if (self->root) { PyErr_SetString( - PyExc_SyntaxError, + elementtree_parseerror_obj, "multiple elements on top level" ); goto error; @@ -1678,7 +1878,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) LOCAL(void) treebuilder_handle_namespace(TreeBuilderObject* self, int start, - const char* prefix, const char *uri) + PyObject *prefix, PyObject *uri) { PyObject* res; PyObject* action; @@ -1691,8 +1891,7 @@ treebuilder_handle_namespace(TreeBuilderObject* self, int start, if (!self->start_ns_event_obj) return; action = self->start_ns_event_obj; - /* FIXME: prefix and uri use utf-8 encoding! */ - parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri); + parcel = Py_BuildValue("OO", prefix, uri); if (!parcel) return; Py_INCREF(action); @@ -1852,6 +2051,7 @@ typedef struct { PyObject* names; PyObject* handle_xml; + PyObject* handle_start; PyObject* handle_data; PyObject* handle_end; @@ -1859,6 +2059,8 @@ typedef struct { PyObject* handle_comment; PyObject* handle_pi; + PyObject* handle_close; + } XMLParserObject; static PyTypeObject XMLParser_Type; @@ -1930,6 +2132,36 @@ makeuniversal(XMLParserObject* self, const char* string) return value; } +static void +expat_set_error(const char* message, int line, int column) +{ + PyObject *error; + PyObject *position; + char buffer[256]; + + sprintf(buffer, "%s: line %d, column %d", message, line, column); + + error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); + if (!error) + return; + + /* add position attribute */ + position = Py_BuildValue("(ii)", line, column); + if (!position) { + Py_DECREF(error); + return; + } + if (PyObject_SetAttrString(error, "position", position) == -1) { + Py_DECREF(error); + Py_DECREF(position); + return; + } + Py_DECREF(position); + + PyErr_SetObject(elementtree_parseerror_obj, error); + Py_DECREF(error); +} + /* -------------------------------------------------------------------- */ /* handlers */ @@ -1960,10 +2192,12 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, else res = NULL; Py_XDECREF(res); - } else { - PyErr_Format( - PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld", - PyBytes_AS_STRING(key), + } else if (!PyErr_Occurred()) { + /* Report the first error, not the last */ + char message[128]; + sprintf(message, "undefined entity &%.100s;", _PyUnicode_AsString(key)); + expat_set_error( + message, EXPAT(GetErrorLineNumber)(self->parser), EXPAT(GetErrorColumnNumber)(self->parser) ); @@ -2018,9 +2252,15 @@ expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, /* shortcut */ res = treebuilder_handle_start((TreeBuilderObject*) self->target, tag, attrib); - else if (self->handle_start) + else if (self->handle_start) { + if (attrib == Py_None) { + Py_DECREF(attrib); + attrib = PyDict_New(); + if (!attrib) + return; + } res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); - else + } else res = NULL; Py_DECREF(tag); @@ -2080,9 +2320,28 @@ static void expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, const XML_Char *uri) { + PyObject* sprefix = NULL; + PyObject* suri = NULL; + + suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict"); + if (!suri) + return; + + if (prefix) + sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict"); + else + sprefix = PyUnicode_FromString(""); + if (!sprefix) { + Py_DECREF(suri); + return; + } + treebuilder_handle_namespace( - (TreeBuilderObject*) self->target, 1, prefix, uri + (TreeBuilderObject*) self->target, 1, sprefix, suri ); + + Py_DECREF(sprefix); + Py_DECREF(suri); } static void @@ -2158,10 +2417,10 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, p = PyUnicode_AS_UNICODE(u); for (i = 0; i < 256; i++) { - if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) - info->map[i] = p[i]; + if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) + info->map[i] = p[i]; else - info->map[i] = -1; + info->map[i] = -1; } Py_DECREF(u); @@ -2245,6 +2504,7 @@ xmlparser(PyObject* self_, PyObject* args, PyObject* kw) self->handle_end = PyObject_GetAttrString(target, "end"); self->handle_comment = PyObject_GetAttrString(target, "comment"); self->handle_pi = PyObject_GetAttrString(target, "pi"); + self->handle_close = PyObject_GetAttrString(target, "close"); PyErr_Clear(); @@ -2288,6 +2548,7 @@ xmlparser_dealloc(XMLParserObject* self) { EXPAT(ParserFree)(self->parser); + Py_XDECREF(self->handle_close); Py_XDECREF(self->handle_pi); Py_XDECREF(self->handle_comment); Py_XDECREF(self->handle_end); @@ -2318,8 +2579,7 @@ expat_parse(XMLParserObject* self, char* data, int data_len, int final) return NULL; if (!ok) { - PyErr_Format( - PyExc_SyntaxError, "%s: line %ld, column %ld", + expat_set_error( EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), EXPAT(GetErrorLineNumber)(self->parser), EXPAT(GetErrorColumnNumber)(self->parser) @@ -2340,13 +2600,17 @@ xmlparser_close(XMLParserObject* self, PyObject* args) return NULL; res = expat_parse(self, "", 0, 1); + if (!res) + return NULL; - if (res && TreeBuilder_CheckExact(self->target)) { + if (TreeBuilder_CheckExact(self->target)) { Py_DECREF(res); return treebuilder_done((TreeBuilderObject*) self->target); - } - - return res; + } if (self->handle_close) { + Py_DECREF(res); + return PyObject_CallFunction(self->handle_close, ""); + } else + return res; } static PyObject* @@ -2458,7 +2722,7 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) if (event_set == Py_None) { /* default is "end" only */ - target->end_event_obj = PyBytes_FromString("end"); + target->end_event_obj = PyUnicode_FromString("end"); Py_RETURN_NONE; } @@ -2468,9 +2732,13 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { PyObject* item = PyTuple_GET_ITEM(event_set, i); char* event; - if (!PyBytes_Check(item)) + if (PyUnicode_Check(item)) { + event = _PyUnicode_AsString(item); + } else if (PyBytes_Check(item)) + event = PyBytes_AS_STRING(item); + else { goto error; - event = PyBytes_AS_STRING(item); + } if (strcmp(event, "start") == 0) { Py_INCREF(item); target->start_event_obj = item; @@ -2530,19 +2798,19 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) char *name = ""; if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); + name = _PyUnicode_AsString(nameobj); PyErr_Clear(); if (strcmp(name, "entity") == 0) - res = self->entity; + res = self->entity; else if (strcmp(name, "target") == 0) - res = self->target; + res = self->target; else if (strcmp(name, "version") == 0) { char buffer[100]; sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); - return PyBytes_FromString(buffer); + return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "strict"); } else { return PyObject_GenericGetAttr((PyObject*) self, nameobj); } @@ -2617,9 +2885,6 @@ PyInit__elementtree(void) PyObject* m; PyObject* g; char* bootstrap; -#if defined(USE_PYEXPAT_CAPI) - struct PyExpat_CAPI* capi; -#endif /* Initialize object types */ if (PyType_Ready(&TreeBuilder_Type) < 0) @@ -2651,10 +2916,6 @@ PyInit__elementtree(void) bootstrap = ( -#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000) - "from __future__ import generators\n" /* enable yield under 2.2 */ -#endif - "from copy import copy, deepcopy\n" "try:\n" @@ -2672,11 +2933,14 @@ PyInit__elementtree(void) " def copyelement(elem):\n" " return elem\n" - "def Comment(text=None):\n" /* public */ + "class CommentProxy:\n" + " def __call__(self, text=None):\n" " element = cElementTree.Element(ET.Comment)\n" " element.text = text\n" " return element\n" - "cElementTree.Comment = Comment\n" + " def __eq__(self, other):\n" + " return ET.Comment == other\n" + "cElementTree.Comment = CommentProxy()\n" "class ElementTree(ET.ElementTree):\n" /* public */ " def parse(self, source, parser=None):\n" @@ -2695,23 +2959,23 @@ PyInit__elementtree(void) " return self._root\n" "cElementTree.ElementTree = ElementTree\n" - "def getiterator(node, tag=None):\n" /* helper */ + "def iter(node, tag=None):\n" /* helper */ " if tag == '*':\n" " tag = None\n" -#if (PY_VERSION_HEX < 0x02020000) - " nodes = []\n" /* 2.1 doesn't have yield */ - " if tag is None or node.tag == tag:\n" - " nodes.append(node)\n" - " for node in node:\n" - " nodes.extend(getiterator(node, tag))\n" - " return nodes\n" -#else " if tag is None or node.tag == tag:\n" " yield node\n" " for node in node:\n" - " for node in getiterator(node, tag):\n" + " for node in iter(node, tag):\n" " yield node\n" -#endif + + "def itertext(node):\n" /* helper */ + " if node.text:\n" + " yield node.text\n" + " for e in node:\n" + " for s in e.itertext():\n" + " yield s\n" + " if e.tail:\n" + " yield e.tail\n" "def parse(source, parser=None):\n" /* public */ " tree = ElementTree()\n" @@ -2719,48 +2983,52 @@ PyInit__elementtree(void) " return tree\n" "cElementTree.parse = parse\n" -#if (PY_VERSION_HEX < 0x02020000) - "if hasattr(ET, 'iterparse'):\n" - " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */ -#else - "class iterparse(object):\n" + "class iterparse:\n" " root = None\n" " def __init__(self, file, events=None):\n" " if not hasattr(file, 'read'):\n" " file = open(file, 'rb')\n" " self._file = file\n" - " self._events = events\n" - " def __iter__(self):\n" - " events = []\n" + " self._events = []\n" + " self._index = 0\n" + " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" - " p = cElementTree.XMLParser(b)\n" - " p._setevents(events, self._events)\n" + " self._parser = cElementTree.XMLParser(b)\n" + " self._parser._setevents(self._events, events)\n" + " def __next__(self):\n" " while 1:\n" - " data = self._file.read(16384)\n" - " if not data:\n" - " break\n" - " p.feed(data)\n" - " for event in events:\n" - " yield event\n" - " del events[:]\n" - " root = p.close()\n" - " for event in events:\n" - " yield event\n" - " self.root = root\n" + " try:\n" + " item = self._events[self._index]\n" + " except IndexError:\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " self._parser.feed(data)\n" + " else:\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" + " else:\n" + " self._index = self._index + 1\n" + " return item\n" + " def __iter__(self):\n" + " return self\n" "cElementTree.iterparse = iterparse\n" -#endif - "def PI(target, text=None):\n" /* public */ - " element = cElementTree.Element(ET.ProcessingInstruction)\n" + "class PIProxy:\n" + " def __call__(self, target, text=None):\n" + " element = cElementTree.Element(ET.PI)\n" " element.text = target\n" " if text:\n" " element.text = element.text + ' ' + text\n" " return element\n" - - " elem = cElementTree.Element(ET.PI)\n" - " elem.text = text\n" - " return elem\n" - "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n" + " def __eq__(self, other):\n" + " return ET.PI == other\n" + "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n" "def XML(text):\n" /* public */ " parser = cElementTree.XMLParser()\n" @@ -2771,25 +3039,34 @@ PyInit__elementtree(void) "def XMLID(text):\n" /* public */ " tree = XML(text)\n" " ids = {}\n" - " for elem in tree.getiterator():\n" + " for elem in tree.iter():\n" " id = elem.get('id')\n" " if id:\n" " ids[id] = elem\n" " return tree, ids\n" "cElementTree.XMLID = XMLID\n" + "try:\n" + " register_namespace = ET.register_namespace\n" + "except AttributeError:\n" + " def register_namespace(prefix, uri):\n" + " ET._namespace_map[uri] = prefix\n" + "cElementTree.register_namespace = register_namespace\n" + "cElementTree.dump = ET.dump\n" "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" "cElementTree.iselement = ET.iselement\n" "cElementTree.QName = ET.QName\n" "cElementTree.tostring = ET.tostring\n" + "cElementTree.fromstringlist = ET.fromstringlist\n" + "cElementTree.tostringlist = ET.tostringlist\n" "cElementTree.VERSION = '" VERSION "'\n" "cElementTree.__version__ = '" VERSION "'\n" - "cElementTree.XMLParserError = SyntaxError\n" ); - PyRun_String(bootstrap, Py_file_input, g, NULL); + if (!PyRun_String(bootstrap, Py_file_input, g, NULL)) + return NULL; elementpath_obj = PyDict_GetItemString(g, "ElementPath"); @@ -2804,22 +3081,30 @@ PyInit__elementtree(void) } } else PyErr_Clear(); + elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); - elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator"); + elementtree_iter_obj = PyDict_GetItemString(g, "iter"); + elementtree_itertext_obj = PyDict_GetItemString(g, "itertext"); #if defined(USE_PYEXPAT_CAPI) /* link against pyexpat, if possible */ - capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); - if (capi && - strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 && - capi->size <= sizeof(*expat_capi) && - capi->MAJOR_VERSION == XML_MAJOR_VERSION && - capi->MINOR_VERSION == XML_MINOR_VERSION && - capi->MICRO_VERSION == XML_MICRO_VERSION) - expat_capi = capi; - else - expat_capi = NULL; + expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); + if (expat_capi) { + /* check that it's usable */ + if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || + expat_capi->size < sizeof(struct PyExpat_CAPI) || + expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || + expat_capi->MINOR_VERSION != XML_MINOR_VERSION || + expat_capi->MICRO_VERSION != XML_MICRO_VERSION) + expat_capi = NULL; + } #endif - return m; + elementtree_parseerror_obj = PyErr_NewException( + "cElementTree.ParseError", PyExc_SyntaxError, NULL + ); + Py_INCREF(elementtree_parseerror_obj); + PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); + + return m; } |