diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2005-12-16 22:06:06 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2005-12-16 22:06:06 (GMT) |
commit | 8c8836b40652e7e3ce86172c847f0febb32976c1 (patch) | |
tree | 942fa67e357f8c607c410966f10cfad3ca1f142d /Modules | |
parent | 7cfe6eee0d3820f3a91a1838b93cf61fe7287e15 (diff) | |
download | cpython-8c8836b40652e7e3ce86172c847f0febb32976c1.zip cpython-8c8836b40652e7e3ce86172c847f0febb32976c1.tar.gz cpython-8c8836b40652e7e3ce86172c847f0febb32976c1.tar.bz2 |
updating to cElementTree 1.0.5 (step 2 of 3)
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_elementtree.c | 2766 |
1 files changed, 2766 insertions, 0 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c new file mode 100644 index 0000000..fe479cf --- /dev/null +++ b/Modules/_elementtree.c @@ -0,0 +1,2766 @@ +/* + * ElementTree + * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $ + * + * elementtree accelerator + * + * History: + * 1999-06-20 fl created (as part of sgmlop) + * 2001-05-29 fl effdom edition + * 2001-06-05 fl backported to unix; fixed bogus free in clear + * 2001-07-10 fl added findall helper + * 2003-02-27 fl elementtree edition (alpha) + * 2004-06-03 fl updates for elementtree 1.2 + * 2005-01-05 fl added universal name cache, Element/SubElement factories + * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support + * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3 + * 2005-01-08 fl added makeelement method; fixed path support + * 2005-01-10 fl optimized memory usage + * 2005-01-11 fl first public release (cElementTree 0.8) + * 2005-01-12 fl split element object into base and extras + * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9) + * 2005-01-17 fl added treebuilder close method + * 2005-01-17 fl fixed crash in getchildren + * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3) + * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8) + * 2005-01-26 fl added VERSION module property (cElementTree 1.0) + * 2005-01-28 fl added remove method (1.0.1) + * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2) + * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers + * 2005-03-26 fl added Comment and PI support to XMLParser + * 2005-03-27 fl event optimizations; complain about bogus events + * 2005-08-08 fl fixed read error handling in parse + * 2005-08-11 fl added runtime test for copy workaround (1.0.3) + * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4) + * 2005-12-16 fl added support for non-standard encodings + * + * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved. + * Copyright (c) 1999-2005 by Fredrik Lundh. + * + * info@pythonware.com + * http://www.pythonware.com + */ + +#include "Python.h" + +#define VERSION "1.0.5" + +/* -------------------------------------------------------------------- */ +/* configuration */ + +/* Leave defined to include the expat-based XMLParser type */ +#define USE_EXPAT + +/* Define to to all expat calls via pyexpat's embedded expat library */ +/* #define USE_PYEXPAT_CAPI */ + +/* An element can hold this many children without extra memory + allocations. */ +#define STATIC_CHILDREN 4 + +/* For best performance, chose a value so that 80-90% of all nodes + have no more than the given number of children. Set this to zero + to minimize the size of the element structure itself (this only + helps if you have lots of leaf nodes with attributes). */ + +/* Also note that pymalloc always allocates blocks in multiples of + eight bytes. For the current version of cElementTree, this means + that the number of children should be an even number, at least on + 32-bit platforms. */ + +/* -------------------------------------------------------------------- */ + +#if 0 +static int memory = 0; +#define ALLOC(size, comment)\ +do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) +#define RELEASE(size, comment)\ +do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) +#else +#define ALLOC(size, comment) +#define RELEASE(size, comment) +#endif + +/* compiler tweaks */ +#if defined(_MSC_VER) +#define LOCAL(type) static __inline type __fastcall +#else +#define LOCAL(type) static type +#endif + +/* compatibility macros */ +#if (PY_VERSION_HEX < 0x02040000) +#define PyDict_CheckExact PyDict_Check +#if (PY_VERSION_HEX < 0x02020000) +#define PyList_CheckExact PyList_Check +#define PyString_CheckExact PyString_Check +#if (PY_VERSION_HEX >= 0x01060000) +#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */ +#endif +#endif +#endif + +#if (PY_VERSION_HEX >= 0x02050000) +#define PY_CONST const /* 2.5 adds const to some API:s */ +#else +#define PY_CONST +#endif + +#if !defined(Py_RETURN_NONE) +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None +#endif + +/* macros used to store 'join' flags in string object pointers. note + that all use of text and tail as object pointers must be wrapped in + JOIN_OBJ. see comments in the ElementObject definition for more + info. */ +#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) +#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) +#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) + +/* glue functions (see the init function for details) */ +static PyObject* elementtree_copyelement_obj; +static PyObject* elementtree_deepcopy_obj; +static PyObject* elementtree_getiterator_obj; +static PyObject* elementpath_obj; + +/* helpers */ + +LOCAL(PyObject*) +deepcopy(PyObject* object, PyObject* memo) +{ + /* do a deep copy of the given object */ + + PyObject* args; + PyObject* result; + + if (!elementtree_deepcopy_obj) { + PyErr_SetString( + PyExc_RuntimeError, + "deepcopy helper not found" + ); + return NULL; + } + + args = PyTuple_New(2); + Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); + Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo); + + result = PyObject_CallObject(elementtree_deepcopy_obj, args); + + Py_DECREF(args); + + return result; +} + +LOCAL(PyObject*) +list_join(PyObject* list) +{ + /* join list elements (destroying the list in the process) */ + + PyObject* joiner; + PyObject* function; + PyObject* args; + PyObject* result; + + switch (PyList_GET_SIZE(list)) { + case 0: + Py_DECREF(list); + return PyString_FromString(""); + case 1: + result = PyList_GET_ITEM(list, 0); + Py_INCREF(result); + Py_DECREF(list); + return result; + } + + /* two or more elements: slice out a suitable separator from the + first member, and use that to join the entire list */ + + joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); + if (!joiner) + return NULL; + + function = PyObject_GetAttrString(joiner, "join"); + if (!function) { + Py_DECREF(joiner); + return NULL; + } + + args = PyTuple_New(1); + PyTuple_SET_ITEM(args, 0, list); + + result = PyObject_CallObject(function, args); + + Py_DECREF(args); /* also removes list */ + Py_DECREF(function); + Py_DECREF(joiner); + + return result; +} + +#if (PY_VERSION_HEX < 0x02020000) +LOCAL(int) +PyDict_Update(PyObject* dict, PyObject* other) +{ + /* PyDict_Update emulation for 2.1 and earlier */ + + PyObject* res; + + res = PyObject_CallMethod(dict, "update", "O", other); + if (!res) + return -1; + + Py_DECREF(res); + return 0; +} +#endif + +/* -------------------------------------------------------------------- */ +/* the element type */ + +typedef struct { + + /* attributes (a dictionary object), or None if no attributes */ + PyObject* attrib; + + /* child elements */ + int length; /* actual number of items */ + int allocated; /* allocated items */ + + /* this either points to _children or to a malloced buffer */ + PyObject* *children; + + PyObject* _children[STATIC_CHILDREN]; + +} ElementObjectExtra; + +typedef struct { + PyObject_HEAD + + /* element tag (a string). */ + PyObject* tag; + + /* text before first child. note that this is a tagged pointer; + use JOIN_OBJ to get the object pointer. the join flag is used + to distinguish lists created by the tree builder from lists + assigned to the attribute by application code; the former + should be joined before being returned to the user, the latter + should be left intact. */ + PyObject* text; + + /* text after this element, in parent. note that this is a tagged + pointer; use JOIN_OBJ to get the object pointer. */ + PyObject* tail; + + ElementObjectExtra* extra; + +} ElementObject; + +staticforward PyTypeObject Element_Type; + +#define Element_CheckExact(op) ((op)->ob_type == &Element_Type) + +/* -------------------------------------------------------------------- */ +/* element constructor and destructor */ + +LOCAL(int) +element_new_extra(ElementObject* self, PyObject* attrib) +{ + self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); + if (!self->extra) + return -1; + + if (!attrib) + attrib = Py_None; + + Py_INCREF(attrib); + self->extra->attrib = attrib; + + self->extra->length = 0; + self->extra->allocated = STATIC_CHILDREN; + self->extra->children = self->extra->_children; + + return 0; +} + +LOCAL(void) +element_dealloc_extra(ElementObject* self) +{ + int i; + + Py_DECREF(self->extra->attrib); + + for (i = 0; i < self->extra->length; i++) + Py_DECREF(self->extra->children[i]); + + if (self->extra->children != self->extra->_children) + PyObject_Free(self->extra->children); + + PyObject_Free(self->extra); +} + +LOCAL(PyObject*) +element_new(PyObject* tag, PyObject* attrib) +{ + ElementObject* self; + + self = PyObject_New(ElementObject, &Element_Type); + if (self == NULL) + return NULL; + + /* use None for empty dictionaries */ + if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) + attrib = Py_None; + + self->extra = NULL; + + if (attrib != Py_None) { + + if (element_new_extra(self, attrib) < 0) + return NULL; + + self->extra->length = 0; + self->extra->allocated = STATIC_CHILDREN; + self->extra->children = self->extra->_children; + + } + + Py_INCREF(tag); + self->tag = tag; + + Py_INCREF(Py_None); + self->text = Py_None; + + Py_INCREF(Py_None); + self->tail = Py_None; + + ALLOC(sizeof(ElementObject), "create element"); + + return (PyObject*) self; +} + +LOCAL(int) +element_resize(ElementObject* self, int extra) +{ + int size; + PyObject* *children; + + /* make sure self->children can hold the given number of extra + elements. set an exception and return -1 if allocation failed */ + + if (!self->extra) + element_new_extra(self, NULL); + + size = self->extra->length + extra; + + if (size > self->extra->allocated) { + /* use Python 2.4's list growth strategy */ + size = (size >> 3) + (size < 9 ? 3 : 6) + size; + if (self->extra->children != self->extra->_children) { + children = PyObject_Realloc(self->extra->children, + size * sizeof(PyObject*)); + if (!children) + goto nomemory; + } else { + children = PyObject_Malloc(size * sizeof(PyObject*)); + if (!children) + goto nomemory; + /* copy existing children from static area to malloc buffer */ + memcpy(children, self->extra->children, + self->extra->length * sizeof(PyObject*)); + } + self->extra->children = children; + self->extra->allocated = size; + } + + return 0; + + nomemory: + PyErr_NoMemory(); + return -1; +} + +LOCAL(int) +element_add_subelement(ElementObject* self, PyObject* element) +{ + /* add a child element to a parent */ + + if (element_resize(self, 1) < 0) + return -1; + + Py_INCREF(element); + self->extra->children[self->extra->length] = element; + + self->extra->length++; + + return 0; +} + +LOCAL(PyObject*) +element_get_attrib(ElementObject* self) +{ + /* return borrowed reference to attrib dictionary */ + /* note: this function assumes that the extra section exists */ + + PyObject* res = self->extra->attrib; + + if (res == Py_None) { + /* create missing dictionary */ + res = PyDict_New(); + if (!res) + return NULL; + self->extra->attrib = res; + } + + return res; +} + +LOCAL(PyObject*) +element_get_text(ElementObject* self) +{ + /* return borrowed reference to text attribute */ + + PyObject* res = self->text; + + if (JOIN_GET(res)) { + res = JOIN_OBJ(res); + if (PyList_CheckExact(res)) { + res = list_join(res); + if (!res) + return NULL; + self->text = res; + } + } + + return res; +} + +LOCAL(PyObject*) +element_get_tail(ElementObject* self) +{ + /* return borrowed reference to text attribute */ + + PyObject* res = self->tail; + + if (JOIN_GET(res)) { + res = JOIN_OBJ(res); + if (PyList_CheckExact(res)) { + res = list_join(res); + if (!res) + return NULL; + self->tail = res; + } + } + + return res; +} + +static PyObject* +element(PyObject* self, PyObject* args, PyObject* kw) +{ + PyObject* elem; + + PyObject* tag; + PyObject* attrib = NULL; + if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, + &PyDict_Type, &attrib)) + return NULL; + + if (attrib || kw) { + attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (!attrib) + return NULL; + if (kw) + PyDict_Update(attrib, kw); + } else { + Py_INCREF(Py_None); + attrib = Py_None; + } + + elem = element_new(tag, attrib); + + Py_DECREF(attrib); + + return elem; +} + +static PyObject* +subelement(PyObject* self, PyObject* args, PyObject* kw) +{ + PyObject* elem; + + ElementObject* parent; + PyObject* tag; + PyObject* attrib = NULL; + if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", + &Element_Type, &parent, &tag, + &PyDict_Type, &attrib)) + return NULL; + + if (attrib || kw) { + attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (!attrib) + return NULL; + if (kw) + PyDict_Update(attrib, kw); + } else { + Py_INCREF(Py_None); + attrib = Py_None; + } + + elem = element_new(tag, attrib); + + Py_DECREF(attrib); + + if (element_add_subelement(parent, elem) < 0) + return NULL; + + return elem; +} + +static void +element_dealloc(ElementObject* self) +{ + if (self->extra) + element_dealloc_extra(self); + + /* discard attributes */ + Py_DECREF(self->tag); + Py_DECREF(JOIN_OBJ(self->text)); + Py_DECREF(JOIN_OBJ(self->tail)); + + RELEASE(sizeof(ElementObject), "destroy element"); + + PyObject_Del(self); +} + +/* -------------------------------------------------------------------- */ +/* methods (in alphabetical order) */ + +static PyObject* +element_append(ElementObject* self, PyObject* args) +{ + PyObject* element; + if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) + return NULL; + + if (element_add_subelement(self, element) < 0) + return NULL; + + Py_RETURN_NONE; +} + +static PyObject* +element_clear(ElementObject* self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":clear")) + return NULL; + + if (self->extra) { + element_dealloc_extra(self); + self->extra = NULL; + } + + Py_INCREF(Py_None); + Py_DECREF(JOIN_OBJ(self->text)); + self->text = Py_None; + + Py_INCREF(Py_None); + Py_DECREF(JOIN_OBJ(self->tail)); + self->tail = Py_None; + + Py_RETURN_NONE; +} + +static PyObject* +element_copy(ElementObject* self, PyObject* args) +{ + int i; + ElementObject* element; + + if (!PyArg_ParseTuple(args, ":__copy__")) + return NULL; + + element = (ElementObject*) element_new( + self->tag, (self->extra) ? self->extra->attrib : Py_None + ); + if (!element) + return NULL; + + Py_DECREF(JOIN_OBJ(element->text)); + element->text = self->text; + Py_INCREF(JOIN_OBJ(element->text)); + + Py_DECREF(JOIN_OBJ(element->tail)); + element->tail = self->tail; + Py_INCREF(JOIN_OBJ(element->tail)); + + if (self->extra) { + + if (element_resize(element, self->extra->length) < 0) + return NULL; + + for (i = 0; i < self->extra->length; i++) { + Py_INCREF(self->extra->children[i]); + element->extra->children[i] = self->extra->children[i]; + } + + element->extra->length = self->extra->length; + + } + + return (PyObject*) element; +} + +static PyObject* +element_deepcopy(ElementObject* self, PyObject* args) +{ + int i; + ElementObject* element; + PyObject* tag; + PyObject* attrib; + PyObject* text; + PyObject* tail; + PyObject* id; + + PyObject* memo; + if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) + return NULL; + + tag = deepcopy(self->tag, memo); + if (!tag) + return NULL; + + if (self->extra) { + attrib = deepcopy(self->extra->attrib, memo); + if (!attrib) { + Py_DECREF(tag); + return NULL; + } + } else { + Py_INCREF(Py_None); + attrib = Py_None; + } + + element = (ElementObject*) element_new(tag, attrib); + + Py_DECREF(tag); + Py_DECREF(attrib); + + if (!element) + return NULL; + + text = deepcopy(JOIN_OBJ(self->text), memo); + if (!text) + goto error; + Py_DECREF(element->text); + element->text = JOIN_SET(text, JOIN_GET(self->text)); + + tail = deepcopy(JOIN_OBJ(self->tail), memo); + if (!tail) + goto error; + Py_DECREF(element->tail); + element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); + + if (self->extra) { + + if (element_resize(element, self->extra->length) < 0) + goto error; + + for (i = 0; i < self->extra->length; i++) { + PyObject* child = deepcopy(self->extra->children[i], memo); + if (!child) { + element->extra->length = i; + goto error; + } + element->extra->children[i] = child; + } + + element->extra->length = self->extra->length; + + } + + /* add object to memo dictionary (so deepcopy won't visit it again) */ + id = PyInt_FromLong((Py_uintptr_t) self); + + i = PyDict_SetItem(memo, id, (PyObject*) element); + + Py_DECREF(id); + + if (i < 0) + goto error; + + return (PyObject*) element; + + error: + Py_DECREF(element); + return NULL; +} + +LOCAL(int) +checkpath(PyObject* tag) +{ + int i, check = 1; + + /* check if a tag contains an xpath character */ + +#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@') + +#if defined(Py_USING_UNICODE) + if (PyUnicode_Check(tag)) { + Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); + for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { + if (p[i] == '{') + check = 0; + else if (p[i] == '}') + check = 1; + else if (check && PATHCHAR(p[i])) + return 1; + } + return 0; + } +#endif + if (PyString_Check(tag)) { + char *p = PyString_AS_STRING(tag); + for (i = 0; i < PyString_GET_SIZE(tag); i++) { + if (p[i] == '{') + check = 0; + else if (p[i] == '}') + check = 1; + else if (check && PATHCHAR(p[i])) + return 1; + } + return 0; + } + + return 1; /* unknown type; might be path expression */ +} + +static PyObject* +element_find(ElementObject* self, PyObject* args) +{ + int i; + + PyObject* tag; + if (!PyArg_ParseTuple(args, "O:find", &tag)) + return NULL; + + if (checkpath(tag)) + return PyObject_CallMethod( + elementpath_obj, "find", "OO", self, tag + ); + + if (!self->extra) + Py_RETURN_NONE; + + for (i = 0; i < self->extra->length; i++) { + PyObject* item = self->extra->children[i]; + if (Element_CheckExact(item) && + PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { + Py_INCREF(item); + return item; + } + } + + Py_RETURN_NONE; +} + +static PyObject* +element_findtext(ElementObject* self, PyObject* args) +{ + int i; + + PyObject* tag; + PyObject* default_value = Py_None; + if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value)) + return NULL; + + if (checkpath(tag)) + return PyObject_CallMethod( + elementpath_obj, "findtext", "OOO", self, tag, default_value + ); + + if (!self->extra) { + Py_INCREF(default_value); + return default_value; + } + + for (i = 0; i < self->extra->length; i++) { + ElementObject* item = (ElementObject*) self->extra->children[i]; + if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) { + PyObject* text = element_get_text(item); + if (text == Py_None) + return PyString_FromString(""); + Py_INCREF(text); + return text; + } + } + + Py_INCREF(default_value); + return default_value; +} + +static PyObject* +element_findall(ElementObject* self, PyObject* args) +{ + int i; + PyObject* out; + + PyObject* tag; + if (!PyArg_ParseTuple(args, "O:findall", &tag)) + return NULL; + + if (checkpath(tag)) + return PyObject_CallMethod( + elementpath_obj, "findall", "OO", self, tag + ); + + out = PyList_New(0); + if (!out) + return NULL; + + if (!self->extra) + return out; + + for (i = 0; i < self->extra->length; i++) { + PyObject* item = self->extra->children[i]; + if (Element_CheckExact(item) && + PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { + if (PyList_Append(out, item) < 0) { + Py_DECREF(out); + return NULL; + } + } + } + + return out; +} + +static PyObject* +element_get(ElementObject* self, PyObject* args) +{ + PyObject* value; + + PyObject* key; + PyObject* default_value = Py_None; + if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) + return NULL; + + if (!self->extra || self->extra->attrib == Py_None) + value = default_value; + else { + value = PyDict_GetItem(self->extra->attrib, key); + if (!value) + value = default_value; + } + + Py_INCREF(value); + return value; +} + +static PyObject* +element_getchildren(ElementObject* self, PyObject* args) +{ + int i; + PyObject* list; + + if (!PyArg_ParseTuple(args, ":getchildren")) + return NULL; + + if (!self->extra) + return PyList_New(0); + + list = PyList_New(self->extra->length); + if (!list) + return NULL; + + for (i = 0; i < self->extra->length; i++) { + PyObject* item = self->extra->children[i]; + Py_INCREF(item); + PyList_SET_ITEM(list, i, item); + } + + return list; +} + +static PyObject* +element_getiterator(ElementObject* self, PyObject* args) +{ + PyObject* result; + + PyObject* tag = Py_None; + if (!PyArg_ParseTuple(args, "|O:getiterator", &tag)) + return NULL; + + if (!elementtree_getiterator_obj) { + PyErr_SetString( + PyExc_RuntimeError, + "getiterator helper not found" + ); + return NULL; + } + + args = PyTuple_New(2); + Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); + Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); + + result = PyObject_CallObject(elementtree_getiterator_obj, args); + + Py_DECREF(args); + + return result; +} + +static PyObject* +element_getitem(ElementObject* self, int index) +{ + if (!self->extra || index < 0 || index >= self->extra->length) { + PyErr_SetString( + PyExc_IndexError, + "child index out of range" + ); + return NULL; + } + + Py_INCREF(self->extra->children[index]); + return self->extra->children[index]; +} + +static PyObject* +element_getslice(ElementObject* self, int start, int end) +{ + int i; + PyObject* list; + + if (!self->extra) + return PyList_New(0); + + /* standard clamping */ + if (start < 0) + start = 0; + if (end < 0) + end = 0; + if (end > self->extra->length) + end = self->extra->length; + if (start > end) + start = end; + + list = PyList_New(end - start); + if (!list) + return NULL; + + for (i = start; i < end; i++) { + PyObject* item = self->extra->children[i]; + Py_INCREF(item); + PyList_SET_ITEM(list, i - start, item); + } + + return list; +} + +static PyObject* +element_insert(ElementObject* self, PyObject* args) +{ + int i; + + int index; + PyObject* element; + if (!PyArg_ParseTuple(args, "iO!:insert", &index, + &Element_Type, &element)) + return NULL; + + if (!self->extra) + element_new_extra(self, NULL); + + if (index < 0) + index = 0; + if (index > self->extra->length) + index = self->extra->length; + + if (element_resize(self, 1) < 0) + return NULL; + + for (i = self->extra->length; i > index; i--) + self->extra->children[i] = self->extra->children[i-1]; + + Py_INCREF(element); + self->extra->children[index] = element; + + self->extra->length++; + + Py_RETURN_NONE; +} + +static PyObject* +element_items(ElementObject* self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":items")) + return NULL; + + if (!self->extra || self->extra->attrib == Py_None) + return PyList_New(0); + + return PyDict_Items(self->extra->attrib); +} + +static PyObject* +element_keys(ElementObject* self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":keys")) + return NULL; + + if (!self->extra || self->extra->attrib == Py_None) + return PyList_New(0); + + return PyDict_Keys(self->extra->attrib); +} + +static int +element_length(ElementObject* self) +{ + if (!self->extra) + return 0; + + return self->extra->length; +} + +static PyObject* +element_makeelement(PyObject* self, PyObject* args, PyObject* kw) +{ + PyObject* elem; + + PyObject* tag; + PyObject* attrib; + if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) + return NULL; + + attrib = PyDict_Copy(attrib); + if (!attrib) + return NULL; + + elem = element_new(tag, attrib); + + Py_DECREF(attrib); + + return elem; +} + +static PyObject* +element_reduce(ElementObject* self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":__reduce__")) + return NULL; + + /* Hack alert: This method is used to work around a __copy__ + problem on certain 2.3 and 2.4 versions. To save time and + simplify the code, we create the copy in here, and use a dummy + copyelement helper to trick the copy module into doing the + right thing. */ + + if (!elementtree_copyelement_obj) { + PyErr_SetString( + PyExc_RuntimeError, + "copyelement helper not found" + ); + return NULL; + } + + return Py_BuildValue( + "O(N)", elementtree_copyelement_obj, element_copy(self, args) + ); +} + +static PyObject* +element_remove(ElementObject* self, PyObject* args) +{ + int i; + + PyObject* element; + if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) + return NULL; + + if (!self->extra) { + /* element has no children, so raise exception */ + PyErr_SetString( + PyExc_ValueError, + "list.remove(x): x not in list" + ); + return NULL; + } + + for (i = 0; i < self->extra->length; i++) { + if (self->extra->children[i] == element) + break; + if (PyObject_Compare(self->extra->children[i], element) == 0) + break; + } + + if (i == self->extra->length) { + /* element is not in children, so raise exception */ + PyErr_SetString( + PyExc_ValueError, + "list.remove(x): x not in list" + ); + return NULL; + } + + Py_DECREF(self->extra->children[i]); + + self->extra->length--; + + for (; i < self->extra->length; i++) + self->extra->children[i] = self->extra->children[i+1]; + + Py_RETURN_NONE; +} + +static PyObject* +element_repr(ElementObject* self) +{ + PyObject* repr; + char buffer[100]; + + repr = PyString_FromString("<Element "); + + PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag)); + + sprintf(buffer, " at %p>", self); + PyString_ConcatAndDel(&repr, PyString_FromString(buffer)); + + return repr; +} + +static PyObject* +element_set(ElementObject* self, PyObject* args) +{ + PyObject* attrib; + + PyObject* key; + PyObject* value; + if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) + return NULL; + + if (!self->extra) + element_new_extra(self, NULL); + + attrib = element_get_attrib(self); + if (!attrib) + return NULL; + + if (PyDict_SetItem(attrib, key, value) < 0) + return NULL; + + Py_RETURN_NONE; +} + +static int +element_setslice(ElementObject* self, int start, int end, PyObject* item) +{ + int i, new, old; + PyObject* recycle = NULL; + + if (!self->extra) + element_new_extra(self, NULL); + + /* standard clamping */ + if (start < 0) + start = 0; + if (end < 0) + end = 0; + if (end > self->extra->length) + end = self->extra->length; + if (start > end) + start = end; + + old = end - start; + + if (item == NULL) + new = 0; + else if (PyList_CheckExact(item)) { + new = PyList_GET_SIZE(item); + } else { + /* FIXME: support arbitrary sequences? */ + PyErr_Format( + PyExc_TypeError, + "expected list, not \"%.200s\"", item->ob_type->tp_name + ); + return -1; + } + + if (old > 0) { + /* to avoid recursive calls to this method (via decref), move + old items to the recycle bin here, and get rid of them when + we're done modifying the element */ + recycle = PyList_New(old); + for (i = 0; i < old; i++) + PyList_SET_ITEM(recycle, i, self->extra->children[i + start]); + } + + if (new < old) { + /* delete slice */ + for (i = end; i < self->extra->length; i++) + self->extra->children[i + new - old] = self->extra->children[i]; + } else if (new > old) { + /* insert slice */ + if (element_resize(self, new - old) < 0) + return -1; + for (i = self->extra->length-1; i >= end; i--) + self->extra->children[i + new - old] = self->extra->children[i]; + } + + /* replace the slice */ + for (i = 0; i < new; i++) { + PyObject* element = PyList_GET_ITEM(item, i); + Py_INCREF(element); + self->extra->children[i + start] = element; + } + + self->extra->length += new - old; + + /* discard the recycle bin, and everything in it */ + Py_XDECREF(recycle); + + return 0; +} + +static int +element_setitem(ElementObject* self, int index, PyObject* item) +{ + int i; + PyObject* old; + + if (!self->extra || index < 0 || index >= self->extra->length) { + PyErr_SetString( + PyExc_IndexError, + "child assignment index out of range"); + return -1; + } + + old = self->extra->children[index]; + + if (item) { + Py_INCREF(item); + self->extra->children[index] = item; + } else { + self->extra->length--; + for (i = index; i < self->extra->length; i++) + self->extra->children[i] = self->extra->children[i+1]; + } + + Py_DECREF(old); + + return 0; +} + +static PyMethodDef element_methods[] = { + + {"clear", (PyCFunction) element_clear, METH_VARARGS}, + + {"get", (PyCFunction) element_get, METH_VARARGS}, + {"set", (PyCFunction) element_set, METH_VARARGS}, + + {"find", (PyCFunction) element_find, METH_VARARGS}, + {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, + {"findall", (PyCFunction) element_findall, METH_VARARGS}, + + {"append", (PyCFunction) element_append, METH_VARARGS}, + {"insert", (PyCFunction) element_insert, METH_VARARGS}, + {"remove", (PyCFunction) element_remove, METH_VARARGS}, + + {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS}, + {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, + + {"items", (PyCFunction) element_items, METH_VARARGS}, + {"keys", (PyCFunction) element_keys, METH_VARARGS}, + + {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, + + {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, + {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, + + /* Some 2.3 and 2.4 versions do not handle the __copy__ method on + C objects correctly, so we have to fake it using a __reduce__- + based hack (see the element_reduce implementation above for + details). */ + + /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're + using a runtime test to figure out if we need to fake things + or now (see the init code below). The following entry is + enabled only if the hack is needed. */ + + {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, + + {NULL, NULL} +}; + +static PyObject* +element_getattr(ElementObject* self, char* name) +{ + PyObject* res; + + res = Py_FindMethod(element_methods, (PyObject*) self, name); + if (res) + return res; + + PyErr_Clear(); + + if (strcmp(name, "tag") == 0) + res = self->tag; + else if (strcmp(name, "text") == 0) + res = element_get_text(self); + else if (strcmp(name, "tail") == 0) { + res = element_get_tail(self); + } else if (strcmp(name, "attrib") == 0) { + if (!self->extra) + element_new_extra(self, NULL); + res = element_get_attrib(self); + } else { + PyErr_SetString(PyExc_AttributeError, name); + return NULL; + } + + if (!res) + return NULL; + + Py_INCREF(res); + return res; +} + +static int +element_setattr(ElementObject* self, const char* name, PyObject* value) +{ + if (value == NULL) { + PyErr_SetString( + PyExc_AttributeError, + "can't delete element attributes" + ); + return -1; + } + + if (strcmp(name, "tag") == 0) { + Py_DECREF(self->tag); + self->tag = value; + Py_INCREF(self->tag); + } else if (strcmp(name, "text") == 0) { + Py_DECREF(JOIN_OBJ(self->text)); + self->text = value; + Py_INCREF(self->text); + } else if (strcmp(name, "tail") == 0) { + Py_DECREF(JOIN_OBJ(self->tail)); + self->tail = value; + Py_INCREF(self->tail); + } else if (strcmp(name, "attrib") == 0) { + if (!self->extra) + element_new_extra(self, NULL); + Py_DECREF(self->extra->attrib); + self->extra->attrib = value; + Py_INCREF(self->extra->attrib); + } else { + PyErr_SetString(PyExc_AttributeError, name); + return -1; + } + + return 0; +} + +static PySequenceMethods element_as_sequence = { + (inquiry) element_length, + 0, /* sq_concat */ + 0, /* sq_repeat */ + (intargfunc) element_getitem, + (intintargfunc) element_getslice, + (intobjargproc) element_setitem, + (intintobjargproc) element_setslice, +}; + +statichere PyTypeObject Element_Type = { + PyObject_HEAD_INIT(NULL) + 0, "Element", sizeof(ElementObject), 0, + /* methods */ + (destructor)element_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + (getattrfunc)element_getattr, /* tp_getattr */ + (setattrfunc)element_setattr, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)element_repr, /* tp_repr */ + 0, /* tp_as_number */ + &element_as_sequence, /* tp_as_sequence */ +}; + +/* ==================================================================== */ +/* the tree builder type */ + +typedef struct { + PyObject_HEAD + + PyObject* root; /* root node (first created node) */ + + ElementObject* this; /* current node */ + ElementObject* last; /* most recently created node */ + + PyObject* data; /* data collector (string or list), or NULL */ + + PyObject* stack; /* element stack */ + int index; /* current stack size (0=empty) */ + + /* element tracing */ + PyObject* events; /* list of events, or NULL if not collecting */ + PyObject* start_event_obj; /* event objects (NULL to ignore) */ + PyObject* end_event_obj; + PyObject* start_ns_event_obj; + PyObject* end_ns_event_obj; + +} TreeBuilderObject; + +staticforward PyTypeObject TreeBuilder_Type; + +#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type) + +/* -------------------------------------------------------------------- */ +/* constructor and destructor */ + +LOCAL(PyObject*) +treebuilder_new(void) +{ + TreeBuilderObject* self; + + self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); + if (self == NULL) + return NULL; + + self->root = NULL; + + Py_INCREF(Py_None); + self->this = (ElementObject*) Py_None; + + Py_INCREF(Py_None); + self->last = (ElementObject*) Py_None; + + self->data = NULL; + + self->stack = PyList_New(20); + self->index = 0; + + self->events = NULL; + self->start_event_obj = self->end_event_obj = NULL; + self->start_ns_event_obj = self->end_ns_event_obj = NULL; + + ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); + + return (PyObject*) self; +} + +static PyObject* +treebuilder(PyObject* _self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":TreeBuilder")) + return NULL; + + return treebuilder_new(); +} + +static void +treebuilder_dealloc(TreeBuilderObject* self) +{ + Py_XDECREF(self->end_ns_event_obj); + Py_XDECREF(self->start_ns_event_obj); + Py_XDECREF(self->end_event_obj); + Py_XDECREF(self->start_event_obj); + Py_XDECREF(self->events); + Py_DECREF(self->stack); + Py_XDECREF(self->data); + Py_DECREF(self->last); + Py_DECREF(self->this); + Py_XDECREF(self->root); + + RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); + + PyObject_Del(self); +} + +/* -------------------------------------------------------------------- */ +/* handlers */ + +LOCAL(PyObject*) +treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, + PyObject* standalone) +{ + Py_RETURN_NONE; +} + +LOCAL(PyObject*) +treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, + PyObject* attrib) +{ + PyObject* node; + PyObject* this; + + if (self->data) { + if (self->this == self->last) { + self->last->text = JOIN_SET( + self->data, PyList_CheckExact(self->data) + ); + } else { + self->last->tail = JOIN_SET( + self->data, PyList_CheckExact(self->data) + ); + } + self->data = NULL; + } + + node = element_new(tag, attrib); + if (!node) + return NULL; + + this = (PyObject*) self->this; + + if (this != Py_None) { + if (element_add_subelement((ElementObject*) this, node) < 0) + return NULL; + } else { + if (self->root) { + PyErr_SetString( + PyExc_SyntaxError, + "multiple elements on top level" + ); + return NULL; + } + Py_INCREF(node); + self->root = node; + } + + if (self->index < PyList_GET_SIZE(self->stack)) { + if (PyList_SetItem(self->stack, self->index, this) < 0) + return NULL; + Py_INCREF(this); + } else { + if (PyList_Append(self->stack, this) < 0) + return NULL; + } + self->index++; + + Py_DECREF(this); + Py_INCREF(node); + self->this = (ElementObject*) node; + + Py_DECREF(self->last); + Py_INCREF(node); + self->last = (ElementObject*) node; + + if (self->start_event_obj) { + PyObject* res; + PyObject* action = self->start_event_obj; + res = PyTuple_New(2); + if (res) { + Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); + Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); + PyList_Append(self->events, res); + Py_DECREF(res); + } else + PyErr_Clear(); /* FIXME: propagate error */ + } + + return node; +} + +LOCAL(PyObject*) +treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) +{ + if (!self->data) { + /* store the first item as is */ + Py_INCREF(data); self->data = data; + } else { + /* more than one item; use a list to collect items */ + if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 && + PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) { + /* expat often generates single character data sections; handle + the most common case by resizing the existing string... */ + int size = PyString_GET_SIZE(self->data); + if (_PyString_Resize(&self->data, size + 1) < 0) + return NULL; + PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0]; + } else if (PyList_CheckExact(self->data)) { + if (PyList_Append(self->data, data) < 0) + return NULL; + } else { + PyObject* list = PyList_New(2); + if (!list) + return NULL; + PyList_SET_ITEM(list, 0, self->data); + Py_INCREF(data); PyList_SET_ITEM(list, 1, data); + self->data = list; + } + } + + Py_RETURN_NONE; +} + +LOCAL(PyObject*) +treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) +{ + PyObject* item; + + if (self->data) { + if (self->this == self->last) { + self->last->text = JOIN_SET( + self->data, PyList_CheckExact(self->data) + ); + } else { + self->last->tail = JOIN_SET( + self->data, PyList_CheckExact(self->data) + ); + } + self->data = NULL; + } + + if (self->index == 0) { + PyErr_SetString( + PyExc_IndexError, + "pop from empty stack" + ); + return NULL; + } + + self->index--; + + item = PyList_GET_ITEM(self->stack, self->index); + Py_INCREF(item); + + Py_DECREF(self->last); + + self->last = (ElementObject*) self->this; + self->this = (ElementObject*) item; + + if (self->end_event_obj) { + PyObject* res; + PyObject* action = self->end_event_obj; + PyObject* node = (PyObject*) self->last; + res = PyTuple_New(2); + if (res) { + Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); + Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); + PyList_Append(self->events, res); + Py_DECREF(res); + } else + PyErr_Clear(); /* FIXME: propagate error */ + } + + Py_INCREF(self->last); + return (PyObject*) self->last; +} + +LOCAL(void) +treebuilder_handle_namespace(TreeBuilderObject* self, int start, + const char* prefix, const char *uri) +{ + PyObject* res; + PyObject* action; + PyObject* parcel; + + if (!self->events) + return; + + if (start) { + if (!self->start_ns_event_obj) + return; + action = self->start_ns_event_obj; + /* FIXME: prefix and uri use utf-8 encoding! */ + parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri); + if (!parcel) + return; + Py_INCREF(action); + } else { + if (!self->end_ns_event_obj) + return; + action = self->end_ns_event_obj; + Py_INCREF(action); + parcel = Py_None; + Py_INCREF(parcel); + } + + res = PyTuple_New(2); + + if (res) { + PyTuple_SET_ITEM(res, 0, action); + PyTuple_SET_ITEM(res, 1, parcel); + PyList_Append(self->events, res); + Py_DECREF(res); + } else + PyErr_Clear(); /* FIXME: propagate error */ +} + +/* -------------------------------------------------------------------- */ +/* methods (in alphabetical order) */ + +static PyObject* +treebuilder_data(TreeBuilderObject* self, PyObject* args) +{ + PyObject* data; + if (!PyArg_ParseTuple(args, "O:data", &data)) + return NULL; + + return treebuilder_handle_data(self, data); +} + +static PyObject* +treebuilder_end(TreeBuilderObject* self, PyObject* args) +{ + PyObject* tag; + if (!PyArg_ParseTuple(args, "O:end", &tag)) + return NULL; + + return treebuilder_handle_end(self, tag); +} + +LOCAL(PyObject*) +treebuilder_done(TreeBuilderObject* self) +{ + PyObject* res; + + /* FIXME: check stack size? */ + + if (self->root) + res = self->root; + else + res = Py_None; + + Py_INCREF(res); + return res; +} + +static PyObject* +treebuilder_close(TreeBuilderObject* self, PyObject* args) +{ + if (!PyArg_ParseTuple(args, ":close")) + return NULL; + + return treebuilder_done(self); +} + +static PyObject* +treebuilder_start(TreeBuilderObject* self, PyObject* args) +{ + PyObject* tag; + PyObject* attrib = Py_None; + if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) + return NULL; + + return treebuilder_handle_start(self, tag, attrib); +} + +static PyObject* +treebuilder_xml(TreeBuilderObject* self, PyObject* args) +{ + PyObject* encoding; + PyObject* standalone; + if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) + return NULL; + + return treebuilder_handle_xml(self, encoding, standalone); +} + +static PyMethodDef treebuilder_methods[] = { + {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, + {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, + {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, + {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, + {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, + {NULL, NULL} +}; + +static PyObject* +treebuilder_getattr(TreeBuilderObject* self, char* name) +{ + return Py_FindMethod(treebuilder_methods, (PyObject*) self, name); +} + +statichere PyTypeObject TreeBuilder_Type = { + PyObject_HEAD_INIT(NULL) + 0, "TreeBuilder", sizeof(TreeBuilderObject), 0, + /* methods */ + (destructor)treebuilder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + (getattrfunc)treebuilder_getattr, /* tp_getattr */ +}; + +/* ==================================================================== */ +/* the expat interface */ + +#if defined(USE_EXPAT) + +#include "expat.h" + +#if defined(USE_PYEXPAT_CAPI) +#include "pyexpat.h" +static struct PyExpat_CAPI* expat_capi; +#define EXPAT(func) (expat_capi->func) +#else +#define EXPAT(func) (XML_##func) +#endif + +typedef struct { + PyObject_HEAD + + XML_Parser parser; + + PyObject* target; + PyObject* entity; + + PyObject* names; + + PyObject* handle_xml; + PyObject* handle_start; + PyObject* handle_data; + PyObject* handle_end; + + PyObject* handle_comment; + PyObject* handle_pi; + +} XMLParserObject; + +staticforward PyTypeObject XMLParser_Type; + +/* helpers */ + +#if defined(Py_USING_UNICODE) +LOCAL(int) +checkstring(const char* string, int size) +{ + int i; + + /* check if an 8-bit string contains UTF-8 characters */ + for (i = 0; i < size; i++) + if (string[i] & 0x80) + return 1; + + return 0; +} +#endif + +LOCAL(PyObject*) +makestring(const char* string, int size) +{ + /* convert a UTF-8 string to either a 7-bit ascii string or a + Unicode string */ + +#if defined(Py_USING_UNICODE) + if (checkstring(string, size)) + return PyUnicode_DecodeUTF8(string, size, "strict"); +#endif + + return PyString_FromStringAndSize(string, size); +} + +LOCAL(PyObject*) +makeuniversal(XMLParserObject* self, const char* string) +{ + /* convert a UTF-8 tag/attribute name from the expat parser + to a universal name string */ + + int size = strlen(string); + PyObject* key; + PyObject* value; + + /* look the 'raw' name up in the names dictionary */ + key = PyString_FromStringAndSize(string, size); + if (!key) + return NULL; + + value = PyDict_GetItem(self->names, key); + + if (value) { + Py_INCREF(value); + } else { + /* new name. convert to universal name, and decode as + necessary */ + + PyObject* tag; + char* p; + int i; + + /* look for namespace separator */ + for (i = 0; i < size; i++) + if (string[i] == '}') + break; + if (i != size) { + /* convert to universal name */ + tag = PyString_FromStringAndSize(NULL, size+1); + p = PyString_AS_STRING(tag); + p[0] = '{'; + memcpy(p+1, string, size); + size++; + } else { + /* plain name; use key as tag */ + Py_INCREF(key); + tag = key; + } + + /* decode universal name */ +#if defined(Py_USING_UNICODE) + /* inline makestring, to avoid duplicating the source string if + it's not an utf-8 string */ + p = PyString_AS_STRING(tag); + if (checkstring(p, size)) { + value = PyUnicode_DecodeUTF8(p, size, "strict"); + Py_DECREF(tag); + if (!value) { + Py_DECREF(key); + return NULL; + } + } else +#endif + value = tag; /* use tag as is */ + + /* add to names dictionary */ + if (PyDict_SetItem(self->names, key, value) < 0) { + Py_DECREF(key); + Py_DECREF(value); + return NULL; + } + } + + Py_DECREF(key); + return value; +} + +/* -------------------------------------------------------------------- */ +/* handlers */ + +static void +expat_default_handler(XMLParserObject* self, const XML_Char* data_in, + int data_len) +{ + PyObject* key; + PyObject* value; + PyObject* res; + + if (data_len < 2 || data_in[0] != '&') + return; + + key = makestring(data_in + 1, data_len - 2); + if (!key) + return; + + value = PyDict_GetItem(self->entity, key); + + if (value) { + if (TreeBuilder_CheckExact(self->target)) + res = treebuilder_handle_data( + (TreeBuilderObject*) self->target, value + ); + else if (self->handle_data) + res = PyObject_CallFunction(self->handle_data, "O", value); + else + res = NULL; + Py_DECREF(value); + Py_XDECREF(res); + } else { + PyErr_Format( + PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d", + PyString_AS_STRING(key), + EXPAT(GetErrorLineNumber)(self->parser), + EXPAT(GetErrorColumnNumber)(self->parser) + ); + } + + Py_DECREF(key); +} + +static void +expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, + const XML_Char **attrib_in) +{ + PyObject* res; + PyObject* tag; + PyObject* attrib; + int ok; + + /* tag name */ + tag = makeuniversal(self, tag_in); + if (!tag) + return; /* parser will look for errors */ + + /* attributes */ + if (attrib_in[0]) { + attrib = PyDict_New(); + if (!attrib) + return; + while (attrib_in[0] && attrib_in[1]) { + PyObject* key = makeuniversal(self, attrib_in[0]); + PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1])); + if (!key || !value) { + Py_XDECREF(value); + Py_XDECREF(key); + Py_DECREF(attrib); + return; + } + ok = PyDict_SetItem(attrib, key, value); + Py_DECREF(value); + Py_DECREF(key); + if (ok < 0) { + Py_DECREF(attrib); + return; + } + attrib_in += 2; + } + } else { + Py_INCREF(Py_None); + attrib = Py_None; + } + + if (TreeBuilder_CheckExact(self->target)) + /* shortcut */ + res = treebuilder_handle_start((TreeBuilderObject*) self->target, + tag, attrib); + else if (self->handle_start) + res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); + else + res = NULL; + + Py_DECREF(tag); + Py_DECREF(attrib); + + Py_XDECREF(res); +} + +static void +expat_data_handler(XMLParserObject* self, const XML_Char* data_in, + int data_len) +{ + PyObject* data; + PyObject* res; + + data = makestring(data_in, data_len); + + if (TreeBuilder_CheckExact(self->target)) + /* shortcut */ + res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); + else if (self->handle_data) + res = PyObject_CallFunction(self->handle_data, "O", data); + else + res = NULL; + + Py_DECREF(data); + + Py_XDECREF(res); +} + +static void +expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) +{ + PyObject* tag; + PyObject* res = NULL; + + if (TreeBuilder_CheckExact(self->target)) + /* shortcut */ + /* the standard tree builder doesn't look at the end tag */ + res = treebuilder_handle_end( + (TreeBuilderObject*) self->target, Py_None + ); + else if (self->handle_end) { + tag = makeuniversal(self, tag_in); + if (tag) { + res = PyObject_CallFunction(self->handle_end, "O", tag); + Py_DECREF(tag); + } + } + + Py_XDECREF(res); +} + +static void +expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, + const XML_Char *uri) +{ + treebuilder_handle_namespace( + (TreeBuilderObject*) self->target, 1, prefix, uri + ); +} + +static void +expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) +{ + treebuilder_handle_namespace( + (TreeBuilderObject*) self->target, 0, NULL, NULL + ); +} + +static void +expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) +{ + PyObject* comment; + PyObject* res; + + if (self->handle_comment) { + comment = makestring(comment_in, strlen(comment_in)); + if (comment) { + res = PyObject_CallFunction(self->handle_comment, "O", comment); + Py_XDECREF(res); + Py_DECREF(comment); + } + } +} + +static void +expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, + const XML_Char* data_in) +{ + PyObject* target; + PyObject* data; + PyObject* res; + + if (self->handle_pi) { + target = makestring(target_in, strlen(target_in)); + data = makestring(data_in, strlen(data_in)); + if (target && data) { + res = PyObject_CallFunction(self->handle_pi, "OO", target, data); + Py_XDECREF(res); + Py_DECREF(data); + Py_DECREF(target); + } else { + Py_XDECREF(data); + Py_XDECREF(target); + } + } +} + +#if defined(Py_USING_UNICODE) +static int +expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, + XML_Encoding *info) +{ + PyObject* u; + Py_UNICODE* p; + unsigned char s[256]; + int i; + + memset(info, 0, sizeof(XML_Encoding)); + + for (i = 0; i < 256; i++) + s[i] = i; + + u = PyUnicode_Decode(s, 256, name, "replace"); + if (!u) + return XML_STATUS_ERROR; + + if (PyUnicode_GET_SIZE(u) != 256) { + Py_DECREF(u); + return XML_STATUS_ERROR; + } + + p = PyUnicode_AS_UNICODE(u); + + for (i = 0; i < 256; i++) { + if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) + info->map[i] = p[i]; + else + info->map[i] = -1; + } + + Py_DECREF(u); + + return XML_STATUS_OK; +} +#endif + +/* -------------------------------------------------------------------- */ +/* constructor and destructor */ + +static PyObject* +xmlparser(PyObject* _self, PyObject* args, PyObject* kw) +{ + XMLParserObject* self; + /* FIXME: does this need to be static? */ + static XML_Memory_Handling_Suite memory_handler; + + PyObject* target = NULL; + char* encoding = NULL; + static PY_CONST char* kwlist[] = { "target", "encoding", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, + &target, &encoding)) + return NULL; + +#if defined(USE_PYEXPAT_CAPI) + if (!expat_capi) { + PyErr_SetString( + PyExc_RuntimeError, "cannot load dispatch table from pyexpat" + ); + return NULL; + } +#endif + + self = PyObject_New(XMLParserObject, &XMLParser_Type); + if (self == NULL) + return NULL; + + self->entity = PyDict_New(); + if (!self->entity) { + PyObject_Del(self); + return NULL; /* FIXME: cleanup on error */ + } + + self->names = PyDict_New(); + if (!self->names) { + PyObject_Del(self); + return NULL; /* FIXME: cleanup on error */ + } + + memory_handler.malloc_fcn = PyObject_Malloc; + memory_handler.realloc_fcn = PyObject_Realloc; + memory_handler.free_fcn = PyObject_Free; + + self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); + if (!self->parser) { + PyErr_NoMemory(); + return NULL; /* FIXME: cleanup on error */ + } + + /* setup target handlers */ + if (!target) { + target = treebuilder_new(); + if (!target) { + PyObject_Del(self); + return NULL; /* FIXME: cleanup on error */ + } + } else + Py_INCREF(target); + self->target = target; + + self->handle_xml = PyObject_GetAttrString(target, "xml"); + self->handle_start = PyObject_GetAttrString(target, "start"); + self->handle_data = PyObject_GetAttrString(target, "data"); + self->handle_end = PyObject_GetAttrString(target, "end"); + self->handle_comment = PyObject_GetAttrString(target, "comment"); + self->handle_pi = PyObject_GetAttrString(target, "pi"); + + PyErr_Clear(); + + /* configure parser */ + EXPAT(SetUserData)(self->parser, self); + EXPAT(SetElementHandler)( + self->parser, + (XML_StartElementHandler) expat_start_handler, + (XML_EndElementHandler) expat_end_handler + ); + EXPAT(SetDefaultHandlerExpand)( + self->parser, + (XML_DefaultHandler) expat_default_handler + ); + EXPAT(SetCharacterDataHandler)( + self->parser, + (XML_CharacterDataHandler) expat_data_handler + ); + if (self->handle_comment) + EXPAT(SetCommentHandler)( + self->parser, + (XML_CommentHandler) expat_comment_handler + ); + if (self->handle_pi) + EXPAT(SetProcessingInstructionHandler)( + self->parser, + (XML_ProcessingInstructionHandler) expat_pi_handler + ); +#if defined(Py_USING_UNICODE) + EXPAT(SetUnknownEncodingHandler)( + self->parser, + (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL + ); +#endif + + ALLOC(sizeof(XMLParserObject), "create expatparser"); + + return (PyObject*) self; +} + +static void +xmlparser_dealloc(XMLParserObject* self) +{ + EXPAT(ParserFree)(self->parser); + + Py_XDECREF(self->handle_pi); + Py_XDECREF(self->handle_comment); + Py_XDECREF(self->handle_end); + Py_XDECREF(self->handle_data); + Py_XDECREF(self->handle_start); + Py_XDECREF(self->handle_xml); + + Py_DECREF(self->target); + Py_DECREF(self->entity); + Py_DECREF(self->names); + + RELEASE(sizeof(XMLParserObject), "destroy expatparser"); + + PyObject_Del(self); +} + +/* -------------------------------------------------------------------- */ +/* methods (in alphabetical order) */ + +LOCAL(PyObject*) +expat_parse(XMLParserObject* self, char* data, int data_len, int final) +{ + int ok; + + ok = EXPAT(Parse)(self->parser, data, data_len, final); + + if (PyErr_Occurred()) + return NULL; + + if (!ok) { + PyErr_Format( + PyExc_SyntaxError, "%s: line %d, column %d", + EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), + EXPAT(GetErrorLineNumber)(self->parser), + EXPAT(GetErrorColumnNumber)(self->parser) + ); + return NULL; + } + + Py_RETURN_NONE; +} + +static PyObject* +xmlparser_close(XMLParserObject* self, PyObject* args) +{ + /* end feeding data to parser */ + + PyObject* res; + if (!PyArg_ParseTuple(args, ":close")) + return NULL; + + res = expat_parse(self, "", 0, 1); + + if (res && TreeBuilder_CheckExact(self->target)) { + Py_DECREF(res); + return treebuilder_done((TreeBuilderObject*) self->target); + } + + return res; +} + +static PyObject* +xmlparser_feed(XMLParserObject* self, PyObject* args) +{ + /* feed data to parser */ + + char* data; + int data_len; + if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) + return NULL; + + return expat_parse(self, data, data_len, 0); +} + +static PyObject* +xmlparser_parse(XMLParserObject* self, PyObject* args) +{ + /* (internal) parse until end of input stream */ + + PyObject* reader; + PyObject* buffer; + PyObject* res; + + PyObject* fileobj; + if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) + return NULL; + + reader = PyObject_GetAttrString(fileobj, "read"); + if (!reader) + return NULL; + + /* read from open file object */ + for (;;) { + + buffer = PyObject_CallFunction(reader, "i", 64*1024); + + if (!buffer) { + /* read failed (e.g. due to KeyboardInterrupt) */ + Py_DECREF(reader); + return NULL; + } + + if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) { + Py_DECREF(buffer); + break; + } + + res = expat_parse( + self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0 + ); + + Py_DECREF(buffer); + + if (!res) { + Py_DECREF(reader); + return NULL; + } + Py_DECREF(res); + + } + + Py_DECREF(reader); + + res = expat_parse(self, "", 0, 1); + + if (res && TreeBuilder_CheckExact(self->target)) { + Py_DECREF(res); + return treebuilder_done((TreeBuilderObject*) self->target); + } + + return res; +} + +static PyObject* +xmlparser_setevents(XMLParserObject* self, PyObject* args) +{ + /* activate element event reporting */ + + int i; + TreeBuilderObject* target; + + PyObject* events; /* event collector */ + PyObject* event_set = Py_None; + if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events, + &event_set)) + return NULL; + + if (!TreeBuilder_CheckExact(self->target)) { + PyErr_SetString( + PyExc_TypeError, + "event handling only supported for cElementTree.Treebuilder " + "targets" + ); + return NULL; + } + + target = (TreeBuilderObject*) self->target; + + Py_INCREF(events); + Py_XDECREF(target->events); + target->events = events; + + /* clear out existing events */ + Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; + Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; + Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; + Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; + + if (event_set == Py_None) { + /* default is "end" only */ + target->end_event_obj = PyString_FromString("end"); + Py_RETURN_NONE; + } + + if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ + goto error; + + for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { + PyObject* item = PyTuple_GET_ITEM(event_set, i); + char* event; + if (!PyString_Check(item)) + goto error; + event = PyString_AS_STRING(item); + if (strcmp(event, "start") == 0) { + Py_INCREF(item); + target->start_event_obj = item; + } else if (strcmp(event, "end") == 0) { + Py_INCREF(item); + Py_XDECREF(target->end_event_obj); + target->end_event_obj = item; + } else if (strcmp(event, "start-ns") == 0) { + Py_INCREF(item); + Py_XDECREF(target->start_ns_event_obj); + target->start_ns_event_obj = item; + EXPAT(SetNamespaceDeclHandler)( + self->parser, + (XML_StartNamespaceDeclHandler) expat_start_ns_handler, + (XML_EndNamespaceDeclHandler) expat_end_ns_handler + ); + } else if (strcmp(event, "end-ns") == 0) { + Py_INCREF(item); + Py_XDECREF(target->end_ns_event_obj); + target->end_ns_event_obj = item; + EXPAT(SetNamespaceDeclHandler)( + self->parser, + (XML_StartNamespaceDeclHandler) expat_start_ns_handler, + (XML_EndNamespaceDeclHandler) expat_end_ns_handler + ); + } else { + PyErr_Format( + PyExc_ValueError, + "unknown event '%s'", event + ); + return NULL; + } + } + + Py_RETURN_NONE; + + error: + PyErr_SetString( + PyExc_TypeError, + "invalid event tuple" + ); + return NULL; +} + +static PyMethodDef xmlparser_methods[] = { + {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, + {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, + {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, + {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, + {NULL, NULL} +}; + +static PyObject* +xmlparser_getattr(XMLParserObject* self, char* name) +{ + PyObject* res; + + res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name); + if (res) + return res; + + PyErr_Clear(); + + if (strcmp(name, "entity") == 0) + res = self->entity; + else if (strcmp(name, "target") == 0) + res = self->target; + else if (strcmp(name, "version") == 0) { + char buffer[100]; + sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, + XML_MINOR_VERSION, XML_MICRO_VERSION); + return PyString_FromString(buffer); + } else { + PyErr_SetString(PyExc_AttributeError, name); + return NULL; + } + + Py_INCREF(res); + return res; +} + +statichere PyTypeObject XMLParser_Type = { + PyObject_HEAD_INIT(NULL) + 0, "XMLParser", sizeof(XMLParserObject), 0, + /* methods */ + (destructor)xmlparser_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + (getattrfunc)xmlparser_getattr, /* tp_getattr */ +}; + +#endif + +/* ==================================================================== */ +/* python module interface */ + +static PyMethodDef _functions[] = { + {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, + {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, + {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, +#if defined(USE_EXPAT) + {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, + {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, +#endif + {NULL, NULL} +}; + +DL_EXPORT(void) +initcElementTree(void) +{ + PyObject* m; + PyObject* g; + char* bootstrap; +#if defined(USE_PYEXPAT_CAPI) + struct PyExpat_CAPI* capi; +#endif + + /* Patch object type */ + Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type; +#if defined(USE_EXPAT) + XMLParser_Type.ob_type = &PyType_Type; +#endif + + m = Py_InitModule("cElementTree", _functions); + + /* python glue code */ + + g = PyDict_New(); + + PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); + + bootstrap = ( + +#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000) + "from __future__ import generators\n" /* enable yield under 2.2 */ +#endif + + "from copy import copy, deepcopy\n" + + "try:\n" + " from elementtree import ElementTree\n" + "except ImportError:\n" + " import ElementTree\n" + "ET = ElementTree\n" + "del ElementTree\n" + + "import cElementTree\n" + + "try:\n" /* check if copy works as is */ + " copy(cElementTree.Element('x'))\n" + "except:\n" + " def copyelement(elem):\n" + " return elem\n" + + "def Comment(text=None):\n" /* public */ + " element = cElementTree.Element(ET.Comment)\n" + " element.text = text\n" + " return element\n" + "cElementTree.Comment = Comment\n" + + "class ElementTree(ET.ElementTree):\n" /* public */ + " def parse(self, source, parser=None):\n" + " if not hasattr(source, 'read'):\n" + " source = open(source, 'rb')\n" + " if parser is not None:\n" + " while 1:\n" + " data = source.read(65536)\n" + " if not data:\n" + " break\n" + " parser.feed(data)\n" + " self._root = parser.close()\n" + " else:\n" + " parser = cElementTree.XMLParser()\n" + " self._root = parser._parse(source)\n" + " return self._root\n" + "cElementTree.ElementTree = ElementTree\n" + + "def getiterator(node, tag=None):\n" /* helper */ + " if tag == '*':\n" + " tag = None\n" +#if (PY_VERSION_HEX < 0x02020000) + " nodes = []\n" /* 2.1 doesn't have yield */ + " if tag is None or node.tag == tag:\n" + " nodes.append(node)\n" + " for node in node:\n" + " nodes.extend(getiterator(node, tag))\n" + " return nodes\n" +#else + " if tag is None or node.tag == tag:\n" + " yield node\n" + " for node in node:\n" + " for node in getiterator(node, tag):\n" + " yield node\n" +#endif + + "def parse(source, parser=None):\n" /* public */ + " tree = ElementTree()\n" + " tree.parse(source, parser)\n" + " return tree\n" + "cElementTree.parse = parse\n" + +#if (PY_VERSION_HEX < 0x02020000) + "if hasattr(ET, 'iterparse'):\n" + " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */ +#else + "class iterparse(object):\n" + " root = None\n" + " def __init__(self, file, events=None):\n" + " if not hasattr(file, 'read'):\n" + " file = open(file, 'rb')\n" + " self._file = file\n" + " self._events = events\n" + " def __iter__(self):\n" + " events = []\n" + " b = cElementTree.TreeBuilder()\n" + " p = cElementTree.XMLParser(b)\n" + " p._setevents(events, self._events)\n" + " while 1:\n" + " data = self._file.read(16384)\n" + " if not data:\n" + " break\n" + " p.feed(data)\n" + " for event in events:\n" + " yield event\n" + " del events[:]\n" + " root = p.close()\n" + " for event in events:\n" + " yield event\n" + " self.root = root\n" + "cElementTree.iterparse = iterparse\n" +#endif + + "def PI(target, text=None):\n" /* public */ + " element = cElementTree.Element(ET.ProcessingInstruction)\n" + " element.text = target\n" + " if text:\n" + " element.text = element.text + ' ' + text\n" + " return element\n" + + " elem = cElementTree.Element(ET.PI)\n" + " elem.text = text\n" + " return elem\n" + "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n" + + "def XML(text):\n" /* public */ + " parser = cElementTree.XMLParser()\n" + " parser.feed(text)\n" + " return parser.close()\n" + "cElementTree.XML = cElementTree.fromstring = XML\n" + + "def XMLID(text):\n" /* public */ + " tree = XML(text)\n" + " ids = {}\n" + " for elem in tree.getiterator():\n" + " id = elem.get('id')\n" + " if id:\n" + " ids[id] = elem\n" + " return tree, ids\n" + "cElementTree.XMLID = XMLID\n" + + "cElementTree.dump = ET.dump\n" + "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" + "cElementTree.iselement = ET.iselement\n" + "cElementTree.QName = ET.QName\n" + "cElementTree.tostring = ET.tostring\n" + "cElementTree.VERSION = '" VERSION "'\n" + "cElementTree.__version__ = '" VERSION "'\n" + "cElementTree.XMLParserError = SyntaxError\n" + + ); + + PyRun_String(bootstrap, Py_file_input, g, NULL); + + elementpath_obj = PyDict_GetItemString(g, "ElementPath"); + + elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); + if (elementtree_copyelement_obj) { + /* reduce hack needed; enable reduce method */ + PyMethodDef* mp; + for (mp = element_methods; mp->ml_name; mp++) + if (mp->ml_meth == (PyCFunction) element_reduce) { + mp->ml_name = "__reduce__"; + break; + } + } else + PyErr_Clear(); + elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); + elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator"); + +#if defined(USE_PYEXPAT_CAPI) + /* link against pyexpat, if possible */ + capi = PyCObject_Import("pyexpat", "expat_CAPI"); + if (capi && + strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 && + capi->size <= sizeof(*expat_capi) && + capi->MAJOR_VERSION == XML_MAJOR_VERSION && + capi->MINOR_VERSION == XML_MINOR_VERSION && + capi->MICRO_VERSION == XML_MICRO_VERSION) + expat_capi = capi; + else + expat_capi = NULL; +#endif + +} |