diff options
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/dom/minidom.py | 1614 |
1 files changed, 1287 insertions, 327 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 967320f..46eb881 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -14,56 +14,34 @@ Todo: * SAX 2 namespaces """ -from xml.dom import HierarchyRequestErr, EMPTY_NAMESPACE - -# localize the types, and allow support for Unicode values if available: -import types -_TupleType = types.TupleType -try: - _StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - _StringTypes = (types.StringType,) -del types - import xml.dom +from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX +from xml.dom.minicompat import * +from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS -if list is type([]): - class NodeList(list): - def item(self, index): - if 0 <= index < len(self): - return self[index] - - length = property(lambda self: len(self), - doc="The number of nodes in the NodeList.") +_TupleType = type(()) -else: - def NodeList(): - return [] +# This is used by the ID-cache invalidation checks; the list isn't +# actually complete, since the nodes being checked will never be the +# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is +# the node being added or removed, not the node being modified.) +# +_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, + xml.dom.Node.ENTITY_REFERENCE_NODE) -class Node(xml.dom.Node): - allnodes = {} - _debug = 0 - _makeParentNodes = 1 - debug = None - childNodeTypes = () +class Node(xml.dom.Node, GetattrMagic): namespaceURI = None # this is non-null only for elements and attributes parentNode = None ownerDocument = None + nextSibling = None + previousSibling = None - def __init__(self): - self.childNodes = NodeList() - if Node._debug: - index = repr(id(self)) + repr(self.__class__) - Node.allnodes[index] = repr(self.__dict__) - if Node.debug is None: - Node.debug = _get_StringIO() - #open("debug4.out", "w") - Node.debug.write("create %s\n" % index) + prefix = EMPTY_PREFIX # non-null only for NS elements and attributes def __nonzero__(self): - return 1 + return True def toxml(self, encoding = None): return self.toprettyxml("", "", encoding) @@ -85,9 +63,12 @@ class Node(xml.dom.Node): def hasChildNodes(self): if self.childNodes: - return 1 + return True else: - return 0 + return False + + def _get_childNodes(self): + return self.childNodes def _get_firstChild(self): if self.childNodes: @@ -97,56 +78,26 @@ class Node(xml.dom.Node): if self.childNodes: return self.childNodes[-1] - try: - property - except NameError: - def __getattr__(self, key): - if key[0:2] == "__": - raise AttributeError, key - # getattr should never call getattr! - if self.__dict__.has_key("inGetAttr"): - del self.inGetAttr - raise AttributeError, key - - prefix, attrname = key[:5], key[5:] - if prefix == "_get_": - self.inGetAttr = 1 - if hasattr(self, attrname): - del self.inGetAttr - return (lambda self=self, attrname=attrname: - getattr(self, attrname)) - else: - del self.inGetAttr - raise AttributeError, key - else: - self.inGetAttr = 1 - try: - func = getattr(self, "_get_" + key) - except AttributeError: - raise AttributeError, key - del self.inGetAttr - return func() - else: - firstChild = property(_get_firstChild, - doc="First child node, or None.") - lastChild = property(_get_lastChild, - doc="Last child node, or None.") - def insertBefore(self, newChild, refChild): if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: for c in tuple(newChild.childNodes): self.insertBefore(c, refChild) ### The DOM does not clearly specify what to return in this case return newChild - if newChild.nodeType not in self.childNodeTypes: - raise HierarchyRequestErr, \ - "%s cannot be child of %s" % (repr(newChild), repr(self)) + if newChild.nodeType not in self._child_node_types: + raise xml.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(newChild), repr(self))) if newChild.parentNode is not None: newChild.parentNode.removeChild(newChild) if refChild is None: self.appendChild(newChild) else: - index = self.childNodes.index(refChild) + try: + index = self.childNodes.index(refChild) + except ValueError: + raise xml.dom.NotFoundErr() + if newChild.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) self.childNodes.insert(index, newChild) newChild.nextSibling = refChild refChild.previousSibling = newChild @@ -156,8 +107,7 @@ class Node(xml.dom.Node): newChild.previousSibling = node else: newChild.previousSibling = None - if self._makeParentNodes: - newChild.parentNode = self + newChild.parentNode = self return newChild def appendChild(self, node): @@ -166,21 +116,15 @@ class Node(xml.dom.Node): self.appendChild(c) ### The DOM does not clearly specify what to return in this case return node - if node.nodeType not in self.childNodeTypes: - raise HierarchyRequestErr, \ - "%s cannot be child of %s" % (repr(node), repr(self)) + if node.nodeType not in self._child_node_types: + raise xml.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) + elif node.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) if node.parentNode is not None: node.parentNode.removeChild(node) - if self.childNodes: - last = self.lastChild - node.previousSibling = last - last.nextSibling = node - else: - node.previousSibling = None + _append_child(self, node) node.nextSibling = None - self.childNodes.append(node) - if self._makeParentNodes: - node.parentNode = self return node def replaceChild(self, newChild, oldChild): @@ -188,18 +132,23 @@ class Node(xml.dom.Node): refChild = oldChild.nextSibling self.removeChild(oldChild) return self.insertBefore(newChild, refChild) - if newChild.nodeType not in self.childNodeTypes: - raise HierarchyRequestErr, \ - "%s cannot be child of %s" % (repr(newChild), repr(self)) + if newChild.nodeType not in self._child_node_types: + raise xml.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(newChild), repr(self))) if newChild.parentNode is not None: newChild.parentNode.removeChild(newChild) if newChild is oldChild: return - index = self.childNodes.index(oldChild) + try: + index = self.childNodes.index(oldChild) + except ValueError: + raise xml.dom.NotFoundErr() self.childNodes[index] = newChild - if self._makeParentNodes: - newChild.parentNode = self - oldChild.parentNode = None + newChild.parentNode = self + oldChild.parentNode = None + if (newChild.nodeType in _nodeTypes_with_children + or oldChild.nodeType in _nodeTypes_with_children): + _clear_id_cache(self) newChild.nextSibling = oldChild.nextSibling newChild.previousSibling = oldChild.previousSibling oldChild.nextSibling = None @@ -211,15 +160,19 @@ class Node(xml.dom.Node): return oldChild def removeChild(self, oldChild): - self.childNodes.remove(oldChild) + try: + self.childNodes.remove(oldChild) + except ValueError: + raise xml.dom.NotFoundErr() if oldChild.nextSibling is not None: oldChild.nextSibling.previousSibling = oldChild.previousSibling if oldChild.previousSibling is not None: oldChild.previousSibling.nextSibling = oldChild.nextSibling oldChild.nextSibling = oldChild.previousSibling = None + if oldChild.nodeType in _nodeTypes_with_children: + _clear_id_cache(self) - if self._makeParentNodes: - oldChild.parentNode = None + oldChild.parentNode = None return oldChild def normalize(self): @@ -230,7 +183,7 @@ class Node(xml.dom.Node): if data and L and L[-1].nodeType == child.nodeType: # collapse text node node = L[-1] - node.data = node.nodeValue = node.data + child.data + node.data = node.data + child.data node.nextSibling = child.nextSibling child.unlink() elif data: @@ -255,58 +208,115 @@ class Node(xml.dom.Node): self.childNodes[:] = L def cloneNode(self, deep): - import new - clone = new.instance(self.__class__, self.__dict__.copy()) - if self._makeParentNodes: - clone.parentNode = None - clone.childNodes = NodeList() - if deep: - for child in self.childNodes: - clone.appendChild(child.cloneNode(1)) - return clone + return _clone_node(self, deep, self.ownerDocument or self) + + def isSupported(self, feature, version): + return self.ownerDocument.implementation.hasFeature(feature, version) - # DOM Level 3 (Working Draft 2001-Jan-26) + def _get_localName(self): + # Overridden in Element and Attr where localName can be Non-Null + return None + + # Node interfaces from Level 3 (WD 9 April 2002) def isSameNode(self, other): return self is other + def getInterface(self, feature): + if self.isSupported(feature, None): + return self + else: + return None + + # The "user data" functions use a dictionary that is only present + # if some user data has been set, so be careful not to assume it + # exists. + + def getUserData(self, key): + try: + return self._user_data[key][0] + except (AttributeError, KeyError): + return None + + def setUserData(self, key, data, handler): + old = None + try: + d = self._user_data + except AttributeError: + d = {} + self._user_data = d + if d.has_key(key): + old = d[key][0] + if data is None: + # ignore handlers passed for None + handler = None + if old is not None: + del d[key] + else: + d[key] = (data, handler) + return old + + def _call_user_data_handler(self, operation, src, dst): + if hasattr(self, "_user_data"): + for key, (data, handler) in self._user_data.items(): + if handler is not None: + handler.handle(operation, key, data, src, dst) + # minidom-specific API: def unlink(self): self.parentNode = self.ownerDocument = None - for child in self.childNodes: - child.unlink() - self.childNodes = None + if self.childNodes: + for child in self.childNodes: + child.unlink() + self.childNodes = NodeList() self.previousSibling = None self.nextSibling = None - if Node._debug: - index = repr(id(self)) + repr(self.__class__) - self.debug.write("Deleting: %s\n" % index) - del Node.allnodes[index] + +defproperty(Node, "firstChild", doc="First child node, or None.") +defproperty(Node, "lastChild", doc="Last child node, or None.") +defproperty(Node, "localName", doc="Namespace-local name of this node.") + + +def _append_child(self, node): + # fast path with less checks; usable by DOM builders if careful + childNodes = self.childNodes + if childNodes: + last = childNodes[-1] + node.__dict__["previousSibling"] = last + last.__dict__["nextSibling"] = node + childNodes.append(node) + node.__dict__["parentNode"] = self + +def _in_document(node): + # return True iff node is part of a document tree + while node is not None: + if node.nodeType == Node.DOCUMENT_NODE: + return True + node = node.parentNode + return False def _write_data(writer, data): "Writes datachars to writer." - data = data.replace("&", "&") - data = data.replace("<", "<") - data = data.replace("\"", """) - data = data.replace(">", ">") + data = data.replace("&", "&").replace("<", "<") + data = data.replace("\"", """).replace(">", ">") writer.write(data) -def _getElementsByTagNameHelper(parent, name, rc): +def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: if node.nodeType == Node.ELEMENT_NODE and \ (name == "*" or node.tagName == name): rc.append(node) - _getElementsByTagNameHelper(node, name, rc) + _get_elements_by_tagName_helper(node, name, rc) return rc -def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): +def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): for node in parent.childNodes: if node.nodeType == Node.ELEMENT_NODE: if ((localName == "*" or node.localName == localName) and (nsURI == "*" or node.namespaceURI == nsURI)): rc.append(node) - _getElementsByTagNameNSHelper(node, nsURI, localName, rc) + _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) return rc class DocumentFragment(Node): @@ -315,48 +325,141 @@ class DocumentFragment(Node): nodeValue = None attributes = None parentNode = None - childNodeTypes = (Node.ELEMENT_NODE, - Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, - Node.ENTITY_REFERENCE_NODE, - Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, - Node.NOTATION_NODE) + _child_node_types = (Node.ELEMENT_NODE, + Node.TEXT_NODE, + Node.CDATA_SECTION_NODE, + Node.ENTITY_REFERENCE_NODE, + Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, + Node.NOTATION_NODE) + + def __init__(self): + self.childNodes = NodeList() class Attr(Node): nodeType = Node.ATTRIBUTE_NODE attributes = None ownerElement = None - childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) + specified = False + _is_id = False + + _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) - def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None): + def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, + prefix=None): # skip setattr for performance d = self.__dict__ - d["localName"] = localName or qName d["nodeName"] = d["name"] = qName d["namespaceURI"] = namespaceURI d["prefix"] = prefix - Node.__init__(self) + d['childNodes'] = NodeList() + + # Add the single child node that represents the value of the attr + self.childNodes.append(Text()) + # nodeValue and value are set elsewhere + def _get_localName(self): + return self.nodeName.split(":", 1)[-1] + + def _get_name(self): + return self.name + + def _get_specified(self): + return self.specified + def __setattr__(self, name, value): d = self.__dict__ if name in ("value", "nodeValue"): d["value"] = d["nodeValue"] = value + d2 = self.childNodes[0].__dict__ + d2["data"] = d2["nodeValue"] = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) elif name in ("name", "nodeName"): d["name"] = d["nodeName"] = value + if self.ownerElement is not None: + _clear_id_cache(self.ownerElement) else: d[name] = value - def cloneNode(self, deep): - clone = Node.cloneNode(self, deep) - if clone.__dict__.has_key("ownerElement"): - del clone.ownerElement - return clone + def _set_prefix(self, value): + nsuri = self.namespaceURI + if value == "xmlns": + if self.namespaceURI and self.namespaceURI != XMLNS_NAMESPACE: + raise xml.dom.NamespaceErr( + "illegal use of 'xmlns' prefix for the wrong namespace") + d = self.__dict__ + d['prefix'] = prefix + if prefix is None: + newName = self.localName + else: + newName = "%s:%s" % (value, self.localName) + if self.ownerElement: + _clear_id_cache(self.ownerElement) + d['nodeName'] = d['name'] = newName + + def _set_value(self, value): + d = self.__dict__ + d['value'] = d['nodeValue'] = value + if self.ownerElement: + _clear_id_cache(self.ownerElement) + self.childNodes[0].data = value + + def unlink(self): + # This implementation does not call the base implementation + # since most of that is not needed, and the expense of the + # method call is not warranted. We duplicate the removal of + # children, but that's all we needed from the base class. + elem = self.ownerElement + if elem is not None: + del elem._attrs[self.nodeName] + del elem._attrsNS[(self.namespaceURI, self.localName)] + if self._is_id: + self._is_id = False + elem._magic_id_nodes -= 1 + self.ownerDocument._magic_id_count -= 1 + for child in self.childNodes: + child.unlink() + del self.childNodes[:] + + def _get_isId(self): + if self._is_id: + return True + doc = self.ownerDocument + elem = self.ownerElement + if doc is None or elem is None: + return False + + info = doc._get_elem_info(elem) + if info is None: + return False + if self.namespaceURI: + return info.isIdNS(self.namespaceURI, self.localName) + else: + return info.isId(self.nodeName) + + def _get_schemaType(self): + doc = self.ownerDocument + elem = self.ownerElement + if doc is None or elem is None: + return _no_type + + info = doc._get_elem_info(elem) + if info is None: + return _no_type + if self.namespaceURI: + return info.getAttributeTypeNS(self.namespaceURI, self.localName) + else: + return info.getAttributeType(self.nodeName) + +defproperty(Attr, "isId", doc="True if this attribute is an ID.") +defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") +defproperty(Attr, "schemaType", doc="Schema type for this attribute.") -class NamedNodeMap: +class NamedNodeMap(NewStyle, GetattrMagic): """The attribute list is a transient interface to the underlying dictionaries. Mutations here will change the underlying element's dictionary. @@ -365,21 +468,15 @@ class NamedNodeMap: attributes as found in an input document. """ + __slots__ = ('_attrs', '_attrsNS', '_ownerElement') + def __init__(self, attrs, attrsNS, ownerElement): self._attrs = attrs self._attrsNS = attrsNS self._ownerElement = ownerElement - try: - property - except NameError: - def __getattr__(self, name): - if name == "length": - return len(self._attrs) - raise AttributeError, name - else: - length = property(lambda self: len(self._attrs), - doc="Number of nodes in the NamedNodeMap.") + def _get_length(self): + return len(self._attrs) def item(self, index): try: @@ -399,6 +496,12 @@ class NamedNodeMap: L.append(((node.namespaceURI, node.localName), node.value)) return L + def has_key(self, key): + if isinstance(key, StringTypes): + return self._attrs.has_key(key) + else: + return self._attrsNS.has_key(key) + def keys(self): return self._attrs.keys() @@ -408,11 +511,10 @@ class NamedNodeMap: def values(self): return self._attrs.values() - def get(self, name, value = None): + def get(self, name, value=None): return self._attrs.get(name, value) - def __len__(self): - return self.length + __len__ = _get_length def __cmp__(self, other): if self._attrs is getattr(other, "_attrs", None): @@ -420,35 +522,74 @@ class NamedNodeMap: else: return cmp(id(self), id(other)) - #FIXME: is it appropriate to return .value? def __getitem__(self, attname_or_tuple): - if type(attname_or_tuple) is _TupleType: + if isinstance(attname_or_tuple, _TupleType): return self._attrsNS[attname_or_tuple] else: return self._attrs[attname_or_tuple] # same as set def __setitem__(self, attname, value): - if type(value) in _StringTypes: - node = Attr(attname) + if isinstance(value, StringTypes): + try: + node = self._attrs[attname] + except KeyError: + node = Attr(attname) + node.ownerDocument = self._ownerElement.ownerDocument node.value = value - node.ownerDocument = self._ownerElement.ownerDocument else: if not isinstance(value, Attr): raise TypeError, "value must be a string or Attr object" node = value - self.setNamedItem(node) + self.setNamedItem(node) + + def getNamedItem(self, name): + try: + return self._attrs[name] + except KeyError: + return None + + def getNamedItemNS(self, namespaceURI, localName): + try: + return self._attrsNS[(namespaceURI, localName)] + except KeyError: + return None + + def removeNamedItem(self, name): + n = self.getNamedItem(name) + if n is not None: + _clear_id_cache(self._ownerElement) + del self._attrs[n.nodeName] + del self._attrsNS[(n.namespaceURI, n.localName)] + if n.__dict__.has_key('ownerElement'): + n.__dict__['ownerElement'] = None + return n + else: + raise xml.dom.NotFoundErr() + + def removeNamedItemNS(self, namespaceURI, localName): + n = self.getNamedItemNS(namespaceURI, localName) + if n is not None: + _clear_id_cache(self._ownerElement) + del self._attrsNS[(n.namespaceURI, n.localName)] + del self._attrs[n.nodeName] + if n.__dict__.has_key('ownerElement'): + n.__dict__['ownerElement'] = None + return n + else: + raise xml.dom.NotFoundErr() def setNamedItem(self, node): if not isinstance(node, Attr): - raise HierarchyRequestErr, \ - "%s cannot be child of %s" % (repr(node), repr(self)) + raise xml.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) old = self._attrs.get(node.name) if old: old.unlink() self._attrs[node.name] = node self._attrsNS[(node.namespaceURI, node.localName)] = node node.ownerElement = self._ownerElement + _clear_id_cache(node.ownerElement) return old def setNamedItemNS(self, node): @@ -456,29 +597,62 @@ class NamedNodeMap: def __delitem__(self, attname_or_tuple): node = self[attname_or_tuple] + _clear_id_cache(node.ownerElement) node.unlink() - del self._attrs[node.name] - del self._attrsNS[(node.namespaceURI, node.localName)] + + def __getstate__(self): + return self._attrs, self._attrsNS, self._ownerElement + + def __setstate__(self, state): + self._attrs, self._attrsNS, self._ownerElement = state + +defproperty(NamedNodeMap, "length", + doc="Number of nodes in the NamedNodeMap.") AttributeList = NamedNodeMap +class TypeInfo(NewStyle): + __slots__ = 'namespace', 'name' + + def __init__(self, namespace, name): + self.namespace = namespace + self.name = name + + def __repr__(self): + if self.namespace: + return "<TypeInfo %s (from %s)>" % (`self.name`, `self.namespace`) + else: + return "<TypeInfo %s>" % `self.name` + + def _get_name(self): + return self.name + + def _get_namespace(self): + return self.namespace + +_no_type = TypeInfo(None, None) + class Element(Node): nodeType = Node.ELEMENT_NODE - nextSibling = None - previousSibling = None - childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, Node.TEXT_NODE, - Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE) + nodeValue = None + schemaType = _no_type + + _magic_id_nodes = 0 + + _child_node_types = (Node.ELEMENT_NODE, + Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, + Node.TEXT_NODE, + Node.CDATA_SECTION_NODE, + Node.ENTITY_REFERENCE_NODE) def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, localName=None): - Node.__init__(self) self.tagName = self.nodeName = tagName - self.localName = localName or tagName self.prefix = prefix self.namespaceURI = namespaceURI - self.nodeValue = None + self.childNodes = NodeList() self._attrs = {} # attributes are double-indexed: self._attrsNS = {} # tagName -> Attribute @@ -488,16 +662,11 @@ class Element(Node): # for now because of headaches with # namespaces. - def cloneNode(self, deep): - clone = Node.cloneNode(self, deep) - clone._attrs = {} - clone._attrsNS = {} - for attr in self._attrs.values(): - node = attr.cloneNode(1) - clone._attrs[node.name] = node - clone._attrsNS[(node.namespaceURI, node.localName)] = node - node.ownerElement = clone - return clone + def _get_localName(self): + return self.tagName.split(":", 1)[-1] + + def _get_tagName(self): + return self.tagName def unlink(self): for attr in self._attrs.values(): @@ -519,21 +688,41 @@ class Element(Node): return "" def setAttribute(self, attname, value): - attr = Attr(attname) - # for performance - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) + attr = self.getAttributeNode(attname) + if attr is None: + attr = Attr(attname) + # for performance + d = attr.__dict__ + d["value"] = d["nodeValue"] = value + d["ownerDocument"] = self.ownerDocument + self.setAttributeNode(attr) + elif value != attr.value: + d = attr.__dict__ + d["value"] = d["nodeValue"] = value + if attr.isId: + _clear_id_cache(self) def setAttributeNS(self, namespaceURI, qualifiedName, value): prefix, localname = _nssplit(qualifiedName) - # for performance - attr = Attr(qualifiedName, namespaceURI, localname, prefix) - d = attr.__dict__ - d["value"] = d["nodeValue"] = value - d["ownerDocument"] = self.ownerDocument - self.setAttributeNode(attr) + attr = self.getAttributeNodeNS(namespaceURI, localname) + if attr is None: + # for performance + attr = Attr(qualifiedName, namespaceURI, localname, prefix) + d = attr.__dict__ + d["prefix"] = prefix + d["nodeName"] = qualifiedName + d["value"] = d["nodeValue"] = value + d["ownerDocument"] = self.ownerDocument + self.setAttributeNode(attr) + else: + d = attr.__dict__ + if value != attr.value: + d["value"] = d["nodeValue"] = value + if attr.isId: + _clear_id_cache(self) + if attr.prefix != prefix: + d["prefix"] = prefix + d["nodeName"] = qualifiedName def getAttributeNode(self, attrname): return self._attrs.get(attrname) @@ -544,36 +733,49 @@ class Element(Node): def setAttributeNode(self, attr): if attr.ownerElement not in (None, self): raise xml.dom.InuseAttributeErr("attribute node already owned") - old = self._attrs.get(attr.name, None) - if old: - old.unlink() - self._attrs[attr.name] = attr - self._attrsNS[(attr.namespaceURI, attr.localName)] = attr - - # This creates a circular reference, but Element.unlink() - # breaks the cycle since the references to the attribute - # dictionaries are tossed. - attr.ownerElement = self - - if old is not attr: + old1 = self._attrs.get(attr.name, None) + if old1 is not None: + self.removeAttributeNode(old1) + old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) + if old2 is not None and old2 is not old1: + self.removeAttributeNode(old2) + _set_attribute_node(self, attr) + + if old1 is not attr: # It might have already been part of this node, in which case # it doesn't represent a change, and should not be returned. - return old + return old1 + if old2 is not attr: + return old2 setAttributeNodeNS = setAttributeNode def removeAttribute(self, name): - attr = self._attrs[name] + try: + attr = self._attrs[name] + except KeyError: + raise xml.dom.NotFoundErr() self.removeAttributeNode(attr) def removeAttributeNS(self, namespaceURI, localName): - attr = self._attrsNS[(namespaceURI, localName)] + try: + attr = self._attrsNS[(namespaceURI, localName)] + except KeyError: + raise xml.dom.NotFoundErr() self.removeAttributeNode(attr) def removeAttributeNode(self, node): + if node is None: + raise xml.dom.NotFoundErr() + try: + self._attrs[node.name] + except KeyError: + raise xml.dom.NotFoundErr() + _clear_id_cache(self) node.unlink() - del self._attrs[node.name] - del self._attrsNS[(node.namespaceURI, node.localName)] + # Restore this since the node is still useful and otherwise + # unlinked + node.ownerDocument = self.ownerDocument removeAttributeNodeNS = removeAttributeNode @@ -584,14 +786,14 @@ class Element(Node): return self._attrsNS.has_key((namespaceURI, localName)) def getElementsByTagName(self, name): - return _getElementsByTagNameHelper(self, name, NodeList()) + return _get_elements_by_tagName_helper(self, name, NodeList()) def getElementsByTagNameNS(self, namespaceURI, localName): - return _getElementsByTagNameNSHelper(self, namespaceURI, localName, - NodeList()) + return _get_elements_by_tagName_ns_helper( + self, namespaceURI, localName, NodeList()) def __repr__(self): - return "<DOM Element: %s at %s>" % (self.tagName, id(self)) + return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) def writexml(self, writer, indent="", addindent="", newl=""): # indent = current indentation @@ -618,61 +820,145 @@ class Element(Node): def _get_attributes(self): return NamedNodeMap(self._attrs, self._attrsNS, self) - try: - property - except NameError: - pass - else: - attributes = property(_get_attributes, - doc="NamedNodeMap of attributes on the element.") - def hasAttributes(self): - if self._attrs or self._attrsNS: - return 1 + if self._attrs: + return True else: - return 0 + return False + + # DOM Level 3 attributes, based on the 22 Oct 2002 draft + + def setIdAttribute(self, name): + idAttr = self.getAttributeNode(name) + self.setIdAttributeNode(idAttr) + + def setIdAttributeNS(self, namespaceURI, localName): + idAttr = self.getAttributeNodeNS(namespaceURI, localName) + self.setIdAttributeNode(idAttr) + + def setIdAttributeNode(self, idAttr): + if idAttr is None or not self.isSameNode(idAttr.ownerElement): + raise xml.dom.NotFoundErr() + if _get_containing_entref(self) is not None: + raise xml.dom.NoModificationAllowedErr() + if not idAttr._is_id: + idAttr.__dict__['_is_id'] = True + self._magic_id_nodes += 1 + self.ownerDocument._magic_id_count += 1 + _clear_id_cache(self) + +defproperty(Element, "attributes", + doc="NamedNodeMap of attributes on the element.") +defproperty(Element, "localName", + doc="Namespace-local name of this element.") + + +def _set_attribute_node(element, attr): + _clear_id_cache(element) + element._attrs[attr.name] = attr + element._attrsNS[(attr.namespaceURI, attr.localName)] = attr + + # This creates a circular reference, but Element.unlink() + # breaks the cycle since the references to the attribute + # dictionaries are tossed. + attr.__dict__['ownerElement'] = element + + +class Childless: + """Mixin that makes childless-ness easy to implement and avoids + the complexity of the Node methods that deal with children. + """ -class Comment(Node): - nodeType = Node.COMMENT_NODE - nodeName = "#comment" attributes = None - childNodeTypes = () + childNodes = EmptyNodeList() + firstChild = None + lastChild = None - def __init__(self, data): - Node.__init__(self) - self.data = self.nodeValue = data + def _get_firstChild(self): + return None - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write("%s<!--%s-->%s" % (indent,self.data,newl)) + def _get_lastChild(self): + return None + + def appendChild(self, node): + raise xml.dom.HierarchyRequestErr( + self.nodeName + " nodes cannot have children") -class ProcessingInstruction(Node): + def hasChildNodes(self): + return False + + def insertBefore(self, newChild, refChild): + raise xml.dom.HierarchyRequestErr( + self.nodeName + " nodes do not have children") + + def removeChild(self, oldChild): + raise xml.dom.NotFoundErr( + self.nodeName + " nodes do not have children") + + def replaceChild(self, newChild, oldChild): + raise xml.dom.HierarchyRequestErr( + self.nodeName + " nodes do not have children") + + +class ProcessingInstruction(Childless, Node): nodeType = Node.PROCESSING_INSTRUCTION_NODE - attributes = None - childNodeTypes = () def __init__(self, target, data): - Node.__init__(self) self.target = self.nodeName = target self.data = self.nodeValue = data + def _get_data(self): + return self.data + def _set_data(self, value): + d = self.__dict__ + d['data'] = d['nodeValue'] = value + + def _get_target(self): + return self.target + def _set_target(self, value): + d = self.__dict__ + d['target'] = d['nodeName'] = value + + def __setattr__(self, name, value): + if name == "data" or name == "nodeValue": + self.__dict__['data'] = self.__dict__['nodeValue'] = value + elif name == "target" or name == "nodeName": + self.__dict__['target'] = self.__dict__['nodeName'] = value + else: + self.__dict__[name] = value + def writexml(self, writer, indent="", addindent="", newl=""): writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) -class CharacterData(Node): - def __init__(self, data): - if type(data) not in _StringTypes: - raise TypeError, "node contents must be a string" - Node.__init__(self) - self.data = self.nodeValue = data - self.length = len(data) + +class CharacterData(Childless, Node): + def _get_length(self): + return len(self.data) + __len__ = _get_length + + def _get_data(self): + return self.__dict__['data'] + def _set_data(self, data): + d = self.__dict__ + d['data'] = d['nodeValue'] = data + + _get_nodeValue = _get_data + _set_nodeValue = _set_data + + def __setattr__(self, name, value): + if name == "data" or name == "nodeValue": + self.__dict__['data'] = self.__dict__['nodeValue'] = value + else: + self.__dict__[name] = value def __repr__(self): - if len(self.data) > 10: + data = self.data + if len(data) > 10: dotdotdot = "..." else: dotdotdot = "" return "<DOM %s node \"%s%s\">" % ( - self.__class__.__name__, self.data[0:10], dotdotdot) + self.__class__.__name__, data[0:10], dotdotdot) def substringData(self, offset, count): if offset < 0: @@ -685,8 +971,6 @@ class CharacterData(Node): def appendData(self, arg): self.data = self.data + arg - self.nodeValue = self.data - self.length = len(self.data) def insertData(self, offset, arg): if offset < 0: @@ -696,8 +980,6 @@ class CharacterData(Node): if arg: self.data = "%s%s%s" % ( self.data[:offset], arg, self.data[offset:]) - self.nodeValue = self.data - self.length = len(self.data) def deleteData(self, offset, count): if offset < 0: @@ -708,8 +990,6 @@ class CharacterData(Node): raise xml.dom.IndexSizeErr("count cannot be negative") if count: self.data = self.data[:offset] + self.data[offset+count:] - self.nodeValue = self.data - self.length = len(self.data) def replaceData(self, offset, count, arg): if offset < 0: @@ -721,19 +1001,25 @@ class CharacterData(Node): if count: self.data = "%s%s%s" % ( self.data[:offset], arg, self.data[offset+count:]) - self.nodeValue = self.data - self.length = len(self.data) + +defproperty(CharacterData, "length", doc="Length of the string data.") + class Text(CharacterData): + # Make sure we don't add an instance __dict__ if we don't already + # have one, at least when that's possible: + __slots__ = () + nodeType = Node.TEXT_NODE nodeName = "#text" attributes = None - childNodeTypes = () def splitText(self, offset): if offset < 0 or offset > len(self.data): raise xml.dom.IndexSizeErr("illegal offset value") - newText = Text(self.data[offset:]) + newText = self.__class__() + newText.data = self.data[offset:] + newText.ownerDocument = self.ownerDocument next = self.nextSibling if self.parentNode and self in self.parentNode.childNodes: if next is None: @@ -741,80 +1027,371 @@ class Text(CharacterData): else: self.parentNode.insertBefore(newText, next) self.data = self.data[:offset] - self.nodeValue = self.data - self.length = len(self.data) return newText def writexml(self, writer, indent="", addindent="", newl=""): _write_data(writer, "%s%s%s"%(indent, self.data, newl)) + # DOM Level 3 (WD 9 April 2002) + + def _get_wholeText(self): + L = [self.data] + n = self.previousSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + L.insert(0, n.data) + n = n.previousSibling + else: + break + n = self.nextSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + L.append(n.data) + n = n.nextSibling + else: + break + return ''.join(L) + + def replaceWholeText(self, content): + # XXX This needs to be seriously changed if minidom ever + # supports EntityReference nodes. + parent = self.parentNode + n = self.previousSibling + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + next = n.previousSibling + parent.removeChild(n) + n = next + else: + break + n = self.nextSibling + if not content: + parent.removeChild(self) + while n is not None: + if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + next = n.nextSibling + parent.removeChild(n) + n = next + else: + break + if content: + d = self.__dict__ + d['data'] = content + d['nodeValue'] = content + return self + else: + return None + + def _get_isWhitespaceInElementContent(self): + if self.data.strip(): + return False + elem = _get_containing_element(self) + if elem is None: + return False + info = self.ownerDocument._get_elem_info(elem) + if info is None: + return False + else: + return info.isElementContent() + +defproperty(Text, "isWhitespaceInElementContent", + doc="True iff this text node contains only whitespace" + " and is in element content.") +defproperty(Text, "wholeText", + doc="The text of all logically-adjacent text nodes.") + + +def _get_containing_element(node): + c = node.parentNode + while c is not None: + if c.nodeType == Node.ELEMENT_NODE: + return c + c = c.parentNode + return None + +def _get_containing_entref(node): + c = node.parentNode + while c is not None: + if c.nodeType == Node.ENTITY_REFERENCE_NODE: + return c + c = c.parentNode + return None + + +class Comment(Childless, CharacterData): + nodeType = Node.COMMENT_NODE + nodeName = "#comment" + + def __init__(self, data): + self.data = self.nodeValue = data + + def writexml(self, writer, indent="", addindent="", newl=""): + writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) + class CDATASection(Text): + # Make sure we don't add an instance __dict__ if we don't already + # have one, at least when that's possible: + __slots__ = () + nodeType = Node.CDATA_SECTION_NODE nodeName = "#cdata-section" def writexml(self, writer, indent="", addindent="", newl=""): + if self.data.find("]]>") >= 0: + raise ValueError("']]>' not allowed in a CDATA section") writer.write("<![CDATA[%s]]>" % self.data) -def _nssplit(qualifiedName): - fields = qualifiedName.split(':', 1) - if len(fields) == 2: - return fields - elif len(fields) == 1: - return (None, fields[0]) +class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic): + __slots__ = '_seq', + + def __init__(self, seq=()): + # seq should be a list or tuple + self._seq = seq + + def __len__(self): + return len(self._seq) + + def _get_length(self): + return len(self._seq) + + def getNamedItem(self, name): + for n in self._seq: + if n.nodeName == name: + return n + + def getNamedItemNS(self, namespaceURI, localName): + for n in self._seq: + if n.namespaceURI == namespaceURI and n.localName == localName: + return n + + def __getitem__(self, name_or_tuple): + if isinstance(name_or_tuple, _TupleType): + node = self.getNamedItemNS(*name_or_tuple) + else: + node = self.getNamedItem(name_or_tuple) + if node is None: + raise KeyError, name_or_tuple + return node + + def item(self, index): + if index < 0: + return None + try: + return self._seq[index] + except IndexError: + return None + + def removeNamedItem(self, name): + raise xml.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def removeNamedItemNS(self, namespaceURI, localName): + raise xml.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def setNamedItem(self, node): + raise xml.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def setNamedItemNS(self, node): + raise xml.dom.NoModificationAllowedErr( + "NamedNodeMap instance is read-only") + + def __getstate__(self): + return [self._seq] + + def __setstate__(self, state): + self._seq = state[0] + +defproperty(ReadOnlySequentialNamedNodeMap, "length", + doc="Number of entries in the NamedNodeMap.") + + +class Identified: + """Mix-in class that supports the publicId and systemId attributes.""" + + __slots__ = 'publicId', 'systemId' + + def _identified_mixin_init(self, publicId, systemId): + self.publicId = publicId + self.systemId = systemId + + def _get_publicId(self): + return self.publicId + def _get_systemId(self): + return self.systemId -class DocumentType(Node): +class DocumentType(Identified, Childless, Node): nodeType = Node.DOCUMENT_TYPE_NODE nodeValue = None - attributes = None name = None publicId = None systemId = None internalSubset = None - entities = None - notations = None def __init__(self, qualifiedName): - Node.__init__(self) + self.entities = ReadOnlySequentialNamedNodeMap() + self.notations = ReadOnlySequentialNamedNodeMap() if qualifiedName: prefix, localname = _nssplit(qualifiedName) self.name = localname + self.nodeName = self.name + + def _get_internalSubset(self): + return self.internalSubset + + def cloneNode(self, deep): + if self.ownerDocument is None: + # it's ok + clone = DocumentType(None) + clone.name = self.name + clone.nodeName = self.name + operation = xml.dom.UserDataHandler.NODE_CLONED + if deep: + clone.entities._seq = [] + clone.notations._seq = [] + for n in self.notations._seq: + notation = Notation(n.nodeName, n.publicId, n.systemId) + clone.notations._seq.append(notation) + n._call_user_data_handler(operation, n, notation) + for e in self.entities._seq: + entity = Entity(e.nodeName, e.publicId, e.systemId, + e.notationName) + entity.actualEncoding = e.actualEncoding + entity.encoding = e.encoding + entity.version = e.version + clone.entities._seq.append(entity) + e._call_user_data_handler(operation, n, entity) + self._call_user_data_handler(operation, self, clone) + return clone + else: + return None + + def writexml(self, writer, indent="", addindent="", newl=""): + writer.write("<!DOCTYPE ") + writer.write(self.name) + if self.publicId: + writer.write("\n PUBLIC '%s'\n '%s'" + % (self.publicId, self.systemId)) + elif self.systemId: + writer.write("\n SYSTEM '%s'" % self.systemId) + if self.internalSubset is not None: + writer.write(" [") + writer.write(self.internalSubset) + writer.write("]") + writer.write(">\n") + +class Entity(Identified, Node): + attributes = None + nodeType = Node.ENTITY_NODE + nodeValue = None + + actualEncoding = None + encoding = None + version = None + def __init__(self, name, publicId, systemId, notation): + self.nodeName = name + self.notationName = notation + self.childNodes = NodeList() + self._identified_mixin_init(publicId, systemId) + + def _get_actualEncoding(self): + return self.actualEncoding + + def _get_encoding(self): + return self.encoding + + def _get_version(self): + return self.version + + def appendChild(self, newChild): + raise xml.dom.HierarchyRequestErr( + "cannot append children to an entity node") + + def insertBefore(self, newChild, refChild): + raise xml.dom.HierarchyRequestErr( + "cannot insert children below an entity node") + + def removeChild(self, oldChild): + raise xml.dom.HierarchyRequestErr( + "cannot remove children from an entity node") + + def replaceChild(self, newChild, oldChild): + raise xml.dom.HierarchyRequestErr( + "cannot replace children of an entity node") + +class Notation(Identified, Childless, Node): + nodeType = Node.NOTATION_NODE + nodeValue = None + + def __init__(self, name, publicId, systemId): + self.nodeName = name + self._identified_mixin_init(publicId, systemId) + + +class DOMImplementation(DOMImplementationLS): + _features = [("core", "1.0"), + ("core", "2.0"), + ("core", "3.0"), + ("core", None), + ("xml", "1.0"), + ("xml", "2.0"), + ("xml", "3.0"), + ("xml", None), + ("ls-load", "3.0"), + ("ls-load", None), + ] -class DOMImplementation: def hasFeature(self, feature, version): - if version not in ("1.0", "2.0"): - return 0 - feature = feature.lower() - return feature == "core" + if version == "": + version = None + return (feature.lower(), version) in self._features def createDocument(self, namespaceURI, qualifiedName, doctype): if doctype and doctype.parentNode is not None: raise xml.dom.WrongDocumentErr( "doctype object owned by another DOM tree") - doc = self._createDocument() - if doctype is None: - doctype = self.createDocumentType(qualifiedName, None, None) - if not qualifiedName: + doc = self._create_document() + + add_root_element = not (namespaceURI is None + and qualifiedName is None + and doctype is None) + + if not qualifiedName and add_root_element: # The spec is unclear what to raise here; SyntaxErr # would be the other obvious candidate. Since Xerces raises # InvalidCharacterErr, and since SyntaxErr is not listed # for createDocument, that seems to be the better choice. # XXX: need to check for illegal characters here and in # createElement. + + # DOM Level III clears this up when talking about the return value + # of this function. If namespaceURI, qName and DocType are + # Null the document is returned without a document element + # Otherwise if doctype or namespaceURI are not None + # Then we go back to the above problem raise xml.dom.InvalidCharacterErr("Element with no name") - prefix, localname = _nssplit(qualifiedName) - if prefix == "xml" \ - and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") - if prefix and not namespaceURI: - raise xml.dom.NamespaceErr( - "illegal use of prefix without namespaces") - element = doc.createElementNS(namespaceURI, qualifiedName) - doc.appendChild(element) - doctype.parentNode = doctype.ownerDocument = doc + + if add_root_element: + prefix, localname = _nssplit(qualifiedName) + if prefix == "xml" \ + and namespaceURI != "http://www.w3.org/XML/1998/namespace": + raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") + if prefix and not namespaceURI: + raise xml.dom.NamespaceErr( + "illegal use of prefix without namespaces") + element = doc.createElementNS(namespaceURI, qualifiedName) + if doctype: + doc.appendChild(doctype) + doc.appendChild(element) + + if doctype: + doctype.parentNode = doctype.ownerDocument = doc + doc.doctype = doctype doc.implementation = self return doc @@ -825,11 +1402,72 @@ class DOMImplementation: doctype.systemId = systemId return doctype + # DOM Level 3 (WD 9 April 2002) + + def getInterface(self, feature): + if self.hasFeature(feature, None): + return self + else: + return None + # internal - def _createDocument(self): + def _create_document(self): return Document() -class Document(Node): +class ElementInfo(NewStyle): + """Object that represents content-model information for an element. + + This implementation is not expected to be used in practice; DOM + builders should provide implementations which do the right thing + using information available to it. + + """ + + __slots__ = 'tagName', + + def __init__(self, name): + self.tagName = name + + def getAttributeType(self, aname): + return _no_type + + def getAttributeTypeNS(self, namespaceURI, localName): + return _no_type + + def isElementContent(self): + return False + + def isEmpty(self): + """Returns true iff this element is declared to have an EMPTY + content model.""" + return False + + def isId(self, aname): + """Returns true iff the named attribte is a DTD-style ID.""" + return False + + def isIdNS(self, namespaceURI, localName): + """Returns true iff the identified attribute is a DTD-style ID.""" + return False + + def __getstate__(self): + return self.tagName + + def __setstate__(self, state): + self.tagName = state + +def _clear_id_cache(node): + if node.nodeType == Node.DOCUMENT_NODE: + node._id_cache.clear() + node._id_search_stack = None + elif _in_document(node): + node.ownerDocument._id_cache.clear() + node.ownerDocument._id_search_stack= None + +class Document(Node, DocumentLS): + _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, + Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) + nodeType = Node.DOCUMENT_NODE nodeName = "#document" nodeValue = None @@ -839,14 +1477,66 @@ class Document(Node): previousSibling = nextSibling = None implementation = DOMImplementation() - childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, - Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) + + # Document attributes from Level 3 (WD 9 April 2002) + + actualEncoding = None + encoding = None + standalone = None + version = None + strictErrorChecking = False + errorHandler = None + documentURI = None + + _magic_id_count = 0 + + def __init__(self): + self.childNodes = NodeList() + # mapping of (namespaceURI, localName) -> ElementInfo + # and tagName -> ElementInfo + self._elem_info = {} + self._id_cache = {} + self._id_search_stack = None + + def _get_elem_info(self, element): + if element.namespaceURI: + key = element.namespaceURI, element.localName + else: + key = element.tagName + return self._elem_info.get(key) + + def _get_actualEncoding(self): + return self.actualEncoding + + def _get_doctype(self): + return self.doctype + + def _get_documentURI(self): + return self.documentURI + + def _get_encoding(self): + return self.encoding + + def _get_errorHandler(self): + return self.errorHandler + + def _get_standalone(self): + return self.standalone + + def _get_strictErrorChecking(self): + return self.strictErrorChecking + + def _get_version(self): + return self.version def appendChild(self, node): - if node.nodeType not in self.childNodeTypes: - raise HierarchyRequestErr, \ - "%s cannot be child of %s" % (repr(node), repr(self)) + if node.nodeType not in self._child_node_types: + raise xml.dom.HierarchyRequestErr( + "%s cannot be child of %s" % (repr(node), repr(self))) if node.parentNode is not None: + # This needs to be done before the next test since this + # may *be* the document element, in which case it should + # end up re-ordered to the end. node.parentNode.removeChild(node) if node.nodeType == Node.ELEMENT_NODE \ @@ -856,7 +1546,10 @@ class Document(Node): return Node.appendChild(self, node) def removeChild(self, oldChild): - self.childNodes.remove(oldChild) + try: + self.childNodes.remove(oldChild) + except ValueError: + raise xml.dom.NotFoundErr() oldChild.nextSibling = oldChild.previousSibling = None oldChild.parentNode = None if self.documentElement is oldChild: @@ -869,23 +1562,36 @@ class Document(Node): if node.nodeType == Node.ELEMENT_NODE: return node - try: - property - except NameError: - pass - else: - documentElement = property(_get_documentElement, - doc="Top-level element of this document.") - def unlink(self): if self.doctype is not None: self.doctype.unlink() self.doctype = None Node.unlink(self) + def cloneNode(self, deep): + if not deep: + return None + clone = self.implementation.createDocument(None, None, None) + clone.encoding = self.encoding + clone.standalone = self.standalone + clone.version = self.version + for n in self.childNodes: + childclone = _clone_node(n, deep, clone) + assert childclone.ownerDocument.isSameNode(clone) + clone.childNodes.append(childclone) + if childclone.nodeType == Node.DOCUMENT_NODE: + assert clone.documentElement is None + elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: + assert clone.doctype is None + clone.doctype = childclone + childclone.parentNode = clone + self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, + self, clone) + return clone + def createDocumentFragment(self): d = DocumentFragment() - d.ownerDoc = self + d.ownerDocument = self return d def createElement(self, tagName): @@ -894,12 +1600,18 @@ class Document(Node): return e def createTextNode(self, data): - t = Text(data) + if not isinstance(data, StringTypes): + raise TypeError, "node contents must be a string" + t = Text() + t.data = data t.ownerDocument = self return t def createCDATASection(self, data): - c = CDATASection(data) + if not isinstance(data, StringTypes): + raise TypeError, "node contents must be a string" + c = CDATASection() + c.data = data c.ownerDocument = self return c @@ -921,7 +1633,7 @@ class Document(Node): def createElementNS(self, namespaceURI, qualifiedName): prefix, localName = _nssplit(qualifiedName) - e = Element(qualifiedName, namespaceURI, prefix, localName) + e = Element(qualifiedName, namespaceURI, prefix) e.ownerDocument = self return e @@ -932,12 +1644,93 @@ class Document(Node): a.value = "" return a + # A couple of implementation-specific helpers to create node types + # not supported by the W3C DOM specs: + + def _create_entity(self, name, publicId, systemId, notationName): + e = Entity(name, publicId, systemId, notationName) + e.ownerDocument = self + return e + + def _create_notation(self, name, publicId, systemId): + n = Notation(name, publicId, systemId) + n.ownerDocument = self + return n + + def getElementById(self, id): + if self._id_cache.has_key(id): + return self._id_cache[id] + if not (self._elem_info or self._magic_id_count): + return None + + stack = self._id_search_stack + if stack is None: + # we never searched before, or the cache has been cleared + stack = [self.documentElement] + self._id_search_stack = stack + elif not stack: + # Previous search was completed and cache is still valid; + # no matching node. + return None + + result = None + while stack: + node = stack.pop() + # add child elements to stack for continued searching + stack.extend([child for child in node.childNodes + if child.nodeType in _nodeTypes_with_children]) + # check this node + info = self._get_elem_info(node) + if info: + # We have to process all ID attributes before + # returning in order to get all the attributes set to + # be IDs using Element.setIdAttribute*(). + for attr in node.attributes.values(): + if attr.namespaceURI: + if info.isIdNS(attr.namespaceURI, attr.localName): + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif not node._magic_id_nodes: + break + elif info.isId(attr.name): + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif not node._magic_id_nodes: + break + elif attr._is_id: + self._id_cache[attr.value] = node + if attr.value == id: + result = node + elif node._magic_id_nodes == 1: + break + elif node._magic_id_nodes: + for attr in node.attributes.values(): + if attr._is_id: + self._id_cache[attr.value] = node + if attr.value == id: + result = node + if result is not None: + break + return result + def getElementsByTagName(self, name): - return _getElementsByTagNameHelper(self, name, NodeList()) + return _get_elements_by_tagName_helper(self, name, NodeList()) def getElementsByTagNameNS(self, namespaceURI, localName): - return _getElementsByTagNameNSHelper(self, namespaceURI, localName, - NodeList()) + return _get_elements_by_tagName_ns_helper( + self, namespaceURI, localName, NodeList()) + + def isSupported(self, feature, version): + return self.implementation.hasFeature(feature, version) + + def importNode(self, node, deep): + if node.nodeType == Node.DOCUMENT_NODE: + raise xml.dom.NotSupportedErr("cannot import document nodes") + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + raise xml.dom.NotSupportedErr("cannot import document type nodes") + return _clone_node(node, deep, self) def writexml(self, writer, indent="", addindent="", newl="", encoding = None): @@ -948,27 +1741,194 @@ class Document(Node): for node in self.childNodes: node.writexml(writer, indent, addindent, newl) + # DOM Level 3 (WD 9 April 2002) + + def renameNode(self, n, namespaceURI, name): + if n.ownerDocument is not self: + raise xml.dom.WrongDocumentErr( + "cannot rename nodes from other documents;\n" + "expected %s,\nfound %s" % (self, n.ownerDocument)) + if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): + raise xml.dom.NotSupportedErr( + "renameNode() only applies to element and attribute nodes") + if namespaceURI != EMPTY_NAMESPACE: + if ':' in name: + prefix, localName = name.split(':', 1) + if ( prefix == "xmlns" + and namespaceURI != xml.dom.XMLNS_NAMESPACE): + raise xml.dom.NamespaceErr( + "illegal use of 'xmlns' prefix") + else: + if ( name == "xmlns" + and namespaceURI != xml.dom.XMLNS_NAMESPACE + and n.nodeType == Node.ATTRIBUTE_NODE): + raise xml.dom.NamespaceErr( + "illegal use of the 'xmlns' attribute") + prefix = None + localName = name + else: + prefix = None + localName = None + if n.nodeType == Node.ATTRIBUTE_NODE: + element = n.ownerElement + if element is not None: + is_id = n._is_id + element.removeAttributeNode(n) + else: + element = None + # avoid __setattr__ + d = n.__dict__ + d['prefix'] = prefix + d['localName'] = localName + d['namespaceURI'] = namespaceURI + d['nodeName'] = name + if n.nodeType == Node.ELEMENT_NODE: + d['tagName'] = name + else: + # attribute node + d['name'] = name + if element is not None: + element.setAttributeNode(n) + if is_id: + element.setIdAttributeNode(n) + # It's not clear from a semantic perspective whether we should + # call the user data handlers for the NODE_RENAMED event since + # we're re-using the existing node. The draft spec has been + # interpreted as meaning "no, don't call the handler unless a + # new node is created." + return n + +defproperty(Document, "documentElement", + doc="Top-level element of this document.") + + +def _clone_node(node, deep, newOwnerDocument): + """ + Clone a node and give it the new owner document. + Called by Node.cloneNode and Document.importNode + """ + if node.ownerDocument.isSameNode(newOwnerDocument): + operation = xml.dom.UserDataHandler.NODE_CLONED + else: + operation = xml.dom.UserDataHandler.NODE_IMPORTED + if node.nodeType == Node.ELEMENT_NODE: + clone = newOwnerDocument.createElementNS(node.namespaceURI, + node.nodeName) + for attr in node.attributes.values(): + clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) + a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) + a.specified = attr.specified + + if deep: + for child in node.childNodes: + c = _clone_node(child, deep, newOwnerDocument) + clone.appendChild(c) + + elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: + clone = newOwnerDocument.createDocumentFragment() + if deep: + for child in node.childNodes: + c = _clone_node(child, deep, newOwnerDocument) + clone.appendChild(c) + + elif node.nodeType == Node.TEXT_NODE: + clone = newOwnerDocument.createTextNode(node.data) + elif node.nodeType == Node.CDATA_SECTION_NODE: + clone = newOwnerDocument.createCDATASection(node.data) + elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: + clone = newOwnerDocument.createProcessingInstruction(node.target, + node.data) + elif node.nodeType == Node.COMMENT_NODE: + clone = newOwnerDocument.createComment(node.data) + elif node.nodeType == Node.ATTRIBUTE_NODE: + clone = newOwnerDocument.createAttributeNS(node.namespaceURI, + node.nodeName) + clone.specified = True + clone.value = node.value + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + assert node.ownerDocument is not newOwnerDocument + operation = xml.dom.UserDataHandler.NODE_IMPORTED + clone = newOwnerDocument.implementation.createDocumentType( + node.name, node.publicId, node.systemId) + clone.ownerDocument = newOwnerDocument + if deep: + clone.entities._seq = [] + clone.notations._seq = [] + for n in node.notations._seq: + notation = Notation(n.nodeName, n.publicId, n.systemId) + notation.ownerDocument = newOwnerDocument + clone.notations._seq.append(notation) + if hasattr(n, '_call_user_data_handler'): + n._call_user_data_handler(operation, n, notation) + for e in node.entities._seq: + entity = Entity(e.nodeName, e.publicId, e.systemId, + e.notationName) + entity.actualEncoding = e.actualEncoding + entity.encoding = e.encoding + entity.version = e.version + entity.ownerDocument = newOwnerDocument + clone.entities._seq.append(entity) + if hasattr(e, '_call_user_data_handler'): + e._call_user_data_handler(operation, n, entity) + else: + # Note the cloning of Document and DocumentType nodes is + # implemenetation specific. minidom handles those cases + # directly in the cloneNode() methods. + raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) + + # Check for _call_user_data_handler() since this could conceivably + # used with other DOM implementations (one of the FourThought + # DOMs, perhaps?). + if hasattr(node, '_call_user_data_handler'): + node._call_user_data_handler(operation, node, clone) + return clone + + +def _nssplit(qualifiedName): + fields = qualifiedName.split(':', 1) + if len(fields) == 2: + return fields + else: + return (None, fields[0]) + + def _get_StringIO(): # we can't use cStringIO since it doesn't support Unicode strings from StringIO import StringIO return StringIO() -def _doparse(func, args, kwargs): +def _do_pulldom_parse(func, args, kwargs): events = apply(func, args, kwargs) toktype, rootNode = events.getEvent() events.expandNode(rootNode) events.clear() return rootNode -def parse(*args, **kwargs): +def parse(file, parser=None, bufsize=None): """Parse a file into a DOM by filename or file object.""" - from xml.dom import pulldom - return _doparse(pulldom.parse, args, kwargs) + if parser is None and not bufsize: + from xml.dom import expatbuilder + return expatbuilder.parse(file) + else: + from xml.dom import pulldom + return _do_pulldom_parse(pulldom.parse, (file,), + {'parser': parser, 'bufsize': bufsize}) -def parseString(*args, **kwargs): +def parseString(string, parser=None): """Parse a file into a DOM from a string.""" - from xml.dom import pulldom - return _doparse(pulldom.parseString, args, kwargs) - -def getDOMImplementation(): + if parser is None: + from xml.dom import expatbuilder + return expatbuilder.parseString(string) + else: + from xml.dom import pulldom + return _do_pulldom_parse(pulldom.parseString, (string,), + {'parser': parser}) + +def getDOMImplementation(features=None): + if features: + if isinstance(features, StringTypes): + features = domreg._parse_feature_string(features) + for f, v in features: + if not Document.implementation.hasFeature(f, v): + return None return Document.implementation |