summaryrefslogtreecommitdiffstats
path: root/Lib/xml
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/dom/minidom.py1614
1 files changed, 1287 insertions, 327 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index 967320f..46eb881 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,56 +14,34 @@ Todo:
* SAX 2 namespaces
"""
-from xml.dom import HierarchyRequestErr, EMPTY_NAMESPACE
-
-# localize the types, and allow support for Unicode values if available:
-import types
-_TupleType = types.TupleType
-try:
- _StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
- _StringTypes = (types.StringType,)
-del types
-
import xml.dom
+from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX
+from xml.dom.minicompat import *
+from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
-if list is type([]):
- class NodeList(list):
- def item(self, index):
- if 0 <= index < len(self):
- return self[index]
-
- length = property(lambda self: len(self),
- doc="The number of nodes in the NodeList.")
+_TupleType = type(())
-else:
- def NodeList():
- return []
+# This is used by the ID-cache invalidation checks; the list isn't
+# actually complete, since the nodes being checked will never be the
+# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
+# the node being added or removed, not the node being modified.)
+#
+_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
+ xml.dom.Node.ENTITY_REFERENCE_NODE)
-class Node(xml.dom.Node):
- allnodes = {}
- _debug = 0
- _makeParentNodes = 1
- debug = None
- childNodeTypes = ()
+class Node(xml.dom.Node, GetattrMagic):
namespaceURI = None # this is non-null only for elements and attributes
parentNode = None
ownerDocument = None
+ nextSibling = None
+ previousSibling = None
- def __init__(self):
- self.childNodes = NodeList()
- if Node._debug:
- index = repr(id(self)) + repr(self.__class__)
- Node.allnodes[index] = repr(self.__dict__)
- if Node.debug is None:
- Node.debug = _get_StringIO()
- #open("debug4.out", "w")
- Node.debug.write("create %s\n" % index)
+ prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
def __nonzero__(self):
- return 1
+ return True
def toxml(self, encoding = None):
return self.toprettyxml("", "", encoding)
@@ -85,9 +63,12 @@ class Node(xml.dom.Node):
def hasChildNodes(self):
if self.childNodes:
- return 1
+ return True
else:
- return 0
+ return False
+
+ def _get_childNodes(self):
+ return self.childNodes
def _get_firstChild(self):
if self.childNodes:
@@ -97,56 +78,26 @@ class Node(xml.dom.Node):
if self.childNodes:
return self.childNodes[-1]
- try:
- property
- except NameError:
- def __getattr__(self, key):
- if key[0:2] == "__":
- raise AttributeError, key
- # getattr should never call getattr!
- if self.__dict__.has_key("inGetAttr"):
- del self.inGetAttr
- raise AttributeError, key
-
- prefix, attrname = key[:5], key[5:]
- if prefix == "_get_":
- self.inGetAttr = 1
- if hasattr(self, attrname):
- del self.inGetAttr
- return (lambda self=self, attrname=attrname:
- getattr(self, attrname))
- else:
- del self.inGetAttr
- raise AttributeError, key
- else:
- self.inGetAttr = 1
- try:
- func = getattr(self, "_get_" + key)
- except AttributeError:
- raise AttributeError, key
- del self.inGetAttr
- return func()
- else:
- firstChild = property(_get_firstChild,
- doc="First child node, or None.")
- lastChild = property(_get_lastChild,
- doc="Last child node, or None.")
-
def insertBefore(self, newChild, refChild):
if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
for c in tuple(newChild.childNodes):
self.insertBefore(c, refChild)
### The DOM does not clearly specify what to return in this case
return newChild
- if newChild.nodeType not in self.childNodeTypes:
- raise HierarchyRequestErr, \
- "%s cannot be child of %s" % (repr(newChild), repr(self))
+ if newChild.nodeType not in self._child_node_types:
+ raise xml.dom.HierarchyRequestErr(
+ "%s cannot be child of %s" % (repr(newChild), repr(self)))
if newChild.parentNode is not None:
newChild.parentNode.removeChild(newChild)
if refChild is None:
self.appendChild(newChild)
else:
- index = self.childNodes.index(refChild)
+ try:
+ index = self.childNodes.index(refChild)
+ except ValueError:
+ raise xml.dom.NotFoundErr()
+ if newChild.nodeType in _nodeTypes_with_children:
+ _clear_id_cache(self)
self.childNodes.insert(index, newChild)
newChild.nextSibling = refChild
refChild.previousSibling = newChild
@@ -156,8 +107,7 @@ class Node(xml.dom.Node):
newChild.previousSibling = node
else:
newChild.previousSibling = None
- if self._makeParentNodes:
- newChild.parentNode = self
+ newChild.parentNode = self
return newChild
def appendChild(self, node):
@@ -166,21 +116,15 @@ class Node(xml.dom.Node):
self.appendChild(c)
### The DOM does not clearly specify what to return in this case
return node
- if node.nodeType not in self.childNodeTypes:
- raise HierarchyRequestErr, \
- "%s cannot be child of %s" % (repr(node), repr(self))
+ if node.nodeType not in self._child_node_types:
+ raise xml.dom.HierarchyRequestErr(
+ "%s cannot be child of %s" % (repr(node), repr(self)))
+ elif node.nodeType in _nodeTypes_with_children:
+ _clear_id_cache(self)
if node.parentNode is not None:
node.parentNode.removeChild(node)
- if self.childNodes:
- last = self.lastChild
- node.previousSibling = last
- last.nextSibling = node
- else:
- node.previousSibling = None
+ _append_child(self, node)
node.nextSibling = None
- self.childNodes.append(node)
- if self._makeParentNodes:
- node.parentNode = self
return node
def replaceChild(self, newChild, oldChild):
@@ -188,18 +132,23 @@ class Node(xml.dom.Node):
refChild = oldChild.nextSibling
self.removeChild(oldChild)
return self.insertBefore(newChild, refChild)
- if newChild.nodeType not in self.childNodeTypes:
- raise HierarchyRequestErr, \
- "%s cannot be child of %s" % (repr(newChild), repr(self))
+ if newChild.nodeType not in self._child_node_types:
+ raise xml.dom.HierarchyRequestErr(
+ "%s cannot be child of %s" % (repr(newChild), repr(self)))
if newChild.parentNode is not None:
newChild.parentNode.removeChild(newChild)
if newChild is oldChild:
return
- index = self.childNodes.index(oldChild)
+ try:
+ index = self.childNodes.index(oldChild)
+ except ValueError:
+ raise xml.dom.NotFoundErr()
self.childNodes[index] = newChild
- if self._makeParentNodes:
- newChild.parentNode = self
- oldChild.parentNode = None
+ newChild.parentNode = self
+ oldChild.parentNode = None
+ if (newChild.nodeType in _nodeTypes_with_children
+ or oldChild.nodeType in _nodeTypes_with_children):
+ _clear_id_cache(self)
newChild.nextSibling = oldChild.nextSibling
newChild.previousSibling = oldChild.previousSibling
oldChild.nextSibling = None
@@ -211,15 +160,19 @@ class Node(xml.dom.Node):
return oldChild
def removeChild(self, oldChild):
- self.childNodes.remove(oldChild)
+ try:
+ self.childNodes.remove(oldChild)
+ except ValueError:
+ raise xml.dom.NotFoundErr()
if oldChild.nextSibling is not None:
oldChild.nextSibling.previousSibling = oldChild.previousSibling
if oldChild.previousSibling is not None:
oldChild.previousSibling.nextSibling = oldChild.nextSibling
oldChild.nextSibling = oldChild.previousSibling = None
+ if oldChild.nodeType in _nodeTypes_with_children:
+ _clear_id_cache(self)
- if self._makeParentNodes:
- oldChild.parentNode = None
+ oldChild.parentNode = None
return oldChild
def normalize(self):
@@ -230,7 +183,7 @@ class Node(xml.dom.Node):
if data and L and L[-1].nodeType == child.nodeType:
# collapse text node
node = L[-1]
- node.data = node.nodeValue = node.data + child.data
+ node.data = node.data + child.data
node.nextSibling = child.nextSibling
child.unlink()
elif data:
@@ -255,58 +208,115 @@ class Node(xml.dom.Node):
self.childNodes[:] = L
def cloneNode(self, deep):
- import new
- clone = new.instance(self.__class__, self.__dict__.copy())
- if self._makeParentNodes:
- clone.parentNode = None
- clone.childNodes = NodeList()
- if deep:
- for child in self.childNodes:
- clone.appendChild(child.cloneNode(1))
- return clone
+ return _clone_node(self, deep, self.ownerDocument or self)
+
+ def isSupported(self, feature, version):
+ return self.ownerDocument.implementation.hasFeature(feature, version)
- # DOM Level 3 (Working Draft 2001-Jan-26)
+ def _get_localName(self):
+ # Overridden in Element and Attr where localName can be Non-Null
+ return None
+
+ # Node interfaces from Level 3 (WD 9 April 2002)
def isSameNode(self, other):
return self is other
+ def getInterface(self, feature):
+ if self.isSupported(feature, None):
+ return self
+ else:
+ return None
+
+ # The "user data" functions use a dictionary that is only present
+ # if some user data has been set, so be careful not to assume it
+ # exists.
+
+ def getUserData(self, key):
+ try:
+ return self._user_data[key][0]
+ except (AttributeError, KeyError):
+ return None
+
+ def setUserData(self, key, data, handler):
+ old = None
+ try:
+ d = self._user_data
+ except AttributeError:
+ d = {}
+ self._user_data = d
+ if d.has_key(key):
+ old = d[key][0]
+ if data is None:
+ # ignore handlers passed for None
+ handler = None
+ if old is not None:
+ del d[key]
+ else:
+ d[key] = (data, handler)
+ return old
+
+ def _call_user_data_handler(self, operation, src, dst):
+ if hasattr(self, "_user_data"):
+ for key, (data, handler) in self._user_data.items():
+ if handler is not None:
+ handler.handle(operation, key, data, src, dst)
+
# minidom-specific API:
def unlink(self):
self.parentNode = self.ownerDocument = None
- for child in self.childNodes:
- child.unlink()
- self.childNodes = None
+ if self.childNodes:
+ for child in self.childNodes:
+ child.unlink()
+ self.childNodes = NodeList()
self.previousSibling = None
self.nextSibling = None
- if Node._debug:
- index = repr(id(self)) + repr(self.__class__)
- self.debug.write("Deleting: %s\n" % index)
- del Node.allnodes[index]
+
+defproperty(Node, "firstChild", doc="First child node, or None.")
+defproperty(Node, "lastChild", doc="Last child node, or None.")
+defproperty(Node, "localName", doc="Namespace-local name of this node.")
+
+
+def _append_child(self, node):
+ # fast path with less checks; usable by DOM builders if careful
+ childNodes = self.childNodes
+ if childNodes:
+ last = childNodes[-1]
+ node.__dict__["previousSibling"] = last
+ last.__dict__["nextSibling"] = node
+ childNodes.append(node)
+ node.__dict__["parentNode"] = self
+
+def _in_document(node):
+ # return True iff node is part of a document tree
+ while node is not None:
+ if node.nodeType == Node.DOCUMENT_NODE:
+ return True
+ node = node.parentNode
+ return False
def _write_data(writer, data):
"Writes datachars to writer."
- data = data.replace("&", "&amp;")
- data = data.replace("<", "&lt;")
- data = data.replace("\"", "&quot;")
- data = data.replace(">", "&gt;")
+ data = data.replace("&", "&amp;").replace("<", "&lt;")
+ data = data.replace("\"", "&quot;").replace(">", "&gt;")
writer.write(data)
-def _getElementsByTagNameHelper(parent, name, rc):
+def _get_elements_by_tagName_helper(parent, name, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
(name == "*" or node.tagName == name):
rc.append(node)
- _getElementsByTagNameHelper(node, name, rc)
+ _get_elements_by_tagName_helper(node, name, rc)
return rc
-def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
+def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE:
if ((localName == "*" or node.localName == localName) and
(nsURI == "*" or node.namespaceURI == nsURI)):
rc.append(node)
- _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
+ _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
return rc
class DocumentFragment(Node):
@@ -315,48 +325,141 @@ class DocumentFragment(Node):
nodeValue = None
attributes = None
parentNode = None
- childNodeTypes = (Node.ELEMENT_NODE,
- Node.TEXT_NODE,
- Node.CDATA_SECTION_NODE,
- Node.ENTITY_REFERENCE_NODE,
- Node.PROCESSING_INSTRUCTION_NODE,
- Node.COMMENT_NODE,
- Node.NOTATION_NODE)
+ _child_node_types = (Node.ELEMENT_NODE,
+ Node.TEXT_NODE,
+ Node.CDATA_SECTION_NODE,
+ Node.ENTITY_REFERENCE_NODE,
+ Node.PROCESSING_INSTRUCTION_NODE,
+ Node.COMMENT_NODE,
+ Node.NOTATION_NODE)
+
+ def __init__(self):
+ self.childNodes = NodeList()
class Attr(Node):
nodeType = Node.ATTRIBUTE_NODE
attributes = None
ownerElement = None
- childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
+ specified = False
+ _is_id = False
+
+ _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
- def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None):
+ def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
+ prefix=None):
# skip setattr for performance
d = self.__dict__
- d["localName"] = localName or qName
d["nodeName"] = d["name"] = qName
d["namespaceURI"] = namespaceURI
d["prefix"] = prefix
- Node.__init__(self)
+ d['childNodes'] = NodeList()
+
+ # Add the single child node that represents the value of the attr
+ self.childNodes.append(Text())
+
# nodeValue and value are set elsewhere
+ def _get_localName(self):
+ return self.nodeName.split(":", 1)[-1]
+
+ def _get_name(self):
+ return self.name
+
+ def _get_specified(self):
+ return self.specified
+
def __setattr__(self, name, value):
d = self.__dict__
if name in ("value", "nodeValue"):
d["value"] = d["nodeValue"] = value
+ d2 = self.childNodes[0].__dict__
+ d2["data"] = d2["nodeValue"] = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
elif name in ("name", "nodeName"):
d["name"] = d["nodeName"] = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
else:
d[name] = value
- def cloneNode(self, deep):
- clone = Node.cloneNode(self, deep)
- if clone.__dict__.has_key("ownerElement"):
- del clone.ownerElement
- return clone
+ def _set_prefix(self, value):
+ nsuri = self.namespaceURI
+ if value == "xmlns":
+ if self.namespaceURI and self.namespaceURI != XMLNS_NAMESPACE:
+ raise xml.dom.NamespaceErr(
+ "illegal use of 'xmlns' prefix for the wrong namespace")
+ d = self.__dict__
+ d['prefix'] = prefix
+ if prefix is None:
+ newName = self.localName
+ else:
+ newName = "%s:%s" % (value, self.localName)
+ if self.ownerElement:
+ _clear_id_cache(self.ownerElement)
+ d['nodeName'] = d['name'] = newName
+
+ def _set_value(self, value):
+ d = self.__dict__
+ d['value'] = d['nodeValue'] = value
+ if self.ownerElement:
+ _clear_id_cache(self.ownerElement)
+ self.childNodes[0].data = value
+
+ def unlink(self):
+ # This implementation does not call the base implementation
+ # since most of that is not needed, and the expense of the
+ # method call is not warranted. We duplicate the removal of
+ # children, but that's all we needed from the base class.
+ elem = self.ownerElement
+ if elem is not None:
+ del elem._attrs[self.nodeName]
+ del elem._attrsNS[(self.namespaceURI, self.localName)]
+ if self._is_id:
+ self._is_id = False
+ elem._magic_id_nodes -= 1
+ self.ownerDocument._magic_id_count -= 1
+ for child in self.childNodes:
+ child.unlink()
+ del self.childNodes[:]
+
+ def _get_isId(self):
+ if self._is_id:
+ return True
+ doc = self.ownerDocument
+ elem = self.ownerElement
+ if doc is None or elem is None:
+ return False
+
+ info = doc._get_elem_info(elem)
+ if info is None:
+ return False
+ if self.namespaceURI:
+ return info.isIdNS(self.namespaceURI, self.localName)
+ else:
+ return info.isId(self.nodeName)
+
+ def _get_schemaType(self):
+ doc = self.ownerDocument
+ elem = self.ownerElement
+ if doc is None or elem is None:
+ return _no_type
+
+ info = doc._get_elem_info(elem)
+ if info is None:
+ return _no_type
+ if self.namespaceURI:
+ return info.getAttributeTypeNS(self.namespaceURI, self.localName)
+ else:
+ return info.getAttributeType(self.nodeName)
+
+defproperty(Attr, "isId", doc="True if this attribute is an ID.")
+defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
+defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
-class NamedNodeMap:
+class NamedNodeMap(NewStyle, GetattrMagic):
"""The attribute list is a transient interface to the underlying
dictionaries. Mutations here will change the underlying element's
dictionary.
@@ -365,21 +468,15 @@ class NamedNodeMap:
attributes as found in an input document.
"""
+ __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
+
def __init__(self, attrs, attrsNS, ownerElement):
self._attrs = attrs
self._attrsNS = attrsNS
self._ownerElement = ownerElement
- try:
- property
- except NameError:
- def __getattr__(self, name):
- if name == "length":
- return len(self._attrs)
- raise AttributeError, name
- else:
- length = property(lambda self: len(self._attrs),
- doc="Number of nodes in the NamedNodeMap.")
+ def _get_length(self):
+ return len(self._attrs)
def item(self, index):
try:
@@ -399,6 +496,12 @@ class NamedNodeMap:
L.append(((node.namespaceURI, node.localName), node.value))
return L
+ def has_key(self, key):
+ if isinstance(key, StringTypes):
+ return self._attrs.has_key(key)
+ else:
+ return self._attrsNS.has_key(key)
+
def keys(self):
return self._attrs.keys()
@@ -408,11 +511,10 @@ class NamedNodeMap:
def values(self):
return self._attrs.values()
- def get(self, name, value = None):
+ def get(self, name, value=None):
return self._attrs.get(name, value)
- def __len__(self):
- return self.length
+ __len__ = _get_length
def __cmp__(self, other):
if self._attrs is getattr(other, "_attrs", None):
@@ -420,35 +522,74 @@ class NamedNodeMap:
else:
return cmp(id(self), id(other))
- #FIXME: is it appropriate to return .value?
def __getitem__(self, attname_or_tuple):
- if type(attname_or_tuple) is _TupleType:
+ if isinstance(attname_or_tuple, _TupleType):
return self._attrsNS[attname_or_tuple]
else:
return self._attrs[attname_or_tuple]
# same as set
def __setitem__(self, attname, value):
- if type(value) in _StringTypes:
- node = Attr(attname)
+ if isinstance(value, StringTypes):
+ try:
+ node = self._attrs[attname]
+ except KeyError:
+ node = Attr(attname)
+ node.ownerDocument = self._ownerElement.ownerDocument
node.value = value
- node.ownerDocument = self._ownerElement.ownerDocument
else:
if not isinstance(value, Attr):
raise TypeError, "value must be a string or Attr object"
node = value
- self.setNamedItem(node)
+ self.setNamedItem(node)
+
+ def getNamedItem(self, name):
+ try:
+ return self._attrs[name]
+ except KeyError:
+ return None
+
+ def getNamedItemNS(self, namespaceURI, localName):
+ try:
+ return self._attrsNS[(namespaceURI, localName)]
+ except KeyError:
+ return None
+
+ def removeNamedItem(self, name):
+ n = self.getNamedItem(name)
+ if n is not None:
+ _clear_id_cache(self._ownerElement)
+ del self._attrs[n.nodeName]
+ del self._attrsNS[(n.namespaceURI, n.localName)]
+ if n.__dict__.has_key('ownerElement'):
+ n.__dict__['ownerElement'] = None
+ return n
+ else:
+ raise xml.dom.NotFoundErr()
+
+ def removeNamedItemNS(self, namespaceURI, localName):
+ n = self.getNamedItemNS(namespaceURI, localName)
+ if n is not None:
+ _clear_id_cache(self._ownerElement)
+ del self._attrsNS[(n.namespaceURI, n.localName)]
+ del self._attrs[n.nodeName]
+ if n.__dict__.has_key('ownerElement'):
+ n.__dict__['ownerElement'] = None
+ return n
+ else:
+ raise xml.dom.NotFoundErr()
def setNamedItem(self, node):
if not isinstance(node, Attr):
- raise HierarchyRequestErr, \
- "%s cannot be child of %s" % (repr(node), repr(self))
+ raise xml.dom.HierarchyRequestErr(
+ "%s cannot be child of %s" % (repr(node), repr(self)))
old = self._attrs.get(node.name)
if old:
old.unlink()
self._attrs[node.name] = node
self._attrsNS[(node.namespaceURI, node.localName)] = node
node.ownerElement = self._ownerElement
+ _clear_id_cache(node.ownerElement)
return old
def setNamedItemNS(self, node):
@@ -456,29 +597,62 @@ class NamedNodeMap:
def __delitem__(self, attname_or_tuple):
node = self[attname_or_tuple]
+ _clear_id_cache(node.ownerElement)
node.unlink()
- del self._attrs[node.name]
- del self._attrsNS[(node.namespaceURI, node.localName)]
+
+ def __getstate__(self):
+ return self._attrs, self._attrsNS, self._ownerElement
+
+ def __setstate__(self, state):
+ self._attrs, self._attrsNS, self._ownerElement = state
+
+defproperty(NamedNodeMap, "length",
+ doc="Number of nodes in the NamedNodeMap.")
AttributeList = NamedNodeMap
+class TypeInfo(NewStyle):
+ __slots__ = 'namespace', 'name'
+
+ def __init__(self, namespace, name):
+ self.namespace = namespace
+ self.name = name
+
+ def __repr__(self):
+ if self.namespace:
+ return "<TypeInfo %s (from %s)>" % (`self.name`, `self.namespace`)
+ else:
+ return "<TypeInfo %s>" % `self.name`
+
+ def _get_name(self):
+ return self.name
+
+ def _get_namespace(self):
+ return self.namespace
+
+_no_type = TypeInfo(None, None)
+
class Element(Node):
nodeType = Node.ELEMENT_NODE
- nextSibling = None
- previousSibling = None
- childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
- Node.COMMENT_NODE, Node.TEXT_NODE,
- Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
+ nodeValue = None
+ schemaType = _no_type
+
+ _magic_id_nodes = 0
+
+ _child_node_types = (Node.ELEMENT_NODE,
+ Node.PROCESSING_INSTRUCTION_NODE,
+ Node.COMMENT_NODE,
+ Node.TEXT_NODE,
+ Node.CDATA_SECTION_NODE,
+ Node.ENTITY_REFERENCE_NODE)
def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
localName=None):
- Node.__init__(self)
self.tagName = self.nodeName = tagName
- self.localName = localName or tagName
self.prefix = prefix
self.namespaceURI = namespaceURI
- self.nodeValue = None
+ self.childNodes = NodeList()
self._attrs = {} # attributes are double-indexed:
self._attrsNS = {} # tagName -> Attribute
@@ -488,16 +662,11 @@ class Element(Node):
# for now because of headaches with
# namespaces.
- def cloneNode(self, deep):
- clone = Node.cloneNode(self, deep)
- clone._attrs = {}
- clone._attrsNS = {}
- for attr in self._attrs.values():
- node = attr.cloneNode(1)
- clone._attrs[node.name] = node
- clone._attrsNS[(node.namespaceURI, node.localName)] = node
- node.ownerElement = clone
- return clone
+ def _get_localName(self):
+ return self.tagName.split(":", 1)[-1]
+
+ def _get_tagName(self):
+ return self.tagName
def unlink(self):
for attr in self._attrs.values():
@@ -519,21 +688,41 @@ class Element(Node):
return ""
def setAttribute(self, attname, value):
- attr = Attr(attname)
- # for performance
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
- self.setAttributeNode(attr)
+ attr = self.getAttributeNode(attname)
+ if attr is None:
+ attr = Attr(attname)
+ # for performance
+ d = attr.__dict__
+ d["value"] = d["nodeValue"] = value
+ d["ownerDocument"] = self.ownerDocument
+ self.setAttributeNode(attr)
+ elif value != attr.value:
+ d = attr.__dict__
+ d["value"] = d["nodeValue"] = value
+ if attr.isId:
+ _clear_id_cache(self)
def setAttributeNS(self, namespaceURI, qualifiedName, value):
prefix, localname = _nssplit(qualifiedName)
- # for performance
- attr = Attr(qualifiedName, namespaceURI, localname, prefix)
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
- self.setAttributeNode(attr)
+ attr = self.getAttributeNodeNS(namespaceURI, localname)
+ if attr is None:
+ # for performance
+ attr = Attr(qualifiedName, namespaceURI, localname, prefix)
+ d = attr.__dict__
+ d["prefix"] = prefix
+ d["nodeName"] = qualifiedName
+ d["value"] = d["nodeValue"] = value
+ d["ownerDocument"] = self.ownerDocument
+ self.setAttributeNode(attr)
+ else:
+ d = attr.__dict__
+ if value != attr.value:
+ d["value"] = d["nodeValue"] = value
+ if attr.isId:
+ _clear_id_cache(self)
+ if attr.prefix != prefix:
+ d["prefix"] = prefix
+ d["nodeName"] = qualifiedName
def getAttributeNode(self, attrname):
return self._attrs.get(attrname)
@@ -544,36 +733,49 @@ class Element(Node):
def setAttributeNode(self, attr):
if attr.ownerElement not in (None, self):
raise xml.dom.InuseAttributeErr("attribute node already owned")
- old = self._attrs.get(attr.name, None)
- if old:
- old.unlink()
- self._attrs[attr.name] = attr
- self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
-
- # This creates a circular reference, but Element.unlink()
- # breaks the cycle since the references to the attribute
- # dictionaries are tossed.
- attr.ownerElement = self
-
- if old is not attr:
+ old1 = self._attrs.get(attr.name, None)
+ if old1 is not None:
+ self.removeAttributeNode(old1)
+ old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
+ if old2 is not None and old2 is not old1:
+ self.removeAttributeNode(old2)
+ _set_attribute_node(self, attr)
+
+ if old1 is not attr:
# It might have already been part of this node, in which case
# it doesn't represent a change, and should not be returned.
- return old
+ return old1
+ if old2 is not attr:
+ return old2
setAttributeNodeNS = setAttributeNode
def removeAttribute(self, name):
- attr = self._attrs[name]
+ try:
+ attr = self._attrs[name]
+ except KeyError:
+ raise xml.dom.NotFoundErr()
self.removeAttributeNode(attr)
def removeAttributeNS(self, namespaceURI, localName):
- attr = self._attrsNS[(namespaceURI, localName)]
+ try:
+ attr = self._attrsNS[(namespaceURI, localName)]
+ except KeyError:
+ raise xml.dom.NotFoundErr()
self.removeAttributeNode(attr)
def removeAttributeNode(self, node):
+ if node is None:
+ raise xml.dom.NotFoundErr()
+ try:
+ self._attrs[node.name]
+ except KeyError:
+ raise xml.dom.NotFoundErr()
+ _clear_id_cache(self)
node.unlink()
- del self._attrs[node.name]
- del self._attrsNS[(node.namespaceURI, node.localName)]
+ # Restore this since the node is still useful and otherwise
+ # unlinked
+ node.ownerDocument = self.ownerDocument
removeAttributeNodeNS = removeAttributeNode
@@ -584,14 +786,14 @@ class Element(Node):
return self._attrsNS.has_key((namespaceURI, localName))
def getElementsByTagName(self, name):
- return _getElementsByTagNameHelper(self, name, NodeList())
+ return _get_elements_by_tagName_helper(self, name, NodeList())
def getElementsByTagNameNS(self, namespaceURI, localName):
- return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
- NodeList())
+ return _get_elements_by_tagName_ns_helper(
+ self, namespaceURI, localName, NodeList())
def __repr__(self):
- return "<DOM Element: %s at %s>" % (self.tagName, id(self))
+ return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
def writexml(self, writer, indent="", addindent="", newl=""):
# indent = current indentation
@@ -618,61 +820,145 @@ class Element(Node):
def _get_attributes(self):
return NamedNodeMap(self._attrs, self._attrsNS, self)
- try:
- property
- except NameError:
- pass
- else:
- attributes = property(_get_attributes,
- doc="NamedNodeMap of attributes on the element.")
-
def hasAttributes(self):
- if self._attrs or self._attrsNS:
- return 1
+ if self._attrs:
+ return True
else:
- return 0
+ return False
+
+ # DOM Level 3 attributes, based on the 22 Oct 2002 draft
+
+ def setIdAttribute(self, name):
+ idAttr = self.getAttributeNode(name)
+ self.setIdAttributeNode(idAttr)
+
+ def setIdAttributeNS(self, namespaceURI, localName):
+ idAttr = self.getAttributeNodeNS(namespaceURI, localName)
+ self.setIdAttributeNode(idAttr)
+
+ def setIdAttributeNode(self, idAttr):
+ if idAttr is None or not self.isSameNode(idAttr.ownerElement):
+ raise xml.dom.NotFoundErr()
+ if _get_containing_entref(self) is not None:
+ raise xml.dom.NoModificationAllowedErr()
+ if not idAttr._is_id:
+ idAttr.__dict__['_is_id'] = True
+ self._magic_id_nodes += 1
+ self.ownerDocument._magic_id_count += 1
+ _clear_id_cache(self)
+
+defproperty(Element, "attributes",
+ doc="NamedNodeMap of attributes on the element.")
+defproperty(Element, "localName",
+ doc="Namespace-local name of this element.")
+
+
+def _set_attribute_node(element, attr):
+ _clear_id_cache(element)
+ element._attrs[attr.name] = attr
+ element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
+
+ # This creates a circular reference, but Element.unlink()
+ # breaks the cycle since the references to the attribute
+ # dictionaries are tossed.
+ attr.__dict__['ownerElement'] = element
+
+
+class Childless:
+ """Mixin that makes childless-ness easy to implement and avoids
+ the complexity of the Node methods that deal with children.
+ """
-class Comment(Node):
- nodeType = Node.COMMENT_NODE
- nodeName = "#comment"
attributes = None
- childNodeTypes = ()
+ childNodes = EmptyNodeList()
+ firstChild = None
+ lastChild = None
- def __init__(self, data):
- Node.__init__(self)
- self.data = self.nodeValue = data
+ def _get_firstChild(self):
+ return None
- def writexml(self, writer, indent="", addindent="", newl=""):
- writer.write("%s<!--%s-->%s" % (indent,self.data,newl))
+ def _get_lastChild(self):
+ return None
+
+ def appendChild(self, node):
+ raise xml.dom.HierarchyRequestErr(
+ self.nodeName + " nodes cannot have children")
-class ProcessingInstruction(Node):
+ def hasChildNodes(self):
+ return False
+
+ def insertBefore(self, newChild, refChild):
+ raise xml.dom.HierarchyRequestErr(
+ self.nodeName + " nodes do not have children")
+
+ def removeChild(self, oldChild):
+ raise xml.dom.NotFoundErr(
+ self.nodeName + " nodes do not have children")
+
+ def replaceChild(self, newChild, oldChild):
+ raise xml.dom.HierarchyRequestErr(
+ self.nodeName + " nodes do not have children")
+
+
+class ProcessingInstruction(Childless, Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
- attributes = None
- childNodeTypes = ()
def __init__(self, target, data):
- Node.__init__(self)
self.target = self.nodeName = target
self.data = self.nodeValue = data
+ def _get_data(self):
+ return self.data
+ def _set_data(self, value):
+ d = self.__dict__
+ d['data'] = d['nodeValue'] = value
+
+ def _get_target(self):
+ return self.target
+ def _set_target(self, value):
+ d = self.__dict__
+ d['target'] = d['nodeName'] = value
+
+ def __setattr__(self, name, value):
+ if name == "data" or name == "nodeValue":
+ self.__dict__['data'] = self.__dict__['nodeValue'] = value
+ elif name == "target" or name == "nodeName":
+ self.__dict__['target'] = self.__dict__['nodeName'] = value
+ else:
+ self.__dict__[name] = value
+
def writexml(self, writer, indent="", addindent="", newl=""):
writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
-class CharacterData(Node):
- def __init__(self, data):
- if type(data) not in _StringTypes:
- raise TypeError, "node contents must be a string"
- Node.__init__(self)
- self.data = self.nodeValue = data
- self.length = len(data)
+
+class CharacterData(Childless, Node):
+ def _get_length(self):
+ return len(self.data)
+ __len__ = _get_length
+
+ def _get_data(self):
+ return self.__dict__['data']
+ def _set_data(self, data):
+ d = self.__dict__
+ d['data'] = d['nodeValue'] = data
+
+ _get_nodeValue = _get_data
+ _set_nodeValue = _set_data
+
+ def __setattr__(self, name, value):
+ if name == "data" or name == "nodeValue":
+ self.__dict__['data'] = self.__dict__['nodeValue'] = value
+ else:
+ self.__dict__[name] = value
def __repr__(self):
- if len(self.data) > 10:
+ data = self.data
+ if len(data) > 10:
dotdotdot = "..."
else:
dotdotdot = ""
return "<DOM %s node \"%s%s\">" % (
- self.__class__.__name__, self.data[0:10], dotdotdot)
+ self.__class__.__name__, data[0:10], dotdotdot)
def substringData(self, offset, count):
if offset < 0:
@@ -685,8 +971,6 @@ class CharacterData(Node):
def appendData(self, arg):
self.data = self.data + arg
- self.nodeValue = self.data
- self.length = len(self.data)
def insertData(self, offset, arg):
if offset < 0:
@@ -696,8 +980,6 @@ class CharacterData(Node):
if arg:
self.data = "%s%s%s" % (
self.data[:offset], arg, self.data[offset:])
- self.nodeValue = self.data
- self.length = len(self.data)
def deleteData(self, offset, count):
if offset < 0:
@@ -708,8 +990,6 @@ class CharacterData(Node):
raise xml.dom.IndexSizeErr("count cannot be negative")
if count:
self.data = self.data[:offset] + self.data[offset+count:]
- self.nodeValue = self.data
- self.length = len(self.data)
def replaceData(self, offset, count, arg):
if offset < 0:
@@ -721,19 +1001,25 @@ class CharacterData(Node):
if count:
self.data = "%s%s%s" % (
self.data[:offset], arg, self.data[offset+count:])
- self.nodeValue = self.data
- self.length = len(self.data)
+
+defproperty(CharacterData, "length", doc="Length of the string data.")
+
class Text(CharacterData):
+ # Make sure we don't add an instance __dict__ if we don't already
+ # have one, at least when that's possible:
+ __slots__ = ()
+
nodeType = Node.TEXT_NODE
nodeName = "#text"
attributes = None
- childNodeTypes = ()
def splitText(self, offset):
if offset < 0 or offset > len(self.data):
raise xml.dom.IndexSizeErr("illegal offset value")
- newText = Text(self.data[offset:])
+ newText = self.__class__()
+ newText.data = self.data[offset:]
+ newText.ownerDocument = self.ownerDocument
next = self.nextSibling
if self.parentNode and self in self.parentNode.childNodes:
if next is None:
@@ -741,80 +1027,371 @@ class Text(CharacterData):
else:
self.parentNode.insertBefore(newText, next)
self.data = self.data[:offset]
- self.nodeValue = self.data
- self.length = len(self.data)
return newText
def writexml(self, writer, indent="", addindent="", newl=""):
_write_data(writer, "%s%s%s"%(indent, self.data, newl))
+ # DOM Level 3 (WD 9 April 2002)
+
+ def _get_wholeText(self):
+ L = [self.data]
+ n = self.previousSibling
+ while n is not None:
+ if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+ L.insert(0, n.data)
+ n = n.previousSibling
+ else:
+ break
+ n = self.nextSibling
+ while n is not None:
+ if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+ L.append(n.data)
+ n = n.nextSibling
+ else:
+ break
+ return ''.join(L)
+
+ def replaceWholeText(self, content):
+ # XXX This needs to be seriously changed if minidom ever
+ # supports EntityReference nodes.
+ parent = self.parentNode
+ n = self.previousSibling
+ while n is not None:
+ if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+ next = n.previousSibling
+ parent.removeChild(n)
+ n = next
+ else:
+ break
+ n = self.nextSibling
+ if not content:
+ parent.removeChild(self)
+ while n is not None:
+ if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+ next = n.nextSibling
+ parent.removeChild(n)
+ n = next
+ else:
+ break
+ if content:
+ d = self.__dict__
+ d['data'] = content
+ d['nodeValue'] = content
+ return self
+ else:
+ return None
+
+ def _get_isWhitespaceInElementContent(self):
+ if self.data.strip():
+ return False
+ elem = _get_containing_element(self)
+ if elem is None:
+ return False
+ info = self.ownerDocument._get_elem_info(elem)
+ if info is None:
+ return False
+ else:
+ return info.isElementContent()
+
+defproperty(Text, "isWhitespaceInElementContent",
+ doc="True iff this text node contains only whitespace"
+ " and is in element content.")
+defproperty(Text, "wholeText",
+ doc="The text of all logically-adjacent text nodes.")
+
+
+def _get_containing_element(node):
+ c = node.parentNode
+ while c is not None:
+ if c.nodeType == Node.ELEMENT_NODE:
+ return c
+ c = c.parentNode
+ return None
+
+def _get_containing_entref(node):
+ c = node.parentNode
+ while c is not None:
+ if c.nodeType == Node.ENTITY_REFERENCE_NODE:
+ return c
+ c = c.parentNode
+ return None
+
+
+class Comment(Childless, CharacterData):
+ nodeType = Node.COMMENT_NODE
+ nodeName = "#comment"
+
+ def __init__(self, data):
+ self.data = self.nodeValue = data
+
+ def writexml(self, writer, indent="", addindent="", newl=""):
+ writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
+
class CDATASection(Text):
+ # Make sure we don't add an instance __dict__ if we don't already
+ # have one, at least when that's possible:
+ __slots__ = ()
+
nodeType = Node.CDATA_SECTION_NODE
nodeName = "#cdata-section"
def writexml(self, writer, indent="", addindent="", newl=""):
+ if self.data.find("]]>") >= 0:
+ raise ValueError("']]>' not allowed in a CDATA section")
writer.write("<![CDATA[%s]]>" % self.data)
-def _nssplit(qualifiedName):
- fields = qualifiedName.split(':', 1)
- if len(fields) == 2:
- return fields
- elif len(fields) == 1:
- return (None, fields[0])
+class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic):
+ __slots__ = '_seq',
+
+ def __init__(self, seq=()):
+ # seq should be a list or tuple
+ self._seq = seq
+
+ def __len__(self):
+ return len(self._seq)
+
+ def _get_length(self):
+ return len(self._seq)
+
+ def getNamedItem(self, name):
+ for n in self._seq:
+ if n.nodeName == name:
+ return n
+
+ def getNamedItemNS(self, namespaceURI, localName):
+ for n in self._seq:
+ if n.namespaceURI == namespaceURI and n.localName == localName:
+ return n
+
+ def __getitem__(self, name_or_tuple):
+ if isinstance(name_or_tuple, _TupleType):
+ node = self.getNamedItemNS(*name_or_tuple)
+ else:
+ node = self.getNamedItem(name_or_tuple)
+ if node is None:
+ raise KeyError, name_or_tuple
+ return node
+
+ def item(self, index):
+ if index < 0:
+ return None
+ try:
+ return self._seq[index]
+ except IndexError:
+ return None
+
+ def removeNamedItem(self, name):
+ raise xml.dom.NoModificationAllowedErr(
+ "NamedNodeMap instance is read-only")
+
+ def removeNamedItemNS(self, namespaceURI, localName):
+ raise xml.dom.NoModificationAllowedErr(
+ "NamedNodeMap instance is read-only")
+
+ def setNamedItem(self, node):
+ raise xml.dom.NoModificationAllowedErr(
+ "NamedNodeMap instance is read-only")
+
+ def setNamedItemNS(self, node):
+ raise xml.dom.NoModificationAllowedErr(
+ "NamedNodeMap instance is read-only")
+
+ def __getstate__(self):
+ return [self._seq]
+
+ def __setstate__(self, state):
+ self._seq = state[0]
+
+defproperty(ReadOnlySequentialNamedNodeMap, "length",
+ doc="Number of entries in the NamedNodeMap.")
+
+
+class Identified:
+ """Mix-in class that supports the publicId and systemId attributes."""
+
+ __slots__ = 'publicId', 'systemId'
+
+ def _identified_mixin_init(self, publicId, systemId):
+ self.publicId = publicId
+ self.systemId = systemId
+
+ def _get_publicId(self):
+ return self.publicId
+ def _get_systemId(self):
+ return self.systemId
-class DocumentType(Node):
+class DocumentType(Identified, Childless, Node):
nodeType = Node.DOCUMENT_TYPE_NODE
nodeValue = None
- attributes = None
name = None
publicId = None
systemId = None
internalSubset = None
- entities = None
- notations = None
def __init__(self, qualifiedName):
- Node.__init__(self)
+ self.entities = ReadOnlySequentialNamedNodeMap()
+ self.notations = ReadOnlySequentialNamedNodeMap()
if qualifiedName:
prefix, localname = _nssplit(qualifiedName)
self.name = localname
+ self.nodeName = self.name
+
+ def _get_internalSubset(self):
+ return self.internalSubset
+
+ def cloneNode(self, deep):
+ if self.ownerDocument is None:
+ # it's ok
+ clone = DocumentType(None)
+ clone.name = self.name
+ clone.nodeName = self.name
+ operation = xml.dom.UserDataHandler.NODE_CLONED
+ if deep:
+ clone.entities._seq = []
+ clone.notations._seq = []
+ for n in self.notations._seq:
+ notation = Notation(n.nodeName, n.publicId, n.systemId)
+ clone.notations._seq.append(notation)
+ n._call_user_data_handler(operation, n, notation)
+ for e in self.entities._seq:
+ entity = Entity(e.nodeName, e.publicId, e.systemId,
+ e.notationName)
+ entity.actualEncoding = e.actualEncoding
+ entity.encoding = e.encoding
+ entity.version = e.version
+ clone.entities._seq.append(entity)
+ e._call_user_data_handler(operation, n, entity)
+ self._call_user_data_handler(operation, self, clone)
+ return clone
+ else:
+ return None
+
+ def writexml(self, writer, indent="", addindent="", newl=""):
+ writer.write("<!DOCTYPE ")
+ writer.write(self.name)
+ if self.publicId:
+ writer.write("\n PUBLIC '%s'\n '%s'"
+ % (self.publicId, self.systemId))
+ elif self.systemId:
+ writer.write("\n SYSTEM '%s'" % self.systemId)
+ if self.internalSubset is not None:
+ writer.write(" [")
+ writer.write(self.internalSubset)
+ writer.write("]")
+ writer.write(">\n")
+
+class Entity(Identified, Node):
+ attributes = None
+ nodeType = Node.ENTITY_NODE
+ nodeValue = None
+
+ actualEncoding = None
+ encoding = None
+ version = None
+ def __init__(self, name, publicId, systemId, notation):
+ self.nodeName = name
+ self.notationName = notation
+ self.childNodes = NodeList()
+ self._identified_mixin_init(publicId, systemId)
+
+ def _get_actualEncoding(self):
+ return self.actualEncoding
+
+ def _get_encoding(self):
+ return self.encoding
+
+ def _get_version(self):
+ return self.version
+
+ def appendChild(self, newChild):
+ raise xml.dom.HierarchyRequestErr(
+ "cannot append children to an entity node")
+
+ def insertBefore(self, newChild, refChild):
+ raise xml.dom.HierarchyRequestErr(
+ "cannot insert children below an entity node")
+
+ def removeChild(self, oldChild):
+ raise xml.dom.HierarchyRequestErr(
+ "cannot remove children from an entity node")
+
+ def replaceChild(self, newChild, oldChild):
+ raise xml.dom.HierarchyRequestErr(
+ "cannot replace children of an entity node")
+
+class Notation(Identified, Childless, Node):
+ nodeType = Node.NOTATION_NODE
+ nodeValue = None
+
+ def __init__(self, name, publicId, systemId):
+ self.nodeName = name
+ self._identified_mixin_init(publicId, systemId)
+
+
+class DOMImplementation(DOMImplementationLS):
+ _features = [("core", "1.0"),
+ ("core", "2.0"),
+ ("core", "3.0"),
+ ("core", None),
+ ("xml", "1.0"),
+ ("xml", "2.0"),
+ ("xml", "3.0"),
+ ("xml", None),
+ ("ls-load", "3.0"),
+ ("ls-load", None),
+ ]
-class DOMImplementation:
def hasFeature(self, feature, version):
- if version not in ("1.0", "2.0"):
- return 0
- feature = feature.lower()
- return feature == "core"
+ if version == "":
+ version = None
+ return (feature.lower(), version) in self._features
def createDocument(self, namespaceURI, qualifiedName, doctype):
if doctype and doctype.parentNode is not None:
raise xml.dom.WrongDocumentErr(
"doctype object owned by another DOM tree")
- doc = self._createDocument()
- if doctype is None:
- doctype = self.createDocumentType(qualifiedName, None, None)
- if not qualifiedName:
+ doc = self._create_document()
+
+ add_root_element = not (namespaceURI is None
+ and qualifiedName is None
+ and doctype is None)
+
+ if not qualifiedName and add_root_element:
# The spec is unclear what to raise here; SyntaxErr
# would be the other obvious candidate. Since Xerces raises
# InvalidCharacterErr, and since SyntaxErr is not listed
# for createDocument, that seems to be the better choice.
# XXX: need to check for illegal characters here and in
# createElement.
+
+ # DOM Level III clears this up when talking about the return value
+ # of this function. If namespaceURI, qName and DocType are
+ # Null the document is returned without a document element
+ # Otherwise if doctype or namespaceURI are not None
+ # Then we go back to the above problem
raise xml.dom.InvalidCharacterErr("Element with no name")
- prefix, localname = _nssplit(qualifiedName)
- if prefix == "xml" \
- and namespaceURI != "http://www.w3.org/XML/1998/namespace":
- raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
- if prefix and not namespaceURI:
- raise xml.dom.NamespaceErr(
- "illegal use of prefix without namespaces")
- element = doc.createElementNS(namespaceURI, qualifiedName)
- doc.appendChild(element)
- doctype.parentNode = doctype.ownerDocument = doc
+
+ if add_root_element:
+ prefix, localname = _nssplit(qualifiedName)
+ if prefix == "xml" \
+ and namespaceURI != "http://www.w3.org/XML/1998/namespace":
+ raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
+ if prefix and not namespaceURI:
+ raise xml.dom.NamespaceErr(
+ "illegal use of prefix without namespaces")
+ element = doc.createElementNS(namespaceURI, qualifiedName)
+ if doctype:
+ doc.appendChild(doctype)
+ doc.appendChild(element)
+
+ if doctype:
+ doctype.parentNode = doctype.ownerDocument = doc
+
doc.doctype = doctype
doc.implementation = self
return doc
@@ -825,11 +1402,72 @@ class DOMImplementation:
doctype.systemId = systemId
return doctype
+ # DOM Level 3 (WD 9 April 2002)
+
+ def getInterface(self, feature):
+ if self.hasFeature(feature, None):
+ return self
+ else:
+ return None
+
# internal
- def _createDocument(self):
+ def _create_document(self):
return Document()
-class Document(Node):
+class ElementInfo(NewStyle):
+ """Object that represents content-model information for an element.
+
+ This implementation is not expected to be used in practice; DOM
+ builders should provide implementations which do the right thing
+ using information available to it.
+
+ """
+
+ __slots__ = 'tagName',
+
+ def __init__(self, name):
+ self.tagName = name
+
+ def getAttributeType(self, aname):
+ return _no_type
+
+ def getAttributeTypeNS(self, namespaceURI, localName):
+ return _no_type
+
+ def isElementContent(self):
+ return False
+
+ def isEmpty(self):
+ """Returns true iff this element is declared to have an EMPTY
+ content model."""
+ return False
+
+ def isId(self, aname):
+ """Returns true iff the named attribte is a DTD-style ID."""
+ return False
+
+ def isIdNS(self, namespaceURI, localName):
+ """Returns true iff the identified attribute is a DTD-style ID."""
+ return False
+
+ def __getstate__(self):
+ return self.tagName
+
+ def __setstate__(self, state):
+ self.tagName = state
+
+def _clear_id_cache(node):
+ if node.nodeType == Node.DOCUMENT_NODE:
+ node._id_cache.clear()
+ node._id_search_stack = None
+ elif _in_document(node):
+ node.ownerDocument._id_cache.clear()
+ node.ownerDocument._id_search_stack= None
+
+class Document(Node, DocumentLS):
+ _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
+ Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
+
nodeType = Node.DOCUMENT_NODE
nodeName = "#document"
nodeValue = None
@@ -839,14 +1477,66 @@ class Document(Node):
previousSibling = nextSibling = None
implementation = DOMImplementation()
- childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
- Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
+
+ # Document attributes from Level 3 (WD 9 April 2002)
+
+ actualEncoding = None
+ encoding = None
+ standalone = None
+ version = None
+ strictErrorChecking = False
+ errorHandler = None
+ documentURI = None
+
+ _magic_id_count = 0
+
+ def __init__(self):
+ self.childNodes = NodeList()
+ # mapping of (namespaceURI, localName) -> ElementInfo
+ # and tagName -> ElementInfo
+ self._elem_info = {}
+ self._id_cache = {}
+ self._id_search_stack = None
+
+ def _get_elem_info(self, element):
+ if element.namespaceURI:
+ key = element.namespaceURI, element.localName
+ else:
+ key = element.tagName
+ return self._elem_info.get(key)
+
+ def _get_actualEncoding(self):
+ return self.actualEncoding
+
+ def _get_doctype(self):
+ return self.doctype
+
+ def _get_documentURI(self):
+ return self.documentURI
+
+ def _get_encoding(self):
+ return self.encoding
+
+ def _get_errorHandler(self):
+ return self.errorHandler
+
+ def _get_standalone(self):
+ return self.standalone
+
+ def _get_strictErrorChecking(self):
+ return self.strictErrorChecking
+
+ def _get_version(self):
+ return self.version
def appendChild(self, node):
- if node.nodeType not in self.childNodeTypes:
- raise HierarchyRequestErr, \
- "%s cannot be child of %s" % (repr(node), repr(self))
+ if node.nodeType not in self._child_node_types:
+ raise xml.dom.HierarchyRequestErr(
+ "%s cannot be child of %s" % (repr(node), repr(self)))
if node.parentNode is not None:
+ # This needs to be done before the next test since this
+ # may *be* the document element, in which case it should
+ # end up re-ordered to the end.
node.parentNode.removeChild(node)
if node.nodeType == Node.ELEMENT_NODE \
@@ -856,7 +1546,10 @@ class Document(Node):
return Node.appendChild(self, node)
def removeChild(self, oldChild):
- self.childNodes.remove(oldChild)
+ try:
+ self.childNodes.remove(oldChild)
+ except ValueError:
+ raise xml.dom.NotFoundErr()
oldChild.nextSibling = oldChild.previousSibling = None
oldChild.parentNode = None
if self.documentElement is oldChild:
@@ -869,23 +1562,36 @@ class Document(Node):
if node.nodeType == Node.ELEMENT_NODE:
return node
- try:
- property
- except NameError:
- pass
- else:
- documentElement = property(_get_documentElement,
- doc="Top-level element of this document.")
-
def unlink(self):
if self.doctype is not None:
self.doctype.unlink()
self.doctype = None
Node.unlink(self)
+ def cloneNode(self, deep):
+ if not deep:
+ return None
+ clone = self.implementation.createDocument(None, None, None)
+ clone.encoding = self.encoding
+ clone.standalone = self.standalone
+ clone.version = self.version
+ for n in self.childNodes:
+ childclone = _clone_node(n, deep, clone)
+ assert childclone.ownerDocument.isSameNode(clone)
+ clone.childNodes.append(childclone)
+ if childclone.nodeType == Node.DOCUMENT_NODE:
+ assert clone.documentElement is None
+ elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
+ assert clone.doctype is None
+ clone.doctype = childclone
+ childclone.parentNode = clone
+ self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
+ self, clone)
+ return clone
+
def createDocumentFragment(self):
d = DocumentFragment()
- d.ownerDoc = self
+ d.ownerDocument = self
return d
def createElement(self, tagName):
@@ -894,12 +1600,18 @@ class Document(Node):
return e
def createTextNode(self, data):
- t = Text(data)
+ if not isinstance(data, StringTypes):
+ raise TypeError, "node contents must be a string"
+ t = Text()
+ t.data = data
t.ownerDocument = self
return t
def createCDATASection(self, data):
- c = CDATASection(data)
+ if not isinstance(data, StringTypes):
+ raise TypeError, "node contents must be a string"
+ c = CDATASection()
+ c.data = data
c.ownerDocument = self
return c
@@ -921,7 +1633,7 @@ class Document(Node):
def createElementNS(self, namespaceURI, qualifiedName):
prefix, localName = _nssplit(qualifiedName)
- e = Element(qualifiedName, namespaceURI, prefix, localName)
+ e = Element(qualifiedName, namespaceURI, prefix)
e.ownerDocument = self
return e
@@ -932,12 +1644,93 @@ class Document(Node):
a.value = ""
return a
+ # A couple of implementation-specific helpers to create node types
+ # not supported by the W3C DOM specs:
+
+ def _create_entity(self, name, publicId, systemId, notationName):
+ e = Entity(name, publicId, systemId, notationName)
+ e.ownerDocument = self
+ return e
+
+ def _create_notation(self, name, publicId, systemId):
+ n = Notation(name, publicId, systemId)
+ n.ownerDocument = self
+ return n
+
+ def getElementById(self, id):
+ if self._id_cache.has_key(id):
+ return self._id_cache[id]
+ if not (self._elem_info or self._magic_id_count):
+ return None
+
+ stack = self._id_search_stack
+ if stack is None:
+ # we never searched before, or the cache has been cleared
+ stack = [self.documentElement]
+ self._id_search_stack = stack
+ elif not stack:
+ # Previous search was completed and cache is still valid;
+ # no matching node.
+ return None
+
+ result = None
+ while stack:
+ node = stack.pop()
+ # add child elements to stack for continued searching
+ stack.extend([child for child in node.childNodes
+ if child.nodeType in _nodeTypes_with_children])
+ # check this node
+ info = self._get_elem_info(node)
+ if info:
+ # We have to process all ID attributes before
+ # returning in order to get all the attributes set to
+ # be IDs using Element.setIdAttribute*().
+ for attr in node.attributes.values():
+ if attr.namespaceURI:
+ if info.isIdNS(attr.namespaceURI, attr.localName):
+ self._id_cache[attr.value] = node
+ if attr.value == id:
+ result = node
+ elif not node._magic_id_nodes:
+ break
+ elif info.isId(attr.name):
+ self._id_cache[attr.value] = node
+ if attr.value == id:
+ result = node
+ elif not node._magic_id_nodes:
+ break
+ elif attr._is_id:
+ self._id_cache[attr.value] = node
+ if attr.value == id:
+ result = node
+ elif node._magic_id_nodes == 1:
+ break
+ elif node._magic_id_nodes:
+ for attr in node.attributes.values():
+ if attr._is_id:
+ self._id_cache[attr.value] = node
+ if attr.value == id:
+ result = node
+ if result is not None:
+ break
+ return result
+
def getElementsByTagName(self, name):
- return _getElementsByTagNameHelper(self, name, NodeList())
+ return _get_elements_by_tagName_helper(self, name, NodeList())
def getElementsByTagNameNS(self, namespaceURI, localName):
- return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
- NodeList())
+ return _get_elements_by_tagName_ns_helper(
+ self, namespaceURI, localName, NodeList())
+
+ def isSupported(self, feature, version):
+ return self.implementation.hasFeature(feature, version)
+
+ def importNode(self, node, deep):
+ if node.nodeType == Node.DOCUMENT_NODE:
+ raise xml.dom.NotSupportedErr("cannot import document nodes")
+ elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+ raise xml.dom.NotSupportedErr("cannot import document type nodes")
+ return _clone_node(node, deep, self)
def writexml(self, writer, indent="", addindent="", newl="",
encoding = None):
@@ -948,27 +1741,194 @@ class Document(Node):
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)
+ # DOM Level 3 (WD 9 April 2002)
+
+ def renameNode(self, n, namespaceURI, name):
+ if n.ownerDocument is not self:
+ raise xml.dom.WrongDocumentErr(
+ "cannot rename nodes from other documents;\n"
+ "expected %s,\nfound %s" % (self, n.ownerDocument))
+ if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
+ raise xml.dom.NotSupportedErr(
+ "renameNode() only applies to element and attribute nodes")
+ if namespaceURI != EMPTY_NAMESPACE:
+ if ':' in name:
+ prefix, localName = name.split(':', 1)
+ if ( prefix == "xmlns"
+ and namespaceURI != xml.dom.XMLNS_NAMESPACE):
+ raise xml.dom.NamespaceErr(
+ "illegal use of 'xmlns' prefix")
+ else:
+ if ( name == "xmlns"
+ and namespaceURI != xml.dom.XMLNS_NAMESPACE
+ and n.nodeType == Node.ATTRIBUTE_NODE):
+ raise xml.dom.NamespaceErr(
+ "illegal use of the 'xmlns' attribute")
+ prefix = None
+ localName = name
+ else:
+ prefix = None
+ localName = None
+ if n.nodeType == Node.ATTRIBUTE_NODE:
+ element = n.ownerElement
+ if element is not None:
+ is_id = n._is_id
+ element.removeAttributeNode(n)
+ else:
+ element = None
+ # avoid __setattr__
+ d = n.__dict__
+ d['prefix'] = prefix
+ d['localName'] = localName
+ d['namespaceURI'] = namespaceURI
+ d['nodeName'] = name
+ if n.nodeType == Node.ELEMENT_NODE:
+ d['tagName'] = name
+ else:
+ # attribute node
+ d['name'] = name
+ if element is not None:
+ element.setAttributeNode(n)
+ if is_id:
+ element.setIdAttributeNode(n)
+ # It's not clear from a semantic perspective whether we should
+ # call the user data handlers for the NODE_RENAMED event since
+ # we're re-using the existing node. The draft spec has been
+ # interpreted as meaning "no, don't call the handler unless a
+ # new node is created."
+ return n
+
+defproperty(Document, "documentElement",
+ doc="Top-level element of this document.")
+
+
+def _clone_node(node, deep, newOwnerDocument):
+ """
+ Clone a node and give it the new owner document.
+ Called by Node.cloneNode and Document.importNode
+ """
+ if node.ownerDocument.isSameNode(newOwnerDocument):
+ operation = xml.dom.UserDataHandler.NODE_CLONED
+ else:
+ operation = xml.dom.UserDataHandler.NODE_IMPORTED
+ if node.nodeType == Node.ELEMENT_NODE:
+ clone = newOwnerDocument.createElementNS(node.namespaceURI,
+ node.nodeName)
+ for attr in node.attributes.values():
+ clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
+ a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
+ a.specified = attr.specified
+
+ if deep:
+ for child in node.childNodes:
+ c = _clone_node(child, deep, newOwnerDocument)
+ clone.appendChild(c)
+
+ elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+ clone = newOwnerDocument.createDocumentFragment()
+ if deep:
+ for child in node.childNodes:
+ c = _clone_node(child, deep, newOwnerDocument)
+ clone.appendChild(c)
+
+ elif node.nodeType == Node.TEXT_NODE:
+ clone = newOwnerDocument.createTextNode(node.data)
+ elif node.nodeType == Node.CDATA_SECTION_NODE:
+ clone = newOwnerDocument.createCDATASection(node.data)
+ elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
+ clone = newOwnerDocument.createProcessingInstruction(node.target,
+ node.data)
+ elif node.nodeType == Node.COMMENT_NODE:
+ clone = newOwnerDocument.createComment(node.data)
+ elif node.nodeType == Node.ATTRIBUTE_NODE:
+ clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
+ node.nodeName)
+ clone.specified = True
+ clone.value = node.value
+ elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+ assert node.ownerDocument is not newOwnerDocument
+ operation = xml.dom.UserDataHandler.NODE_IMPORTED
+ clone = newOwnerDocument.implementation.createDocumentType(
+ node.name, node.publicId, node.systemId)
+ clone.ownerDocument = newOwnerDocument
+ if deep:
+ clone.entities._seq = []
+ clone.notations._seq = []
+ for n in node.notations._seq:
+ notation = Notation(n.nodeName, n.publicId, n.systemId)
+ notation.ownerDocument = newOwnerDocument
+ clone.notations._seq.append(notation)
+ if hasattr(n, '_call_user_data_handler'):
+ n._call_user_data_handler(operation, n, notation)
+ for e in node.entities._seq:
+ entity = Entity(e.nodeName, e.publicId, e.systemId,
+ e.notationName)
+ entity.actualEncoding = e.actualEncoding
+ entity.encoding = e.encoding
+ entity.version = e.version
+ entity.ownerDocument = newOwnerDocument
+ clone.entities._seq.append(entity)
+ if hasattr(e, '_call_user_data_handler'):
+ e._call_user_data_handler(operation, n, entity)
+ else:
+ # Note the cloning of Document and DocumentType nodes is
+ # implemenetation specific. minidom handles those cases
+ # directly in the cloneNode() methods.
+ raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
+
+ # Check for _call_user_data_handler() since this could conceivably
+ # used with other DOM implementations (one of the FourThought
+ # DOMs, perhaps?).
+ if hasattr(node, '_call_user_data_handler'):
+ node._call_user_data_handler(operation, node, clone)
+ return clone
+
+
+def _nssplit(qualifiedName):
+ fields = qualifiedName.split(':', 1)
+ if len(fields) == 2:
+ return fields
+ else:
+ return (None, fields[0])
+
+
def _get_StringIO():
# we can't use cStringIO since it doesn't support Unicode strings
from StringIO import StringIO
return StringIO()
-def _doparse(func, args, kwargs):
+def _do_pulldom_parse(func, args, kwargs):
events = apply(func, args, kwargs)
toktype, rootNode = events.getEvent()
events.expandNode(rootNode)
events.clear()
return rootNode
-def parse(*args, **kwargs):
+def parse(file, parser=None, bufsize=None):
"""Parse a file into a DOM by filename or file object."""
- from xml.dom import pulldom
- return _doparse(pulldom.parse, args, kwargs)
+ if parser is None and not bufsize:
+ from xml.dom import expatbuilder
+ return expatbuilder.parse(file)
+ else:
+ from xml.dom import pulldom
+ return _do_pulldom_parse(pulldom.parse, (file,),
+ {'parser': parser, 'bufsize': bufsize})
-def parseString(*args, **kwargs):
+def parseString(string, parser=None):
"""Parse a file into a DOM from a string."""
- from xml.dom import pulldom
- return _doparse(pulldom.parseString, args, kwargs)
-
-def getDOMImplementation():
+ if parser is None:
+ from xml.dom import expatbuilder
+ return expatbuilder.parseString(string)
+ else:
+ from xml.dom import pulldom
+ return _do_pulldom_parse(pulldom.parseString, (string,),
+ {'parser': parser})
+
+def getDOMImplementation(features=None):
+ if features:
+ if isinstance(features, StringTypes):
+ features = domreg._parse_feature_string(features)
+ for f, v in features:
+ if not Document.implementation.hasFeature(f, v):
+ return None
return Document.implementation