diff options
Diffstat (limited to 'Lib/xml/dom/minidom.py')
-rw-r--r-- | Lib/xml/dom/minidom.py | 257 |
1 files changed, 181 insertions, 76 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 6a72684..bf8166a 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -14,10 +14,19 @@ Todo: * SAX 2 namespaces """ -import pulldom import string -from StringIO import StringIO +_string = string +del string + +# localize the types, and allow support for Unicode values if available: import types +_TupleType = types.TupleType +try: + _StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + _StringTypes = (types.StringType,) +del types + class Node: ELEMENT_NODE = 1 @@ -44,7 +53,7 @@ class Node: index = repr(id(self)) + repr(self.__class__) Node.allnodes[index] = repr(self.__dict__) if Node.debug is None: - Node.debug = StringIO() + Node.debug = _get_StringIO() #open( "debug4.out", "w" ) Node.debug.write("create %s\n" % index) @@ -79,7 +88,7 @@ class Node: return 1 def toxml(self): - writer = StringIO() + writer = _get_StringIO() self.writexml(writer) return writer.getvalue() @@ -90,16 +99,30 @@ class Node: return 0 def _get_firstChild(self): - return self.childNodes[0] + if self.childNodes: + return self.childNodes[0] def _get_lastChild(self): - return self.childNodes[-1] + if self.childNodes: + return self.childNodes[-1] def insertBefore(self, newChild, refChild): - index = self.childNodes.index(refChild) - self.childNodes.insert(index, newChild) - if self._makeParentNodes: - newChild.parentNode = self + if refChild is None: + self.appendChild(newChild) + else: + index = self.childNodes.index(refChild) + self.childNodes.insert(index, newChild) + newChild.nextSibling = refChild + refChild.previousSibling = newChild + if index: + node = self.childNodes[index-1] + node.nextSibling = newChild + newChild.previousSibling = node + else: + newChild.previousSibling = None + if self._makeParentNodes: + newChild.parentNode = self + return newChild def appendChild(self, node): if self.childNodes: @@ -110,39 +133,69 @@ class Node: node.previousSibling = None node.nextSibling = None self.childNodes.append(node) + if self._makeParentNodes: + node.parentNode = self return node def replaceChild(self, newChild, oldChild): + if newChild is oldChild: + return index = self.childNodes.index(oldChild) - self.childNodes[index] = oldChild + self.childNodes[index] = newChild + if self._makeParentNodes: + newChild.parentNode = self + oldChild.parentNode = None + newChild.nextSibling = oldChild.nextSibling + newChild.previousSibling = oldChild.previousSibling + oldChild.newChild = None + oldChild.previousSibling = None + return oldChild def removeChild(self, oldChild): - index = self.childNodes.index(oldChild) - del self.childNodes[index] + self.childNodes.remove(oldChild) + if self._makeParentNodes: + oldChild.parentNode = None + return oldChild + + def normalize(self): + if len(self.childNodes) > 1: + L = [self.childNodes[0]] + for child in self.childNodes[1:]: + if ( child.nodeType == Node.TEXT_NODE + and L[-1].nodeType == child.nodeType): + # collapse text node + node = L[-1] + node.data = node.nodeValue = node.data + child.data + node.nextSibling = child.nextSibling + child.unlink() + else: + L[-1].nextSibling = child + child.previousSibling = L[-1] + L.append(child) + child.normalize() + self.childNodes = L + elif self.childNodes: + # exactly one child -- just recurse + self.childNodes[0].normalize() def cloneNode(self, deep): import new - clone = new.instance(self.__class__, self.__dict__) - clone.attributes = self.attributes.copy() - if not deep: - clone.childNodes = [] - else: - clone.childNodes = map(lambda x: x.cloneNode, self.childNodes) + clone = new.instance(self.__class__, self.__dict__.copy()) + if self._makeParentNodes: + clone.parentNode = None + clone.childNodes = [] + if deep: + for child in self.childNodes: + clone.appendChild(child.cloneNode(1)) return clone def unlink(self): self.parentNode = None - while self.childNodes: - self.childNodes[-1].unlink() - del self.childNodes[-1] # probably not most efficient! + for child in self.childNodes: + child.unlink() self.childNodes = None self.previousSibling = None self.nextSibling = None - if self.attributes: - for attr in self._attrs.values(): - self.removeAttributeNode(attr) - assert not len(self._attrs) - assert not len(self._attrsNS) if Node._debug: index = repr(id(self)) + repr(self.__class__) self.debug.write("Deleting: %s\n" % index) @@ -150,10 +203,11 @@ class Node: def _write_data(writer, data): "Writes datachars to writer." - data = string.replace(data, "&", "&") - data = string.replace(data, "<", "<") - data = string.replace(data, "\"", """) - data = string.replace(data, ">", ">") + replace = _string.replace + data = replace(data, "&", "&") + data = replace(data, "<", "<") + data = replace(data, "\"", """) + data = replace(data, ">", ">") writer.write(data) def _getElementsByTagNameHelper(parent, name, rc): @@ -174,14 +228,16 @@ def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): class Attr(Node): nodeType = Node.ATTRIBUTE_NODE + attributes = None + ownerElement = None def __init__(self, qName, namespaceURI="", localName=None, prefix=None): # skip setattr for performance - self.__dict__["localName"] = localName or qName - self.__dict__["nodeName"] = self.__dict__["name"] = qName - self.__dict__["namespaceURI"] = namespaceURI - self.__dict__["prefix"] = prefix - self.attributes = None + d = self.__dict__ + d["localName"] = localName or qName + d["nodeName"] = d["name"] = qName + d["namespaceURI"] = namespaceURI + d["prefix"] = prefix Node.__init__(self) # nodeValue and value are set elsewhere @@ -191,14 +247,21 @@ class Attr(Node): else: self.__dict__[name] = value + def cloneNode(self, deep): + clone = Node.cloneNode(self, deep) + if clone.__dict__.has_key("ownerElement"): + del clone.ownerElement + return clone + class AttributeList: - """the attribute list is a transient interface to the underlying - dictionaries. mutations here will change the underlying element's + """The attribute list is a transient interface to the underlying + dictionaries. Mutations here will change the underlying element's dictionary""" + def __init__(self, attrs, attrsNS): self._attrs = attrs self._attrsNS = attrsNS - self.length = len(self._attrs.keys()) + self.length = len(self._attrs) def item(self, index): try: @@ -207,12 +270,16 @@ class AttributeList: return None def items(self): - return map(lambda node: (node.tagName, node.value), - self._attrs.values()) + L = [] + for node in self._attrs.values(): + L.append((node.tagName, node.value)) + return L def itemsNS(self): - return map(lambda node: ((node.URI, node.localName), node.value), - self._attrs.values()) + L = [] + for node in self._attrs.values(): + L.append(((node.URI, node.localName), node.value)) + return L def keys(self): return self._attrs.keys() @@ -234,18 +301,19 @@ class AttributeList: #FIXME: is it appropriate to return .value? def __getitem__(self, attname_or_tuple): - if type(attname_or_tuple) is types.TupleType: + if type(attname_or_tuple) is _TupleType: return self._attrsNS[attname_or_tuple] else: return self._attrs[attname_or_tuple] # same as set def __setitem__(self, attname, value): - if type(value) is types.StringType: + if type(value) in _StringTypes: node = Attr(attname) - node.value=value + node.value = value else: - assert isinstance(value, Attr) or type(value) is types.StringType + if not isinstance(value, Attr): + raise TypeError, "value must be a string or Attr object" node = value old = self._attrs.get(attname, None) if old: @@ -261,6 +329,8 @@ class AttributeList: class Element(Node): nodeType = Node.ELEMENT_NODE + nextSibling = None + previousSibling = None def __init__(self, tagName, namespaceURI="", prefix="", localName=None): @@ -271,12 +341,31 @@ class Element(Node): self.namespaceURI = namespaceURI self.nodeValue = None - self._attrs={} # attributes are double-indexed: - self._attrsNS={}# tagName -> Attribute - # URI,localName -> Attribute - # in the future: consider lazy generation of attribute objects - # this is too tricky for now because of headaches - # with namespaces. + self._attrs = {} # attributes are double-indexed: + self._attrsNS = {} # tagName -> Attribute + # URI,localName -> Attribute + # in the future: consider lazy generation + # of attribute objects this is too tricky + # for now because of headaches with + # namespaces. + + def cloneNode(self, deep): + clone = Node.cloneNode(self, deep) + clone._attrs = {} + clone._attrsNS = {} + for attr in self._attrs.values(): + node = attr.cloneNode(1) + clone._attrs[node.name] = node + clone._attrsNS[(node.namespaceURI, node.localName)] = node + node.ownerElement = clone + return clone + + def unlink(self): + for attr in self._attrs.values(): + attr.unlink() + self._attrs = None + self._attrsNS = None + Node.unlink(self) def getAttribute(self, attname): return self._attrs[attname].value @@ -296,7 +385,6 @@ class Element(Node): attr = Attr(qualifiedName, namespaceURI, localname, prefix) attr.__dict__["value"] = attr.__dict__["nodeValue"] = value self.setAttributeNode(attr) - # FIXME: return original node if something changed. def getAttributeNode(self, attrname): return self._attrs.get(attrname) @@ -305,12 +393,23 @@ class Element(Node): return self._attrsNS[(namespaceURI, localName)] def setAttributeNode(self, attr): + if attr.ownerElement not in (None, self): + raise ValueError, "attribute node already owned" old = self._attrs.get(attr.name, None) if old: old.unlink() self._attrs[attr.name] = attr self._attrsNS[(attr.namespaceURI, attr.localName)] = attr - # FIXME: return old value if something changed + + # This creates a circular reference, but Element.unlink() + # breaks the cycle since the references to the attribute + # dictionaries are tossed. + attr.ownerElement = self + + if old is not attr: + # It might have already been part of this node, in which case + # it doesn't represent a change, and should not be returned. + return old def removeAttribute(self, name): attr = self._attrs[name] @@ -334,16 +433,16 @@ class Element(Node): def __repr__(self): return "<DOM Element: %s at %s>" % (self.tagName, id(self)) - # undocumented def writexml(self, writer): writer.write("<" + self.tagName) - a_names = self._get_attributes().keys() + attrs = self._get_attributes() + a_names = attrs.keys() a_names.sort() for a_name in a_names: writer.write(" %s=\"" % a_name) - _write_data(writer, self._get_attributes()[a_name].value) + _write_data(writer, attrs[a_name].value) writer.write("\"") if self.childNodes: writer.write(">") @@ -358,24 +457,24 @@ class Element(Node): class Comment(Node): nodeType = Node.COMMENT_NODE + nodeName = "#comment" + attributes = None def __init__(self, data): Node.__init__(self) self.data = self.nodeValue = data - self.nodeName = "#comment" - self.attributes = None def writexml(self, writer): writer.write("<!--%s-->" % self.data) class ProcessingInstruction(Node): nodeType = Node.PROCESSING_INSTRUCTION_NODE + attributes = None def __init__(self, target, data): Node.__init__(self) self.target = self.nodeName = target self.data = self.nodeValue = data - self.attributes = None def writexml(self, writer): writer.write("<?%s %s?>" % (self.target, self.data)) @@ -383,11 +482,11 @@ class ProcessingInstruction(Node): class Text(Node): nodeType = Node.TEXT_NODE nodeName = "#text" + attributes = None def __init__(self, data): Node.__init__(self) self.data = self.nodeValue = data - self.attributes = None def __repr__(self): if len(self.data) > 10: @@ -400,8 +499,7 @@ class Text(Node): _write_data(writer, self.data) def _nssplit(qualifiedName): - import string - fields = string.split(qualifiedName,':', 1) + fields = _string.split(qualifiedName, ':', 1) if len(fields) == 2: return fields elif len(fields) == 1: @@ -409,22 +507,18 @@ def _nssplit(qualifiedName): class Document(Node): nodeType = Node.DOCUMENT_NODE + nodeName = "#document" + nodeValue = None + attributes = None documentElement = None - def __init__(self): - Node.__init__(self) - self.attributes = None - self.nodeName = "#document" - self.nodeValue = None - def appendChild(self, node): if node.nodeType == Node.ELEMENT_NODE: if self.documentElement: raise TypeError, "Two document elements disallowed" else: self.documentElement = node - Node.appendChild(self, node) - return node + return Node.appendChild(self, node) createElement = Element @@ -437,12 +531,14 @@ class Document(Node): createAttribute = Attr def createElementNS(self, namespaceURI, qualifiedName): - prefix,localName = _nssplit(qualifiedName) - return Element(qualifiedName, namespaceURI, prefix, localName) + prefix, localName = _nssplit(qualifiedName) + return self.createElement(qualifiedName, namespaceURI, + prefix, localName) def createAttributeNS(self, namespaceURI, qualifiedName): - prefix,localName = _nssplit(qualifiedName) - return Attr(qualifiedName, namespaceURI, localName, prefix) + prefix, localName = _nssplit(qualifiedName) + return self.createAttribute(qualifiedName, namespaceURI, + localName, prefix) def getElementsByTagNameNS(self, namespaceURI, localName): _getElementsByTagNameNSHelper(self, namespaceURI, localName) @@ -460,6 +556,13 @@ class Document(Node): for node in self.childNodes: node.writexml(writer) +def _get_StringIO(): + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + return StringIO() + def _doparse(func, args, kwargs): events = apply(func, args, kwargs) toktype, rootNode = events.getEvent() @@ -468,8 +571,10 @@ def _doparse(func, args, kwargs): def parse(*args, **kwargs): "Parse a file into a DOM by filename or file object" + from xml.dom import pulldom return _doparse(pulldom.parse, args, kwargs) def parseString(*args, **kwargs): "Parse a file into a DOM from a string" + from xml.dom import pulldom return _doparse(pulldom.parseString, args, kwargs) |