1 files changed, 181 insertions, 76 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index 6a72684..bf8166a 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,10 +14,19 @@ Todo:
  * SAX 2 namespaces
 """
 
-import pulldom
 import string
-from StringIO import StringIO
+_string = string
+del string
+
+# localize the types, and allow support for Unicode values if available:
 import types
+_TupleType = types.TupleType
+try:
+    _StringTypes = (types.StringType, types.UnicodeType)
+except AttributeError:
+    _StringTypes = (types.StringType,)
+del types
+
 
 class Node:
     ELEMENT_NODE                = 1
@@ -44,7 +53,7 @@ class Node:
             index = repr(id(self)) + repr(self.__class__)
             Node.allnodes[index] = repr(self.__dict__)
             if Node.debug is None:
-                Node.debug = StringIO()
+                Node.debug = _get_StringIO()
                 #open( "debug4.out", "w" )
             Node.debug.write("create %s\n" % index)
 
@@ -79,7 +88,7 @@ class Node:
         return 1
 
     def toxml(self):
-        writer = StringIO()
+        writer = _get_StringIO()
         self.writexml(writer)
         return writer.getvalue()
 
@@ -90,16 +99,30 @@ class Node:
             return 0
 
     def _get_firstChild(self):
-        return self.childNodes[0]
+        if self.childNodes:
+            return self.childNodes[0]
 
     def _get_lastChild(self):
-        return self.childNodes[-1]
+        if self.childNodes:
+            return self.childNodes[-1]
 
     def insertBefore(self, newChild, refChild):
-        index = self.childNodes.index(refChild)
-        self.childNodes.insert(index, newChild)
-        if self._makeParentNodes:
-            newChild.parentNode = self
+        if refChild is None:
+            self.appendChild(newChild)
+        else:
+            index = self.childNodes.index(refChild)
+            self.childNodes.insert(index, newChild)
+            newChild.nextSibling = refChild
+            refChild.previousSibling = newChild
+            if index:
+                node = self.childNodes[index-1]
+                node.nextSibling = newChild
+                newChild.previousSibling = node
+            else:
+                newChild.previousSibling = None
+            if self._makeParentNodes:
+                newChild.parentNode = self
+        return newChild
 
     def appendChild(self, node):
         if self.childNodes:
@@ -110,39 +133,69 @@ class Node:
             node.previousSibling = None
         node.nextSibling = None
         self.childNodes.append(node)
+        if self._makeParentNodes:
+            node.parentNode = self
         return node
 
     def replaceChild(self, newChild, oldChild):
+        if newChild is oldChild:
+            return
         index = self.childNodes.index(oldChild)
-        self.childNodes[index] = oldChild
+        self.childNodes[index] = newChild
+        if self._makeParentNodes:
+            newChild.parentNode = self
+            oldChild.parentNode = None
+        newChild.nextSibling = oldChild.nextSibling
+        newChild.previousSibling = oldChild.previousSibling
+        oldChild.newChild = None
+        oldChild.previousSibling = None
+        return oldChild
 
     def removeChild(self, oldChild):
-        index = self.childNodes.index(oldChild)
-        del self.childNodes[index]
+        self.childNodes.remove(oldChild)
+        if self._makeParentNodes:
+            oldChild.parentNode = None
+        return oldChild
+
+    def normalize(self):
+        if len(self.childNodes) > 1:
+            L = [self.childNodes[0]]
+            for child in self.childNodes[1:]:
+                if (  child.nodeType == Node.TEXT_NODE
+                      and L[-1].nodeType == child.nodeType):
+                    # collapse text node
+                    node = L[-1]
+                    node.data = node.nodeValue = node.data + child.data
+                    node.nextSibling = child.nextSibling
+                    child.unlink()
+                else:
+                    L[-1].nextSibling = child
+                    child.previousSibling = L[-1]
+                    L.append(child)
+                    child.normalize()
+            self.childNodes = L
+        elif self.childNodes:
+            # exactly one child -- just recurse
+            self.childNodes[0].normalize()
 
     def cloneNode(self, deep):
         import new
-        clone = new.instance(self.__class__, self.__dict__)
-        clone.attributes = self.attributes.copy()
-        if not deep:
-            clone.childNodes = []
-        else:
-            clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
+        clone = new.instance(self.__class__, self.__dict__.copy())
+        if self._makeParentNodes:
+            clone.parentNode = None
+        clone.childNodes = []
+        if deep:
+            for child in self.childNodes:
+                clone.appendChild(child.cloneNode(1))
         return clone
 
     def unlink(self):
         self.parentNode = None
-        while self.childNodes:
-            self.childNodes[-1].unlink()
-            del self.childNodes[-1] # probably not most efficient!
+        for child in self.childNodes:
+            child.unlink()
         self.childNodes = None
         self.previousSibling = None
         self.nextSibling = None
-        if self.attributes:
-            for attr in self._attrs.values():
-                self.removeAttributeNode(attr)
-            assert not len(self._attrs)
-            assert not len(self._attrsNS)
         if Node._debug:
             index = repr(id(self)) + repr(self.__class__)
             self.debug.write("Deleting: %s\n" % index)
@@ -150,10 +203,11 @@ class Node:
 
 def _write_data(writer, data):
     "Writes datachars to writer."
-    data = string.replace(data, "&", "&amp;")
-    data = string.replace(data, "<", "&lt;")
-    data = string.replace(data, "\"", "&quot;")
-    data = string.replace(data, ">", "&gt;")
+    replace = _string.replace
+    data = replace(data, "&", "&amp;")
+    data = replace(data, "<", "&lt;")
+    data = replace(data, "\"", "&quot;")
+    data = replace(data, ">", "&gt;")
     writer.write(data)
 
 def _getElementsByTagNameHelper(parent, name, rc):
@@ -174,14 +228,16 @@ def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
 
 class Attr(Node):
     nodeType = Node.ATTRIBUTE_NODE
+    attributes = None
+    ownerElement = None
 
     def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
         # skip setattr for performance
-        self.__dict__["localName"] = localName or qName
-        self.__dict__["nodeName"] = self.__dict__["name"] = qName
-        self.__dict__["namespaceURI"] = namespaceURI
-        self.__dict__["prefix"] = prefix
-        self.attributes = None
+        d = self.__dict__
+        d["localName"] = localName or qName
+        d["nodeName"] = d["name"] = qName
+        d["namespaceURI"] = namespaceURI
+        d["prefix"] = prefix
         Node.__init__(self)
         # nodeValue and value are set elsewhere
 
@@ -191,14 +247,21 @@ class Attr(Node):
         else:
             self.__dict__[name] = value
 
+    def cloneNode(self, deep):
+        clone = Node.cloneNode(self, deep)
+        if clone.__dict__.has_key("ownerElement"):
+            del clone.ownerElement
+        return clone
+
 class AttributeList:
-    """the attribute list is a transient interface to the underlying
-    dictionaries.  mutations here will change the underlying element's
+    """The attribute list is a transient interface to the underlying
+    dictionaries.  Mutations here will change the underlying element's
     dictionary"""
+
     def __init__(self, attrs, attrsNS):
         self._attrs = attrs
         self._attrsNS = attrsNS
-        self.length = len(self._attrs.keys())
+        self.length = len(self._attrs)
 
     def item(self, index):
         try:
@@ -207,12 +270,16 @@ class AttributeList:
             return None
 
     def items(self):
-        return map(lambda node: (node.tagName, node.value),
-                   self._attrs.values())
+        L = []
+        for node in self._attrs.values():
+            L.append((node.tagName, node.value))
+        return L
 
     def itemsNS(self):
-        return map(lambda node: ((node.URI, node.localName), node.value),
-                   self._attrs.values())
+        L = []
+        for node in self._attrs.values():
+            L.append(((node.URI, node.localName), node.value))
+        return L
 
     def keys(self):
         return self._attrs.keys()
@@ -234,18 +301,19 @@ class AttributeList:
 
     #FIXME: is it appropriate to return .value?
     def __getitem__(self, attname_or_tuple):
-        if type(attname_or_tuple) is types.TupleType:
+        if type(attname_or_tuple) is _TupleType:
             return self._attrsNS[attname_or_tuple]
         else:
             return self._attrs[attname_or_tuple]
 
     # same as set
     def __setitem__(self, attname, value):
-        if type(value) is types.StringType:
+        if type(value) in _StringTypes:
             node = Attr(attname)
-            node.value=value
+            node.value = value
         else:
-            assert isinstance(value, Attr) or type(value) is types.StringType
+            if not isinstance(value, Attr):
+                raise TypeError, "value must be a string or Attr object"
             node = value
         old = self._attrs.get(attname, None)
         if old:
@@ -261,6 +329,8 @@ class AttributeList:
 
 class Element(Node):
     nodeType = Node.ELEMENT_NODE
+    nextSibling = None
+    previousSibling = None
 
     def __init__(self, tagName, namespaceURI="", prefix="",
                  localName=None):
@@ -271,12 +341,31 @@ class Element(Node):
         self.namespaceURI = namespaceURI
         self.nodeValue = None
 
-        self._attrs={}  # attributes are double-indexed:
-        self._attrsNS={}#    tagName -> Attribute
-                #    URI,localName -> Attribute
-                # in the future: consider lazy generation of attribute objects
-                #                this is too tricky for now because of headaches
-                #                with namespaces.
+        self._attrs = {}   # attributes are double-indexed:
+        self._attrsNS = {} #    tagName -> Attribute
+                           #    URI,localName -> Attribute
+                           # in the future: consider lazy generation
+                           # of attribute objects this is too tricky
+                           # for now because of headaches with
+                           # namespaces.
+
+    def cloneNode(self, deep):
+        clone = Node.cloneNode(self, deep)
+        clone._attrs = {}
+        clone._attrsNS = {}
+        for attr in self._attrs.values():
+            node = attr.cloneNode(1)
+            clone._attrs[node.name] = node
+            clone._attrsNS[(node.namespaceURI, node.localName)] = node
+            node.ownerElement = clone
+        return clone
+
+    def unlink(self):
+        for attr in self._attrs.values():
+            attr.unlink()
+        self._attrs = None
+        self._attrsNS = None
+        Node.unlink(self)
 
     def getAttribute(self, attname):
         return self._attrs[attname].value
@@ -296,7 +385,6 @@ class Element(Node):
         attr = Attr(qualifiedName, namespaceURI, localname, prefix)
         attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
         self.setAttributeNode(attr)
-        # FIXME: return original node if something changed.
 
     def getAttributeNode(self, attrname):
         return self._attrs.get(attrname)
@@ -305,12 +393,23 @@ class Element(Node):
         return self._attrsNS[(namespaceURI, localName)]
 
     def setAttributeNode(self, attr):
+        if attr.ownerElement not in (None, self):
+            raise ValueError, "attribute node already owned"
         old = self._attrs.get(attr.name, None)
         if old:
             old.unlink()
         self._attrs[attr.name] = attr
         self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
-        # FIXME: return old value if something changed
+
+        # This creates a circular reference, but Element.unlink()
+        # breaks the cycle since the references to the attribute
+        # dictionaries are tossed.
+        attr.ownerElement = self
+
+        if old is not attr:
+            # It might have already been part of this node, in which case
+            # it doesn't represent a change, and should not be returned.
+            return old
 
     def removeAttribute(self, name):
         attr = self._attrs[name]
@@ -334,16 +433,16 @@ class Element(Node):
     def __repr__(self):
         return "<DOM Element: %s at %s>" % (self.tagName, id(self))
 
-    # undocumented
     def writexml(self, writer):
         writer.write("<" + self.tagName)
 
-        a_names = self._get_attributes().keys()
+        attrs = self._get_attributes()
+        a_names = attrs.keys()
         a_names.sort()
 
         for a_name in a_names:
             writer.write(" %s=\"" % a_name)
-            _write_data(writer, self._get_attributes()[a_name].value)
+            _write_data(writer, attrs[a_name].value)
             writer.write("\"")
         if self.childNodes:
             writer.write(">")
@@ -358,24 +457,24 @@ class Element(Node):
 
 class Comment(Node):
     nodeType = Node.COMMENT_NODE
+    nodeName = "#comment"
+    attributes = None
 
     def __init__(self, data):
         Node.__init__(self)
         self.data = self.nodeValue = data
-        self.nodeName = "#comment"
-        self.attributes = None
 
     def writexml(self, writer):
         writer.write("<!--%s-->" % self.data)
 
 class ProcessingInstruction(Node):
     nodeType = Node.PROCESSING_INSTRUCTION_NODE
+    attributes = None
 
     def __init__(self, target, data):
         Node.__init__(self)
         self.target = self.nodeName = target
         self.data = self.nodeValue = data
-        self.attributes = None
 
     def writexml(self, writer):
         writer.write("<?%s %s?>" % (self.target, self.data))
@@ -383,11 +482,11 @@ class ProcessingInstruction(Node):
 class Text(Node):
     nodeType = Node.TEXT_NODE
     nodeName = "#text"
+    attributes = None
 
     def __init__(self, data):
         Node.__init__(self)
         self.data = self.nodeValue = data
-        self.attributes = None
 
     def __repr__(self):
         if len(self.data) > 10:
@@ -400,8 +499,7 @@ class Text(Node):
         _write_data(writer, self.data)
 
 def _nssplit(qualifiedName):
-    import string
-    fields = string.split(qualifiedName,':', 1)
+    fields = _string.split(qualifiedName, ':', 1)
     if len(fields) == 2:
         return fields
     elif len(fields) == 1:
@@ -409,22 +507,18 @@ def _nssplit(qualifiedName):
 
 class Document(Node):
     nodeType = Node.DOCUMENT_NODE
+    nodeName = "#document"
+    nodeValue = None
+    attributes = None
     documentElement = None
 
-    def __init__(self):
-        Node.__init__(self)
-        self.attributes = None
-        self.nodeName = "#document"
-        self.nodeValue = None
-
     def appendChild(self, node):
         if node.nodeType == Node.ELEMENT_NODE:
             if self.documentElement:
                 raise TypeError, "Two document elements disallowed"
             else:
                 self.documentElement = node
-        Node.appendChild(self, node)
-        return node
+        return Node.appendChild(self, node)
 
     createElement = Element
 
@@ -437,12 +531,14 @@ class Document(Node):
     createAttribute = Attr
 
     def createElementNS(self, namespaceURI, qualifiedName):
-        prefix,localName = _nssplit(qualifiedName)
-        return Element(qualifiedName, namespaceURI, prefix, localName)
+        prefix, localName = _nssplit(qualifiedName)
+        return self.createElement(qualifiedName, namespaceURI,
+                                  prefix, localName)
 
     def createAttributeNS(self, namespaceURI, qualifiedName):
-        prefix,localName = _nssplit(qualifiedName)
-        return Attr(qualifiedName, namespaceURI, localName, prefix)
+        prefix, localName = _nssplit(qualifiedName)
+        return self.createAttribute(qualifiedName, namespaceURI,
+                                    localName, prefix)
 
     def getElementsByTagNameNS(self, namespaceURI, localName):
         _getElementsByTagNameNSHelper(self, namespaceURI, localName)
@@ -460,6 +556,13 @@ class Document(Node):
         for node in self.childNodes:
             node.writexml(writer)
 
+def _get_StringIO():
+    try:
+        from cStringIO import StringIO
+    except ImportError:
+        from StringIO import StringIO
+    return StringIO()
+
 def _doparse(func, args, kwargs):
     events = apply(func, args, kwargs)
     toktype, rootNode = events.getEvent()
@@ -468,8 +571,10 @@ def _doparse(func, args, kwargs):
 
 def parse(*args, **kwargs):
     "Parse a file into a DOM by filename or file object"
+    from xml.dom import pulldom
     return _doparse(pulldom.parse, args, kwargs)
 
 def parseString(*args, **kwargs):
     "Parse a file into a DOM from a string"
+    from xml.dom import pulldom
     return _doparse(pulldom.parseString, args, kwargs)