summaryrefslogtreecommitdiffstats
path: root/Lib/xml
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2000-11-21 22:02:22 (GMT)
committerFred Drake <fdrake@acm.org>2000-11-21 22:02:22 (GMT)
commit4ccf4a1e8a3141e34fba73f08d678b47b3ddb81d (patch)
treef65989d93cbbc0f80d6be3cc4548c31d0019f116 /Lib/xml
parent707e964734e4fc245ba4d3c6453adeb67a55867a (diff)
downloadcpython-4ccf4a1e8a3141e34fba73f08d678b47b3ddb81d.zip
cpython-4ccf4a1e8a3141e34fba73f08d678b47b3ddb81d.tar.gz
cpython-4ccf4a1e8a3141e34fba73f08d678b47b3ddb81d.tar.bz2
Reduce the visibility of imported modules for cleaner "from ... import *"
behavior. Added support for the Attr.ownerElement attribute. Everywhere: Define constant object attributes in the classes rather than on the instances during object construction. This reduces the amount of work needed for object construction and destruction; these need to be lightweight operations on a DOM. Node._get_firstChild(), Node._get_lastChild(): Return None if there are no children (required for compliance with DOM level 1). Node.insertBefore(): If refChild is None, append the new node instead of failing (required for compliance). Also, update the sibling relationships. Return the inserted node (required for compliance). Node.appendChild(): Update the parent of the appended node. Node.replaceChild(): Actually replace the old child! Update the parent and sibling relationships of both the old and new children. Return the replaced child (required for compliance). Node.normalize(): Implemented the normalize() method. Required for compliance, but missing from the release. Useful for joining adjacent Text nodes into a single node for easier processing. Node.cloneNode(): Actually make this work. Don't let the new node share the instance __dict__ with the original. Do proper recursion if doing a "deep" clone. Move the attribute cloning out of the base class, since only Element is supposed to have attributes. Node.unlink(): Simplify handling of child nodes for efficiency, and remove the attribute handling since only Element nodes support attributes. Attr.cloneNode(): Extend this to clear the ownerElement attribute in the clone. AttributeList.items(), AttributeList.itemsNS(): Slight performance improvement (avoid lambda). Element.cloneNode(): Extend Node.cloneNode() with support for the attributes. Clone the Attr objects after creating the underlying clone. Element.unlink(): Clean out the attributes here instead of in the base class, since this is the only class that will have them. Element.toxml(): Adjust to create only one AttributeList instance; minor efficiency improvement. _nssplit(): No need to re-import string. Document.__init__(): No longer needed once constant attributes are initialized in the class itself. Document.createElementNS(), Document.createAttributeNS(): Use the defined constructors rather than directly access the classes. _get_StringIO(): New function. Create an output StringIO using the most efficient available flavor. parse(), parseString(): Import pulldom here instead of in the public namespace of the module.
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/dom/minidom.py257
1 files changed, 181 insertions, 76 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index 6a72684..bf8166a 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,10 +14,19 @@ Todo:
* SAX 2 namespaces
"""
-import pulldom
import string
-from StringIO import StringIO
+_string = string
+del string
+
+# localize the types, and allow support for Unicode values if available:
import types
+_TupleType = types.TupleType
+try:
+ _StringTypes = (types.StringType, types.UnicodeType)
+except AttributeError:
+ _StringTypes = (types.StringType,)
+del types
+
class Node:
ELEMENT_NODE = 1
@@ -44,7 +53,7 @@ class Node:
index = repr(id(self)) + repr(self.__class__)
Node.allnodes[index] = repr(self.__dict__)
if Node.debug is None:
- Node.debug = StringIO()
+ Node.debug = _get_StringIO()
#open( "debug4.out", "w" )
Node.debug.write("create %s\n" % index)
@@ -79,7 +88,7 @@ class Node:
return 1
def toxml(self):
- writer = StringIO()
+ writer = _get_StringIO()
self.writexml(writer)
return writer.getvalue()
@@ -90,16 +99,30 @@ class Node:
return 0
def _get_firstChild(self):
- return self.childNodes[0]
+ if self.childNodes:
+ return self.childNodes[0]
def _get_lastChild(self):
- return self.childNodes[-1]
+ if self.childNodes:
+ return self.childNodes[-1]
def insertBefore(self, newChild, refChild):
- index = self.childNodes.index(refChild)
- self.childNodes.insert(index, newChild)
- if self._makeParentNodes:
- newChild.parentNode = self
+ if refChild is None:
+ self.appendChild(newChild)
+ else:
+ index = self.childNodes.index(refChild)
+ self.childNodes.insert(index, newChild)
+ newChild.nextSibling = refChild
+ refChild.previousSibling = newChild
+ if index:
+ node = self.childNodes[index-1]
+ node.nextSibling = newChild
+ newChild.previousSibling = node
+ else:
+ newChild.previousSibling = None
+ if self._makeParentNodes:
+ newChild.parentNode = self
+ return newChild
def appendChild(self, node):
if self.childNodes:
@@ -110,39 +133,69 @@ class Node:
node.previousSibling = None
node.nextSibling = None
self.childNodes.append(node)
+ if self._makeParentNodes:
+ node.parentNode = self
return node
def replaceChild(self, newChild, oldChild):
+ if newChild is oldChild:
+ return
index = self.childNodes.index(oldChild)
- self.childNodes[index] = oldChild
+ self.childNodes[index] = newChild
+ if self._makeParentNodes:
+ newChild.parentNode = self
+ oldChild.parentNode = None
+ newChild.nextSibling = oldChild.nextSibling
+ newChild.previousSibling = oldChild.previousSibling
+ oldChild.newChild = None
+ oldChild.previousSibling = None
+ return oldChild
def removeChild(self, oldChild):
- index = self.childNodes.index(oldChild)
- del self.childNodes[index]
+ self.childNodes.remove(oldChild)
+ if self._makeParentNodes:
+ oldChild.parentNode = None
+ return oldChild
+
+ def normalize(self):
+ if len(self.childNodes) > 1:
+ L = [self.childNodes[0]]
+ for child in self.childNodes[1:]:
+ if ( child.nodeType == Node.TEXT_NODE
+ and L[-1].nodeType == child.nodeType):
+ # collapse text node
+ node = L[-1]
+ node.data = node.nodeValue = node.data + child.data
+ node.nextSibling = child.nextSibling
+ child.unlink()
+ else:
+ L[-1].nextSibling = child
+ child.previousSibling = L[-1]
+ L.append(child)
+ child.normalize()
+ self.childNodes = L
+ elif self.childNodes:
+ # exactly one child -- just recurse
+ self.childNodes[0].normalize()
def cloneNode(self, deep):
import new
- clone = new.instance(self.__class__, self.__dict__)
- clone.attributes = self.attributes.copy()
- if not deep:
- clone.childNodes = []
- else:
- clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
+ clone = new.instance(self.__class__, self.__dict__.copy())
+ if self._makeParentNodes:
+ clone.parentNode = None
+ clone.childNodes = []
+ if deep:
+ for child in self.childNodes:
+ clone.appendChild(child.cloneNode(1))
return clone
def unlink(self):
self.parentNode = None
- while self.childNodes:
- self.childNodes[-1].unlink()
- del self.childNodes[-1] # probably not most efficient!
+ for child in self.childNodes:
+ child.unlink()
self.childNodes = None
self.previousSibling = None
self.nextSibling = None
- if self.attributes:
- for attr in self._attrs.values():
- self.removeAttributeNode(attr)
- assert not len(self._attrs)
- assert not len(self._attrsNS)
if Node._debug:
index = repr(id(self)) + repr(self.__class__)
self.debug.write("Deleting: %s\n" % index)
@@ -150,10 +203,11 @@ class Node:
def _write_data(writer, data):
"Writes datachars to writer."
- data = string.replace(data, "&", "&amp;")
- data = string.replace(data, "<", "&lt;")
- data = string.replace(data, "\"", "&quot;")
- data = string.replace(data, ">", "&gt;")
+ replace = _string.replace
+ data = replace(data, "&", "&amp;")
+ data = replace(data, "<", "&lt;")
+ data = replace(data, "\"", "&quot;")
+ data = replace(data, ">", "&gt;")
writer.write(data)
def _getElementsByTagNameHelper(parent, name, rc):
@@ -174,14 +228,16 @@ def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
class Attr(Node):
nodeType = Node.ATTRIBUTE_NODE
+ attributes = None
+ ownerElement = None
def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
# skip setattr for performance
- self.__dict__["localName"] = localName or qName
- self.__dict__["nodeName"] = self.__dict__["name"] = qName
- self.__dict__["namespaceURI"] = namespaceURI
- self.__dict__["prefix"] = prefix
- self.attributes = None
+ d = self.__dict__
+ d["localName"] = localName or qName
+ d["nodeName"] = d["name"] = qName
+ d["namespaceURI"] = namespaceURI
+ d["prefix"] = prefix
Node.__init__(self)
# nodeValue and value are set elsewhere
@@ -191,14 +247,21 @@ class Attr(Node):
else:
self.__dict__[name] = value
+ def cloneNode(self, deep):
+ clone = Node.cloneNode(self, deep)
+ if clone.__dict__.has_key("ownerElement"):
+ del clone.ownerElement
+ return clone
+
class AttributeList:
- """the attribute list is a transient interface to the underlying
- dictionaries. mutations here will change the underlying element's
+ """The attribute list is a transient interface to the underlying
+ dictionaries. Mutations here will change the underlying element's
dictionary"""
+
def __init__(self, attrs, attrsNS):
self._attrs = attrs
self._attrsNS = attrsNS
- self.length = len(self._attrs.keys())
+ self.length = len(self._attrs)
def item(self, index):
try:
@@ -207,12 +270,16 @@ class AttributeList:
return None
def items(self):
- return map(lambda node: (node.tagName, node.value),
- self._attrs.values())
+ L = []
+ for node in self._attrs.values():
+ L.append((node.tagName, node.value))
+ return L
def itemsNS(self):
- return map(lambda node: ((node.URI, node.localName), node.value),
- self._attrs.values())
+ L = []
+ for node in self._attrs.values():
+ L.append(((node.URI, node.localName), node.value))
+ return L
def keys(self):
return self._attrs.keys()
@@ -234,18 +301,19 @@ class AttributeList:
#FIXME: is it appropriate to return .value?
def __getitem__(self, attname_or_tuple):
- if type(attname_or_tuple) is types.TupleType:
+ if type(attname_or_tuple) is _TupleType:
return self._attrsNS[attname_or_tuple]
else:
return self._attrs[attname_or_tuple]
# same as set
def __setitem__(self, attname, value):
- if type(value) is types.StringType:
+ if type(value) in _StringTypes:
node = Attr(attname)
- node.value=value
+ node.value = value
else:
- assert isinstance(value, Attr) or type(value) is types.StringType
+ if not isinstance(value, Attr):
+ raise TypeError, "value must be a string or Attr object"
node = value
old = self._attrs.get(attname, None)
if old:
@@ -261,6 +329,8 @@ class AttributeList:
class Element(Node):
nodeType = Node.ELEMENT_NODE
+ nextSibling = None
+ previousSibling = None
def __init__(self, tagName, namespaceURI="", prefix="",
localName=None):
@@ -271,12 +341,31 @@ class Element(Node):
self.namespaceURI = namespaceURI
self.nodeValue = None
- self._attrs={} # attributes are double-indexed:
- self._attrsNS={}# tagName -> Attribute
- # URI,localName -> Attribute
- # in the future: consider lazy generation of attribute objects
- # this is too tricky for now because of headaches
- # with namespaces.
+ self._attrs = {} # attributes are double-indexed:
+ self._attrsNS = {} # tagName -> Attribute
+ # URI,localName -> Attribute
+ # in the future: consider lazy generation
+ # of attribute objects this is too tricky
+ # for now because of headaches with
+ # namespaces.
+
+ def cloneNode(self, deep):
+ clone = Node.cloneNode(self, deep)
+ clone._attrs = {}
+ clone._attrsNS = {}
+ for attr in self._attrs.values():
+ node = attr.cloneNode(1)
+ clone._attrs[node.name] = node
+ clone._attrsNS[(node.namespaceURI, node.localName)] = node
+ node.ownerElement = clone
+ return clone
+
+ def unlink(self):
+ for attr in self._attrs.values():
+ attr.unlink()
+ self._attrs = None
+ self._attrsNS = None
+ Node.unlink(self)
def getAttribute(self, attname):
return self._attrs[attname].value
@@ -296,7 +385,6 @@ class Element(Node):
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr)
- # FIXME: return original node if something changed.
def getAttributeNode(self, attrname):
return self._attrs.get(attrname)
@@ -305,12 +393,23 @@ class Element(Node):
return self._attrsNS[(namespaceURI, localName)]
def setAttributeNode(self, attr):
+ if attr.ownerElement not in (None, self):
+ raise ValueError, "attribute node already owned"
old = self._attrs.get(attr.name, None)
if old:
old.unlink()
self._attrs[attr.name] = attr
self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
- # FIXME: return old value if something changed
+
+ # This creates a circular reference, but Element.unlink()
+ # breaks the cycle since the references to the attribute
+ # dictionaries are tossed.
+ attr.ownerElement = self
+
+ if old is not attr:
+ # It might have already been part of this node, in which case
+ # it doesn't represent a change, and should not be returned.
+ return old
def removeAttribute(self, name):
attr = self._attrs[name]
@@ -334,16 +433,16 @@ class Element(Node):
def __repr__(self):
return "<DOM Element: %s at %s>" % (self.tagName, id(self))
- # undocumented
def writexml(self, writer):
writer.write("<" + self.tagName)
- a_names = self._get_attributes().keys()
+ attrs = self._get_attributes()
+ a_names = attrs.keys()
a_names.sort()
for a_name in a_names:
writer.write(" %s=\"" % a_name)
- _write_data(writer, self._get_attributes()[a_name].value)
+ _write_data(writer, attrs[a_name].value)
writer.write("\"")
if self.childNodes:
writer.write(">")
@@ -358,24 +457,24 @@ class Element(Node):
class Comment(Node):
nodeType = Node.COMMENT_NODE
+ nodeName = "#comment"
+ attributes = None
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
- self.nodeName = "#comment"
- self.attributes = None
def writexml(self, writer):
writer.write("<!--%s-->" % self.data)
class ProcessingInstruction(Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
+ attributes = None
def __init__(self, target, data):
Node.__init__(self)
self.target = self.nodeName = target
self.data = self.nodeValue = data
- self.attributes = None
def writexml(self, writer):
writer.write("<?%s %s?>" % (self.target, self.data))
@@ -383,11 +482,11 @@ class ProcessingInstruction(Node):
class Text(Node):
nodeType = Node.TEXT_NODE
nodeName = "#text"
+ attributes = None
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
- self.attributes = None
def __repr__(self):
if len(self.data) > 10:
@@ -400,8 +499,7 @@ class Text(Node):
_write_data(writer, self.data)
def _nssplit(qualifiedName):
- import string
- fields = string.split(qualifiedName,':', 1)
+ fields = _string.split(qualifiedName, ':', 1)
if len(fields) == 2:
return fields
elif len(fields) == 1:
@@ -409,22 +507,18 @@ def _nssplit(qualifiedName):
class Document(Node):
nodeType = Node.DOCUMENT_NODE
+ nodeName = "#document"
+ nodeValue = None
+ attributes = None
documentElement = None
- def __init__(self):
- Node.__init__(self)
- self.attributes = None
- self.nodeName = "#document"
- self.nodeValue = None
-
def appendChild(self, node):
if node.nodeType == Node.ELEMENT_NODE:
if self.documentElement:
raise TypeError, "Two document elements disallowed"
else:
self.documentElement = node
- Node.appendChild(self, node)
- return node
+ return Node.appendChild(self, node)
createElement = Element
@@ -437,12 +531,14 @@ class Document(Node):
createAttribute = Attr
def createElementNS(self, namespaceURI, qualifiedName):
- prefix,localName = _nssplit(qualifiedName)
- return Element(qualifiedName, namespaceURI, prefix, localName)
+ prefix, localName = _nssplit(qualifiedName)
+ return self.createElement(qualifiedName, namespaceURI,
+ prefix, localName)
def createAttributeNS(self, namespaceURI, qualifiedName):
- prefix,localName = _nssplit(qualifiedName)
- return Attr(qualifiedName, namespaceURI, localName, prefix)
+ prefix, localName = _nssplit(qualifiedName)
+ return self.createAttribute(qualifiedName, namespaceURI,
+ localName, prefix)
def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName)
@@ -460,6 +556,13 @@ class Document(Node):
for node in self.childNodes:
node.writexml(writer)
+def _get_StringIO():
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+ return StringIO()
+
def _doparse(func, args, kwargs):
events = apply(func, args, kwargs)
toktype, rootNode = events.getEvent()
@@ -468,8 +571,10 @@ def _doparse(func, args, kwargs):
def parse(*args, **kwargs):
"Parse a file into a DOM by filename or file object"
+ from xml.dom import pulldom
return _doparse(pulldom.parse, args, kwargs)
def parseString(*args, **kwargs):
"Parse a file into a DOM from a string"
+ from xml.dom import pulldom
return _doparse(pulldom.parseString, args, kwargs)