diff options
Diffstat (limited to 'Lib/xml/dom/minidom.py')
-rw-r--r-- | Lib/xml/dom/minidom.py | 483 |
1 files changed, 246 insertions, 237 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 18d82ee..80771ad 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1,9 +1,4 @@ -import pulldom -import string -from StringIO import StringIO -import types - -""" +"""\ minidom.py -- a lightweight DOM implementation based on SAX. parse( "foo.xml" ) @@ -19,6 +14,11 @@ Todo: * SAX 2 namespaces """ +import pulldom +import string +from StringIO import StringIO +import types + class Node: ELEMENT_NODE = 1 ATTRIBUTE_NODE = 2 @@ -33,229 +33,234 @@ class Node: DOCUMENT_FRAGMENT_NODE = 11 NOTATION_NODE = 12 - allnodes={} - _debug=0 - _makeParentNodes=1 - debug=None + allnodes = {} + _debug = 0 + _makeParentNodes = 1 + debug = None - def __init__( self ): - self.childNodes=[] + def __init__(self): + self.childNodes = [] if Node._debug: - index=repr( id( self ))+repr( self.__class__ ) - Node.allnodes[index]=repr( self.__dict__ ) - if Node.debug==None: - Node.debug=StringIO() + index = repr(id(self)) + repr(self.__class__) + Node.allnodes[index] = repr(self.__dict__) + if Node.debug is None: + Node.debug = StringIO() #open( "debug4.out", "w" ) - Node.debug.write( "create %s\n"%index ) + Node.debug.write("create %s\n" % index) - def __getattr__( self, key ): - if key[0:2]=="__": raise AttributeError + def __getattr__(self, key): + if key[0:2] == "__": + raise AttributeError # getattr should never call getattr! if self.__dict__.has_key("inGetAttr"): del self.inGetAttr raise AttributeError, key - prefix,attrname=key[:5],key[5:] - if prefix=="_get_": - self.inGetAttr=1 - if hasattr( self, attrname ): + prefix, attrname = key[:5], key[5:] + if prefix == "_get_": + self.inGetAttr = 1 + if hasattr(self, attrname): del self.inGetAttr return (lambda self=self, attrname=attrname: - getattr( self, attrname )) + getattr(self, attrname)) else: del self.inGetAttr raise AttributeError, key else: - self.inGetAttr=1 + self.inGetAttr = 1 try: - func = getattr( self, "_get_"+key ) + func = getattr(self, "_get_" + key) except AttributeError: raise AttributeError, key del self.inGetAttr return func() - def __nonzero__(self): return 1 + def __nonzero__(self): + return 1 - def toxml( self ): - writer=StringIO() - self.writexml( writer ) + def toxml(self): + writer = StringIO() + self.writexml(writer) return writer.getvalue() - def hasChildNodes( self ): - if self.childNodes: return 1 - else: return 0 + def hasChildNodes(self): + if self.childNodes: + return 1 + else: + return 0 - def _get_firstChild( self ): + def _get_firstChild(self): return self.childNodes[0] - def _get_lastChild( self ): + def _get_lastChild(self): return self.childNodes[-1] - def insertBefore( self, newChild, refChild): - index=self.childNodes.index( refChild ) - self.childNodes.insert( index, newChild ) + def insertBefore(self, newChild, refChild): + index = self.childNodes.index(refChild) + self.childNodes.insert(index, newChild) if self._makeParentNodes: - newChild.parentNode=self + newChild.parentNode = self - def appendChild( self, node ): - self.childNodes.append( node ) + def appendChild(self, node): + self.childNodes.append(node) return node - def replaceChild( self, newChild, oldChild ): - index=self.childNodes.index( oldChild ) - self.childNodes[index]=oldChild + def replaceChild(self, newChild, oldChild): + index = self.childNodes.index(oldChild) + self.childNodes[index] = oldChild - def removeChild( self, oldChild ): - index=self.childNodes.index( oldChild ) + def removeChild(self, oldChild): + index = self.childNodes.index(oldChild) del self.childNodes[index] - def cloneNode( self, deep ): + def cloneNode(self, deep): import new - clone=new.instance( self.__class__, self.__dict__ ) - clone.attributes=self.attributes.copy() + clone = new.instance(self.__class__, self.__dict__) + clone.attributes = self.attributes.copy() if not deep: - clone.childNodes=[] + clone.childNodes = [] else: - clone.childNodes=map( lambda x: x.cloneNode, self.childNodes ) + clone.childNodes = map(lambda x: x.cloneNode, self.childNodes) return clone - def unlink( self ): - self.parentNode=None + def unlink(self): + self.parentNode = None while self.childNodes: self.childNodes[-1].unlink() del self.childNodes[-1] # probably not most efficient! - self.childNodes=None + self.childNodes = None if self.attributes: for attr in self._attrs.values(): - self.removeAttributeNode( attr ) - assert not len( self._attrs ) - assert not len( self._attrsNS ) + self.removeAttributeNode(attr) + assert not len(self._attrs) + assert not len(self._attrsNS) if Node._debug: - index=repr( id( self ))+repr( self.__class__ ) - self.debug.write( "Deleting: %s\n" % index ) + index = repr(id(self)) + repr(self.__class__) + self.debug.write("Deleting: %s\n" % index) del Node.allnodes[index] -def _write_data( writer, data): +def _write_data(writer, data): "Writes datachars to writer." - data=string.replace(data,"&","&") - data=string.replace(data,"<","<") - data=string.replace(data,"\"",""") - data=string.replace(data,">",">") + data = string.replace(data, "&", "&") + data = string.replace(data, "<", "<") + data = string.replace(data, "\"", """) + data = string.replace(data, ">", ">") writer.write(data) -def _getElementsByTagNameHelper( parent, name, rc ): +def _getElementsByTagNameHelper(parent, name, rc): for node in parent.childNodes: - if node.nodeType==Node.ELEMENT_NODE and\ - (name=="*" or node.tagName==name): - rc.append( node ) - _getElementsByTagNameHelper( node, name, rc ) + if node.nodeType == Node.ELEMENT_NODE and \ + (name == "*" or node.tagName == name): + rc.append(node) + _getElementsByTagNameHelper(node, name, rc) return rc -def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ): +def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): for node in parent.childNodes: - if (node.nodeType==Node.ELEMENT_NODE ): - if ((localName=="*" or node.tagName==localName) and - (nsURI=="*" or node.namespaceURI==nsURI)): - rc.append( node ) - _getElementsByTagNameNSHelper( node, name, rc ) + if node.nodeType == Node.ELEMENT_NODE: + if ((localName == "*" or node.tagName == localName) and + (nsURI == "*" or node.namespaceURI == nsURI)): + rc.append(node) + _getElementsByTagNameNSHelper(node, name, rc) class Attr(Node): - nodeType=Node.ATTRIBUTE_NODE - def __init__( self, qName, namespaceURI="", localName=None, -prefix=None ): + nodeType = Node.ATTRIBUTE_NODE + + def __init__(self, qName, namespaceURI="", localName=None, prefix=None): # skip setattr for performance - self.__dict__["localName"]=localName or qName + self.__dict__["localName"] = localName or qName self.__dict__["nodeName"] = self.__dict__["name"] = qName - self.__dict__["namespaceURI"]=namespaceURI - self.__dict__["prefix"]=prefix - self.attributes=None - Node.__init__( self ) + self.__dict__["namespaceURI"] = namespaceURI + self.__dict__["prefix"] = prefix + self.attributes = None + Node.__init__(self) # nodeValue and value are set elsewhere - def __setattr__( self, name, value ): - if name in ("value", "nodeValue" ): - self.__dict__["value"]=self.__dict__["nodeValue"]=value + def __setattr__(self, name, value): + if name in ("value", "nodeValue"): + self.__dict__["value"] = self.__dict__["nodeValue"] = value else: - self.__dict__[name]=value + self.__dict__[name] = value class AttributeList: """the attribute list is a transient interface to the underlying -dictionaries. mutations here will change the underlying element's -dictionary""" - def __init__( self, attrs, attrsNS ): - self._attrs=attrs - self._attrsNS=attrsNS - self.length=len( self._attrs.keys() ) - - def item( self, index ): + dictionaries. mutations here will change the underlying element's + dictionary""" + def __init__(self, attrs, attrsNS): + self._attrs = attrs + self._attrsNS = attrsNS + self.length = len(self._attrs.keys()) + + def item(self, index): try: return self[self.keys()[index]] except IndexError: return None - - def items( self ): - return map( lambda node: (node.tagName, node.value), - self._attrs.values() ) - def itemsNS( self ): - return map( lambda node: ((node.URI, node.localName), node.value), - self._attrs.values() ) + def items(self): + return map(lambda node: (node.tagName, node.value), + self._attrs.values()) + + def itemsNS(self): + return map(lambda node: ((node.URI, node.localName), node.value), + self._attrs.values()) - def keys( self ): + def keys(self): return self._attrs.keys() - def keysNS( self ): + def keysNS(self): return self._attrsNS.keys() - def values( self ): + def values(self): return self._attrs.values() - def __len__( self ): + def __len__(self): return self.length - def __cmp__( self, other ): - if self._attrs is getattr( other, "_attrs", None ): + def __cmp__(self, other): + if self._attrs is getattr(other, "_attrs", None): return 0 else: - return cmp( id( self ), id( other ) ) + return cmp(id(self), id(other)) #FIXME: is it appropriate to return .value? - def __getitem__( self, attname_or_tuple ): - if type( attname_or_tuple ) == types.TupleType: + def __getitem__(self, attname_or_tuple): + if type(attname_or_tuple) is types.TupleType: return self._attrsNS[attname_or_tuple] else: return self._attrs[attname_or_tuple] # same as set - def __setitem__( self, attname, value ): - if type( value ) == types.StringType: - node=Attr( attname ) + def __setitem__(self, attname, value): + if type(value) is types.StringType: + node = Attr(attname) node.value=value else: - assert isinstance( value, Attr ) or type( value )==types.StringType - node=value - old=self._attrs.get( attname, None) + assert isinstance(value, Attr) or type(value) is types.StringType + node = value + old = self._attrs.get(attname, None) if old: old.unlink() - self._attrs[node.name]=node - self._attrsNS[(node.namespaceURI,node.localName)]=node + self._attrs[node.name] = node + self._attrsNS[(node.namespaceURI, node.localName)] = node - def __delitem__( self, attname_or_tuple ): - node=self[attname_or_tuple] + def __delitem__(self, attname_or_tuple): + node = self[attname_or_tuple] node.unlink() del self._attrs[node.name] del self._attrsNS[(node.namespaceURI, node.localName)] - + class Element( Node ): - nodeType=Node.ELEMENT_NODE - def __init__( self, tagName, namespaceURI="", prefix="", - localName=None ): - Node.__init__( self ) + nodeType = Node.ELEMENT_NODE + + def __init__(self, tagName, namespaceURI="", prefix="", + localName=None): + Node.__init__(self) self.tagName = self.nodeName = tagName - self.localName=localName or tagName - self.prefix=prefix - self.namespaceURI=namespaceURI - self.nodeValue=None + self.localName = localName or tagName + self.prefix = prefix + self.namespaceURI = namespaceURI + self.nodeValue = None self._attrs={} # attributes are double-indexed: self._attrsNS={}# tagName -> Attribute @@ -264,191 +269,195 @@ class Element( Node ): # this is too tricky for now because of headaches # with namespaces. - def getAttribute( self, attname ): + def getAttribute(self, attname): return self._attrs[attname].value - def getAttributeNS( self, namespaceURI, localName ): + def getAttributeNS(self, namespaceURI, localName): return self._attrsNS[(namespaceURI, localName)].value - - def setAttribute( self, attname, value ): - attr=Attr( attname ) + + def setAttribute(self, attname, value): + attr = Attr(attname) # for performance - attr.__dict__["value"]=attr.__dict__["nodeValue"]=value - self.setAttributeNode( attr ) + attr.__dict__["value"] = attr.__dict__["nodeValue"] = value + self.setAttributeNode(attr) - def setAttributeNS( self, namespaceURI, qualifiedName, value ): - prefix,localname=_nssplit( qualifiedName ) + def setAttributeNS(self, namespaceURI, qualifiedName, value): + prefix, localname = _nssplit(qualifiedName) # for performance - attr = Attr( qualifiedName, namespaceURI, localname, prefix ) - attr.__dict__["value"]=attr.__dict__["nodeValue"]=value - self.setAttributeNode( attr ) + attr = Attr(qualifiedName, namespaceURI, localname, prefix) + attr.__dict__["value"] = attr.__dict__["nodeValue"] = value + self.setAttributeNode(attr) - def getAttributeNode( self, attrname ): - return self._attrs.get( attrname ) + def getAttributeNode(self, attrname): + return self._attrs.get(attrname) - def getAttributeNodeNS( self, namespaceURI, localName ): + def getAttributeNodeNS(self, namespaceURI, localName): return self._attrsNS[(namespaceURI, localName)] - def setAttributeNode( self, attr ): - old=self._attrs.get( attr.name, None) + def setAttributeNode(self, attr): + old = self._attrs.get(attr.name, None) if old: old.unlink() - self._attrs[attr.name]=attr - self._attrsNS[(attr.namespaceURI,attr.localName)]=attr + self._attrs[attr.name] = attr + self._attrsNS[(attr.namespaceURI, attr.localName)] = attr - def removeAttribute( self, name ): + def removeAttribute(self, name): attr = self._attrs[name] - self.removeAttributeNode( attr ) + self.removeAttributeNode(attr) - def removeAttributeNS( self, namespaceURI, localName ): + def removeAttributeNS(self, namespaceURI, localName): attr = self._attrsNS[(namespaceURI, localName)] - self.removeAttributeNode( attr ) + self.removeAttributeNode(attr) - def removeAttributeNode( self, node ): + def removeAttributeNode(self, node): node.unlink() del self._attrs[node.name] del self._attrsNS[(node.namespaceURI, node.localName)] - def getElementsByTagName( self, name ): - return _getElementsByTagNameHelper( self, name, [] ) + def getElementsByTagName(self, name): + return _getElementsByTagNameHelper(self, name, []) - def getElementsByTagNameNS(self,namespaceURI,localName): - _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] ) + def getElementsByTagNameNS(self, namespaceURI, localName): + _getElementsByTagNameNSHelper(self, namespaceURI, localName, []) - def __repr__( self ): - return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >" + def __repr__(self): + return "<DOM Element: %s at %s>" % (self.tagName, id(self)) def writexml(self, writer): - writer.write("<"+self.tagName) + writer.write("<" + self.tagName) - a_names=self._get_attributes().keys() + a_names = self._get_attributes().keys() a_names.sort() for a_name in a_names: - writer.write(" "+a_name+"=\"") + writer.write(" %s=\"" % a_name) _write_data(writer, self._get_attributes()[a_name]) writer.write("\"") if self.childNodes: writer.write(">") for node in self.childNodes: - node.writexml( writer ) - writer.write("</"+self.tagName+">") + node.writexml(writer) + writer.write("</%s>" % self.tagName) else: writer.write("/>") - def _get_attributes( self ): - return AttributeList( self._attrs, self._attrsNS ) + def _get_attributes(self): + return AttributeList(self._attrs, self._attrsNS) + +class Comment(Node): + nodeType = Node.COMMENT_NODE + + def __init__(self, data): + Node.__init__(self) + self.data = self.nodeValue = data + self.nodeName = "#comment" + self.attributes = None -class Comment( Node ): - nodeType=Node.COMMENT_NODE - def __init__(self, data ): - Node.__init__( self ) - self.data=self.nodeValue=data - self.nodeName="#comment" - self.attributes=None + def writexml(self, writer): + writer.write("<!--%s-->" % self.data) - def writexml( self, writer ): - writer.write( "<!--" + self.data + "-->" ) +class ProcessingInstruction(Node): + nodeType = Node.PROCESSING_INSTRUCTION_NODE -class ProcessingInstruction( Node ): - nodeType=Node.PROCESSING_INSTRUCTION_NODE - def __init__(self, target, data ): - Node.__init__( self ) + def __init__(self, target, data): + Node.__init__(self) self.target = self.nodeName = target self.data = self.nodeValue = data - self.attributes=None + self.attributes = None + + def writexml(self, writer): + writer.write("<?%s %s?>" % (self.target, self.data)) - def writexml( self, writer ): - writer.write( "<?" + self.target +" " + self.data+ "?>" ) +class Text(Node): + nodeType = Node.TEXT_NODE + nodeName = "#text" -class Text( Node ): - nodeType=Node.TEXT_NODE - nodeName="#text" - def __init__(self, data ): - Node.__init__( self ) + def __init__(self, data): + Node.__init__(self) self.data = self.nodeValue = data - self.attributes=None + self.attributes = None def __repr__(self): - if len( self.data )> 10: - dotdotdot="..." + if len(self.data) > 10: + dotdotdot = "..." else: - dotdotdot="" - return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">" + dotdotdot = "" + return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot) - def writexml( self, writer ): - _write_data( writer, self.data ) + def writexml(self, writer): + _write_data(writer, self.data) -def _nssplit( qualifiedName ): - fields = string.split(qualifiedName, ':') +def _nssplit(qualifiedName): + fields = qualifiedName.split(':', 1) if len(fields) == 2: return fields elif len(fields) == 1: - return( '', fields[0] ) - -class Document( Node ): - nodeType=Node.DOCUMENT_NODE - documentElement=None - def __init__( self ): - Node.__init__( self ) - self.attributes=None - self.nodeName="#document" - self.nodeValue=None - - def appendChild( self, node ): - if node.nodeType==Node.ELEMENT_NODE: + return ('', fields[0]) + +class Document(Node): + nodeType = Node.DOCUMENT_NODE + documentElement = None + + def __init__(self): + Node.__init__(self) + self.attributes = None + self.nodeName = "#document" + self.nodeValue = None + + def appendChild(self, node): + if node.nodeType == Node.ELEMENT_NODE: if self.documentElement: raise TypeError, "Two document elements disallowed" else: - self.documentElement=node - Node.appendChild( self, node ) + self.documentElement = node + Node.appendChild(self, node) return node - createElement=Element + createElement = Element - createTextNode=Text + createTextNode = Text - createComment=Comment + createComment = Comment - createProcessingInstruction=ProcessingInstruction + createProcessingInstruction = ProcessingInstruction - createAttribute=Attr + createAttribute = Attr def createElementNS(self, namespaceURI, qualifiedName): - prefix,localName=_nssplit( qualifiedName ) + prefix,localName = _nssplit(qualifiedName) return Element(qualifiedName, namespaceURI, prefix, localName) def createAttributeNS(self, namespaceURI, qualifiedName): - prefix,localName=_nssplit( qualifiedName ) + prefix,localName = _nssplit(qualifiedName) return Attr(namespaceURI, qualifiedName, localName, prefix) - def getElementsByTagNameNS(self,namespaceURI,localName): - _getElementsByTagNameNSHelper( self, namespaceURI, localName ) + def getElementsByTagNameNS(self, namespaceURI, localName): + _getElementsByTagNameNSHelper(self, namespaceURI, localName) - def unlink( self ): - self.documentElement=None - Node.unlink( self ) + def unlink(self): + self.documentElement = None + Node.unlink(self) - def getElementsByTagName( self, name ): - rc=[] - _getElementsByTagNameHelper( self, name, rc ) + def getElementsByTagName(self, name): + rc = [] + _getElementsByTagNameHelper(self, name, rc) return rc - def writexml( self, writer ): + def writexml(self, writer): for node in self.childNodes: - node.writexml( writer ) + node.writexml(writer) -def _doparse( func, args, kwargs ): - events=apply( func, args, kwargs ) - (toktype, rootNode)=events.getEvent() - events.expandNode( rootNode ) +def _doparse(func, args, kwargs): + events = apply(func, args, kwargs) + toktype, rootNode = events.getEvent() + events.expandNode(rootNode) return rootNode -def parse( *args, **kwargs ): +def parse(*args, **kwargs): "Parse a file into a DOM by filename or file object" - return _doparse( pulldom.parse, args, kwargs ) + return _doparse(pulldom.parse, args, kwargs) -def parseString( *args, **kwargs ): +def parseString(*args, **kwargs): "Parse a file into a DOM from a string" - return _doparse( pulldom.parseString, args, kwargs ) + return _doparse(pulldom.parseString, args, kwargs) |