diff options
Diffstat (limited to 'Lib/xml/dom/minidom.py')
-rw-r--r-- | Lib/xml/dom/minidom.py | 385 |
1 files changed, 385 insertions, 0 deletions
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py new file mode 100644 index 0000000..32d2d2b --- /dev/null +++ b/Lib/xml/dom/minidom.py @@ -0,0 +1,385 @@ +import pulldom +import string +from StringIO import StringIO +import types + +""" +minidom.py -- a lightweight DOM implementation based on SAX. + +Todo: +===== + * convenience methods for getting elements and text. + * more testing + * bring some of the writer and linearizer code into conformance with this + interface + * SAX 2 namespaces +""" + +class Node: + ELEMENT_NODE = 1 + ATTRIBUTE_NODE = 2 + TEXT_NODE = 3 + CDATA_SECTION_NODE = 4 + ENTITY_REFERENCE_NODE = 5 + ENTITY_NODE = 6 + PROCESSING_INSTRUCTION_NODE = 7 + COMMENT_NODE = 8 + DOCUMENT_NODE = 9 + DOCUMENT_TYPE_NODE = 10 + DOCUMENT_FRAGMENT_NODE = 11 + NOTATION_NODE = 12 + + allnodes=[] + + def __init__( self ): + self.childNodes=[] + Node.allnodes.append( repr( id( self ))+repr( self.__class__ )) + + def __getattr__( self, key ): + if key[0:2]=="__": raise AttributeError + # getattr should never call getattr! + if self.__dict__.has_key("inGetAttr"): + del self.inGetAttr + raise AttributeError, key + + prefix,attrname=key[:5],key[5:] + if prefix=="_get_": + self.inGetAttr=1 + if hasattr( self, attrname ): + del self.inGetAttr + return (lambda self=self, attrname=attrname: + getattr( self, attrname )) + else: + del self.inGetAttr + raise AttributeError, key + else: + self.inGetAttr=1 + try: + func = getattr( self, "_get_"+key ) + except AttributeError: + raise AttributeError, key + del self.inGetAttr + return func() + + def __nonzero__(self): return 1 + + def toxml( self ): + writer=StringIO() + self.writexml( writer ) + return writer.getvalue() + + def hasChildNodes( self ): + if self.childNodes: return 1 + else: return 0 + + def insertBefore( self, newChild, refChild): + index=self.childNodes.index( refChild ) + self.childNodes.insert( index, newChild ) + + def appendChild( self, node ): + self.childNodes.append( node ) + + def unlink( self ): + self.parentNode=None + while self.childNodes: + self.childNodes[-1].unlink() + del self.childNodes[-1] # probably not most efficient! + self.childNodes=None + if self.attributes: + for attr in self.attributes.values(): + attr.unlink() + self.attributes=None + index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ )) + del Node.allnodes[index] + +def _write_data( writer, data): + "Writes datachars to writer." + data=string.replace(data,"&","&") + data=string.replace(data,"<","<") + data=string.replace(data,"\"",""") + data=string.replace(data,">",">") + writer.write(data) + +def _closeElement( element ): + del element.parentNode + for node in element.elements: + _closeElement( node ) + +def _getElementsByTagNameHelper( parent, name, rc ): + for node in parent.childNodes: + if node.nodeType==Node.ELEMENT_NODE and\ + (name=="*" or node.tagName==name): + rc.append( node ) + _getElementsByTagNameHelper( node, name, rc ) + return rc + +def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ): + for node in parent.childNodes: + if (node.nodeType==Node.ELEMENT_NODE ): + if ((localName=="*" or node.tagName==localName) and + (nsURI=="*" or node.namespaceURI==nsURI)): + rc.append( node ) + _getElementsByTagNameNSHelper( node, name, rc ) + +class Attr(Node): + nodeType=Node.ATTRIBUTE_NODE + def __init__( self, qName, namespaceURI="", prefix="", + localName=None ): + Node.__init__( self ) + assert qName + # skip setattr for performance + self.__dict__["nodeName"] = self.__dict__["name"] = qName + self.__dict__["localName"]=localName or qName + self.__dict__["prefix"]=prefix + self.__dict__["namespaceURI"]=namespaceURI + # nodeValue and value are set elsewhere + self.attributes=None + + def __setattr__( self, name, value ): + if name in ("value", "nodeValue" ): + self.__dict__["value"]=self.__dict__["nodeValue"]=value + else: + self.__dict__[name]=value + +class AttributeList: + # the attribute list is a transient interface to the underlying dictionaries + # mutations here will change the underlying element's dictionary + def __init__( self, attrs, attrsNS ): + self.__attrs=attrs + self.__attrsNS=attrs + self.length=len( self.__attrs.keys() ) + + def item( self, index ): + try: + return self[self.keys()[index]] + except IndexError: + return None + + def items( self ): + return map( lambda node: (node.tagName, node.value), + self.__attrs.values() ) + + def itemsNS( self ): + return map( lambda node: ((node.URI, node.localName), node.value), + self.__attrs.values() ) + + def keys( self ): + return self.__attrs.keys() + + def keysNS( self ): + return self.__attrsNS.keys() + + def values( self ): + return self.__attrs.values() + + def __len__( self ): + return self.length + + def __cmp__( self, other ): + if self.__attrs is other.__attrs: + return 0 + else: + return cmp( id( self ), id( other ) ) + + #FIXME: is it appropriate to return .value? + def __getitem__( self, attname_or_tuple ): + if type( attname_or_tuple ) == type( (1,2) ): + return self.__attrsNS[attname_or_tuple].value + else: + return self.__attrs[attname_or_tuple].value + + def __setitem__( self, attname ): + raise TypeError, "object does not support item assignment" + +class Element( Node ): + nodeType=Node.ELEMENT_NODE + def __init__( self, tagName, namespaceURI="", prefix="", + localName=None ): + Node.__init__( self ) + self.tagName = self.nodeName = tagName + self.localName=localName or tagName + self.prefix=prefix + self.namespaceURI=namespaceURI + self.nodeValue=None + + self.__attrs={} # attributes are double-indexed: + self.__attrsNS={}# tagName -> Attribute + # URI,localName -> Attribute + # in the future: consider lazy generation of attribute objects + # this is too tricky for now because of headaches + # with namespaces. + + def getAttribute( self, attname ): + return self.__attrs[attname].value + + def getAttributeNS( self, namespaceURI, localName ): + return self.__attrsNS[(namespaceURI, localName)].value + + def setAttribute( self, attname, value ): + attr=Attr( attname ) + # for performance + attr.__dict__["value"]=attr.__dict__["nodeValue"]=value + self.setAttributeNode( attr ) + + def setAttributeNS( self, namespaceURI, qualifiedName, value ): + attr=createAttributeNS( namespaceURI, qualifiedName ) + # for performance + attr.__dict__["value"]=attr.__dict__["nodeValue"]=value + self.setAttributeNode( attr ) + + def setAttributeNode( self, attr ): + self.__attrs[attr.name]=attr + self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr + + def removeAttribute( self, name ): + attr = self.__attrs[name] + self.removeAttributeNode( attr ) + + def removeAttributeNS( self, namespaceURI, localName ): + attr = self.__attrsNS[(uri, localName)] + self.removeAttributeNode( attr ) + + def removeAttributeNode( self, node ): + del self.__attrs[node.name] + del self.__attrsNS[(node.namespaceURI, node.localName)] + + def getElementsByTagName( self, name ): + return _getElementsByTagNameHelper( self, name, [] ) + + def getElementsByTagNameNS(self,namespaceURI,localName): + _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] ) + + def __repr__( self ): + return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >" + + def writexml(self, writer): + writer.write("<"+self.tagName) + + a_names=self._get_attributes().keys() + a_names.sort() + + for a_name in a_names: + writer.write(" "+a_name+"=\"") + _write_data(writer, self._get_attributes()[a_name]) + writer.write("\"") + if self.childNodes: + writer.write(">") + for node in self.childNodes: + node.writexml( writer ) + writer.write("</"+self.tagName+">") + else: + writer.write("/>") + + def _get_attributes( self ): + return AttributeList( self.__attrs, self.__attrsNS ) + +class Comment( Node ): + nodeType=Node.COMMENT_NODE + def __init__(self, data ): + Node.__init__( self ) + self.data=self.nodeValue=data + self.nodeName="#comment" + self.attributes=None + + def writexml( self, writer ): + writer.write( "<!--" + self.data + "-->" ) + +class ProcessingInstruction( Node ): + nodeType=Node.PROCESSING_INSTRUCTION_NODE + def __init__(self, target, data ): + Node.__init__( self ) + self.target = self.nodeName = target + self.data = self.nodeValue = data + self.attributes=None + + def writexml( self, writer ): + writer.write( "<?" + self.target +" " + self.data+ "?>" ) + +class Text( Node ): + nodeType=Node.TEXT_NODE + nodeName="#text" + def __init__(self, data ): + Node.__init__( self ) + self.data = self.nodeValue = data + self.attributes=None + + def __repr__(self): + if len( self.data )> 10: + dotdotdot="..." + else: + dotdotdot="" + return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">" + + def writexml( self, writer ): + _write_data( writer, self.data ) + +class Document( Node ): + nodeType=Node.DOCUMENT_NODE + def __init__( self ): + Node.__init__( self ) + self.documentElement=None + self.attributes=None + self.nodeName="#document" + self.nodeValue=None + + createElement=Element + + createTextNode=Text + + createComment=Comment + + createProcessingInstruction=ProcessingInstruction + + createAttribute=Attr + + def createElementNS(self, namespaceURI, qualifiedName): + fields = string.split(qualifiedName, ':') + if len(fields) == 2: + prefix = fields[0] + localName = fields[1] + elif len(fields) == 1: + prefix = '' + localName = fields[0] + return Element(self, qualifiedName, namespaceURI, prefix, localName) + + def createAttributeNS(self, namespaceURI, qualifiedName): + fields = string.split(qualifiedName,':') + if len(fields) == 2: + localName = fields[1] + prefix = fields[0] + elif len(fields) == 1: + localName = fields[0] + prefix = None + return Attr(qualifiedName, namespaceURI, prefix, localName) + + def getElementsByTagNameNS(self,namespaceURI,localName): + _getElementsByTagNameNSHelper( self, namespaceURI, localName ) + + def close( self ): + for node in self.elements: + _closeElement( node ) + + def unlink( self ): + self.documentElement=None + Node.unlink( self ) + + def getElementsByTagName( self, name ): + rc=[] + _getElementsByTagNameHelper( self, name, rc ) + return rc + + def writexml( self, writer ): + for node in self.childNodes: + node.writexml( writer ) + +def _doparse( func, args, kwargs ): + events=apply( func, args, kwargs ) + (toktype, rootNode)=events.getEvent() + events.expandNode( rootNode ) + return rootNode + +def parse( *args, **kwargs ): + return _doparse( pulldom.parse, args, kwargs ) + +def parseString( *args, **kwargs ): + return _doparse( pulldom.parseString, args, kwargs ) |