From 1f54902e05de94848fe728a24baf14dd605a65de Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Sun, 24 Sep 2000 05:21:58 +0000 Subject: Conform to the Python style guide. --- Lib/xml/dom/minidom.py | 483 +++++++++++++++++++++++++------------------------ Lib/xml/dom/pulldom.py | 246 +++++++++++++------------ 2 files changed, 367 insertions(+), 362 deletions(-) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 18d82ee..80771ad 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1,9 +1,4 @@ -import pulldom -import string -from StringIO import StringIO -import types - -""" +"""\ minidom.py -- a lightweight DOM implementation based on SAX. parse( "foo.xml" ) @@ -19,6 +14,11 @@ Todo: * SAX 2 namespaces """ +import pulldom +import string +from StringIO import StringIO +import types + class Node: ELEMENT_NODE = 1 ATTRIBUTE_NODE = 2 @@ -33,229 +33,234 @@ class Node: DOCUMENT_FRAGMENT_NODE = 11 NOTATION_NODE = 12 - allnodes={} - _debug=0 - _makeParentNodes=1 - debug=None + allnodes = {} + _debug = 0 + _makeParentNodes = 1 + debug = None - def __init__( self ): - self.childNodes=[] + def __init__(self): + self.childNodes = [] if Node._debug: - index=repr( id( self ))+repr( self.__class__ ) - Node.allnodes[index]=repr( self.__dict__ ) - if Node.debug==None: - Node.debug=StringIO() + index = repr(id(self)) + repr(self.__class__) + Node.allnodes[index] = repr(self.__dict__) + if Node.debug is None: + Node.debug = StringIO() #open( "debug4.out", "w" ) - Node.debug.write( "create %s\n"%index ) + Node.debug.write("create %s\n" % index) - def __getattr__( self, key ): - if key[0:2]=="__": raise AttributeError + def __getattr__(self, key): + if key[0:2] == "__": + raise AttributeError # getattr should never call getattr! if self.__dict__.has_key("inGetAttr"): del self.inGetAttr raise AttributeError, key - prefix,attrname=key[:5],key[5:] - if prefix=="_get_": - self.inGetAttr=1 - if hasattr( self, attrname ): + prefix, attrname = key[:5], key[5:] + if prefix == "_get_": + self.inGetAttr = 1 + if hasattr(self, attrname): del self.inGetAttr return (lambda self=self, attrname=attrname: - getattr( self, attrname )) + getattr(self, attrname)) else: del self.inGetAttr raise AttributeError, key else: - self.inGetAttr=1 + self.inGetAttr = 1 try: - func = getattr( self, "_get_"+key ) + func = getattr(self, "_get_" + key) except AttributeError: raise AttributeError, key del self.inGetAttr return func() - def __nonzero__(self): return 1 + def __nonzero__(self): + return 1 - def toxml( self ): - writer=StringIO() - self.writexml( writer ) + def toxml(self): + writer = StringIO() + self.writexml(writer) return writer.getvalue() - def hasChildNodes( self ): - if self.childNodes: return 1 - else: return 0 + def hasChildNodes(self): + if self.childNodes: + return 1 + else: + return 0 - def _get_firstChild( self ): + def _get_firstChild(self): return self.childNodes[0] - def _get_lastChild( self ): + def _get_lastChild(self): return self.childNodes[-1] - def insertBefore( self, newChild, refChild): - index=self.childNodes.index( refChild ) - self.childNodes.insert( index, newChild ) + def insertBefore(self, newChild, refChild): + index = self.childNodes.index(refChild) + self.childNodes.insert(index, newChild) if self._makeParentNodes: - newChild.parentNode=self + newChild.parentNode = self - def appendChild( self, node ): - self.childNodes.append( node ) + def appendChild(self, node): + self.childNodes.append(node) return node - def replaceChild( self, newChild, oldChild ): - index=self.childNodes.index( oldChild ) - self.childNodes[index]=oldChild + def replaceChild(self, newChild, oldChild): + index = self.childNodes.index(oldChild) + self.childNodes[index] = oldChild - def removeChild( self, oldChild ): - index=self.childNodes.index( oldChild ) + def removeChild(self, oldChild): + index = self.childNodes.index(oldChild) del self.childNodes[index] - def cloneNode( self, deep ): + def cloneNode(self, deep): import new - clone=new.instance( self.__class__, self.__dict__ ) - clone.attributes=self.attributes.copy() + clone = new.instance(self.__class__, self.__dict__) + clone.attributes = self.attributes.copy() if not deep: - clone.childNodes=[] + clone.childNodes = [] else: - clone.childNodes=map( lambda x: x.cloneNode, self.childNodes ) + clone.childNodes = map(lambda x: x.cloneNode, self.childNodes) return clone - def unlink( self ): - self.parentNode=None + def unlink(self): + self.parentNode = None while self.childNodes: self.childNodes[-1].unlink() del self.childNodes[-1] # probably not most efficient! - self.childNodes=None + self.childNodes = None if self.attributes: for attr in self._attrs.values(): - self.removeAttributeNode( attr ) - assert not len( self._attrs ) - assert not len( self._attrsNS ) + self.removeAttributeNode(attr) + assert not len(self._attrs) + assert not len(self._attrsNS) if Node._debug: - index=repr( id( self ))+repr( self.__class__ ) - self.debug.write( "Deleting: %s\n" % index ) + index = repr(id(self)) + repr(self.__class__) + self.debug.write("Deleting: %s\n" % index) del Node.allnodes[index] -def _write_data( writer, data): +def _write_data(writer, data): "Writes datachars to writer." - data=string.replace(data,"&","&") - data=string.replace(data,"<","<") - data=string.replace(data,"\"",""") - data=string.replace(data,">",">") + data = string.replace(data, "&", "&") + data = string.replace(data, "<", "<") + data = string.replace(data, "\"", """) + data = string.replace(data, ">", ">") writer.write(data) -def _getElementsByTagNameHelper( parent, name, rc ): +def _getElementsByTagNameHelper(parent, name, rc): for node in parent.childNodes: - if node.nodeType==Node.ELEMENT_NODE and\ - (name=="*" or node.tagName==name): - rc.append( node ) - _getElementsByTagNameHelper( node, name, rc ) + if node.nodeType == Node.ELEMENT_NODE and \ + (name == "*" or node.tagName == name): + rc.append(node) + _getElementsByTagNameHelper(node, name, rc) return rc -def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ): +def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): for node in parent.childNodes: - if (node.nodeType==Node.ELEMENT_NODE ): - if ((localName=="*" or node.tagName==localName) and - (nsURI=="*" or node.namespaceURI==nsURI)): - rc.append( node ) - _getElementsByTagNameNSHelper( node, name, rc ) + if node.nodeType == Node.ELEMENT_NODE: + if ((localName == "*" or node.tagName == localName) and + (nsURI == "*" or node.namespaceURI == nsURI)): + rc.append(node) + _getElementsByTagNameNSHelper(node, name, rc) class Attr(Node): - nodeType=Node.ATTRIBUTE_NODE - def __init__( self, qName, namespaceURI="", localName=None, -prefix=None ): + nodeType = Node.ATTRIBUTE_NODE + + def __init__(self, qName, namespaceURI="", localName=None, prefix=None): # skip setattr for performance - self.__dict__["localName"]=localName or qName + self.__dict__["localName"] = localName or qName self.__dict__["nodeName"] = self.__dict__["name"] = qName - self.__dict__["namespaceURI"]=namespaceURI - self.__dict__["prefix"]=prefix - self.attributes=None - Node.__init__( self ) + self.__dict__["namespaceURI"] = namespaceURI + self.__dict__["prefix"] = prefix + self.attributes = None + Node.__init__(self) # nodeValue and value are set elsewhere - def __setattr__( self, name, value ): - if name in ("value", "nodeValue" ): - self.__dict__["value"]=self.__dict__["nodeValue"]=value + def __setattr__(self, name, value): + if name in ("value", "nodeValue"): + self.__dict__["value"] = self.__dict__["nodeValue"] = value else: - self.__dict__[name]=value + self.__dict__[name] = value class AttributeList: """the attribute list is a transient interface to the underlying -dictionaries. mutations here will change the underlying element's -dictionary""" - def __init__( self, attrs, attrsNS ): - self._attrs=attrs - self._attrsNS=attrsNS - self.length=len( self._attrs.keys() ) - - def item( self, index ): + dictionaries. mutations here will change the underlying element's + dictionary""" + def __init__(self, attrs, attrsNS): + self._attrs = attrs + self._attrsNS = attrsNS + self.length = len(self._attrs.keys()) + + def item(self, index): try: return self[self.keys()[index]] except IndexError: return None - - def items( self ): - return map( lambda node: (node.tagName, node.value), - self._attrs.values() ) - def itemsNS( self ): - return map( lambda node: ((node.URI, node.localName), node.value), - self._attrs.values() ) + def items(self): + return map(lambda node: (node.tagName, node.value), + self._attrs.values()) + + def itemsNS(self): + return map(lambda node: ((node.URI, node.localName), node.value), + self._attrs.values()) - def keys( self ): + def keys(self): return self._attrs.keys() - def keysNS( self ): + def keysNS(self): return self._attrsNS.keys() - def values( self ): + def values(self): return self._attrs.values() - def __len__( self ): + def __len__(self): return self.length - def __cmp__( self, other ): - if self._attrs is getattr( other, "_attrs", None ): + def __cmp__(self, other): + if self._attrs is getattr(other, "_attrs", None): return 0 else: - return cmp( id( self ), id( other ) ) + return cmp(id(self), id(other)) #FIXME: is it appropriate to return .value? - def __getitem__( self, attname_or_tuple ): - if type( attname_or_tuple ) == types.TupleType: + def __getitem__(self, attname_or_tuple): + if type(attname_or_tuple) is types.TupleType: return self._attrsNS[attname_or_tuple] else: return self._attrs[attname_or_tuple] # same as set - def __setitem__( self, attname, value ): - if type( value ) == types.StringType: - node=Attr( attname ) + def __setitem__(self, attname, value): + if type(value) is types.StringType: + node = Attr(attname) node.value=value else: - assert isinstance( value, Attr ) or type( value )==types.StringType - node=value - old=self._attrs.get( attname, None) + assert isinstance(value, Attr) or type(value) is types.StringType + node = value + old = self._attrs.get(attname, None) if old: old.unlink() - self._attrs[node.name]=node - self._attrsNS[(node.namespaceURI,node.localName)]=node + self._attrs[node.name] = node + self._attrsNS[(node.namespaceURI, node.localName)] = node - def __delitem__( self, attname_or_tuple ): - node=self[attname_or_tuple] + def __delitem__(self, attname_or_tuple): + node = self[attname_or_tuple] node.unlink() del self._attrs[node.name] del self._attrsNS[(node.namespaceURI, node.localName)] - + class Element( Node ): - nodeType=Node.ELEMENT_NODE - def __init__( self, tagName, namespaceURI="", prefix="", - localName=None ): - Node.__init__( self ) + nodeType = Node.ELEMENT_NODE + + def __init__(self, tagName, namespaceURI="", prefix="", + localName=None): + Node.__init__(self) self.tagName = self.nodeName = tagName - self.localName=localName or tagName - self.prefix=prefix - self.namespaceURI=namespaceURI - self.nodeValue=None + self.localName = localName or tagName + self.prefix = prefix + self.namespaceURI = namespaceURI + self.nodeValue = None self._attrs={} # attributes are double-indexed: self._attrsNS={}# tagName -> Attribute @@ -264,191 +269,195 @@ class Element( Node ): # this is too tricky for now because of headaches # with namespaces. - def getAttribute( self, attname ): + def getAttribute(self, attname): return self._attrs[attname].value - def getAttributeNS( self, namespaceURI, localName ): + def getAttributeNS(self, namespaceURI, localName): return self._attrsNS[(namespaceURI, localName)].value - - def setAttribute( self, attname, value ): - attr=Attr( attname ) + + def setAttribute(self, attname, value): + attr = Attr(attname) # for performance - attr.__dict__["value"]=attr.__dict__["nodeValue"]=value - self.setAttributeNode( attr ) + attr.__dict__["value"] = attr.__dict__["nodeValue"] = value + self.setAttributeNode(attr) - def setAttributeNS( self, namespaceURI, qualifiedName, value ): - prefix,localname=_nssplit( qualifiedName ) + def setAttributeNS(self, namespaceURI, qualifiedName, value): + prefix, localname = _nssplit(qualifiedName) # for performance - attr = Attr( qualifiedName, namespaceURI, localname, prefix ) - attr.__dict__["value"]=attr.__dict__["nodeValue"]=value - self.setAttributeNode( attr ) + attr = Attr(qualifiedName, namespaceURI, localname, prefix) + attr.__dict__["value"] = attr.__dict__["nodeValue"] = value + self.setAttributeNode(attr) - def getAttributeNode( self, attrname ): - return self._attrs.get( attrname ) + def getAttributeNode(self, attrname): + return self._attrs.get(attrname) - def getAttributeNodeNS( self, namespaceURI, localName ): + def getAttributeNodeNS(self, namespaceURI, localName): return self._attrsNS[(namespaceURI, localName)] - def setAttributeNode( self, attr ): - old=self._attrs.get( attr.name, None) + def setAttributeNode(self, attr): + old = self._attrs.get(attr.name, None) if old: old.unlink() - self._attrs[attr.name]=attr - self._attrsNS[(attr.namespaceURI,attr.localName)]=attr + self._attrs[attr.name] = attr + self._attrsNS[(attr.namespaceURI, attr.localName)] = attr - def removeAttribute( self, name ): + def removeAttribute(self, name): attr = self._attrs[name] - self.removeAttributeNode( attr ) + self.removeAttributeNode(attr) - def removeAttributeNS( self, namespaceURI, localName ): + def removeAttributeNS(self, namespaceURI, localName): attr = self._attrsNS[(namespaceURI, localName)] - self.removeAttributeNode( attr ) + self.removeAttributeNode(attr) - def removeAttributeNode( self, node ): + def removeAttributeNode(self, node): node.unlink() del self._attrs[node.name] del self._attrsNS[(node.namespaceURI, node.localName)] - def getElementsByTagName( self, name ): - return _getElementsByTagNameHelper( self, name, [] ) + def getElementsByTagName(self, name): + return _getElementsByTagNameHelper(self, name, []) - def getElementsByTagNameNS(self,namespaceURI,localName): - _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] ) + def getElementsByTagNameNS(self, namespaceURI, localName): + _getElementsByTagNameNSHelper(self, namespaceURI, localName, []) - def __repr__( self ): - return "" + def __repr__(self): + return "" % (self.tagName, id(self)) def writexml(self, writer): - writer.write("<"+self.tagName) + writer.write("<" + self.tagName) - a_names=self._get_attributes().keys() + a_names = self._get_attributes().keys() a_names.sort() for a_name in a_names: - writer.write(" "+a_name+"=\"") + writer.write(" %s=\"" % a_name) _write_data(writer, self._get_attributes()[a_name]) writer.write("\"") if self.childNodes: writer.write(">") for node in self.childNodes: - node.writexml( writer ) - writer.write("") + node.writexml(writer) + writer.write("" % self.tagName) else: writer.write("/>") - def _get_attributes( self ): - return AttributeList( self._attrs, self._attrsNS ) + def _get_attributes(self): + return AttributeList(self._attrs, self._attrsNS) + +class Comment(Node): + nodeType = Node.COMMENT_NODE + + def __init__(self, data): + Node.__init__(self) + self.data = self.nodeValue = data + self.nodeName = "#comment" + self.attributes = None -class Comment( Node ): - nodeType=Node.COMMENT_NODE - def __init__(self, data ): - Node.__init__( self ) - self.data=self.nodeValue=data - self.nodeName="#comment" - self.attributes=None + def writexml(self, writer): + writer.write("" % self.data) - def writexml( self, writer ): - writer.write( "" ) +class ProcessingInstruction(Node): + nodeType = Node.PROCESSING_INSTRUCTION_NODE -class ProcessingInstruction( Node ): - nodeType=Node.PROCESSING_INSTRUCTION_NODE - def __init__(self, target, data ): - Node.__init__( self ) + def __init__(self, target, data): + Node.__init__(self) self.target = self.nodeName = target self.data = self.nodeValue = data - self.attributes=None + self.attributes = None + + def writexml(self, writer): + writer.write("" % (self.target, self.data)) - def writexml( self, writer ): - writer.write( "" ) +class Text(Node): + nodeType = Node.TEXT_NODE + nodeName = "#text" -class Text( Node ): - nodeType=Node.TEXT_NODE - nodeName="#text" - def __init__(self, data ): - Node.__init__( self ) + def __init__(self, data): + Node.__init__(self) self.data = self.nodeValue = data - self.attributes=None + self.attributes = None def __repr__(self): - if len( self.data )> 10: - dotdotdot="..." + if len(self.data) > 10: + dotdotdot = "..." else: - dotdotdot="" - return "" + dotdotdot = "" + return "" % (self.data[0:10], dotdotdot) - def writexml( self, writer ): - _write_data( writer, self.data ) + def writexml(self, writer): + _write_data(writer, self.data) -def _nssplit( qualifiedName ): - fields = string.split(qualifiedName, ':') +def _nssplit(qualifiedName): + fields = qualifiedName.split(':', 1) if len(fields) == 2: return fields elif len(fields) == 1: - return( '', fields[0] ) - -class Document( Node ): - nodeType=Node.DOCUMENT_NODE - documentElement=None - def __init__( self ): - Node.__init__( self ) - self.attributes=None - self.nodeName="#document" - self.nodeValue=None - - def appendChild( self, node ): - if node.nodeType==Node.ELEMENT_NODE: + return ('', fields[0]) + +class Document(Node): + nodeType = Node.DOCUMENT_NODE + documentElement = None + + def __init__(self): + Node.__init__(self) + self.attributes = None + self.nodeName = "#document" + self.nodeValue = None + + def appendChild(self, node): + if node.nodeType == Node.ELEMENT_NODE: if self.documentElement: raise TypeError, "Two document elements disallowed" else: - self.documentElement=node - Node.appendChild( self, node ) + self.documentElement = node + Node.appendChild(self, node) return node - createElement=Element + createElement = Element - createTextNode=Text + createTextNode = Text - createComment=Comment + createComment = Comment - createProcessingInstruction=ProcessingInstruction + createProcessingInstruction = ProcessingInstruction - createAttribute=Attr + createAttribute = Attr def createElementNS(self, namespaceURI, qualifiedName): - prefix,localName=_nssplit( qualifiedName ) + prefix,localName = _nssplit(qualifiedName) return Element(qualifiedName, namespaceURI, prefix, localName) def createAttributeNS(self, namespaceURI, qualifiedName): - prefix,localName=_nssplit( qualifiedName ) + prefix,localName = _nssplit(qualifiedName) return Attr(namespaceURI, qualifiedName, localName, prefix) - def getElementsByTagNameNS(self,namespaceURI,localName): - _getElementsByTagNameNSHelper( self, namespaceURI, localName ) + def getElementsByTagNameNS(self, namespaceURI, localName): + _getElementsByTagNameNSHelper(self, namespaceURI, localName) - def unlink( self ): - self.documentElement=None - Node.unlink( self ) + def unlink(self): + self.documentElement = None + Node.unlink(self) - def getElementsByTagName( self, name ): - rc=[] - _getElementsByTagNameHelper( self, name, rc ) + def getElementsByTagName(self, name): + rc = [] + _getElementsByTagNameHelper(self, name, rc) return rc - def writexml( self, writer ): + def writexml(self, writer): for node in self.childNodes: - node.writexml( writer ) + node.writexml(writer) -def _doparse( func, args, kwargs ): - events=apply( func, args, kwargs ) - (toktype, rootNode)=events.getEvent() - events.expandNode( rootNode ) +def _doparse(func, args, kwargs): + events = apply(func, args, kwargs) + toktype, rootNode = events.getEvent() + events.expandNode(rootNode) return rootNode -def parse( *args, **kwargs ): +def parse(*args, **kwargs): "Parse a file into a DOM by filename or file object" - return _doparse( pulldom.parse, args, kwargs ) + return _doparse(pulldom.parse, args, kwargs) -def parseString( *args, **kwargs ): +def parseString(*args, **kwargs): "Parse a file into a DOM from a string" - return _doparse( pulldom.parseString, args, kwargs ) + return _doparse(pulldom.parseString, args, kwargs) diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index ae8ea1c..011d46a 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -1,159 +1,157 @@ import minidom -import types -import string -import sys import xml.sax #todo: SAX2/namespace handling -START_ELEMENT="START_ELEMENT" -END_ELEMENT="END_ELEMENT" -COMMENT="COMMENT" -START_DOCUMENT="START_DOCUMENT" -END_DOCUMENT="END_DOCUMENT" -PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION" -IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE" -CHARACTERS="CHARACTERS" +START_ELEMENT = "START_ELEMENT" +END_ELEMENT = "END_ELEMENT" +COMMENT = "COMMENT" +START_DOCUMENT = "START_DOCUMENT" +END_DOCUMENT = "END_DOCUMENT" +PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" +IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" +CHARACTERS = "CHARACTERS" class PullDOM: - def __init__( self ): - self.firstEvent=[None,None] - self.lastEvent=self.firstEvent + def __init__(self): + self.firstEvent = [None, None] + self.lastEvent = self.firstEvent - def setDocumentLocator( self, locator ): pass + def setDocumentLocator(self, locator): pass - def startElement( self, name, tagName , attrs ): - if not hasattr( self, "curNode" ): + def startElement(self, name, tagName, attrs): + if not hasattr(self, "curNode"): # FIXME: hack! - self.startDocument( ) + self.startDocument() - node = self.document.createElement( tagName ) #FIXME namespaces! + node = self.document.createElement(tagName) #FIXME namespaces! for attr in attrs.keys(): - node.setAttribute( attr, attrs[attr] ) - - parent=self.curNode + node.setAttribute(attr, attrs[attr]) + + parent = self.curNode node.parentNode = parent if parent.childNodes: - node.previousSibling=parent.childNodes[-1] - node.previousSibling.nextSibling=node + node.previousSibling = parent.childNodes[-1] + node.previousSibling.nextSibling = node self.curNode = node # FIXME: do I have to screen namespace attributes - self.lastEvent[1]=[(START_ELEMENT, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (START_ELEMENT, node) ) + self.lastEvent[1] = [(START_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((START_ELEMENT, node)) - def endElement( self, name, tagName ): + def endElement(self, name, tagName): node = self.curNode - self.lastEvent[1]=[(END_ELEMENT, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (END_ELEMENT, node )) + self.lastEvent[1] = [(END_ELEMENT, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((END_ELEMENT, node)) self.curNode = node.parentNode - def comment( self, s): - node = self.document.createComment ( s ) - parent=self.curNode - node.parentNode=parent + def comment(self, s): + node = self.document.createComment(s) + parent = self.curNode + node.parentNode = parent if parent.childNodes: - node.previousSibling=parent.childNodes[-1] - node.previousSibling.nextSibling=node - self.lastEvent[1]=[(COMMENT, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (COMMENT, node )) - - def processingInstruction( self, target, data ): - node = self.document.createProcessingInstruction( target, data ) - #self.appendChild( node ) + node.previousSibling = parent.childNodes[-1] + node.previousSibling.nextSibling = node + self.lastEvent[1] = [(COMMENT, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((COMMENT, node)) + + def processingInstruction(self, target, data): + node = self.document.createProcessingInstruction(target, data) + #self.appendChild(node) - parent=self.curNode - node.parentNode=parent + parent = self.curNode + node.parentNode = parent if parent.childNodes: - node.previousSibling=parent.childNodes[-1] - node.previousSibling.nextSibling=node - self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (PROCESSING_INSTRUCTION, node) ) - - def ignorableWhitespace( self, chars ): - node = self.document.createTextNode( chars[start:start+length] ) - parent=self.curNode - node.parentNode=parent + node.previousSibling = parent.childNodes[-1] + node.previousSibling.nextSibling = node + self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((PROCESSING_INSTRUCTION, node)) + + def ignorableWhitespace(self, chars): + node = self.document.createTextNode(chars[start:start + length]) + parent = self.curNode + node.parentNode = parent if parent.childNodes: - node.previousSibling=parent.childNodes[-1] - node.previousSibling.nextSibling=node - self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (IGNORABLE_WHITESPACE, node)) - - def characters( self, chars ): - node = self.document.createTextNode( chars ) - node.parentNode=self.curNode - self.lastEvent[1]=[(CHARACTERS, node), None ] - self.lastEvent=self.lastEvent[1] - - def startDocument( self ): + node.previousSibling = parent.childNodes[-1] + node.previousSibling.nextSibling = node + self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((IGNORABLE_WHITESPACE, node)) + + def characters(self, chars): + node = self.document.createTextNode(chars) + node.parentNode = self.curNode + self.lastEvent[1] = [(CHARACTERS, node), None] + self.lastEvent = self.lastEvent[1] + + def startDocument(self): node = self.curNode = self.document = minidom.Document() - node.parentNode=None - self.lastEvent[1]=[(START_DOCUMENT, node), None ] - self.lastEvent=self.lastEvent[1] - #self.events.append( (START_DOCUMENT, node) ) - - def endDocument( self ): - assert( not self.curNode.parentNode ) + node.parentNode = None + self.lastEvent[1] = [(START_DOCUMENT, node), None] + self.lastEvent = self.lastEvent[1] + #self.events.append((START_DOCUMENT, node)) + + def endDocument(self): + assert not self.curNode.parentNode for node in self.curNode.childNodes: - if node.nodeType==node.ELEMENT_NODE: - self.document.documentElement = node + if node.nodeType == node.ELEMENT_NODE: + self.document.documentElement = node #if not self.document.documentElement: - # raise Error, "No document element" + # raise Error, "No document element" - self.lastEvent[1]=[(END_DOCUMENT, node), None ] - #self.events.append( (END_DOCUMENT, self.curNode) ) + self.lastEvent[1] = [(END_DOCUMENT, node), None] + #self.events.append((END_DOCUMENT, self.curNode)) class ErrorHandler: - def warning( self, exception ): + def warning(self, exception): print exception - def error( self, exception ): + def error(self, exception): raise exception - def fatalError( self, exception ): + def fatalError(self, exception): raise exception class DOMEventStream: - def __init__( self, stream, parser, bufsize ): - self.stream=stream - self.parser=parser - self.bufsize=bufsize + def __init__(self, stream, parser, bufsize): + self.stream = stream + self.parser = parser + self.bufsize = bufsize self.reset() - def reset( self ): + def reset(self): self.pulldom = PullDOM() - self.parser.setContentHandler( self.pulldom ) + self.parser.setContentHandler(self.pulldom) - def __getitem__( self, pos ): - rc=self.getEvent() - if rc: return rc + def __getitem__(self, pos): + rc = self.getEvent() + if rc: + return rc raise IndexError - def expandNode( self, node ): - event=self.getEvent() + def expandNode(self, node): + event = self.getEvent() while event: - token,cur_node=event - if cur_node is node: return - - if token !=END_ELEMENT: - cur_node.parentNode.appendChild( cur_node ) - event=self.getEvent() - - def getEvent( self ): + token, cur_node = event + if cur_node is node: + return + if token != END_ELEMENT: + cur_node.parentNode.appendChild(cur_node) + event = self.getEvent() + + def getEvent(self): if not self.pulldom.firstEvent[1]: - self.pulldom.lastEvent=self.pulldom.firstEvent + self.pulldom.lastEvent = self.pulldom.firstEvent while not self.pulldom.firstEvent[1]: - buf=self.stream.read( self.bufsize ) + buf=self.stream.read(self.bufsize) if not buf: #FIXME: why doesn't Expat close work? #self.parser.close() return None - self.parser.feed( buf ) - rc=self.pulldom.firstEvent[1][0] - self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1] + self.parser.feed(buf) + rc = self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] return rc # FIXME: sax2 @@ -168,27 +166,25 @@ class DOMEventStream: def _getParser(): return xml.sax.make_parser() -default_bufsize=(2**14)-20 +default_bufsize = (2 ** 14) - 20 + # FIXME: move into sax package for common usage -def parse( stream_or_string, parser=None, bufsize=default_bufsize ): - if type( stream_or_string ) == type( "" ): - stream=open( stream_or_string ) +def parse(stream_or_string, parser=None, bufsize=default_bufsize): + if type(stream_or_string) is type(""): + stream = open(stream_or_string) else: - stream=stream_or_string + stream = stream_or_string if not parser: - parser=_getParser() - return DOMEventStream( stream, parser, bufsize ) + parser = _getParser() + return DOMEventStream(stream, parser, bufsize) -def parseString( string, parser=None ): +def parseString(string, parser=None): try: - import cStringIO - stringio=cStringIO.StringIO + from cStringIO import StringIO except ImportError: - import StringIO - stringio=StringIO.StringIO + from StringIO import StringIO - bufsize=len( string ) - buf=stringio( string ) - parser=_getParser() - return DOMEventStream( buf, parser, bufsize ) - + bufsize = len(string) + buf = StringIO(string) + parser = _getParser() + return DOMEventStream(buf, parser, bufsize) -- cgit v0.12