diff options
Diffstat (limited to 'Lib/xml/dom/pulldom.py')
-rw-r--r-- | Lib/xml/dom/pulldom.py | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py new file mode 100644 index 0000000..9c85646 --- /dev/null +++ b/Lib/xml/dom/pulldom.py @@ -0,0 +1,267 @@ +import minidom +import types +import string +import sys +import pyexpat +from xml.sax import ExpatParser + +#todo: SAX2/namespace handling + +START_ELEMENT="START_ELEMENT" +END_ELEMENT="END_ELEMENT" +COMMENT="COMMENT" +START_DOCUMENT="START_DOCUMENT" +END_DOCUMENT="END_DOCUMENT" +PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION" +IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE" +CHARACTERS="CHARACTERS" + +class PullDOM: + def __init__( self ): + self.firstEvent=[None,None] + self.lastEvent=self.firstEvent + + def setDocumentLocator( self, locator ): pass + + def startElement( self, tagName , attrs ): + if not hasattr( self, "curNode" ): + # FIXME: hack! + self.startDocument( ) + + node = self.document.createElement( tagName ) #FIXME namespaces! + for attr in attrs.keys(): + node.setAttribute( attr, attrs[attr] ) + + parent=self.curNode + node.parentNode = parent + if parent.childNodes: + node.previousSibling=parent.childNodes[-1] + node.previousSibling.nextSibling=node + self.curNode = node + # FIXME: do I have to screen namespace attributes + self.lastEvent[1]=[(START_ELEMENT, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (START_ELEMENT, node) ) + + def endElement( self, name ): + node = self.curNode + self.lastEvent[1]=[(END_ELEMENT, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (END_ELEMENT, node )) + self.curNode = node.parentNode + + def comment( self, s): + node = self.document.createComment ( s ) + parent=self.curNode + node.parentNode=parent + if parent.childNodes: + node.previousSibling=parent.childNodes[-1] + node.previousSibling.nextSibling=node + self.lastEvent[1]=[(COMMENT, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (COMMENT, node )) + + def processingInstruction( self, target, data ): + node = self.document.createProcessingInstruction( target, data ) + #self.appendChild( node ) + + parent=self.curNode + node.parentNode=parent + if parent.childNodes: + node.previousSibling=parent.childNodes[-1] + node.previousSibling.nextSibling=node + self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (PROCESSING_INSTRUCTION, node) ) + + def ignorableWhitespace( self, chars ): + node = self.document.createTextNode( chars[start:start+length] ) + parent=self.curNode + node.parentNode=parent + if parent.childNodes: + node.previousSibling=parent.childNodes[-1] + node.previousSibling.nextSibling=node + self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (IGNORABLE_WHITESPACE, node)) + + def characters( self, chars ): + node = self.document.createTextNode( chars ) + node.parentNode=self.curNode + self.lastEvent[1]=[(CHARACTERS, node), None ] + self.lastEvent=self.lastEvent[1] + + def startDocument( self ): + node = self.curNode = self.document = minidom.Document() + node.parentNode=None + self.lastEvent[1]=[(START_DOCUMENT, node), None ] + self.lastEvent=self.lastEvent[1] + #self.events.append( (START_DOCUMENT, node) ) + + def endDocument( self ): + assert( not self.curNode.parentNode ) + for node in self.curNode.childNodes: + if node.nodeType==node.ELEMENT_NODE: + self.document.documentElement = node + #if not self.document.documentElement: + # raise Error, "No document element" + + self.lastEvent[1]=[(END_DOCUMENT, node), None ] + #self.events.append( (END_DOCUMENT, self.curNode) ) + +class ErrorHandler: + def warning( self, exception ): + print exception + def error( self, exception ): + raise exception + def fatalError( self, exception ): + raise exception + +class DOMEventStream: + def __init__( self, stream, parser, bufsize ): + self.stream=stream + self.parser=parser + self.bufsize=bufsize + self.reset() + + def reset( self ): + self.pulldom = PullDOM() + self.parser.setContentHandler( self.pulldom ) + + def __getitem__( self, pos ): + rc=self.getEvent() + if rc: return rc + raise IndexError + + def expandNode( self, node ): + event=self.getEvent() + while event: + token,cur_node=event + if cur_node is node: return + + if token !=END_ELEMENT: + cur_node.parentNode.childNodes.append( cur_node ) + event=self.getEvent() + if node.nodeType==minidom.Node.DOCUMENT_NODE: + for child in node.childNodes: + if child.nodeType==minidom.Node.ELEMENT_NODE: + node.documentElement=child + + def getEvent( self ): + if not self.pulldom.firstEvent[1]: + self.pulldom.lastEvent=self.pulldom.firstEvent + while not self.pulldom.firstEvent[1]: + buf=self.stream.read( self.bufsize ) + if not buf: + #FIXME: why doesn't Expat close work? + #self.parser.close() + return None + self.parser.feed( buf ) + rc=self.pulldom.firstEvent[1][0] + self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1] + return rc + +# FIXME: sax2 +#def _getParser( ): + # from xml.sax.saxexts import make_parser + # expat doesn't report errors properly! Figure it out + # return make_parser() + # return make_parser("xml.sax.drivers.drv_xmllib") + + + +def _getParser(): + return ExpatParser() + +default_bufsize=(2**14)-20 +# FIXME: move into sax package for common usage +def parse( stream_or_string, parser=None, bufsize=default_bufsize ): + if type( stream_or_string ) == type( "" ): + stream=open( stream_or_string ) + else: + stream=stream_or_string + if not parser: + parser=_getParser() + return DOMEventStream( stream, parser, bufsize ) + +def parseString( string, parser=None ): + try: + import cStringIO + stringio=cStringIO.StringIO + except ImportError: + import StringIO + stringio=StringIO.StringIO + + bufsize=len( string ) + stringio( string ) + parser=_getParser() + return DOMEventStream( buf, parser, bufsize ) + +#FIXME: Use Lars' instead!!! +class SAX_expat: + "SAX driver for the Pyexpat C module." + + def __init__(self): + self.parser=pyexpat.ParserCreate() + self.started=0 + + def setDocumentHandler( self, handler ): + self.parser.StartElementHandler = handler.startElement + self.parser.EndElementHandler = handler.endElement + self.parser.CharacterDataHandler = handler.datachars + self.parser.ProcessingInstructionHandler = handler.processingInstruction + self.doc_handler=handler + + def setErrorHandler( self, handler ): + self.err_handler=handler + + # --- Locator methods. Only usable after errors. + + def getLineNumber(self): + return self.parser.ErrorLineNumber + + def getColumnNumber(self): + return self.parser.ErrorColumnNumber + + # --- Internal + + def __report_error(self): + msg=pyexpat.ErrorString(self.parser.ErrorCode) + self.err_handler.fatalError(msg) + + # --- EXPERIMENTAL PYTHON SAX EXTENSIONS + + def get_parser_name(self): + return "pyexpat" + + def get_parser_version(self): + return "Unknown" + + def get_driver_version(self): + return version + + def is_validating(self): + return 0 + + def is_dtd_reading(self): + return 0 + + def reset(self): + self.parser=pyexpat.ParserCreate() + self.parser.StartElementHandler = self.startElement + self.parser.EndElementHandler = self.endElement + self.parser.CharacterDataHandler = self.characters + self.parser.ProcessingInstructionHandler = self.processingInstruction + + def feed(self,data): + if not self.started: + self.doc_handler.startDocument() + self.started=1 + if not self.parser.Parse(data): + self.__report_error() + + def close(self): + if not self.parser.Parse("",1): + self.__report_error() + self.doc_handler.endDocument() + self.parser = None |