diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/xml/__init__.py | 4 | ||||
-rw-r--r-- | Lib/xml/dom/minidom.py | 183 | ||||
-rw-r--r-- | Lib/xml/dom/pulldom.py | 77 | ||||
-rw-r--r-- | Lib/xml/sax/__init__.py | 24 |
4 files changed, 144 insertions, 144 deletions
diff --git a/Lib/xml/__init__.py b/Lib/xml/__init__.py index a892772..7daa4ea 100644 --- a/Lib/xml/__init__.py +++ b/Lib/xml/__init__.py @@ -8,6 +8,6 @@ dom -- The W3C Document Object Model. This supports DOM Level 1 + parser -- Python wrappers for XML parsers (currently only supports Expat). sax -- The Simple API for XML, developed by XML-Dev, led by David - Megginson. This supports the SAX 2 API. - + Megginson and ported to Python by Lars Marius Garsholm. This + supports the SAX 2 API. """ diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 32d2d2b..0283fee 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -29,11 +29,19 @@ class Node: DOCUMENT_FRAGMENT_NODE = 11 NOTATION_NODE = 12 - allnodes=[] + allnodes={} + _debug=0 + _makeParentNodes=1 + debug=None def __init__( self ): self.childNodes=[] - Node.allnodes.append( repr( id( self ))+repr( self.__class__ )) + if Node._debug: + index=repr( id( self ))+repr( self.__class__ ) + Node.allnodes[index]=repr( self.__dict__ ) + if Node.debug==None: + Node.debug=open( "debug4.out", "w" ) + Node.debug.write( "create %s\n"%index ) def __getattr__( self, key ): if key[0:2]=="__": raise AttributeError @@ -72,12 +80,39 @@ class Node: if self.childNodes: return 1 else: return 0 + def _get_firstChild( self ): + return self.childNodes[0] + + def _get_lastChild( self ): + return self.childNodes[-1] + def insertBefore( self, newChild, refChild): index=self.childNodes.index( refChild ) self.childNodes.insert( index, newChild ) + if self._makeParentNodes: + newChild.parentNode=self def appendChild( self, node ): self.childNodes.append( node ) + return node + + def replaceChild( self, newChild, oldChild ): + index=self.childNodes.index( oldChild ) + self.childNodes[index]=oldChild + + def removeChild( self, oldChild ): + index=self.childNodes.index( oldChild ) + del self.childNodes[index] + + def cloneNode( self, deep ): + import new + clone=new.instance( self.__class__, self.__dict__ ) + clone.attributes=self.attributes.copy() + if not deep: + clone.childNodes=[] + else: + clone.childNodes=map( lambda x: x.cloneNode, self.childNodes ) + return clone def unlink( self ): self.parentNode=None @@ -86,11 +121,14 @@ class Node: del self.childNodes[-1] # probably not most efficient! self.childNodes=None if self.attributes: - for attr in self.attributes.values(): - attr.unlink() - self.attributes=None - index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ )) - del Node.allnodes[index] + for attr in self._attrs.values(): + self.removeAttributeNode( attr ) + assert not len( self._attrs ) + assert not len( self._attrsNS ) + if Node._debug: + index=repr( id( self ))+repr( self.__class__ ) + self.debug.write( "Deleting: %s\n" % index ) + del Node.allnodes[index] def _write_data( writer, data): "Writes datachars to writer." @@ -100,11 +138,6 @@ def _write_data( writer, data): data=string.replace(data,">",">") writer.write(data) -def _closeElement( element ): - del element.parentNode - for node in element.elements: - _closeElement( node ) - def _getElementsByTagNameHelper( parent, name, rc ): for node in parent.childNodes: if node.nodeType==Node.ELEMENT_NODE and\ @@ -123,17 +156,16 @@ def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ): class Attr(Node): nodeType=Node.ATTRIBUTE_NODE - def __init__( self, qName, namespaceURI="", prefix="", - localName=None ): - Node.__init__( self ) - assert qName + def __init__( self, qName, namespaceURI="", localName=None, +prefix=None ): # skip setattr for performance - self.__dict__["nodeName"] = self.__dict__["name"] = qName self.__dict__["localName"]=localName or qName - self.__dict__["prefix"]=prefix + self.__dict__["nodeName"] = self.__dict__["name"] = qName self.__dict__["namespaceURI"]=namespaceURI - # nodeValue and value are set elsewhere + self.__dict__["prefix"]=prefix self.attributes=None + Node.__init__( self ) + # nodeValue and value are set elsewhere def __setattr__( self, name, value ): if name in ("value", "nodeValue" ): @@ -142,12 +174,13 @@ class Attr(Node): self.__dict__[name]=value class AttributeList: - # the attribute list is a transient interface to the underlying dictionaries - # mutations here will change the underlying element's dictionary + """the attribute list is a transient interface to the underlying +dictionaries. mutations here will change the underlying element's +dictionary""" def __init__( self, attrs, attrsNS ): - self.__attrs=attrs - self.__attrsNS=attrs - self.length=len( self.__attrs.keys() ) + self._attrs=attrs + self._attrsNS=attrsNS + self.length=len( self._attrs.keys() ) def item( self, index ): try: @@ -157,40 +190,46 @@ class AttributeList: def items( self ): return map( lambda node: (node.tagName, node.value), - self.__attrs.values() ) + self._attrs.values() ) def itemsNS( self ): return map( lambda node: ((node.URI, node.localName), node.value), - self.__attrs.values() ) + self._attrs.values() ) def keys( self ): - return self.__attrs.keys() + return self._attrs.keys() def keysNS( self ): - return self.__attrsNS.keys() + return self._attrsNS.keys() def values( self ): - return self.__attrs.values() + return self._attrs.values() def __len__( self ): return self.length def __cmp__( self, other ): - if self.__attrs is other.__attrs: + if self._attrs is getattr( other, "_attrs", None ): return 0 else: return cmp( id( self ), id( other ) ) #FIXME: is it appropriate to return .value? def __getitem__( self, attname_or_tuple ): - if type( attname_or_tuple ) == type( (1,2) ): - return self.__attrsNS[attname_or_tuple].value + if type( attname_or_tuple ) == type( () ): + return self._attrsNS[attname_or_tuple] else: - return self.__attrs[attname_or_tuple].value + return self._attrs[attname_or_tuple] def __setitem__( self, attname ): raise TypeError, "object does not support item assignment" - + + def __delitem__( self, attname_or_tuple ): + node=self[attname_or_tuple] + node.unlink() + del self._attrs[node.name] + del self._attrsNS[(node.namespaceURI, node.localName)] + class Element( Node ): nodeType=Node.ELEMENT_NODE def __init__( self, tagName, namespaceURI="", prefix="", @@ -202,18 +241,18 @@ class Element( Node ): self.namespaceURI=namespaceURI self.nodeValue=None - self.__attrs={} # attributes are double-indexed: - self.__attrsNS={}# tagName -> Attribute + self._attrs={} # attributes are double-indexed: + self._attrsNS={}# tagName -> Attribute # URI,localName -> Attribute # in the future: consider lazy generation of attribute objects # this is too tricky for now because of headaches # with namespaces. def getAttribute( self, attname ): - return self.__attrs[attname].value + return self._attrs[attname].value def getAttributeNS( self, namespaceURI, localName ): - return self.__attrsNS[(namespaceURI, localName)].value + return self._attrsNS[(namespaceURI, localName)].value def setAttribute( self, attname, value ): attr=Attr( attname ) @@ -222,26 +261,37 @@ class Element( Node ): self.setAttributeNode( attr ) def setAttributeNS( self, namespaceURI, qualifiedName, value ): - attr=createAttributeNS( namespaceURI, qualifiedName ) + prefix,localname=_nssplit( qualifiedName ) # for performance + attr = Attr( qualifiedName, namespaceURI, localname, prefix ) attr.__dict__["value"]=attr.__dict__["nodeValue"]=value self.setAttributeNode( attr ) + def getAttributeNode( self, attrname ): + return self._attrs.get( attrname ) + + def getAttributeNodeNS( self, namespaceURI, localName ): + return self._attrsNS[(namespaceURI, localName)] + def setAttributeNode( self, attr ): - self.__attrs[attr.name]=attr - self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr + old=self._attrs.get( attr.name, None) + if old: + old.unlink() + self._attrs[attr.name]=attr + self._attrsNS[(attr.namespaceURI,attr.localName)]=attr def removeAttribute( self, name ): - attr = self.__attrs[name] + attr = self._attrs[name] self.removeAttributeNode( attr ) def removeAttributeNS( self, namespaceURI, localName ): - attr = self.__attrsNS[(uri, localName)] + attr = self._attrsNS[(namespaceURI, localName)] self.removeAttributeNode( attr ) def removeAttributeNode( self, node ): - del self.__attrs[node.name] - del self.__attrsNS[(node.namespaceURI, node.localName)] + node.unlink() + del self._attrs[node.name] + del self._attrsNS[(node.namespaceURI, node.localName)] def getElementsByTagName( self, name ): return _getElementsByTagNameHelper( self, name, [] ) @@ -271,7 +321,7 @@ class Element( Node ): writer.write("/>") def _get_attributes( self ): - return AttributeList( self.__attrs, self.__attrsNS ) + return AttributeList( self._attrs, self._attrsNS ) class Comment( Node ): nodeType=Node.COMMENT_NODE @@ -313,15 +363,30 @@ class Text( Node ): def writexml( self, writer ): _write_data( writer, self.data ) +def _nssplit( qualifiedName ): + fields = string.split(qualifiedName, ':') + if len(fields) == 2: + return fields + elif len(fields) == 1: + return( '', fields[0] ) + class Document( Node ): nodeType=Node.DOCUMENT_NODE + documentElement=None def __init__( self ): Node.__init__( self ) - self.documentElement=None self.attributes=None self.nodeName="#document" self.nodeValue=None + def appendChild( self, node ): + if node.nodeType==Node.ELEMENT_NODE and self.documentElement: + raise TypeError, "Two document elements disallowed" + else: + self.documentElement=node + Node.appendChild( self, node ) + return node + createElement=Element createTextNode=Text @@ -333,32 +398,16 @@ class Document( Node ): createAttribute=Attr def createElementNS(self, namespaceURI, qualifiedName): - fields = string.split(qualifiedName, ':') - if len(fields) == 2: - prefix = fields[0] - localName = fields[1] - elif len(fields) == 1: - prefix = '' - localName = fields[0] - return Element(self, qualifiedName, namespaceURI, prefix, localName) + prefix,localName=_nssplit( qualifiedName ) + return Element(qualifiedName, namespaceURI, prefix, localName) def createAttributeNS(self, namespaceURI, qualifiedName): - fields = string.split(qualifiedName,':') - if len(fields) == 2: - localName = fields[1] - prefix = fields[0] - elif len(fields) == 1: - localName = fields[0] - prefix = None - return Attr(qualifiedName, namespaceURI, prefix, localName) + prefix,localName=_nssplit( qualifiedName ) + return Attr(namespaceURI, qualifiedName, localName, prefix) def getElementsByTagNameNS(self,namespaceURI,localName): _getElementsByTagNameNSHelper( self, namespaceURI, localName ) - def close( self ): - for node in self.elements: - _closeElement( node ) - def unlink( self ): self.documentElement=None Node.unlink( self ) diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index 9c85646..0c047f6 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -2,7 +2,6 @@ import minidom import types import string import sys -import pyexpat from xml.sax import ExpatParser #todo: SAX2/namespace handling @@ -140,12 +139,8 @@ class DOMEventStream: if cur_node is node: return if token !=END_ELEMENT: - cur_node.parentNode.childNodes.append( cur_node ) + cur_node.parentNode.appendChild( cur_node ) event=self.getEvent() - if node.nodeType==minidom.Node.DOCUMENT_NODE: - for child in node.childNodes: - if child.nodeType==minidom.Node.ELEMENT_NODE: - node.documentElement=child def getEvent( self ): if not self.pulldom.firstEvent[1]: @@ -193,75 +188,7 @@ def parseString( string, parser=None ): stringio=StringIO.StringIO bufsize=len( string ) - stringio( string ) + buf=stringio( string ) parser=_getParser() return DOMEventStream( buf, parser, bufsize ) -#FIXME: Use Lars' instead!!! -class SAX_expat: - "SAX driver for the Pyexpat C module." - - def __init__(self): - self.parser=pyexpat.ParserCreate() - self.started=0 - - def setDocumentHandler( self, handler ): - self.parser.StartElementHandler = handler.startElement - self.parser.EndElementHandler = handler.endElement - self.parser.CharacterDataHandler = handler.datachars - self.parser.ProcessingInstructionHandler = handler.processingInstruction - self.doc_handler=handler - - def setErrorHandler( self, handler ): - self.err_handler=handler - - # --- Locator methods. Only usable after errors. - - def getLineNumber(self): - return self.parser.ErrorLineNumber - - def getColumnNumber(self): - return self.parser.ErrorColumnNumber - - # --- Internal - - def __report_error(self): - msg=pyexpat.ErrorString(self.parser.ErrorCode) - self.err_handler.fatalError(msg) - - # --- EXPERIMENTAL PYTHON SAX EXTENSIONS - - def get_parser_name(self): - return "pyexpat" - - def get_parser_version(self): - return "Unknown" - - def get_driver_version(self): - return version - - def is_validating(self): - return 0 - - def is_dtd_reading(self): - return 0 - - def reset(self): - self.parser=pyexpat.ParserCreate() - self.parser.StartElementHandler = self.startElement - self.parser.EndElementHandler = self.endElement - self.parser.CharacterDataHandler = self.characters - self.parser.ProcessingInstructionHandler = self.processingInstruction - - def feed(self,data): - if not self.started: - self.doc_handler.startDocument() - self.started=1 - if not self.parser.Parse(data): - self.__report_error() - - def close(self): - if not self.parser.Parse("",1): - self.__report_error() - self.doc_handler.endDocument() - self.parser = None diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py index 5d0fea5..324558d 100644 --- a/Lib/xml/sax/__init__.py +++ b/Lib/xml/sax/__init__.py @@ -23,3 +23,27 @@ from _exceptions import * from saxutils import * from _exceptions import SAXParseException import xmlreader + +def parse( filename_or_stream, handler, errorHandler=ErrorHandler() ): + parser=ExpatParser() + parser.setContentHandler( handler ) + parse.setErrorHandler( errorHandler ) + parser.parse( filename_or_stream ) + +# this may not work yet...Expat doesn't handle buffer inputs +def parseString( string, handler, errorHandler=ErrorHandler() ): + try: + import cStringIO + stringio=cStringIO.StringIO + except ImportError: + import StringIO + stringio=StringIO.StringIO + + bufsize=len( string ) + buf=stringio( string ) + + parser=ExpatParser() + parser.setContentHandler( handler ) + parse.setErrorHandler( errorHandler ) + parser.parse( buf ) + |