diff options
Diffstat (limited to 'Lib/xml/dom/pulldom.py')
-rw-r--r-- | Lib/xml/dom/pulldom.py | 63 |
1 files changed, 37 insertions, 26 deletions
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index 063f7e8..e674385 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -1,5 +1,5 @@ -import minidom -import xml.sax,xml.sax.handler +import xml.sax +import xml.sax.handler START_ELEMENT = "START_ELEMENT" END_ELEMENT = "END_ELEMENT" @@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" CHARACTERS = "CHARACTERS" class PullDOM(xml.sax.ContentHandler): - def __init__(self): + _locator = None + document = None + + def __init__(self, documentFactory=None): + self.documentFactory = documentFactory self.firstEvent = [None, None] self.lastEvent = self.firstEvent self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] - def setDocumentLocator(self, locator): pass + def setDocumentLocator(self, locator): + self._locator = locator def startPrefixMapping(self, prefix, uri): self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix + self._current_context[uri] = prefix or '' def endPrefixMapping(self, prefix): - del self._ns_contexts[-1] + self._current_context = self._ns_contexts.pop() def startElementNS(self, name, tagName , attrs): - uri,localname = name + uri, localname = name if uri: # When using namespaces, the reader may or may not # provide us with the original name. If not, create @@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler): attr.value = value node.setAttributeNode(attr) - parent = self.curNode - node.parentNode = parent + node.parentNode = self.curNode self.curNode = node self.lastEvent[1] = [(START_ELEMENT, node), None] @@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler): self.lastEvent[1] = [(END_ELEMENT, node), None] self.lastEvent = self.lastEvent[1] #self.events.append((END_ELEMENT, node)) - self.curNode = node.parentNode + self.curNode = self.curNode.parentNode def startElement(self, name, attrs): node = self.document.createElement(name) @@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler): attr.value = value node.setAttributeNode(attr) - parent = self.curNode - node.parentNode = parent + node.parentNode = self.curNode self.curNode = node self.lastEvent[1] = [(START_ELEMENT, node), None] @@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler): #self.events.append((PROCESSING_INSTRUCTION, node)) def ignorableWhitespace(self, chars): - node = self.document.createTextNode(chars[start:start + length]) + node = self.document.createTextNode(chars) parent = self.curNode node.parentNode = parent self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] @@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler): self.lastEvent = self.lastEvent[1] def startDocument(self): - node = self.curNode = self.document = minidom.Document() - node.parentNode = None + publicId = systemId = None + if self._locator: + publicId = self._locator.getPublicId() + systemId = self._locator.getSystemId() + if self.documentFactory is None: + import xml.dom.minidom + self.documentFactory = xml.dom.minidom.Document.implementation + node = self.documentFactory.createDocument(None, publicId, systemId) + self.curNode = self.document = node self.lastEvent[1] = [(START_DOCUMENT, node), None] self.lastEvent = self.lastEvent[1] #self.events.append((START_DOCUMENT, node)) def endDocument(self): - assert not self.curNode.parentNode - for node in self.curNode.childNodes: - if node.nodeType == node.ELEMENT_NODE: - self.document.documentElement = node - #if not self.document.documentElement: - # raise Error, "No document element" - + assert self.curNode.parentNode is None, \ + "not all elements have been properly closed" + assert self.curNode.documentElement is not None, \ + "document does not contain a root element" + node = self.curNode.documentElement self.lastEvent[1] = [(END_DOCUMENT, node), None] #self.events.append((END_DOCUMENT, self.curNode)) @@ -156,7 +164,7 @@ class DOMEventStream: def reset(self): self.pulldom = PullDOM() # This content handler relies on namespace support - self.parser.setFeature(xml.sax.handler.feature_namespaces,1) + self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) self.parser.setContentHandler(self.pulldom) def __getitem__(self, pos): @@ -179,7 +187,7 @@ class DOMEventStream: if not self.pulldom.firstEvent[1]: self.pulldom.lastEvent = self.pulldom.firstEvent while not self.pulldom.firstEvent[1]: - buf=self.stream.read(self.bufsize) + buf = self.stream.read(self.bufsize) if not buf: #FIXME: why doesn't Expat close work? #self.parser.close() @@ -214,10 +222,13 @@ class SAX2DOM(PullDOM): node = self.lastEvent[0][1] node.parentNode.appendChild(node) + default_bufsize = (2 ** 14) - 20 -def parse(stream_or_string, parser=None, bufsize=default_bufsize): - if type(stream_or_string) is type(""): +def parse(stream_or_string, parser=None, bufsize=None): + if bufsize is None: + bufsize = default_bufsize + if type(stream_or_string) in [type(""), type(u"")]: stream = open(stream_or_string) else: stream = stream_or_string |