summaryrefslogtreecommitdiffstats
path: root/Lib/xml/dom/pulldom.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml/dom/pulldom.py')
-rw-r--r--Lib/xml/dom/pulldom.py63
1 files changed, 37 insertions, 26 deletions
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py
index 063f7e8..e674385 100644
--- a/Lib/xml/dom/pulldom.py
+++ b/Lib/xml/dom/pulldom.py
@@ -1,5 +1,5 @@
-import minidom
-import xml.sax,xml.sax.handler
+import xml.sax
+import xml.sax.handler
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
@@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS"
class PullDOM(xml.sax.ContentHandler):
- def __init__(self):
+ _locator = None
+ document = None
+
+ def __init__(self, documentFactory=None):
+ self.documentFactory = documentFactory
self.firstEvent = [None, None]
self.lastEvent = self.firstEvent
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
- def setDocumentLocator(self, locator): pass
+ def setDocumentLocator(self, locator):
+ self._locator = locator
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
- self._current_context[uri] = prefix
+ self._current_context[uri] = prefix or ''
def endPrefixMapping(self, prefix):
- del self._ns_contexts[-1]
+ self._current_context = self._ns_contexts.pop()
def startElementNS(self, name, tagName , attrs):
- uri,localname = name
+ uri, localname = name
if uri:
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
@@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value
node.setAttributeNode(attr)
- parent = self.curNode
- node.parentNode = parent
+ node.parentNode = self.curNode
self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node))
- self.curNode = node.parentNode
+ self.curNode = self.curNode.parentNode
def startElement(self, name, attrs):
node = self.document.createElement(name)
@@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value
node.setAttributeNode(attr)
- parent = self.curNode
- node.parentNode = parent
+ node.parentNode = self.curNode
self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler):
#self.events.append((PROCESSING_INSTRUCTION, node))
def ignorableWhitespace(self, chars):
- node = self.document.createTextNode(chars[start:start + length])
+ node = self.document.createTextNode(chars)
parent = self.curNode
node.parentNode = parent
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
@@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent = self.lastEvent[1]
def startDocument(self):
- node = self.curNode = self.document = minidom.Document()
- node.parentNode = None
+ publicId = systemId = None
+ if self._locator:
+ publicId = self._locator.getPublicId()
+ systemId = self._locator.getSystemId()
+ if self.documentFactory is None:
+ import xml.dom.minidom
+ self.documentFactory = xml.dom.minidom.Document.implementation
+ node = self.documentFactory.createDocument(None, publicId, systemId)
+ self.curNode = self.document = node
self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_DOCUMENT, node))
def endDocument(self):
- assert not self.curNode.parentNode
- for node in self.curNode.childNodes:
- if node.nodeType == node.ELEMENT_NODE:
- self.document.documentElement = node
- #if not self.document.documentElement:
- # raise Error, "No document element"
-
+ assert self.curNode.parentNode is None, \
+ "not all elements have been properly closed"
+ assert self.curNode.documentElement is not None, \
+ "document does not contain a root element"
+ node = self.curNode.documentElement
self.lastEvent[1] = [(END_DOCUMENT, node), None]
#self.events.append((END_DOCUMENT, self.curNode))
@@ -156,7 +164,7 @@ class DOMEventStream:
def reset(self):
self.pulldom = PullDOM()
# This content handler relies on namespace support
- self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
+ self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
self.parser.setContentHandler(self.pulldom)
def __getitem__(self, pos):
@@ -179,7 +187,7 @@ class DOMEventStream:
if not self.pulldom.firstEvent[1]:
self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]:
- buf=self.stream.read(self.bufsize)
+ buf = self.stream.read(self.bufsize)
if not buf:
#FIXME: why doesn't Expat close work?
#self.parser.close()
@@ -214,10 +222,13 @@ class SAX2DOM(PullDOM):
node = self.lastEvent[0][1]
node.parentNode.appendChild(node)
+
default_bufsize = (2 ** 14) - 20
-def parse(stream_or_string, parser=None, bufsize=default_bufsize):
- if type(stream_or_string) is type(""):
+def parse(stream_or_string, parser=None, bufsize=None):
+ if bufsize is None:
+ bufsize = default_bufsize
+ if type(stream_or_string) in [type(""), type(u"")]:
stream = open(stream_or_string)
else:
stream = stream_or_string