diff options
author | Fred Drake <fdrake@acm.org> | 2000-06-29 19:34:54 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2000-06-29 19:34:54 (GMT) |
commit | 45cd9de2bb2faa96bb18eb11d20261d7d1b8c20e (patch) | |
tree | 4d411da21e2dd6b70b29e38212a9c9836060fef5 /Lib/xml | |
parent | 0d800e14811f06d10a0f251c59a7d84e8f73fbfb (diff) | |
download | cpython-45cd9de2bb2faa96bb18eb11d20261d7d1b8c20e.zip cpython-45cd9de2bb2faa96bb18eb11d20261d7d1b8c20e.tar.gz cpython-45cd9de2bb2faa96bb18eb11d20261d7d1b8c20e.tar.bz2 |
Paul Prescod <paul@prescod.net>:
SAX interfaces for Python.
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/sax/_exceptions.py | 108 | ||||
-rw-r--r-- | Lib/xml/sax/expatreader.py | 204 | ||||
-rw-r--r-- | Lib/xml/sax/handler.py | 270 | ||||
-rw-r--r-- | Lib/xml/sax/saxutils.py | 153 | ||||
-rw-r--r-- | Lib/xml/sax/xmlreader.py | 225 |
5 files changed, 960 insertions, 0 deletions
diff --git a/Lib/xml/sax/_exceptions.py b/Lib/xml/sax/_exceptions.py new file mode 100644 index 0000000..f05bd88 --- /dev/null +++ b/Lib/xml/sax/_exceptions.py @@ -0,0 +1,108 @@ +"""Different kinds of SAX Exceptions""" +import sys +if sys.platform[:4] == "java": + from java.lang import Exception + +# ===== SAXEXCEPTION ===== + +class SAXException(Exception): + """Encapsulate an XML error or warning. This class can contain + basic error or warning information from either the XML parser or + the application: you can subclass it to provide additional + functionality, or to add localization. Note that although you will + receive a SAXException as the argument to the handlers in the + ErrorHandler interface, you are not actually required to throw + the exception; instead, you can simply read the information in + it.""" + + def __init__(self, msg, exception = None): + """Creates an exception. The message is required, but the exception + is optional.""" + self._msg = msg + self._exception = exception + + def getMessage(self): + "Return a message for this exception." + return self._msg + + def getException(self): + "Return the embedded exception, or None if there was none." + return self._exception + + def __str__(self): + "Create a string representation of the exception." + return self._msg + + def __getitem__(self, ix): + """Avoids weird error messages if someone does exception[ix] by + mistake, since Exception has __getitem__ defined.""" + raise NameError("__getitem__") + + +# ===== SAXPARSEEXCEPTION ===== + +class SAXParseException(SAXException): + """Encapsulate an XML parse error or warning. + + This exception will include information for locating the error in + the original XML document. Note that although the application will + receive a SAXParseException as the argument to the handlers in the + ErrorHandler interface, the application is not actually required + to throw the exception; instead, it can simply read the + information in it and take a different action. + + Since this exception is a subclass of SAXException, it inherits + the ability to wrap another exception.""" + + def __init__(self, msg, exception, locator): + "Creates the exception. The exception parameter is allowed to be None." + SAXException.__init__(self, msg, exception) + self._locator = locator + + def getColumnNumber(self): + """The column number of the end of the text where the exception + occurred.""" + return self._locator.getColumnNumber() + + def getLineNumber(self): + "The line number of the end of the text where the exception occurred." + return self._locator.getLineNumber() + + def getPublicId(self): + "Get the public identifier of the entity where the exception occurred." + return self._locator.getPublicId() + + def getSystemId(self): + "Get the system identifier of the entity where the exception occurred." + return self._locator.getSystemId() + + def __str__(self): + "Create a string representation of the exception." + return "%s at %s:%d:%d" % (self._msg, + self.getSystemId(), + self.getLineNumber(), + self.getColumnNumber()) + + +# ===== SAXNOTRECOGNIZEDEXCEPTION ===== + +class SAXNotRecognizedException(SAXException): + """Exception class for an unrecognized identifier. + + An XMLReader will raise this exception when it is confronted with an + unrecognized feature or property. SAX applications and extensions may + use this class for similar purposes.""" + + +# ===== SAXNOTSUPPORTEDEXCEPTION ===== + +class SAXNotSupportedException(SAXException): + """Exception class for an unsupported operation. + + An XMLReader will raise this exception when a service it cannot + perform is requested (specifically setting a state or value). SAX + applications and extensions may use this class for similar + purposes.""" + + + diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py new file mode 100644 index 0000000..b6816a4 --- /dev/null +++ b/Lib/xml/sax/expatreader.py @@ -0,0 +1,204 @@ +""" +SAX driver for the Pyexpat C module. This driver works with +pyexpat.__version__ == '1.5'. + +$Id$ +""" + +# Todo on driver: +# - make it support external entities (wait for pyexpat.c) +# - enable configuration between reset() and feed() calls +# - support lexical events? +# - proper inputsource handling +# - properties and features + +# Todo on pyexpat.c: +# - support XML_ExternalEntityParserCreate +# - exceptions in callouts from pyexpat to python code lose position info + +version = "0.20" + +from string import split + +from xml.sax import xmlreader +import pyexpat +import xml.sax + +# --- ExpatParser + +class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ): + "SAX driver for the Pyexpat C module." + + def __init__(self, namespaceHandling=0, bufsize=2**16-20): + xmlreader.IncrementalParser.__init__(self, bufsize) + self._source = None + self._parser = None + self._namespaces = namespaceHandling + self._parsing = 0 + + # XMLReader methods + + def parse(self, stream_or_string ): + "Parse an XML document from a URL." + if type( stream_or_string ) == type( "" ): + stream=open( stream_or_string ) + else: + stream=stream_or_string + + self.reset() + self._cont_handler.setDocumentLocator(self) + try: + xmlreader.IncrementalParser.parse(self, stream) + except pyexpat.error: + error_code = self._parser.ErrorCode + raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code), + None, self) + + self._cont_handler.endDocument() + + def prepareParser(self, filename=None): + self._source = filename + + if self._source != None: + self._parser.SetBase(self._source) + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + # IncrementalParser methods + + def feed(self, data): + if not self._parsing: + self._parsing=1 + self.reset() + self._cont_handler.startDocument() + # FIXME: error checking and endDocument() + self._parser.Parse(data, 0) + + def close(self): + if self._parsing: + self._cont_handler.endDocument() + self._parsing=0 + self._parser.Parse("", 1) + + def reset(self): + if self._namespaces: + self._parser = pyexpat.ParserCreate(None, " ") + self._parser.StartElementHandler = self.start_element_ns + self._parser.EndElementHandler = self.end_element_ns + else: + self._parser = pyexpat.ParserCreate() + self._parser.StartElementHandler = self._cont_handler.startElement + self._parser.EndElementHandler = self._cont_handler.endElement + + self._parser.ProcessingInstructionHandler = \ + self._cont_handler.processingInstruction + self._parser.CharacterDataHandler = self._cont_handler.characters + self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl + self._parser.NotationDeclHandler = self.notation_decl + self._parser.StartNamespaceDeclHandler = self.start_namespace_decl + self._parser.EndNamespaceDeclHandler = self.end_namespace_decl +# self._parser.CommentHandler = +# self._parser.StartCdataSectionHandler = +# self._parser.EndCdataSectionHandler = +# self._parser.DefaultHandler = +# self._parser.DefaultHandlerExpand = +# self._parser.NotStandaloneHandler = + self._parser.ExternalEntityRefHandler = self.external_entity_ref + + # Locator methods + + def getColumnNumber(self): + return self._parser.ErrorColumnNumber + + def getLineNumber(self): + return self._parser.ErrorLineNumber + + def getPublicId(self): + return self._source.getPublicId() + + def getSystemId(self): + return self._parser.GetBase() + + # internal methods + + # event handlers + + def start_element(self, name, attrs): + self._cont_handler.startElement(name, + xmlreader.AttributesImpl(attrs, attrs)) + + def end_element(self, name): + self._cont_handler.endElement(name) + + def start_element_ns(self, name, attrs): + pair = split(name) + if len(pair) == 1: + tup = (None, name, None) + else: + tup = pair+[None] # prefix is not implemented yet! + + self._cont_handler.startElement(tup, + xmlreader.AttributesImpl(attrs, None)) + + def end_element_ns(self, name): + pair = split(name) + if len(pair) == 1: + name = (None, name, None) + else: + name = pair+[None] # prefix is not implemented yet! + + self._cont_handler.endElement(name) + + def processing_instruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def character_data(self, data): + self._cont_handler.characters(data) + + def start_namespace_decl(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def end_namespace_decl(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): + self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) + + def notation_decl(self, name, base, sysid, pubid): + self._dtd_handler.notationDecl(name, pubid, sysid) + + def external_entity_ref(self, context, base, sysid, pubid): + assert 0 # not implemented + source = self._ent_handler.resolveEntity(pubid, sysid) + source = saxutils.prepare_input_source(source) + # FIXME: create new parser, stack self._source and self._parser + # FIXME: reuse code from self.parse(...) + return 1 + +# --- + +def create_parser(*args, **kwargs): + return apply( ExpatParser, args, kwargs ) + +# --- + +if __name__ == "__main__": + import xml.sax + p = create_parser() + p.setContentHandler(xml.sax.XMLGenerator()) + p.setErrorHandler(xml.sax.ErrorHandler()) + p.parse("../../../hamlet.xml") diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py new file mode 100644 index 0000000..cbbb959 --- /dev/null +++ b/Lib/xml/sax/handler.py @@ -0,0 +1,270 @@ +""" +This module contains the core classes of version 2.0 of SAX for Python. +This file provides only default classes with absolutely minimum +functionality, from which drivers and applications can be subclassed. + +Many of these classes are empty and are included only as documentation +of the interfaces. + +$Id$ +""" + +version = '2.0beta' +#============================================================================ +# +# HANDLER INTERFACES +# +#============================================================================ +# ===== ErrorHandler ===== +class ErrorHandler: + """Basic interface for SAX error handlers. If you create an object + that implements this interface, then register the object with your + Parser, the parser will call the methods in your object to report + all warnings and errors. There are three levels of errors + available: warnings, (possibly) recoverable errors, and + unrecoverable errors. All methods take a SAXParseException as the + only parameter.""" + + def error(self, exception): + "Handle a recoverable error." + raise exception + + def fatalError(self, exception): + "Handle a non-recoverable error." + raise exception + + def warning(self, exception): + "Handle a warning." + print exception + +# ===== CONTENTHANDLER ===== + +class ContentHandler: + """Interface for receiving logical document content events. + + This is the main callback interface in SAX, and the one most + important to applications. The order of events in this interface + mirrors the order of the information in the document.""" + + def __init__(self): + self._locator = None + + def setDocumentLocator(self, locator): + """Called by the parser to give the application a locator for + locating the origin of document events. + + SAX parsers are strongly encouraged (though not absolutely + required) to supply a locator: if it does so, it must supply + the locator to the application by invoking this method before + invoking any of the other methods in the DocumentHandler + interface. + + The locator allows the application to determine the end + position of any document-related event, even if the parser is + not reporting an error. Typically, the application will use + this information for reporting its own errors (such as + character content that does not match an application's + business rules). The information returned by the locator is + probably not sufficient for use with a search engine. + + Note that the locator will return correct information only + during the invocation of the events in this interface. The + application should not attempt to use it at any other time.""" + self._locator = locator + + def startDocument(self): + """Receive notification of the beginning of a document. + + The SAX parser will invoke this method only once, before any + other methods in this interface or in DTDHandler (except for + setDocumentLocator).""" + + def endDocument(self): + """Receive notification of the end of a document. + + The SAX parser will invoke this method only once, and it will + be the last method invoked during the parse. The parser shall + not invoke this method until it has either abandoned parsing + (because of an unrecoverable error) or reached the end of + input.""" + + def startPrefixMapping(self, prefix, uri): + """Begin the scope of a prefix-URI Namespace mapping. + + The information from this event is not necessary for normal + Namespace processing: the SAX XML reader will automatically + replace prefixes for element and attribute names when the + http://xml.org/sax/features/namespaces feature is true (the + default). + + There are cases, however, when applications need to use + prefixes in character data or in attribute values, where they + cannot safely be expanded automatically; the + start/endPrefixMapping event supplies the information to the + application to expand prefixes in those contexts itself, if + necessary. + + Note that start/endPrefixMapping events are not guaranteed to + be properly nested relative to each-other: all + startPrefixMapping events will occur before the corresponding + startElement event, and all endPrefixMapping events will occur + after the corresponding endElement event, but their order is + not guaranteed.""" + + def endPrefixMapping(self, prefix): + """End the scope of a prefix-URI mapping. + + See startPrefixMapping for details. This event will always + occur after the corresponding endElement event, but the order + of endPrefixMapping events is not otherwise guaranteed.""" + + def startElement(self, name, attrs): + """Signals the start of an element. + + The name parameter contains the name of the element type as a + (uri ,localname) tuple, the qname parameter the raw XML 1.0 + name used in the source document, and the attrs parameter + holds an instance of the Attributes class containing the + attributes of the element.""" + + def endElement(self, name ): + """Signals the end of an element. + + The name parameter contains the name of the element type, just + as with the startElement event.""" + + def characters(self, content): + """Receive notification of character data. + + The Parser will call this method to report each chunk of + character data. SAX parsers may return all contiguous + character data in a single chunk, or they may split it into + several chunks; however, all of the characters in any single + event must come from the same external entity so that the + Locator provides useful information.""" + + def ignorableWhitespace(self ): + """Receive notification of ignorable whitespace in element content. + + Validating Parsers must use this method to report each chunk + of ignorable whitespace (see the W3C XML 1.0 recommendation, + section 2.10): non-validating parsers may also use this method + if they are capable of parsing and using content models. + + SAX parsers may return all contiguous whitespace in a single + chunk, or they may split it into several chunks; however, all + of the characters in any single event must come from the same + external entity, so that the Locator provides useful + information. + + The application must not attempt to read from the array + outside of the specified range.""" + + def processingInstruction(self, target, data): + """Receive notification of a processing instruction. + + The Parser will invoke this method once for each processing + instruction found: note that processing instructions may occur + before or after the main document element. + + A SAX parser should never report an XML declaration (XML 1.0, + section 2.8) or a text declaration (XML 1.0, section 4.3.1) + using this method.""" + + def skippedEntity(self, name): + """Receive notification of a skipped entity. + + The Parser will invoke this method once for each entity + skipped. Non-validating processors may skip entities if they + have not seen the declarations (because, for example, the + entity was declared in an external DTD subset). All processors + may skip external entities, depending on the values of the + http://xml.org/sax/features/external-general-entities and the + http://xml.org/sax/features/external-parameter-entities + properties.""" + +#============================================================================ +# +# CORE FEATURES +# +#============================================================================ + +feature_namespaces = "http://xml.org/sax/features/namespaces" +# true: Perform Namespace processing (default). +# false: Optionally do not perform Namespace processing +# (implies namespace-prefixes). +# access: (parsing) read-only; (not parsing) read/write + +feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" +# true: Report the original prefixed names and attributes used for Namespace +# declarations. +# false: Do not report attributes used for Namespace declarations, and +# optionally do not report original prefixed names (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_string_interning = "http://xml.org/sax/features/string-interning" +# true: All element names, prefixes, attribute names, Namespace URIs, and +# local names are interned using the built-in intern function. +# false: Names are not necessarily interned, although they may be (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_validation = "http://xml.org/sax/features/validation" +# true: Report all validation errors (implies external-general-entities and +# external-parameter-entities). +# false: Do not report validation errors. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_ges = "http://xml.org/sax/features/external-general-entities" +# true: Include all external general (text) entities. +# false: Do not include external general entities. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" +# true: Include all external parameter entities, including the external +# DTD subset. +# false: Do not include any external parameter entities, even the external +# DTD subset. +# access: (parsing) read-only; (not parsing) read/write + +all_features = [feature_namespaces, + feature_namespace_prefixes, + feature_string_interning, + feature_validation, + feature_external_ges, + feature_external_pes] + + +#============================================================================ +# +# CORE PROPERTIES +# +#============================================================================ + +property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" +# data type: xml.sax.sax2lib.LexicalHandler +# description: An optional extension handler for lexical events like comments. +# access: read/write + +property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" +# data type: xml.sax.sax2lib.DeclHandler +# description: An optional extension handler for DTD-related events other +# than notations and unparsed entities. +# access: read/write + +property_dom_node = "http://xml.org/sax/properties/dom-node" +# data type: org.w3c.dom.Node +# description: When parsing, the current DOM node being visited if this is +# a DOM iterator; when not parsing, the root DOM node for +# iteration. +# access: (parsing) read-only; (not parsing) read/write + +property_xml_string = "http://xml.org/sax/properties/xml-string" +# data type: String +# description: The literal string of characters that was the source for +# the current event. +# access: read-only + +all_properties = [property_lexical_handler, + property_dom_node, + property_declaration_handler, + property_xml_string] diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py new file mode 100644 index 0000000..eb0760e --- /dev/null +++ b/Lib/xml/sax/saxutils.py @@ -0,0 +1,153 @@ +""" +A library of useful helper classes to the sax classes, for the +convenience of application and driver writers. + +$Id$ +""" + +import types, string, sys, urllib +import handler + +def escape(data, entities = {}): + """Escape &, <, and > in a string of data. + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = string.replace(data, "&", "&") + data = string.replace(data, "<", "<") + data = string.replace(data, ">", ">") + for chars, entity in entities.items(): + data = string.replace(data, chars, entity) + return data + +class XMLGenerator(handler.ContentHandler): + + def __init__(self, out = sys.stdout): + handler.ContentHandler.__init__(self) + self._out = out + + # ContentHandler methods + + def startDocument(self): + self._out.write('<?xml version="1.0" encoding="iso-8859-1"?>\n') + + def startPrefixMapping(self, prefix, uri): + pass + + def endPrefixMapping(self, prefix): + pass + + def startElement(self, name, attrs): + if type(name)==type(()): + uri, localname, prefix=name + name="%s:%s"%(prefix,localname) + self._out.write('<' + name) + for (name, value) in attrs.items(): + self._out.write(' %s="%s"' % (name, escape(value))) + self._out.write('>') + + def endElement(self, name): + # FIXME: not namespace friendly yet + self._out.write('</%s>' % name) + + def characters(self, content): + self._out.write(escape(content)) + + def ignorableWhitespace(self, content): + self._out.write(content) + + def processingInstruction(self, target, data): + self._out.write('<?%s %s?>' % (target, data)) + +class XMLFilterBase: + """This class is designed to sit between an XMLReader and the + client application's event handlers. By default, it does nothing + but pass requests up to the reader and events on to the handlers + unmodified, but subclasses can override specific methods to modify + the event stream or the configuration requests as they pass + through.""" + + # ErrorHandler methods + + def error(self, exception): + self._err_handler.error(exception) + + def fatalError(self, exception): + self._err_handler.fatalError(exception) + + def warning(self, exception): + self._err_handler.warning(exception) + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + + def endDocument(self): + self._cont_handler.endDocument() + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def startElement(self, name, attrs): + self._cont_handler.startElement(name, attrs) + + def endElement(self, name, qname): + self._cont_handler.endElement(name, qname) + + def characters(self, content): + self._cont_handler.characters(content) + + def ignorableWhitespace(self, chars, start, end): + self._cont_handler.ignorableWhitespace(chars, start, end) + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def skippedEntity(self, name): + self._cont_handler.skippedEntity(name) + + # DTDHandler methods + + def notationDecl(self, name, publicId, systemId): + self._dtd_handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) + + # EntityResolver methods + + def resolveEntity(self, publicId, systemId): + self._ent_handler.resolveEntity(publicId, systemId) + + # XMLReader methods + + def parse(self, source): + self._parent.setContentHandler(self) + self._parent.setErrorHandler(self) + self._parent.setEntityResolver(self) + self._parent.setDTDHandler(self) + self._parent.parse(source) + + def setLocale(self, locale): + self._parent.setLocale(locale) + + def getFeature(self, name): + return self._parent.getFeature(name) + + def setFeature(self, name, state): + self._parent.setFeature(name, state) + + def getProperty(self, name): + return self._parent.getProperty(name) + + def setProperty(self, name, value): + self._parent.setProperty(name, value) + diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py new file mode 100644 index 0000000..b66ef1c --- /dev/null +++ b/Lib/xml/sax/xmlreader.py @@ -0,0 +1,225 @@ +import handler + +"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers +should be based on this code. """ +# ===== XMLREADER ===== + +class XMLReader: + def __init__(self): + self._cont_handler = handler.ContentHandler() + #self._dtd_handler = handler.DTDHandler() + #self._ent_handler = handler.EntityResolver() + self._err_handler = handler.ErrorHandler() + + def parse(self, source): + "Parse an XML document from a system identifier or an InputSource." + raise NotImplementedError("This method must be implemented!") + + def getContentHandler(self): + "Returns the current ContentHandler." + return self._cont_handler + + def setContentHandler(self, handler): + "Registers a new object to receive document content events." + self._cont_handler = handler + + def getDTDHandler(self): + "Returns the current DTD handler." + return self._dtd_handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self._dtd_handler = handler + + def getEntityResolver(self): + "Returns the current EntityResolver." + return self._ent_handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self._ent_handler = resolver + + def getErrorHandler(self): + "Returns the current ErrorHandler." + return self._err_handler + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self._err_handler = handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localisation for errors + and warnings; if they cannot support the requested locale, + however, they must throw a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + +class IncrementalParser(XMLReader): + """This interface adds three extra methods to the XMLReader + interface that allow XML parsers to support incremental + parsing. Support for this interface is optional, since not all + underlying XML parsers support this functionality. + + When the parser is instantiated it is ready to begin accepting + data from the feed method immediately. After parsing has been + finished with a call to close the reset method must be called to + make the parser ready to accept new data, either from feed or + using the parse method. + + Note that these methods must _not_ be called during parsing, that + is, after parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + def __init__(self, bufsize=2**16 ): + self._bufsize=bufsize + XMLReader.__init__( self ) + + def parse(self, source): + self.prepareParser(source) + #FIXME: do some type checking: could be already stream, URL or + # filename + inf=open( source ) + buffer = inf.read(self._bufsize) + while buffer != "": + self.feed(buffer) + buffer = inf.read(self._bufsize) + self.close() + self.reset() + + def feed(self, data): + """This method gives the raw XML data in the data parameter to + the parser and makes it parse the data, emitting the + corresponding events. It is allowed for XML constructs to be + split across several calls to feed. + + feed may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + + def close(self): + """This method is called when the entire XML document has been + passed to the parser through the feed method, to notify the + parser that there are no more data. This allows the parser to + do the final checks on the document and empty the internal + data buffer. + + The parser will not be ready to parse another document until + the reset method has been called. + + close may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def reset(self): + """This method is called after close has been called to reset + the parser so that it is ready to parse new documents. The + results of calling parse or feed after close without calling + reset are undefined.""" + raise NotImplementedError("This method must be implemented!") + +# ===== LOCATOR ===== +class Locator: + """Interface for associating a SAX event with a document + location. A locator object will return valid results only during + calls to DocumentHandler methods; at any other time, the + results are unpredictable.""" + + def getColumnNumber(self): + "Return the column number where the current event ends." + return -1 + + def getLineNumber(self): + "Return the line number where the current event ends." + return -1 + + def getPublicId(self): + "Return the public identifier for the current event." + return None + + def getSystemId(self): + "Return the system identifier for the current event." + return None + +# --- AttributesImpl +class AttributesImpl: + def __init__(self, attrs, rawnames): + self._attrs = attrs + self._rawnames = rawnames + + def getLength(self): + return len(self._attrs) + + def getType(self, name): + return "CDATA" + + def getValue(self, name): + return self._attrs[name] + + def getValueByQName(self, name): + return self._attrs[self._rawnames[name]] + + def getNameByQName(self, name): + return self._rawnames[name] + + def getNames(self): + return self._attrs.keys() + + def getQNames(self): + return self._rawnames.keys() + + def __len__(self): + return len(self._attrs) + + def __getitem__(self, name): + return self._attrs[name] + + def keys(self): + return self._attrs.keys() + + def has_key(self, name): + return self._attrs.has_key(name) + + def get(self, name, alternative=None): + return self._attrs.get(name, alternative) + + def copy(self): + return self.__class__(self._attrs, self._rawnames) + + def items(self): + return self._attrs.items() + + def values(self): + return self._attrs.values() + +def _test(): + XMLReader() + IncrementalParser() + Locator() + AttributesImpl() + +if __name__=="__main__": + _test() |