diff options
Diffstat (limited to 'Lib/xmlcore/sax/xmlreader.py')
-rw-r--r-- | Lib/xmlcore/sax/xmlreader.py | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/Lib/xmlcore/sax/xmlreader.py b/Lib/xmlcore/sax/xmlreader.py new file mode 100644 index 0000000..9a2361e --- /dev/null +++ b/Lib/xmlcore/sax/xmlreader.py @@ -0,0 +1,381 @@ +"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers +should be based on this code. """ + +import handler + +from _exceptions import SAXNotSupportedException, SAXNotRecognizedException + + +# ===== XMLREADER ===== + +class XMLReader: + """Interface for reading an XML document using callbacks. + + XMLReader is the interface that an XML parser's SAX2 driver must + implement. This interface allows an application to set and query + features and properties in the parser, to register event handlers + for document processing, and to initiate a document parse. + + All SAX interfaces are assumed to be synchronous: the parse + methods must not return until parsing is complete, and readers + must wait for an event-handler callback to return before reporting + the next event.""" + + def __init__(self): + self._cont_handler = handler.ContentHandler() + self._dtd_handler = handler.DTDHandler() + self._ent_handler = handler.EntityResolver() + self._err_handler = handler.ErrorHandler() + + def parse(self, source): + "Parse an XML document from a system identifier or an InputSource." + raise NotImplementedError("This method must be implemented!") + + def getContentHandler(self): + "Returns the current ContentHandler." + return self._cont_handler + + def setContentHandler(self, handler): + "Registers a new object to receive document content events." + self._cont_handler = handler + + def getDTDHandler(self): + "Returns the current DTD handler." + return self._dtd_handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self._dtd_handler = handler + + def getEntityResolver(self): + "Returns the current EntityResolver." + return self._ent_handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self._ent_handler = resolver + + def getErrorHandler(self): + "Returns the current ErrorHandler." + return self._err_handler + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self._err_handler = handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localization for errors + and warnings; if they cannot support the requested locale, + however, they must throw a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + +class IncrementalParser(XMLReader): + """This interface adds three extra methods to the XMLReader + interface that allow XML parsers to support incremental + parsing. Support for this interface is optional, since not all + underlying XML parsers support this functionality. + + When the parser is instantiated it is ready to begin accepting + data from the feed method immediately. After parsing has been + finished with a call to close the reset method must be called to + make the parser ready to accept new data, either from feed or + using the parse method. + + Note that these methods must _not_ be called during parsing, that + is, after parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + + def __init__(self, bufsize=2**16): + self._bufsize = bufsize + XMLReader.__init__(self) + + def parse(self, source): + import saxutils + source = saxutils.prepare_input_source(source) + + self.prepareParser(source) + file = source.getByteStream() + buffer = file.read(self._bufsize) + while buffer != "": + self.feed(buffer) + buffer = file.read(self._bufsize) + self.close() + + def feed(self, data): + """This method gives the raw XML data in the data parameter to + the parser and makes it parse the data, emitting the + corresponding events. It is allowed for XML constructs to be + split across several calls to feed. + + feed may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + + def close(self): + """This method is called when the entire XML document has been + passed to the parser through the feed method, to notify the + parser that there are no more data. This allows the parser to + do the final checks on the document and empty the internal + data buffer. + + The parser will not be ready to parse another document until + the reset method has been called. + + close may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def reset(self): + """This method is called after close has been called to reset + the parser so that it is ready to parse new documents. The + results of calling parse or feed after close without calling + reset are undefined.""" + raise NotImplementedError("This method must be implemented!") + +# ===== LOCATOR ===== + +class Locator: + """Interface for associating a SAX event with a document + location. A locator object will return valid results only during + calls to DocumentHandler methods; at any other time, the + results are unpredictable.""" + + def getColumnNumber(self): + "Return the column number where the current event ends." + return -1 + + def getLineNumber(self): + "Return the line number where the current event ends." + return -1 + + def getPublicId(self): + "Return the public identifier for the current event." + return None + + def getSystemId(self): + "Return the system identifier for the current event." + return None + +# ===== INPUTSOURCE ===== + +class InputSource: + """Encapsulation of the information needed by the XMLReader to + read entities. + + This class may include information about the public identifier, + system identifier, byte stream (possibly with character encoding + information) and/or the character stream of an entity. + + Applications will create objects of this class for use in the + XMLReader.parse method and for returning from + EntityResolver.resolveEntity. + + An InputSource belongs to the application, the XMLReader is not + allowed to modify InputSource objects passed to it from the + application, although it may make copies and modify those.""" + + def __init__(self, system_id = None): + self.__system_id = system_id + self.__public_id = None + self.__encoding = None + self.__bytefile = None + self.__charfile = None + + def setPublicId(self, public_id): + "Sets the public identifier of this InputSource." + self.__public_id = public_id + + def getPublicId(self): + "Returns the public identifier of this InputSource." + return self.__public_id + + def setSystemId(self, system_id): + "Sets the system identifier of this InputSource." + self.__system_id = system_id + + def getSystemId(self): + "Returns the system identifier of this InputSource." + return self.__system_id + + def setEncoding(self, encoding): + """Sets the character encoding of this InputSource. + + The encoding must be a string acceptable for an XML encoding + declaration (see section 4.3.3 of the XML recommendation). + + The encoding attribute of the InputSource is ignored if the + InputSource also contains a character stream.""" + self.__encoding = encoding + + def getEncoding(self): + "Get the character encoding of this InputSource." + return self.__encoding + + def setByteStream(self, bytefile): + """Set the byte stream (a Python file-like object which does + not perform byte-to-character conversion) for this input + source. + + The SAX parser will ignore this if there is also a character + stream specified, but it will use a byte stream in preference + to opening a URI connection itself. + + If the application knows the character encoding of the byte + stream, it should set it with the setEncoding method.""" + self.__bytefile = bytefile + + def getByteStream(self): + """Get the byte stream for this input source. + + The getEncoding method will return the character encoding for + this byte stream, or None if unknown.""" + return self.__bytefile + + def setCharacterStream(self, charfile): + """Set the character stream for this input source. (The stream + must be a Python 2.0 Unicode-wrapped file-like that performs + conversion to Unicode strings.) + + If there is a character stream specified, the SAX parser will + ignore any byte stream and will not attempt to open a URI + connection to the system identifier.""" + self.__charfile = charfile + + def getCharacterStream(self): + "Get the character stream for this input source." + return self.__charfile + +# ===== ATTRIBUTESIMPL ===== + +class AttributesImpl: + + def __init__(self, attrs): + """Non-NS-aware implementation. + + attrs should be of the form {name : value}.""" + self._attrs = attrs + + def getLength(self): + return len(self._attrs) + + def getType(self, name): + return "CDATA" + + def getValue(self, name): + return self._attrs[name] + + def getValueByQName(self, name): + return self._attrs[name] + + def getNameByQName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getQNameByName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getNames(self): + return self._attrs.keys() + + def getQNames(self): + return self._attrs.keys() + + def __len__(self): + return len(self._attrs) + + def __getitem__(self, name): + return self._attrs[name] + + def keys(self): + return self._attrs.keys() + + def has_key(self, name): + return self._attrs.has_key(name) + + def __contains__(self, name): + return self._attrs.has_key(name) + + def get(self, name, alternative=None): + return self._attrs.get(name, alternative) + + def copy(self): + return self.__class__(self._attrs) + + def items(self): + return self._attrs.items() + + def values(self): + return self._attrs.values() + +# ===== ATTRIBUTESNSIMPL ===== + +class AttributesNSImpl(AttributesImpl): + + def __init__(self, attrs, qnames): + """NS-aware implementation. + + attrs should be of the form {(ns_uri, lname): value, ...}. + qnames of the form {(ns_uri, lname): qname, ...}.""" + self._attrs = attrs + self._qnames = qnames + + def getValueByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return self._attrs[nsname] + + raise KeyError, name + + def getNameByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return nsname + + raise KeyError, name + + def getQNameByName(self, name): + return self._qnames[name] + + def getQNames(self): + return self._qnames.values() + + def copy(self): + return self.__class__(self._attrs, self._qnames) + + +def _test(): + XMLReader() + IncrementalParser() + Locator() + +if __name__ == "__main__": + _test() |