summaryrefslogtreecommitdiffstats
path: root/Lib/xml/sax/expatreader.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml/sax/expatreader.py')
-rw-r--r--Lib/xml/sax/expatreader.py414
1 files changed, 414 insertions, 0 deletions
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
new file mode 100644
index 0000000..bb9c294
--- /dev/null
+++ b/Lib/xml/sax/expatreader.py
@@ -0,0 +1,414 @@
+"""
+SAX driver for the pyexpat C module. This driver works with
+pyexpat.__version__ == '2.22'.
+"""
+
+version = "0.20"
+
+from xml.sax._exceptions import *
+from xml.sax.handler import feature_validation, feature_namespaces
+from xml.sax.handler import feature_namespace_prefixes
+from xml.sax.handler import feature_external_ges, feature_external_pes
+from xml.sax.handler import feature_string_interning
+from xml.sax.handler import property_xml_string, property_interning_dict
+
+# xml.parsers.expat does not raise ImportError in Jython
+import sys
+if sys.platform[:4] == "java":
+ raise SAXReaderNotAvailable("expat not available in Java", None)
+del sys
+
+try:
+ from xml.parsers import expat
+except ImportError:
+ raise SAXReaderNotAvailable("expat not supported", None)
+else:
+ if not hasattr(expat, "ParserCreate"):
+ raise SAXReaderNotAvailable("expat not supported", None)
+from xml.sax import xmlreader, saxutils, handler
+
+AttributesImpl = xmlreader.AttributesImpl
+AttributesNSImpl = xmlreader.AttributesNSImpl
+
+# If we're using a sufficiently recent version of Python, we can use
+# weak references to avoid cycles between the parser and content
+# handler, otherwise we'll just have to pretend.
+try:
+ import _weakref
+except ImportError:
+ def _mkproxy(o):
+ return o
+else:
+ import weakref
+ _mkproxy = weakref.proxy
+ del weakref, _weakref
+
+# --- ExpatLocator
+
+class ExpatLocator(xmlreader.Locator):
+ """Locator for use with the ExpatParser class.
+
+ This uses a weak reference to the parser object to avoid creating
+ a circular reference between the parser and the content handler.
+ """
+ def __init__(self, parser):
+ self._ref = _mkproxy(parser)
+
+ def getColumnNumber(self):
+ parser = self._ref
+ if parser._parser is None:
+ return None
+ return parser._parser.ErrorColumnNumber
+
+ def getLineNumber(self):
+ parser = self._ref
+ if parser._parser is None:
+ return 1
+ return parser._parser.ErrorLineNumber
+
+ def getPublicId(self):
+ parser = self._ref
+ if parser is None:
+ return None
+ return parser._source.getPublicId()
+
+ def getSystemId(self):
+ parser = self._ref
+ if parser is None:
+ return None
+ return parser._source.getSystemId()
+
+
+# --- ExpatParser
+
+class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
+ """SAX driver for the pyexpat C module."""
+
+ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
+ xmlreader.IncrementalParser.__init__(self, bufsize)
+ self._source = xmlreader.InputSource()
+ self._parser = None
+ self._namespaces = namespaceHandling
+ self._lex_handler_prop = None
+ self._parsing = 0
+ self._entity_stack = []
+ self._external_ges = 1
+ self._interning = None
+
+ # XMLReader methods
+
+ def parse(self, source):
+ "Parse an XML document from a URL or an InputSource."
+ source = saxutils.prepare_input_source(source)
+
+ self._source = source
+ self.reset()
+ self._cont_handler.setDocumentLocator(ExpatLocator(self))
+ xmlreader.IncrementalParser.parse(self, source)
+
+ def prepareParser(self, source):
+ if source.getSystemId() != None:
+ self._parser.SetBase(source.getSystemId())
+
+ # Redefined setContentHandler to allow changing handlers during parsing
+
+ def setContentHandler(self, handler):
+ xmlreader.IncrementalParser.setContentHandler(self, handler)
+ if self._parsing:
+ self._reset_cont_handler()
+
+ def getFeature(self, name):
+ if name == feature_namespaces:
+ return self._namespaces
+ elif name == feature_string_interning:
+ return self._interning is not None
+ elif name in (feature_validation, feature_external_pes,
+ feature_namespace_prefixes):
+ return 0
+ elif name == feature_external_ges:
+ return self._external_ges
+ raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
+
+ def setFeature(self, name, state):
+ if self._parsing:
+ raise SAXNotSupportedException("Cannot set features while parsing")
+
+ if name == feature_namespaces:
+ self._namespaces = state
+ elif name == feature_external_ges:
+ self._external_ges = state
+ elif name == feature_string_interning:
+ if state:
+ if self._interning is None:
+ self._interning = {}
+ else:
+ self._interning = None
+ elif name == feature_validation:
+ if state:
+ raise SAXNotSupportedException(
+ "expat does not support validation")
+ elif name == feature_external_pes:
+ if state:
+ raise SAXNotSupportedException(
+ "expat does not read external parameter entities")
+ elif name == feature_namespace_prefixes:
+ if state:
+ raise SAXNotSupportedException(
+ "expat does not report namespace prefixes")
+ else:
+ raise SAXNotRecognizedException(
+ "Feature '%s' not recognized" % name)
+
+ def getProperty(self, name):
+ if name == handler.property_lexical_handler:
+ return self._lex_handler_prop
+ elif name == property_interning_dict:
+ return self._interning
+ elif name == property_xml_string:
+ if self._parser:
+ if hasattr(self._parser, "GetInputContext"):
+ return self._parser.GetInputContext()
+ else:
+ raise SAXNotRecognizedException(
+ "This version of expat does not support getting"
+ " the XML string")
+ else:
+ raise SAXNotSupportedException(
+ "XML string cannot be returned when not parsing")
+ raise SAXNotRecognizedException("Property '%s' not recognized" % name)
+
+ def setProperty(self, name, value):
+ if name == handler.property_lexical_handler:
+ self._lex_handler_prop = value
+ if self._parsing:
+ self._reset_lex_handler_prop()
+ elif name == property_interning_dict:
+ self._interning = value
+ elif name == property_xml_string:
+ raise SAXNotSupportedException("Property '%s' cannot be set" %
+ name)
+ else:
+ raise SAXNotRecognizedException("Property '%s' not recognized" %
+ name)
+
+ # IncrementalParser methods
+
+ def feed(self, data, isFinal = 0):
+ if not self._parsing:
+ self.reset()
+ self._parsing = 1
+ self._cont_handler.startDocument()
+
+ try:
+ # The isFinal parameter is internal to the expat reader.
+ # If it is set to true, expat will check validity of the entire
+ # document. When feeding chunks, they are not normally final -
+ # except when invoked from close.
+ self._parser.Parse(data, isFinal)
+ except expat.error, e:
+ exc = SAXParseException(expat.ErrorString(e.code), e, self)
+ # FIXME: when to invoke error()?
+ self._err_handler.fatalError(exc)
+
+ def close(self):
+ if self._entity_stack:
+ # If we are completing an external entity, do nothing here
+ return
+ self.feed("", isFinal = 1)
+ self._cont_handler.endDocument()
+ self._parsing = 0
+ # break cycle created by expat handlers pointing to our methods
+ self._parser = None
+
+ def _reset_cont_handler(self):
+ self._parser.ProcessingInstructionHandler = \
+ self._cont_handler.processingInstruction
+ self._parser.CharacterDataHandler = self._cont_handler.characters
+
+ def _reset_lex_handler_prop(self):
+ lex = self._lex_handler_prop
+ parser = self._parser
+ if lex is None:
+ parser.CommentHandler = None
+ parser.StartCdataSectionHandler = None
+ parser.EndCdataSectionHandler = None
+ parser.StartDoctypeDeclHandler = None
+ parser.EndDoctypeDeclHandler = None
+ else:
+ parser.CommentHandler = lex.comment
+ parser.StartCdataSectionHandler = lex.startCDATA
+ parser.EndCdataSectionHandler = lex.endCDATA
+ parser.StartDoctypeDeclHandler = self.start_doctype_decl
+ parser.EndDoctypeDeclHandler = lex.endDTD
+
+ def reset(self):
+ if self._namespaces:
+ self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
+ intern=self._interning)
+ self._parser.namespace_prefixes = 1
+ self._parser.StartElementHandler = self.start_element_ns
+ self._parser.EndElementHandler = self.end_element_ns
+ else:
+ self._parser = expat.ParserCreate(self._source.getEncoding(),
+ intern = self._interning)
+ self._parser.StartElementHandler = self.start_element
+ self._parser.EndElementHandler = self.end_element
+
+ self._reset_cont_handler()
+ self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
+ self._parser.NotationDeclHandler = self.notation_decl
+ self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
+ self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
+
+ self._decl_handler_prop = None
+ if self._lex_handler_prop:
+ self._reset_lex_handler_prop()
+# self._parser.DefaultHandler =
+# self._parser.DefaultHandlerExpand =
+# self._parser.NotStandaloneHandler =
+ self._parser.ExternalEntityRefHandler = self.external_entity_ref
+ try:
+ self._parser.SkippedEntityHandler = self.skipped_entity_handler
+ except AttributeError:
+ # This pyexpat does not support SkippedEntity
+ pass
+ self._parser.SetParamEntityParsing(
+ expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
+
+ self._parsing = 0
+ self._entity_stack = []
+
+ # Locator methods
+
+ def getColumnNumber(self):
+ if self._parser is None:
+ return None
+ return self._parser.ErrorColumnNumber
+
+ def getLineNumber(self):
+ if self._parser is None:
+ return 1
+ return self._parser.ErrorLineNumber
+
+ def getPublicId(self):
+ return self._source.getPublicId()
+
+ def getSystemId(self):
+ return self._source.getSystemId()
+
+ # event handlers
+ def start_element(self, name, attrs):
+ self._cont_handler.startElement(name, AttributesImpl(attrs))
+
+ def end_element(self, name):
+ self._cont_handler.endElement(name)
+
+ def start_element_ns(self, name, attrs):
+ pair = name.split()
+ if len(pair) == 1:
+ # no namespace
+ pair = (None, name)
+ elif len(pair) == 3:
+ pair = pair[0], pair[1]
+ else:
+ # default namespace
+ pair = tuple(pair)
+
+ newattrs = {}
+ qnames = {}
+ for (aname, value) in attrs.items():
+ parts = aname.split()
+ length = len(parts)
+ if length == 1:
+ # no namespace
+ qname = aname
+ apair = (None, aname)
+ elif length == 3:
+ qname = "%s:%s" % (parts[2], parts[1])
+ apair = parts[0], parts[1]
+ else:
+ # default namespace
+ qname = parts[1]
+ apair = tuple(parts)
+
+ newattrs[apair] = value
+ qnames[apair] = qname
+
+ self._cont_handler.startElementNS(pair, None,
+ AttributesNSImpl(newattrs, qnames))
+
+ def end_element_ns(self, name):
+ pair = name.split()
+ if len(pair) == 1:
+ pair = (None, name)
+ elif len(pair) == 3:
+ pair = pair[0], pair[1]
+ else:
+ pair = tuple(pair)
+
+ self._cont_handler.endElementNS(pair, None)
+
+ # this is not used (call directly to ContentHandler)
+ def processing_instruction(self, target, data):
+ self._cont_handler.processingInstruction(target, data)
+
+ # this is not used (call directly to ContentHandler)
+ def character_data(self, data):
+ self._cont_handler.characters(data)
+
+ def start_namespace_decl(self, prefix, uri):
+ self._cont_handler.startPrefixMapping(prefix, uri)
+
+ def end_namespace_decl(self, prefix):
+ self._cont_handler.endPrefixMapping(prefix)
+
+ def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
+ self._lex_handler_prop.startDTD(name, pubid, sysid)
+
+ def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
+ self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
+
+ def notation_decl(self, name, base, sysid, pubid):
+ self._dtd_handler.notationDecl(name, pubid, sysid)
+
+ def external_entity_ref(self, context, base, sysid, pubid):
+ if not self._external_ges:
+ return 1
+
+ source = self._ent_handler.resolveEntity(pubid, sysid)
+ source = saxutils.prepare_input_source(source,
+ self._source.getSystemId() or
+ "")
+
+ self._entity_stack.append((self._parser, self._source))
+ self._parser = self._parser.ExternalEntityParserCreate(context)
+ self._source = source
+
+ try:
+ xmlreader.IncrementalParser.parse(self, source)
+ except:
+ return 0 # FIXME: save error info here?
+
+ (self._parser, self._source) = self._entity_stack[-1]
+ del self._entity_stack[-1]
+ return 1
+
+ def skipped_entity_handler(self, name, is_pe):
+ if is_pe:
+ # The SAX spec requires to report skipped PEs with a '%'
+ name = '%'+name
+ self._cont_handler.skippedEntity(name)
+
+# ---
+
+def create_parser(*args, **kwargs):
+ return ExpatParser(*args, **kwargs)
+
+# ---
+
+if __name__ == "__main__":
+ import xml.sax
+ p = create_parser()
+ p.setContentHandler(xml.sax.XMLGenerator())
+ p.setErrorHandler(xml.sax.ErrorHandler())
+ p.parse("../../../hamlet.xml")