summaryrefslogtreecommitdiffstats
path: root/Lib/xml/sax/expatreader.py
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2005-12-14 06:20:35 (GMT)
committerFred Drake <fdrake@acm.org>2005-12-14 06:20:35 (GMT)
commitc6730e1772f52b99feaffb95cdb7774905340d52 (patch)
tree0f125cf464348edd71c41f35c4ecbff8f8fe6114 /Lib/xml/sax/expatreader.py
parentcc117dbb9d925ade41ae3e43ff2292b83b5d9b32 (diff)
downloadcpython-c6730e1772f52b99feaffb95cdb7774905340d52.zip
cpython-c6730e1772f52b99feaffb95cdb7774905340d52.tar.gz
cpython-c6730e1772f52b99feaffb95cdb7774905340d52.tar.bz2
move the xml package implementation to xmlcore, and adjust the tests to
test that package, not the xmlcore/PyXML switcheroo fiasco in the xml module/package
Diffstat (limited to 'Lib/xml/sax/expatreader.py')
-rw-r--r--Lib/xml/sax/expatreader.py414
1 files changed, 0 insertions, 414 deletions
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
deleted file mode 100644
index bb9c294..0000000
--- a/Lib/xml/sax/expatreader.py
+++ /dev/null
@@ -1,414 +0,0 @@
-"""
-SAX driver for the pyexpat C module. This driver works with
-pyexpat.__version__ == '2.22'.
-"""
-
-version = "0.20"
-
-from xml.sax._exceptions import *
-from xml.sax.handler import feature_validation, feature_namespaces
-from xml.sax.handler import feature_namespace_prefixes
-from xml.sax.handler import feature_external_ges, feature_external_pes
-from xml.sax.handler import feature_string_interning
-from xml.sax.handler import property_xml_string, property_interning_dict
-
-# xml.parsers.expat does not raise ImportError in Jython
-import sys
-if sys.platform[:4] == "java":
- raise SAXReaderNotAvailable("expat not available in Java", None)
-del sys
-
-try:
- from xml.parsers import expat
-except ImportError:
- raise SAXReaderNotAvailable("expat not supported", None)
-else:
- if not hasattr(expat, "ParserCreate"):
- raise SAXReaderNotAvailable("expat not supported", None)
-from xml.sax import xmlreader, saxutils, handler
-
-AttributesImpl = xmlreader.AttributesImpl
-AttributesNSImpl = xmlreader.AttributesNSImpl
-
-# If we're using a sufficiently recent version of Python, we can use
-# weak references to avoid cycles between the parser and content
-# handler, otherwise we'll just have to pretend.
-try:
- import _weakref
-except ImportError:
- def _mkproxy(o):
- return o
-else:
- import weakref
- _mkproxy = weakref.proxy
- del weakref, _weakref
-
-# --- ExpatLocator
-
-class ExpatLocator(xmlreader.Locator):
- """Locator for use with the ExpatParser class.
-
- This uses a weak reference to the parser object to avoid creating
- a circular reference between the parser and the content handler.
- """
- def __init__(self, parser):
- self._ref = _mkproxy(parser)
-
- def getColumnNumber(self):
- parser = self._ref
- if parser._parser is None:
- return None
- return parser._parser.ErrorColumnNumber
-
- def getLineNumber(self):
- parser = self._ref
- if parser._parser is None:
- return 1
- return parser._parser.ErrorLineNumber
-
- def getPublicId(self):
- parser = self._ref
- if parser is None:
- return None
- return parser._source.getPublicId()
-
- def getSystemId(self):
- parser = self._ref
- if parser is None:
- return None
- return parser._source.getSystemId()
-
-
-# --- ExpatParser
-
-class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
- """SAX driver for the pyexpat C module."""
-
- def __init__(self, namespaceHandling=0, bufsize=2**16-20):
- xmlreader.IncrementalParser.__init__(self, bufsize)
- self._source = xmlreader.InputSource()
- self._parser = None
- self._namespaces = namespaceHandling
- self._lex_handler_prop = None
- self._parsing = 0
- self._entity_stack = []
- self._external_ges = 1
- self._interning = None
-
- # XMLReader methods
-
- def parse(self, source):
- "Parse an XML document from a URL or an InputSource."
- source = saxutils.prepare_input_source(source)
-
- self._source = source
- self.reset()
- self._cont_handler.setDocumentLocator(ExpatLocator(self))
- xmlreader.IncrementalParser.parse(self, source)
-
- def prepareParser(self, source):
- if source.getSystemId() != None:
- self._parser.SetBase(source.getSystemId())
-
- # Redefined setContentHandler to allow changing handlers during parsing
-
- def setContentHandler(self, handler):
- xmlreader.IncrementalParser.setContentHandler(self, handler)
- if self._parsing:
- self._reset_cont_handler()
-
- def getFeature(self, name):
- if name == feature_namespaces:
- return self._namespaces
- elif name == feature_string_interning:
- return self._interning is not None
- elif name in (feature_validation, feature_external_pes,
- feature_namespace_prefixes):
- return 0
- elif name == feature_external_ges:
- return self._external_ges
- raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
-
- def setFeature(self, name, state):
- if self._parsing:
- raise SAXNotSupportedException("Cannot set features while parsing")
-
- if name == feature_namespaces:
- self._namespaces = state
- elif name == feature_external_ges:
- self._external_ges = state
- elif name == feature_string_interning:
- if state:
- if self._interning is None:
- self._interning = {}
- else:
- self._interning = None
- elif name == feature_validation:
- if state:
- raise SAXNotSupportedException(
- "expat does not support validation")
- elif name == feature_external_pes:
- if state:
- raise SAXNotSupportedException(
- "expat does not read external parameter entities")
- elif name == feature_namespace_prefixes:
- if state:
- raise SAXNotSupportedException(
- "expat does not report namespace prefixes")
- else:
- raise SAXNotRecognizedException(
- "Feature '%s' not recognized" % name)
-
- def getProperty(self, name):
- if name == handler.property_lexical_handler:
- return self._lex_handler_prop
- elif name == property_interning_dict:
- return self._interning
- elif name == property_xml_string:
- if self._parser:
- if hasattr(self._parser, "GetInputContext"):
- return self._parser.GetInputContext()
- else:
- raise SAXNotRecognizedException(
- "This version of expat does not support getting"
- " the XML string")
- else:
- raise SAXNotSupportedException(
- "XML string cannot be returned when not parsing")
- raise SAXNotRecognizedException("Property '%s' not recognized" % name)
-
- def setProperty(self, name, value):
- if name == handler.property_lexical_handler:
- self._lex_handler_prop = value
- if self._parsing:
- self._reset_lex_handler_prop()
- elif name == property_interning_dict:
- self._interning = value
- elif name == property_xml_string:
- raise SAXNotSupportedException("Property '%s' cannot be set" %
- name)
- else:
- raise SAXNotRecognizedException("Property '%s' not recognized" %
- name)
-
- # IncrementalParser methods
-
- def feed(self, data, isFinal = 0):
- if not self._parsing:
- self.reset()
- self._parsing = 1
- self._cont_handler.startDocument()
-
- try:
- # The isFinal parameter is internal to the expat reader.
- # If it is set to true, expat will check validity of the entire
- # document. When feeding chunks, they are not normally final -
- # except when invoked from close.
- self._parser.Parse(data, isFinal)
- except expat.error, e:
- exc = SAXParseException(expat.ErrorString(e.code), e, self)
- # FIXME: when to invoke error()?
- self._err_handler.fatalError(exc)
-
- def close(self):
- if self._entity_stack:
- # If we are completing an external entity, do nothing here
- return
- self.feed("", isFinal = 1)
- self._cont_handler.endDocument()
- self._parsing = 0
- # break cycle created by expat handlers pointing to our methods
- self._parser = None
-
- def _reset_cont_handler(self):
- self._parser.ProcessingInstructionHandler = \
- self._cont_handler.processingInstruction
- self._parser.CharacterDataHandler = self._cont_handler.characters
-
- def _reset_lex_handler_prop(self):
- lex = self._lex_handler_prop
- parser = self._parser
- if lex is None:
- parser.CommentHandler = None
- parser.StartCdataSectionHandler = None
- parser.EndCdataSectionHandler = None
- parser.StartDoctypeDeclHandler = None
- parser.EndDoctypeDeclHandler = None
- else:
- parser.CommentHandler = lex.comment
- parser.StartCdataSectionHandler = lex.startCDATA
- parser.EndCdataSectionHandler = lex.endCDATA
- parser.StartDoctypeDeclHandler = self.start_doctype_decl
- parser.EndDoctypeDeclHandler = lex.endDTD
-
- def reset(self):
- if self._namespaces:
- self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
- intern=self._interning)
- self._parser.namespace_prefixes = 1
- self._parser.StartElementHandler = self.start_element_ns
- self._parser.EndElementHandler = self.end_element_ns
- else:
- self._parser = expat.ParserCreate(self._source.getEncoding(),
- intern = self._interning)
- self._parser.StartElementHandler = self.start_element
- self._parser.EndElementHandler = self.end_element
-
- self._reset_cont_handler()
- self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
- self._parser.NotationDeclHandler = self.notation_decl
- self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
- self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
-
- self._decl_handler_prop = None
- if self._lex_handler_prop:
- self._reset_lex_handler_prop()
-# self._parser.DefaultHandler =
-# self._parser.DefaultHandlerExpand =
-# self._parser.NotStandaloneHandler =
- self._parser.ExternalEntityRefHandler = self.external_entity_ref
- try:
- self._parser.SkippedEntityHandler = self.skipped_entity_handler
- except AttributeError:
- # This pyexpat does not support SkippedEntity
- pass
- self._parser.SetParamEntityParsing(
- expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
-
- self._parsing = 0
- self._entity_stack = []
-
- # Locator methods
-
- def getColumnNumber(self):
- if self._parser is None:
- return None
- return self._parser.ErrorColumnNumber
-
- def getLineNumber(self):
- if self._parser is None:
- return 1
- return self._parser.ErrorLineNumber
-
- def getPublicId(self):
- return self._source.getPublicId()
-
- def getSystemId(self):
- return self._source.getSystemId()
-
- # event handlers
- def start_element(self, name, attrs):
- self._cont_handler.startElement(name, AttributesImpl(attrs))
-
- def end_element(self, name):
- self._cont_handler.endElement(name)
-
- def start_element_ns(self, name, attrs):
- pair = name.split()
- if len(pair) == 1:
- # no namespace
- pair = (None, name)
- elif len(pair) == 3:
- pair = pair[0], pair[1]
- else:
- # default namespace
- pair = tuple(pair)
-
- newattrs = {}
- qnames = {}
- for (aname, value) in attrs.items():
- parts = aname.split()
- length = len(parts)
- if length == 1:
- # no namespace
- qname = aname
- apair = (None, aname)
- elif length == 3:
- qname = "%s:%s" % (parts[2], parts[1])
- apair = parts[0], parts[1]
- else:
- # default namespace
- qname = parts[1]
- apair = tuple(parts)
-
- newattrs[apair] = value
- qnames[apair] = qname
-
- self._cont_handler.startElementNS(pair, None,
- AttributesNSImpl(newattrs, qnames))
-
- def end_element_ns(self, name):
- pair = name.split()
- if len(pair) == 1:
- pair = (None, name)
- elif len(pair) == 3:
- pair = pair[0], pair[1]
- else:
- pair = tuple(pair)
-
- self._cont_handler.endElementNS(pair, None)
-
- # this is not used (call directly to ContentHandler)
- def processing_instruction(self, target, data):
- self._cont_handler.processingInstruction(target, data)
-
- # this is not used (call directly to ContentHandler)
- def character_data(self, data):
- self._cont_handler.characters(data)
-
- def start_namespace_decl(self, prefix, uri):
- self._cont_handler.startPrefixMapping(prefix, uri)
-
- def end_namespace_decl(self, prefix):
- self._cont_handler.endPrefixMapping(prefix)
-
- def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
- self._lex_handler_prop.startDTD(name, pubid, sysid)
-
- def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
- self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
-
- def notation_decl(self, name, base, sysid, pubid):
- self._dtd_handler.notationDecl(name, pubid, sysid)
-
- def external_entity_ref(self, context, base, sysid, pubid):
- if not self._external_ges:
- return 1
-
- source = self._ent_handler.resolveEntity(pubid, sysid)
- source = saxutils.prepare_input_source(source,
- self._source.getSystemId() or
- "")
-
- self._entity_stack.append((self._parser, self._source))
- self._parser = self._parser.ExternalEntityParserCreate(context)
- self._source = source
-
- try:
- xmlreader.IncrementalParser.parse(self, source)
- except:
- return 0 # FIXME: save error info here?
-
- (self._parser, self._source) = self._entity_stack[-1]
- del self._entity_stack[-1]
- return 1
-
- def skipped_entity_handler(self, name, is_pe):
- if is_pe:
- # The SAX spec requires to report skipped PEs with a '%'
- name = '%'+name
- self._cont_handler.skippedEntity(name)
-
-# ---
-
-def create_parser(*args, **kwargs):
- return ExpatParser(*args, **kwargs)
-
-# ---
-
-if __name__ == "__main__":
- import xml.sax
- p = create_parser()
- p.setContentHandler(xml.sax.XMLGenerator())
- p.setErrorHandler(xml.sax.ErrorHandler())
- p.parse("../../../hamlet.xml")