diff options
Diffstat (limited to 'Lib/xml/sax/saxutils.py')
-rw-r--r-- | Lib/xml/sax/saxutils.py | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py new file mode 100644 index 0000000..a496519 --- /dev/null +++ b/Lib/xml/sax/saxutils.py @@ -0,0 +1,299 @@ +"""\ +A library of useful helper classes to the SAX classes, for the +convenience of application and driver writers. +""" + +import os, urlparse, urllib, types +import handler +import xmlreader + +try: + _StringTypes = [types.StringType, types.UnicodeType] +except AttributeError: + _StringTypes = [types.StringType] + +# See whether the xmlcharrefreplace error handler is +# supported +try: + from codecs import xmlcharrefreplace_errors + _error_handling = "xmlcharrefreplace" + del xmlcharrefreplace_errors +except ImportError: + _error_handling = "strict" + +def __dict_replace(s, d): + """Replace substrings of a string using a dictionary.""" + for key, value in d.items(): + s = s.replace(key, value) + return s + +def escape(data, entities={}): + """Escape &, <, and > in a string of data. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + + # must do ampersand first + data = data.replace("&", "&") + data = data.replace(">", ">") + data = data.replace("<", "<") + if entities: + data = __dict_replace(data, entities) + return data + +def unescape(data, entities={}): + """Unescape &, <, and > in a string of data. + + You can unescape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = data.replace("<", "<") + data = data.replace(">", ">") + if entities: + data = __dict_replace(data, entities) + # must do ampersand last + return data.replace("&", "&") + +def quoteattr(data, entities={}): + """Escape and quote an attribute value. + + Escape &, <, and > in a string of data, then quote it for use as + an attribute value. The \" character will be escaped as well, if + necessary. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + entities = entities.copy() + entities.update({'\n': ' ', '\r': ' ', '\t':'	'}) + data = escape(data, entities) + if '"' in data: + if "'" in data: + data = '"%s"' % data.replace('"', """) + else: + data = "'%s'" % data + else: + data = '"%s"' % data + return data + + +class XMLGenerator(handler.ContentHandler): + + def __init__(self, out=None, encoding="iso-8859-1"): + if out is None: + import sys + out = sys.stdout + handler.ContentHandler.__init__(self) + self._out = out + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self._undeclared_ns_maps = [] + self._encoding = encoding + + def _write(self, text): + if isinstance(text, str): + self._out.write(text) + else: + self._out.write(text.encode(self._encoding, _error_handling)) + + # ContentHandler methods + + def startDocument(self): + self._write('<?xml version="1.0" encoding="%s"?>\n' % + self._encoding) + + def startPrefixMapping(self, prefix, uri): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix + self._undeclared_ns_maps.append((prefix, uri)) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElement(self, name, attrs): + self._write('<' + name) + for (name, value) in attrs.items(): + self._write(' %s=%s' % (name, quoteattr(value))) + self._write('>') + + def endElement(self, name): + self._write('</%s>' % name) + + def startElementNS(self, name, qname, attrs): + if name[0] is None: + # if the name was not namespace-scoped, use the unqualified part + name = name[1] + else: + # else try to restore the original prefix from the namespace + name = self._current_context[name[0]] + ":" + name[1] + self._write('<' + name) + + for pair in self._undeclared_ns_maps: + self._write(' xmlns:%s="%s"' % pair) + self._undeclared_ns_maps = [] + + for (name, value) in attrs.items(): + name = self._current_context[name[0]] + ":" + name[1] + self._write(' %s=%s' % (name, quoteattr(value))) + self._write('>') + + def endElementNS(self, name, qname): + if name[0] is None: + name = name[1] + else: + name = self._current_context[name[0]] + ":" + name[1] + self._write('</%s>' % name) + + def characters(self, content): + self._write(escape(content)) + + def ignorableWhitespace(self, content): + self._write(content) + + def processingInstruction(self, target, data): + self._write('<?%s %s?>' % (target, data)) + + +class XMLFilterBase(xmlreader.XMLReader): + """This class is designed to sit between an XMLReader and the + client application's event handlers. By default, it does nothing + but pass requests up to the reader and events on to the handlers + unmodified, but subclasses can override specific methods to modify + the event stream or the configuration requests as they pass + through.""" + + def __init__(self, parent = None): + xmlreader.XMLReader.__init__(self) + self._parent = parent + + # ErrorHandler methods + + def error(self, exception): + self._err_handler.error(exception) + + def fatalError(self, exception): + self._err_handler.fatalError(exception) + + def warning(self, exception): + self._err_handler.warning(exception) + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + + def endDocument(self): + self._cont_handler.endDocument() + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def startElement(self, name, attrs): + self._cont_handler.startElement(name, attrs) + + def endElement(self, name): + self._cont_handler.endElement(name) + + def startElementNS(self, name, qname, attrs): + self._cont_handler.startElementNS(name, qname, attrs) + + def endElementNS(self, name, qname): + self._cont_handler.endElementNS(name, qname) + + def characters(self, content): + self._cont_handler.characters(content) + + def ignorableWhitespace(self, chars): + self._cont_handler.ignorableWhitespace(chars) + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def skippedEntity(self, name): + self._cont_handler.skippedEntity(name) + + # DTDHandler methods + + def notationDecl(self, name, publicId, systemId): + self._dtd_handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) + + # EntityResolver methods + + def resolveEntity(self, publicId, systemId): + return self._ent_handler.resolveEntity(publicId, systemId) + + # XMLReader methods + + def parse(self, source): + self._parent.setContentHandler(self) + self._parent.setErrorHandler(self) + self._parent.setEntityResolver(self) + self._parent.setDTDHandler(self) + self._parent.parse(source) + + def setLocale(self, locale): + self._parent.setLocale(locale) + + def getFeature(self, name): + return self._parent.getFeature(name) + + def setFeature(self, name, state): + self._parent.setFeature(name, state) + + def getProperty(self, name): + return self._parent.getProperty(name) + + def setProperty(self, name, value): + self._parent.setProperty(name, value) + + # XMLFilter methods + + def getParent(self): + return self._parent + + def setParent(self, parent): + self._parent = parent + +# --- Utility functions + +def prepare_input_source(source, base = ""): + """This function takes an InputSource and an optional base URL and + returns a fully resolved InputSource object ready for reading.""" + + if type(source) in _StringTypes: + source = xmlreader.InputSource(source) + elif hasattr(source, "read"): + f = source + source = xmlreader.InputSource() + source.setByteStream(f) + if hasattr(f, "name"): + source.setSystemId(f.name) + + if source.getByteStream() is None: + sysid = source.getSystemId() + basehead = os.path.dirname(os.path.normpath(base)) + sysidfilename = os.path.join(basehead, sysid) + if os.path.isfile(sysidfilename): + source.setSystemId(sysidfilename) + f = open(sysidfilename, "rb") + else: + source.setSystemId(urlparse.urljoin(base, sysid)) + f = urllib.urlopen(source.getSystemId()) + + source.setByteStream(f) + + return source |