diff options
-rw-r--r-- | Demo/xml/elem_count.py | 36 | ||||
-rw-r--r-- | Demo/xml/roundtrip.py | 45 | ||||
-rw-r--r-- | Demo/xml/rss2html.py | 91 |
3 files changed, 172 insertions, 0 deletions
diff --git a/Demo/xml/elem_count.py b/Demo/xml/elem_count.py new file mode 100644 index 0000000..4316199 --- /dev/null +++ b/Demo/xml/elem_count.py @@ -0,0 +1,36 @@ +import sys + +from xml.sax import make_parser, handler + +class FancyCounter(handler.ContentHandler): + + def __init__(self): + self._elems = 0 + self._attrs = 0 + self._elem_types = {} + self._attr_types = {} + + def startElement(self, name, attrs): + self._elems = self._elems + 1 + self._attrs = self._attrs + len(attrs) + self._elem_types[name] = self._elem_types.get(name, 0) + 1 + + for name in attrs.keys(): + self._attr_types[name] = self._attr_types.get(name, 0) + 1 + + def endDocument(self): + print "There were", self._elems, "elements." + print "There were", self._attrs, "attributes." + + print "---ELEMENT TYPES" + for pair in self._elem_types.items(): + print "%20s %d" % pair + + print "---ATTRIBUTE TYPES" + for pair in self._attr_types.items(): + print "%20s %d" % pair + + +parser = make_parser() +parser.setContentHandler(FancyCounter()) +parser.parse(sys.argv[1]) diff --git a/Demo/xml/roundtrip.py b/Demo/xml/roundtrip.py new file mode 100644 index 0000000..e42bf1b --- /dev/null +++ b/Demo/xml/roundtrip.py @@ -0,0 +1,45 @@ +""" +A simple demo that reads in an XML document and spits out an equivalent, +but not necessarily identical, document. +""" + +import sys, string + +from xml.sax import saxutils, handler, make_parser + +# --- The ContentHandler + +class ContentGenerator(handler.ContentHandler): + + def __init__(self, out = sys.stdout): + handler.ContentHandler.__init__(self) + self._out = out + + # ContentHandler methods + + def startDocument(self): + self._out.write('<?xml version="1.0" encoding="iso-8859-1"?>\n') + + def startElement(self, name, attrs): + self._out.write('<' + name) + for (name, value) in attrs.items(): + self._out.write(' %s="%s"' % (name, saxutils.escape(value))) + self._out.write('>') + + def endElement(self, name): + self._out.write('</%s>' % name) + + def characters(self, content): + self._out.write(saxutils.escape(content)) + + def ignorableWhitespace(self, content): + self._out.write(content) + + def processingInstruction(self, target, data): + self._out.write('<?%s %s?>' % (target, data)) + +# --- The main program + +parser = make_parser() +parser.setContentHandler(ContentGenerator()) +parser.parse(sys.argv[1]) diff --git a/Demo/xml/rss2html.py b/Demo/xml/rss2html.py new file mode 100644 index 0000000..df5d1c7 --- /dev/null +++ b/Demo/xml/rss2html.py @@ -0,0 +1,91 @@ +import sys + +from xml.sax import make_parser, handler + +# --- Templates + +top = \ +""" +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<HTML> +<HEAD> + <TITLE>%s</TITLE> +</HEAD> + +<BODY> +<H1>%s</H1> +""" + +bottom = \ +""" +</ul> + +<HR> +<ADDRESS> +Converted to HTML by sax_rss2html.py. +</ADDRESS> + +</BODY> +</HTML> +""" + +# --- The ContentHandler + +class RSSHandler(handler.ContentHandler): + + def __init__(self, out = sys.stdout): + handler.ContentHandler.__init__(self) + self._out = out + + self._text = "" + self._parent = None + self._list_started = 0 + self._title = None + self._link = None + self._descr = "" + + # ContentHandler methods + + def startElement(self, name, attrs): + if name == "channel" or name == "image" or name == "item": + self._parent = name + + self._text = "" + + def endElement(self, name): + if self._parent == "channel": + if name == "title": + self._out.write(top % (self._text, self._text)) + elif name == "description": + self._out.write("<p>%s</p>\n" % self._text) + + elif self._parent == "item": + if name == "title": + self._title = self._text + elif name == "link": + self._link = self._text + elif name == "description": + self._descr = self._text + elif name == "item": + if not self._list_started: + self._out.write("<ul>\n") + self._list_started = 1 + + self._out.write(' <li><a href="%s">%s</a> %s\n' % + (self._link, self._title, self._descr)) + + self._title = None + self._link = None + self._descr = "" + + if name == "rss": + self._out.write(bottom) + + def characters(self, content): + self._text = self._text + content + +# --- Main program + +parser = make_parser() +parser.setContentHandler(RSSHandler()) +parser.parse(sys.argv[1]) |