summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Demo/xml/elem_count.py36
-rw-r--r--Demo/xml/roundtrip.py45
-rw-r--r--Demo/xml/rss2html.py91
3 files changed, 172 insertions, 0 deletions
diff --git a/Demo/xml/elem_count.py b/Demo/xml/elem_count.py
new file mode 100644
index 0000000..4316199
--- /dev/null
+++ b/Demo/xml/elem_count.py
@@ -0,0 +1,36 @@
+import sys
+
+from xml.sax import make_parser, handler
+
+class FancyCounter(handler.ContentHandler):
+
+ def __init__(self):
+ self._elems = 0
+ self._attrs = 0
+ self._elem_types = {}
+ self._attr_types = {}
+
+ def startElement(self, name, attrs):
+ self._elems = self._elems + 1
+ self._attrs = self._attrs + len(attrs)
+ self._elem_types[name] = self._elem_types.get(name, 0) + 1
+
+ for name in attrs.keys():
+ self._attr_types[name] = self._attr_types.get(name, 0) + 1
+
+ def endDocument(self):
+ print "There were", self._elems, "elements."
+ print "There were", self._attrs, "attributes."
+
+ print "---ELEMENT TYPES"
+ for pair in self._elem_types.items():
+ print "%20s %d" % pair
+
+ print "---ATTRIBUTE TYPES"
+ for pair in self._attr_types.items():
+ print "%20s %d" % pair
+
+
+parser = make_parser()
+parser.setContentHandler(FancyCounter())
+parser.parse(sys.argv[1])
diff --git a/Demo/xml/roundtrip.py b/Demo/xml/roundtrip.py
new file mode 100644
index 0000000..e42bf1b
--- /dev/null
+++ b/Demo/xml/roundtrip.py
@@ -0,0 +1,45 @@
+"""
+A simple demo that reads in an XML document and spits out an equivalent,
+but not necessarily identical, document.
+"""
+
+import sys, string
+
+from xml.sax import saxutils, handler, make_parser
+
+# --- The ContentHandler
+
+class ContentGenerator(handler.ContentHandler):
+
+ def __init__(self, out = sys.stdout):
+ handler.ContentHandler.__init__(self)
+ self._out = out
+
+ # ContentHandler methods
+
+ def startDocument(self):
+ self._out.write('<?xml version="1.0" encoding="iso-8859-1"?>\n')
+
+ def startElement(self, name, attrs):
+ self._out.write('<' + name)
+ for (name, value) in attrs.items():
+ self._out.write(' %s="%s"' % (name, saxutils.escape(value)))
+ self._out.write('>')
+
+ def endElement(self, name):
+ self._out.write('</%s>' % name)
+
+ def characters(self, content):
+ self._out.write(saxutils.escape(content))
+
+ def ignorableWhitespace(self, content):
+ self._out.write(content)
+
+ def processingInstruction(self, target, data):
+ self._out.write('<?%s %s?>' % (target, data))
+
+# --- The main program
+
+parser = make_parser()
+parser.setContentHandler(ContentGenerator())
+parser.parse(sys.argv[1])
diff --git a/Demo/xml/rss2html.py b/Demo/xml/rss2html.py
new file mode 100644
index 0000000..df5d1c7
--- /dev/null
+++ b/Demo/xml/rss2html.py
@@ -0,0 +1,91 @@
+import sys
+
+from xml.sax import make_parser, handler
+
+# --- Templates
+
+top = \
+"""
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD>
+ <TITLE>%s</TITLE>
+</HEAD>
+
+<BODY>
+<H1>%s</H1>
+"""
+
+bottom = \
+"""
+</ul>
+
+<HR>
+<ADDRESS>
+Converted to HTML by sax_rss2html.py.
+</ADDRESS>
+
+</BODY>
+</HTML>
+"""
+
+# --- The ContentHandler
+
+class RSSHandler(handler.ContentHandler):
+
+ def __init__(self, out = sys.stdout):
+ handler.ContentHandler.__init__(self)
+ self._out = out
+
+ self._text = ""
+ self._parent = None
+ self._list_started = 0
+ self._title = None
+ self._link = None
+ self._descr = ""
+
+ # ContentHandler methods
+
+ def startElement(self, name, attrs):
+ if name == "channel" or name == "image" or name == "item":
+ self._parent = name
+
+ self._text = ""
+
+ def endElement(self, name):
+ if self._parent == "channel":
+ if name == "title":
+ self._out.write(top % (self._text, self._text))
+ elif name == "description":
+ self._out.write("<p>%s</p>\n" % self._text)
+
+ elif self._parent == "item":
+ if name == "title":
+ self._title = self._text
+ elif name == "link":
+ self._link = self._text
+ elif name == "description":
+ self._descr = self._text
+ elif name == "item":
+ if not self._list_started:
+ self._out.write("<ul>\n")
+ self._list_started = 1
+
+ self._out.write(' <li><a href="%s">%s</a> %s\n' %
+ (self._link, self._title, self._descr))
+
+ self._title = None
+ self._link = None
+ self._descr = ""
+
+ if name == "rss":
+ self._out.write(bottom)
+
+ def characters(self, content):
+ self._text = self._text + content
+
+# --- Main program
+
+parser = make_parser()
+parser.setContentHandler(RSSHandler())
+parser.parse(sys.argv[1])