diff options
-rw-r--r-- | Demo/xml/elem_count.py | 26 | ||||
-rw-r--r-- | Demo/xml/roundtrip.py | 11 | ||||
-rw-r--r-- | Demo/xml/rss2html.py | 54 |
3 files changed, 52 insertions, 39 deletions
diff --git a/Demo/xml/elem_count.py b/Demo/xml/elem_count.py index 7b53189..f4e6ef5 100644 --- a/Demo/xml/elem_count.py +++ b/Demo/xml/elem_count.py @@ -1,4 +1,10 @@ +""" +A simple demo that reads in an XML document and displays the number of +elements and attributes as well as a tally of elements and attributes by name. +""" + import sys +from collections import defaultdict from xml.sax import make_parser, handler @@ -7,16 +13,16 @@ class FancyCounter(handler.ContentHandler): def __init__(self): self._elems = 0 self._attrs = 0 - self._elem_types = {} - self._attr_types = {} + self._elem_types = defaultdict(int) + self._attr_types = defaultdict(int) def startElement(self, name, attrs): - self._elems = self._elems + 1 - self._attrs = self._attrs + len(attrs) - self._elem_types[name] = self._elem_types.get(name, 0) + 1 + self._elems += 1 + self._attrs += len(attrs) + self._elem_types[name] += 1 for name in attrs.keys(): - self._attr_types[name] = self._attr_types.get(name, 0) + 1 + self._attr_types[name] += 1 def endDocument(self): print "There were", self._elems, "elements." @@ -30,7 +36,7 @@ class FancyCounter(handler.ContentHandler): for pair in self._attr_types.items(): print "%20s %d" % pair - -parser = make_parser() -parser.setContentHandler(FancyCounter()) -parser.parse(sys.argv[1]) +if __name__ == '__main__': + parser = make_parser() + parser.setContentHandler(FancyCounter()) + parser.parse(sys.argv[1]) diff --git a/Demo/xml/roundtrip.py b/Demo/xml/roundtrip.py index 8d7d437..801c009 100644 --- a/Demo/xml/roundtrip.py +++ b/Demo/xml/roundtrip.py @@ -3,7 +3,7 @@ A simple demo that reads in an XML document and spits out an equivalent, but not necessarily identical, document. """ -import sys, string +import sys from xml.sax import saxutils, handler, make_parser @@ -11,7 +11,7 @@ from xml.sax import saxutils, handler, make_parser class ContentGenerator(handler.ContentHandler): - def __init__(self, out = sys.stdout): + def __init__(self, out=sys.stdout): handler.ContentHandler.__init__(self) self._out = out @@ -40,6 +40,7 @@ class ContentGenerator(handler.ContentHandler): # --- The main program -parser = make_parser() -parser.setContentHandler(ContentGenerator()) -parser.parse(sys.argv[1]) +if __name__ == '__main__': + parser = make_parser() + parser.setContentHandler(ContentGenerator()) + parser.parse(sys.argv[1]) diff --git a/Demo/xml/rss2html.py b/Demo/xml/rss2html.py index 15c9891..f2067a9 100644 --- a/Demo/xml/rss2html.py +++ b/Demo/xml/rss2html.py @@ -1,45 +1,50 @@ +""" +A demo that reads in an RSS XML document and emits an HTML file containing +a list of the individual items in the feed. +""" + import sys +import codecs from xml.sax import make_parser, handler # --- Templates -top = \ -""" +top = """\ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> - <TITLE>%s</TITLE> -</HEAD> - -<BODY> -<H1>%s</H1> +<html> +<head> + <title>%s</title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +</head> + +<body> +<h1>%s</h1> """ -bottom = \ -""" +bottom = """ </ul> -<HR> -<ADDRESS> -Converted to HTML by sax_rss2html.py. -</ADDRESS> +<hr> +<address> +Converted to HTML by rss2html.py. +</address> -</BODY> -</HTML> +</body> +</html> """ # --- The ContentHandler class RSSHandler(handler.ContentHandler): - def __init__(self, out = sys.stdout): + def __init__(self, out=sys.stdout): handler.ContentHandler.__init__(self) - self._out = out + self._out = codecs.getwriter('utf-8')(out) self._text = "" self._parent = None - self._list_started = 0 + self._list_started = False self._title = None self._link = None self._descr = "" @@ -69,7 +74,7 @@ class RSSHandler(handler.ContentHandler): elif name == "item": if not self._list_started: self._out.write("<ul>\n") - self._list_started = 1 + self._list_started = True self._out.write(' <li><a href="%s">%s</a> %s\n' % (self._link, self._title, self._descr)) @@ -86,6 +91,7 @@ class RSSHandler(handler.ContentHandler): # --- Main program -parser = make_parser() -parser.setContentHandler(RSSHandler()) -parser.parse(sys.argv[1]) +if __name__ == '__main__': + parser = make_parser() + parser.setContentHandler(RSSHandler()) + parser.parse(sys.argv[1]) |