diff options
author | Fred Drake <fdrake@acm.org> | 2001-09-24 20:22:09 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2001-09-24 20:22:09 (GMT) |
commit | 30c484916988862608e4efdfa8f8aa911e4cc0c3 (patch) | |
tree | e3301e878fc594cce276076cf39b13edf92f164b /Lib/test/test_sgmllib.py | |
parent | e822049efcfb3eafede8035609284656671aece4 (diff) | |
download | cpython-30c484916988862608e4efdfa8f8aa911e4cc0c3.zip cpython-30c484916988862608e4efdfa8f8aa911e4cc0c3.tar.gz cpython-30c484916988862608e4efdfa8f8aa911e4cc0c3.tar.bz2 |
Added several new tests to check the behavior with respect to doctype
declarations and weird markup that we used to accept & ignore that recent
versions raised an exception for; the original behavior has been restored
and augmented (the user can decide what to do if they care; the default is
to ignore it as done in early versions).
Diffstat (limited to 'Lib/test/test_sgmllib.py')
-rw-r--r-- | Lib/test/test_sgmllib.py | 83 |
1 files changed, 77 insertions, 6 deletions
diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index a37696d..ff0af9e 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser): def handle_pi(self, data): self.append(("pi", data)) + def unknown_decl(self, decl): + self.append(("unknown decl", decl)) + class CDATAEventCollector(EventCollector): def start_cdata(self, attrs): @@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase): collector = EventCollector - def check_events(self, source, expected_events): + def get_events(self, source): parser = self.collector() - for s in source: - parser.feed(s) - parser.close() - events = parser.get_events() + try: + for s in source: + parser.feed(s) + parser.close() + except: + #self.events = parser.events + raise + return parser.get_events() + + def check_events(self, source, expected_events): + try: + events = self.get_events(source) + except: + import sys + #print >>sys.stderr, pprint.pformat(self.events) + raise if events != expected_events: self.fail("received events did not match expected events\n" "Expected:\n" + pprint.pformat(expected_events) + @@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase): self.fail("expected SGMLParseError for %r\nReceived:\n%s" % (source, pprint.pformat(parser.get_events()))) + def test_doctype_decl_internal(self): + inside = """\ +DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' + SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [ + <!ELEMENT html - O EMPTY> + <!ATTLIST html + version CDATA #IMPLIED + profile CDATA 'DublinCore'> + <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'> + <!ENTITY myEntity 'internal parsed entity'> + <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'> + <!ENTITY % paramEntity 'name|name|name'> + %paramEntity; + <!-- comment --> +]""" + self.check_events(["<!%s>" % inside], [ + ("decl", inside), + ]) + + def test_doctype_decl_external(self): + inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'" + self.check_events("<!%s>" % inside, [ + ("decl", inside), + ]) + def test_underscore_in_attrname(self): # SF bug #436621 """Make sure attribute names with underscores are accepted""" @@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase): ("endtag", "b"), ]) + def test_bare_ampersands(self): + self.check_events("this text & contains & ampersands &", [ + ("data", "this text & contains & ampersands &"), + ]) + + def test_bare_pointy_brackets(self): + self.check_events("this < text > contains < bare>pointy< brackets", [ + ("data", "this < text > contains < bare>pointy< brackets"), + ]) + def test_attr_syntax(self): output = [ ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")]) @@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase): ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), ]) + def test_illegal_declarations(self): + s = 'abc<!spacer type="block" height="25">def' + self.check_events(s, [ + ("data", "abc"), + ("unknown decl", 'spacer type="block" height="25"'), + ("data", "def"), + ]) + def test_weird_starttags(self): self.check_events("<a<a>", [ ("starttag", "a", []), @@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase): ("endtag", "cdata"), ]) + def test_illegal_declarations(self): + s = 'abc<!spacer type="block" height="25">def' + self.check_events(s, [ + ("data", "abc"), + ("unknown decl", 'spacer type="block" height="25"'), + ("data", "def"), + ]) + # XXX These tests have been disabled by prefixing their names with # an underscore. The first two exercise outstanding bugs in the # sgmllib module, and the third exhibits questionable behavior @@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase): self.check_parse_error("<a foo=>") -test_support.run_unittest(SGMLParserTestCase) +def test_main(): + test_support.run_unittest(SGMLParserTestCase) + + +if __name__ == "__main__": + test_main() |