diff options
Diffstat (limited to 'Lib/test/test_pyexpat.py')
| -rw-r--r-- | Lib/test/test_pyexpat.py | 218 |
1 files changed, 176 insertions, 42 deletions
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 133c02b..27eecb8 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -2,11 +2,10 @@ # handler, are obscure and unhelpful. from io import BytesIO -import sys import unittest -import pyexpat from xml.parsers import expat +from xml.parsers.expat import errors from test.support import sortdict, run_unittest @@ -38,6 +37,7 @@ data = b'''\ <!-- comment data --> <!DOCTYPE quotations SYSTEM "quotations.dtd" [ <!ELEMENT root ANY> +<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED> <!NOTATION notation SYSTEM "notation.jpeg"> <!ENTITY acirc "â"> <!ENTITY external_entity SYSTEM "entity.file"> @@ -51,6 +51,7 @@ data = b'''\ </myns:subelement> <sub2><![CDATA[contents of CDATA section]]></sub2> &external_entity; +&skipped_entity; </root> ''' @@ -99,7 +100,7 @@ class ParseTest(unittest.TestCase): entityName, base, systemId, publicId, notationName = args self.out.append('Unparsed entity decl: %s' %(args,)) - def NotStandaloneHandler(self, userData): + def NotStandaloneHandler(self): self.out.append('Not standalone') return 1 @@ -108,6 +109,34 @@ class ParseTest(unittest.TestCase): self.out.append('External entity ref: %s' %(args[1:],)) return 1 + def StartDoctypeDeclHandler(self, *args): + self.out.append(('Start doctype', args)) + return 1 + + def EndDoctypeDeclHandler(self): + self.out.append("End doctype") + return 1 + + def EntityDeclHandler(self, *args): + self.out.append(('Entity declaration', args)) + return 1 + + def XmlDeclHandler(self, *args): + self.out.append(('XML declaration', args)) + return 1 + + def ElementDeclHandler(self, *args): + self.out.append(('Element declaration', args)) + return 1 + + def AttlistDeclHandler(self, *args): + self.out.append(('Attribute list declaration', args)) + return 1 + + def SkippedEntityHandler(self, *args): + self.out.append(("Skipped entity", args)) + return 1 + def DefaultHandler(self, userData): pass @@ -115,61 +144,86 @@ class ParseTest(unittest.TestCase): pass handler_names = [ - 'StartElementHandler', 'EndElementHandler', - 'CharacterDataHandler', 'ProcessingInstructionHandler', - 'UnparsedEntityDeclHandler', 'NotationDeclHandler', - 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', - 'CommentHandler', 'StartCdataSectionHandler', - 'EndCdataSectionHandler', - 'DefaultHandler', 'DefaultHandlerExpand', - #'NotStandaloneHandler', - 'ExternalEntityRefHandler' + 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler', + 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler', + 'NotationDeclHandler', 'StartNamespaceDeclHandler', + 'EndNamespaceDeclHandler', 'CommentHandler', + 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler', + 'DefaultHandlerExpand', 'NotStandaloneHandler', + 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler', + 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler', + 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler', ] - def _verify_parse_output(self, op): - self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') - self.assertEqual(op[1], "Comment: ' comment data '") - self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") - self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") - self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}") - self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") - self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") - self.assertEqual(op[7], "Character data: 'Contents of subelements'") - self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") - self.assertEqual(op[9], "End of NS decl: 'myns'") - self.assertEqual(op[10], "Start element: 'sub2' {}") - self.assertEqual(op[11], 'Start of CDATA section') - self.assertEqual(op[12], "Character data: 'contents of CDATA section'") - self.assertEqual(op[13], 'End of CDATA section') - self.assertEqual(op[14], "End element: 'sub2'") - self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") - self.assertEqual(op[16], "End element: 'root'") - + def _hookup_callbacks(self, parser, handler): + """ + Set each of the callbacks defined on handler and named in + self.handler_names on the given parser. + """ + for name in self.handler_names: + setattr(parser, name, getattr(handler, name)) + + def _verify_parse_output(self, operations): + expected_operations = [ + ('XML declaration', ('1.0', 'iso-8859-1', 0)), + 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'', + "Comment: ' comment data '", + "Not standalone", + ("Start doctype", ('quotations', 'quotations.dtd', None, 1)), + ('Element declaration', ('root', (2, 0, None, ()))), + ('Attribute list declaration', ('root', 'attr1', 'CDATA', None, + 1)), + ('Attribute list declaration', ('root', 'attr2', 'CDATA', None, + 0)), + "Notation declared: ('notation', None, 'notation.jpeg', None)", + ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)), + ('Entity declaration', ('external_entity', 0, None, None, + 'entity.file', None, None)), + "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')", + "Not standalone", + "End doctype", + "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}", + "NS decl: 'myns' 'http://www.python.org/namespace'", + "Start element: 'http://www.python.org/namespace!subelement' {}", + "Character data: 'Contents of subelements'", + "End element: 'http://www.python.org/namespace!subelement'", + "End of NS decl: 'myns'", + "Start element: 'sub2' {}", + 'Start of CDATA section', + "Character data: 'contents of CDATA section'", + 'End of CDATA section', + "End element: 'sub2'", + "External entity ref: (None, 'entity.file', None)", + ('Skipped entity', ('skipped_entity', 0)), + "End element: 'root'", + ] + for operation, expected_operation in zip(operations, expected_operations): + self.assertEqual(operation, expected_operation) def test_unicode(self): # Try the parse again, this time producing Unicode output out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') - for name in self.handler_names: - setattr(parser, name, getattr(out, name)) + self._hookup_callbacks(parser, out) parser.Parse(data, 1) - op = out.out - self._verify_parse_output(op) + operations = out.out + self._verify_parse_output(operations) + # Issue #6697. + self.assertRaises(AttributeError, getattr, parser, '\uD800') def test_parse_file(self): # Try parsing a file out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') - for name in self.handler_names: - setattr(parser, name, getattr(out, name)) + self._hookup_callbacks(parser, out) file = BytesIO(data) parser.ParseFile(file) - op = out.out - self._verify_parse_output(op) + operations = out.out + self._verify_parse_output(operations) class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): @@ -185,7 +239,7 @@ class NamespaceSeparatorTest(unittest.TestCase): self.fail() except TypeError as e: self.assertEqual(str(e), - 'ParserCreate() argument 2 must be string or None, not int') + 'ParserCreate() argument 2 must be str or None, not int') try: expat.ParserCreate(namespace_separator='too long') @@ -222,6 +276,25 @@ class InterningTest(unittest.TestCase): # L should have the same string repeated over and over. self.assertTrue(tag is entry) + def test_issue9402(self): + # create an ExternalEntityParserCreate with buffer text + class ExternalOutputter: + def __init__(self, parser): + self.parser = parser + self.parser_result = None + + def ExternalEntityRefHandler(self, context, base, sysId, pubId): + external_parser = self.parser.ExternalEntityParserCreate("") + self.parser_result = external_parser.Parse("", 1) + return 1 + + parser = expat.ParserCreate(namespace_separator='!') + parser.buffer_text = 1 + out = ExternalOutputter(parser) + parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler + parser.Parse(data, 1) + self.assertEqual(out.parser_result, 1) + class BufferTextTest(unittest.TestCase): def setUp(self): @@ -510,7 +583,7 @@ class ChardataBufferTest(unittest.TestCase): parser.Parse(xml2, 1) self.assertEqual(self.n, 4) -class MalformedInputText(unittest.TestCase): +class MalformedInputTest(unittest.TestCase): def test1(self): xml = "\0\r\n" parser = expat.ParserCreate() @@ -529,6 +602,65 @@ class MalformedInputText(unittest.TestCase): except expat.ExpatError as e: self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') +class ErrorMessageTest(unittest.TestCase): + def test_codes(self): + # verify mapping of errors.codes and errors.messages + self.assertEqual(errors.XML_ERROR_SYNTAX, + errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) + + def test_expaterror(self): + xml = '<' + parser = expat.ParserCreate() + try: + parser.Parse(xml, True) + self.fail() + except expat.ExpatError as e: + self.assertEqual(e.code, + errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) + + +class ForeignDTDTests(unittest.TestCase): + """ + Tests for the UseForeignDTD method of expat parser objects. + """ + def test_use_foreign_dtd(self): + """ + If UseForeignDTD is passed True and a document without an external + entity reference is parsed, ExternalEntityRefHandler is first called + with None for the public and system ids. + """ + handler_call_args = [] + def resolve_entity(context, base, system_id, public_id): + handler_call_args.append((public_id, system_id)) + return 1 + + parser = expat.ParserCreate() + parser.UseForeignDTD(True) + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = resolve_entity + parser.Parse("<?xml version='1.0'?><element/>") + self.assertEqual(handler_call_args, [(None, None)]) + + def test_ignore_use_foreign_dtd(self): + """ + If UseForeignDTD is passed True and a document with an external + entity reference is parsed, ExternalEntityRefHandler is called with + the public and system ids from the document. + """ + handler_call_args = [] + def resolve_entity(context, base, system_id, public_id): + handler_call_args.append((public_id, system_id)) + return 1 + + parser = expat.ParserCreate() + parser.UseForeignDTD(True) + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = resolve_entity + parser.Parse( + "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") + self.assertEqual(handler_call_args, [("bar", "baz")]) + + def test_main(): run_unittest(SetAttributeTest, ParseTest, @@ -539,7 +671,9 @@ def test_main(): PositionTest, sf1296433Test, ChardataBufferTest, - MalformedInputText) + MalformedInputTest, + ErrorMessageTest, + ForeignDTDTests) if __name__ == "__main__": test_main() |
