diff options
Diffstat (limited to 'Lib/test/test_sax.py')
| -rw-r--r-- | Lib/test/test_sax.py | 226 |
1 files changed, 220 insertions, 6 deletions
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index cfa18f7..90f3016 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -10,16 +10,17 @@ except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise unittest.SkipTest("no XML parsers available") from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase + XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs +import gc import os.path import shutil from test import support -from test.support import findfile, run_unittest +from test.support import findfile, run_unittest, TESTFN TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") @@ -95,6 +96,126 @@ class XmlTestBase(unittest.TestCase): self.assertEqual(attrs["attr"], "val") self.assertEqual(attrs.getQNameByName("attr"), "attr") + +def xml_str(doc, encoding=None): + if encoding is None: + return doc + return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) + +def xml_bytes(doc, encoding, decl_encoding=...): + if decl_encoding is ...: + decl_encoding = encoding + return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') + +def make_xml_file(doc, encoding, decl_encoding=...): + if decl_encoding is ...: + decl_encoding = encoding + with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: + f.write(xml_str(doc, decl_encoding)) + + +class ParseTest(unittest.TestCase): + data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>' + + def tearDown(self): + support.unlink(TESTFN) + + def check_parse(self, f): + from xml.sax import parse + result = StringIO() + parse(f, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) + + def test_parse_text(self): + encodings = ('us-ascii', 'iso-8859-1', 'utf-8', + 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(StringIO(xml_str(self.data, encoding))) + make_xml_file(self.data, encoding) + with open(TESTFN, 'r', encoding=encoding) as f: + self.check_parse(f) + self.check_parse(StringIO(self.data)) + make_xml_file(self.data, encoding, None) + with open(TESTFN, 'r', encoding=encoding) as f: + self.check_parse(f) + + def test_parse_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(BytesIO(xml_bytes(self.data, encoding))) + make_xml_file(self.data, encoding) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(BytesIO(xml_bytes(self.data, encoding, None))) + make_xml_file(self.data, encoding, None) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept UTF-8 with BOM + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) + make_xml_file(self.data, 'utf-8-sig', 'utf-8') + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) + make_xml_file(self.data, 'utf-8-sig', None) + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept data with declared encoding + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1'))) + make_xml_file(self.data, 'iso-8859-1') + self.check_parse(TESTFN) + with open(TESTFN, 'rb') as f: + self.check_parse(f) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) + make_xml_file(self.data, 'iso-8859-1', None) + with support.check_warnings(('unclosed file', ResourceWarning)): + # XXX Failed parser leaks an opened file. + with self.assertRaises(SAXException): + self.check_parse(TESTFN) + # Collect leaked file. + gc.collect() + with open(TESTFN, 'rb') as f: + with self.assertRaises(SAXException): + self.check_parse(f) + + def test_parse_InputSource(self): + # accept data without declared but with explicitly specified encoding + make_xml_file(self.data, 'iso-8859-1', None) + with open(TESTFN, 'rb') as f: + input = InputSource() + input.setByteStream(f) + input.setEncoding('iso-8859-1') + self.check_parse(input) + + def check_parseString(self, s): + from xml.sax import parseString + result = StringIO() + parseString(s, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) + + def test_parseString_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_bytes(self.data, encoding)) + self.check_parseString(xml_bytes(self.data, encoding, None)) + # accept UTF-8 with BOM + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) + # accept data with declared encoding + self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) + class MakeParserTest(unittest.TestCase): def test_make_parser2(self): # Creating parsers several times in a row should succeed. @@ -172,6 +293,60 @@ class SaxutilsTest(unittest.TestCase): p = make_parser(['xml.parsers.no_such_parser']) +class PrepareInputSourceTest(unittest.TestCase): + + def setUp(self): + self.file = support.TESTFN + with open(self.file, "w") as tmp: + tmp.write("This was read from a file.") + + def tearDown(self): + support.unlink(self.file) + + def make_byte_stream(self): + return BytesIO(b"This is a byte stream.") + + def checkContent(self, stream, content): + self.assertIsNotNone(stream) + self.assertEqual(stream.read(), content) + stream.close() + + + def test_byte_stream(self): + # If the source is an InputSource that does not have a character + # stream but does have a byte stream, use the byte stream. + src = InputSource(self.file) + src.setByteStream(self.make_byte_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_system_id(self): + # If the source is an InputSource that has neither a character + # stream nor a byte stream, open the system ID. + src = InputSource(self.file) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_string(self): + # If the source is a string, use it as a system ID and open it. + prep = prepare_input_source(self.file) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_binary_file(self): + # If the source is a binary file-like object, use it as a byte + # stream. + prep = prepare_input_source(self.make_byte_stream()) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + # ===== XMLGenerator class XmlgenTest: @@ -622,7 +797,7 @@ class ExpatReaderTest(XmlTestBase): # ===== XMLReader support - def test_expat_file(self): + def test_expat_binary_file(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) @@ -633,8 +808,19 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_text_file(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + @requires_nonascii_filenames - def test_expat_file_nonascii(self): + def test_expat_binary_file_nonascii(self): fname = support.TESTFN_UNICODE shutil.copyfile(TEST_XMLFILE, fname) self.addCleanup(support.unlink, fname) @@ -644,7 +830,31 @@ class ExpatReaderTest(XmlTestBase): xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) - parser.parse(open(fname)) + parser.parse(open(fname, 'rb')) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_binary_file_bytes_name(self): + fname = os.fsencode(TEST_XMLFILE) + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(fname, 'rb') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + + def test_expat_binary_file_int_name(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rb') as f: + with open(f.fileno(), 'rb', closefd=False) as f2: + parser.parse(f2) self.assertEqual(result.getvalue(), xml_test_out) @@ -802,7 +1012,7 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), xml_test_out) - def test_expat_inpsource_stream(self): + def test_expat_inpsource_byte_stream(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) @@ -916,6 +1126,8 @@ class ErrorReportingTest(unittest.TestCase): parser = create_parser() parser.setContentHandler(ContentHandler()) # do nothing self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>")) + self.assertEqual(parser.getColumnNumber(), 5) + self.assertEqual(parser.getLineNumber(), 1) def test_sax_parse_exception_str(self): # pass various values from a locator to the SAXParseException to @@ -993,7 +1205,9 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest, + ParseTest, SaxutilsTest, + PrepareInputSourceTest, StringXmlgenTest, BytesXmlgenTest, WriterXmlgenTest, |
