diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-04-02 20:05:23 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-04-02 20:05:23 (GMT) |
commit | aff77f3b194c14f0e788ab3287f8337a03d15e21 (patch) | |
tree | 40a8af16f16e8a0027aec752c6b91149cbd9f4a3 | |
parent | e9d4dc192f84d28d03796f18612dc54506e2a74c (diff) | |
download | cpython-aff77f3b194c14f0e788ab3287f8337a03d15e21.zip cpython-aff77f3b194c14f0e788ab3287f8337a03d15e21.tar.gz cpython-aff77f3b194c14f0e788ab3287f8337a03d15e21.tar.bz2 |
Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString().
-rw-r--r-- | Lib/test/test_sax.py | 109 |
1 files changed, 108 insertions, 1 deletions
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index e314b7c..5019722 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -15,10 +15,11 @@ from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO import io +import gc import os.path import shutil import test.test_support as support -from test.test_support import findfile, run_unittest +from test.test_support import findfile, run_unittest, TESTFN import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") @@ -90,6 +91,111 @@ class XmlTestBase(unittest.TestCase): self.assertEqual(attrs["attr"], "val") self.assertEqual(attrs.getQNameByName("attr"), "attr") + +def xml_unicode(doc, encoding=None): + if encoding is None: + return doc + return u'<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) + +def xml_bytes(doc, encoding, decl_encoding=Ellipsis): + if decl_encoding is Ellipsis: + decl_encoding = encoding + return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') + +def make_xml_file(doc, encoding, decl_encoding=Ellipsis): + if decl_encoding is Ellipsis: + decl_encoding = encoding + with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: + f.write(xml_unicode(doc, decl_encoding)) + + +class ParseTest(unittest.TestCase): + data = support.u(r'<money value="$\xa3\u20ac\U0001017b">' + r'$\xa3\u20ac\U0001017b</money>') + + def tearDown(self): + support.unlink(TESTFN) + + def check_parse(self, f): + from xml.sax import parse + result = StringIO() + parse(f, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) + + def test_parse_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parse(io.BytesIO(xml_bytes(self.data, encoding))) + make_xml_file(self.data, encoding) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None))) + make_xml_file(self.data, encoding, None) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept UTF-8 with BOM + self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) + make_xml_file(self.data, 'utf-8-sig', 'utf-8') + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) + make_xml_file(self.data, 'utf-8-sig', None) + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # accept data with declared encoding + self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1'))) + make_xml_file(self.data, 'iso-8859-1') + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + self.check_parse(f) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) + make_xml_file(self.data, 'iso-8859-1', None) + with self.assertRaises(SAXException): + self.check_parse(TESTFN) + with io.open(TESTFN, 'rb') as f: + with self.assertRaises(SAXException): + self.check_parse(f) + + def test_parse_InputSource(self): + # accept data without declared but with explicitly specified encoding + make_xml_file(self.data, 'iso-8859-1', None) + with io.open(TESTFN, 'rb') as f: + input = InputSource() + input.setByteStream(f) + input.setEncoding('iso-8859-1') + self.check_parse(input) + + def check_parseString(self, s): + from xml.sax import parseString + result = StringIO() + parseString(s, XMLGenerator(result, 'utf-8')) + self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) + + def test_parseString_bytes(self): + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, + # UTF-16 is autodetected + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') + for encoding in encodings: + self.check_parseString(xml_bytes(self.data, encoding)) + self.check_parseString(xml_bytes(self.data, encoding, None)) + # accept UTF-8 with BOM + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) + # accept data with declared encoding + self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) + # fail on non-UTF-8 incompatible data without declared encoding + with self.assertRaises(SAXException): + self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) + + class MakeParserTest(unittest.TestCase): def test_make_parser2(self): # Creating parsers several times in a row should succeed. @@ -949,6 +1055,7 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest, + ParseTest, SaxutilsTest, PrepareInputSourceTest, StringXmlgenTest, |