diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-04-27 16:16:20 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-27 16:16:20 (GMT) |
commit | f60b4c3d74f241775f80affe60dcba6448634fe3 (patch) | |
tree | e382c77f2b900101851632ac8fca544220e26a99 | |
parent | ad9f817eeb2d2d36834e7bad2264ad0c0de1d1c4 (diff) | |
download | cpython-f60b4c3d74f241775f80affe60dcba6448634fe3.zip cpython-f60b4c3d74f241775f80affe60dcba6448634fe3.tar.gz cpython-f60b4c3d74f241775f80affe60dcba6448634fe3.tar.bz2 |
gh-91810: Expand ElementTree.write() tests to use non-ASCII data (GH-91989)
-rw-r--r-- | Lib/test/test_xml_etree.py | 97 |
1 files changed, 80 insertions, 17 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 60a4150..db25eab 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -130,6 +130,9 @@ def checkwarnings(*filters, quiet=False): return newtest return decorator +def convlinesep(data): + return data.replace(b'\n', os.linesep.encode()) + class ModuleTest(unittest.TestCase): def test_sanity(self): @@ -3713,32 +3716,92 @@ class IOTest(unittest.TestCase): def test_write_to_filename(self): self.addCleanup(os_helper.unlink, TESTFN) - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) tree.write(TESTFN) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), b'''<site />''') + self.assertEqual(f.read(), b'''<site>ø</site>''') + + def test_write_to_filename_with_encoding(self): + self.addCleanup(os_helper.unlink, TESTFN) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) + tree.write(TESTFN, encoding='utf-8') + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') + + tree.write(TESTFN, encoding='ISO-8859-1') + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), convlinesep( + b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' + b'''<site>\xf8</site>''')) + + def test_write_to_filename_as_unicode(self): + self.addCleanup(os_helper.unlink, TESTFN) + with open(TESTFN, 'w') as f: + encoding = f.encoding + os_helper.unlink(TESTFN) + + try: + '\xf8'.encode(encoding) + except UnicodeEncodeError: + self.skipTest(f'default file encoding {encoding} not supported') + + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) + tree.write(TESTFN, encoding='unicode') + with open(TESTFN, 'rb') as f: + data = f.read() + expected = "<site>\xf8</site>".encode(encoding, 'xmlcharrefreplace') + self.assertEqual(data, expected) def test_write_to_text_file(self): self.addCleanup(os_helper.unlink, TESTFN) - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) with open(TESTFN, 'w', encoding='utf-8') as f: tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), b'''<site />''') + self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') + + with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f: + tree.write(f, encoding='unicode') + self.assertFalse(f.closed) + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'''<site>ø</site>''') + + with open(TESTFN, 'w', encoding='ISO-8859-1') as f: + tree.write(f, encoding='unicode') + self.assertFalse(f.closed) + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'''<site>\xf8</site>''') def test_write_to_binary_file(self): self.addCleanup(os_helper.unlink, TESTFN) - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) with open(TESTFN, 'wb') as f: tree.write(f) self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), b'''<site />''') + self.assertEqual(f.read(), b'''<site>ø</site>''') + + def test_write_to_binary_file_with_encoding(self): + self.addCleanup(os_helper.unlink, TESTFN) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) + with open(TESTFN, 'wb') as f: + tree.write(f, encoding='utf-8') + self.assertFalse(f.closed) + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') + + with open(TESTFN, 'wb') as f: + tree.write(f, encoding='ISO-8859-1') + self.assertFalse(f.closed) + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), + b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' + b'''<site>\xf8</site>''') def test_write_to_binary_file_with_bom(self): self.addCleanup(os_helper.unlink, TESTFN) - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) # test BOM writing to buffered file with open(TESTFN, 'wb') as f: tree.write(f, encoding='utf-16') @@ -3746,7 +3809,7 @@ class IOTest(unittest.TestCase): with open(TESTFN, 'rb') as f: self.assertEqual(f.read(), '''<?xml version='1.0' encoding='utf-16'?>\n''' - '''<site />'''.encode("utf-16")) + '''<site>\xf8</site>'''.encode("utf-16")) # test BOM writing to non-buffered file with open(TESTFN, 'wb', buffering=0) as f: tree.write(f, encoding='utf-16') @@ -3754,7 +3817,7 @@ class IOTest(unittest.TestCase): with open(TESTFN, 'rb') as f: self.assertEqual(f.read(), '''<?xml version='1.0' encoding='utf-16'?>\n''' - '''<site />'''.encode("utf-16")) + '''<site>\xf8</site>'''.encode("utf-16")) def test_read_from_stringio(self): tree = ET.ElementTree() @@ -3763,10 +3826,10 @@ class IOTest(unittest.TestCase): self.assertEqual(tree.getroot().tag, 'site') def test_write_to_stringio(self): - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) stream = io.StringIO() tree.write(stream, encoding='unicode') - self.assertEqual(stream.getvalue(), '''<site />''') + self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') def test_read_from_bytesio(self): tree = ET.ElementTree() @@ -3775,10 +3838,10 @@ class IOTest(unittest.TestCase): self.assertEqual(tree.getroot().tag, 'site') def test_write_to_bytesio(self): - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) raw = io.BytesIO() tree.write(raw) - self.assertEqual(raw.getvalue(), b'''<site />''') + self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') class dummy: pass @@ -3792,12 +3855,12 @@ class IOTest(unittest.TestCase): self.assertEqual(tree.getroot().tag, 'site') def test_write_to_user_text_writer(self): - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) stream = io.StringIO() writer = self.dummy() writer.write = stream.write tree.write(writer, encoding='unicode') - self.assertEqual(stream.getvalue(), '''<site />''') + self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') def test_read_from_user_binary_reader(self): raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') @@ -3809,12 +3872,12 @@ class IOTest(unittest.TestCase): tree = ET.ElementTree() def test_write_to_user_binary_writer(self): - tree = ET.ElementTree(ET.XML('''<site />''')) + tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) raw = io.BytesIO() writer = self.dummy() writer.write = raw.write tree.write(writer) - self.assertEqual(raw.getvalue(), b'''<site />''') + self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') def test_write_to_user_binary_writer_with_bom(self): tree = ET.ElementTree(ET.XML('''<site />''')) |