From 5f5da728aec9c4f74cc771fbf30037b64a447514 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 25 Sep 2017 01:43:56 -0700 Subject: bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) (#3745) Non-regression tests for the Expat 2.2.3 UTF-8 decoder bug. (cherry picked from commit e6d9fcbb8d0c325e57df08ae8781aafedb71eca2) --- Lib/test/test_xml_etree.py | 31 ++++++++++++++++++++++++++++++ Lib/test/xmltestdata/expat224_utf8_bug.xml | 2 ++ 2 files changed, 33 insertions(+) create mode 100644 Lib/test/xmltestdata/expat224_utf8_bug.xml diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 55d7010..e466867 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -30,6 +30,7 @@ ET = None SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") +UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") SAMPLE_XML = """\ @@ -1494,6 +1495,36 @@ class BugsTest(unittest.TestCase): ET.register_namespace('test10777', 'http://myuri/') ET.register_namespace('test10777', 'http://myuri/') + def check_expat224_utf8_bug(self, text): + xml = b'' % text + root = ET.XML(xml) + self.assertEqual(root.get('b'), text.decode('utf-8')) + + def test_expat224_utf8_bug(self): + # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. + # Check that Expat 2.2.4 fixed the bug. + # + # Test buffer bounds at odd and even positions. + + text = b'\xc3\xa0' * 1024 + self.check_expat224_utf8_bug(text) + + text = b'x' + b'\xc3\xa0' * 1024 + self.check_expat224_utf8_bug(text) + + def test_expat224_utf8_bug_file(self): + with open(UTF8_BUG_XMLFILE, 'rb') as fp: + raw = fp.read() + root = ET.fromstring(raw) + xmlattr = root.get('b') + + # "Parse" manually the XML file to extract the value of the 'b' + # attribute of the XML element + text = raw.decode('utf-8').strip() + text = text.replace('\r\n', ' ') + text = text[6:-4] + self.assertEqual(root.get('b'), text) + # -------------------------------------------------------------------- diff --git a/Lib/test/xmltestdata/expat224_utf8_bug.xml b/Lib/test/xmltestdata/expat224_utf8_bug.xml new file mode 100644 index 0000000..d66a8e6 --- /dev/null +++ b/Lib/test/xmltestdata/expat224_utf8_bug.xml @@ -0,0 +1,2 @@ + -- cgit v0.12