diff options
author | Eli Bendersky <eliben@gmail.com> | 2013-05-25 12:25:48 (GMT) |
---|---|---|
committer | Eli Bendersky <eliben@gmail.com> | 2013-05-25 12:25:48 (GMT) |
commit | 6dc32b34ddfba0ddb990cbbb77cf8803879d20f9 (patch) | |
tree | 52b3529adfb9b0ccdf71bd5a34a1edb8bb50fdff /Lib/test | |
parent | 6b5a38c728bbca3273b6917308559cf22e298531 (diff) | |
download | cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.zip cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.tar.gz cpython-6dc32b34ddfba0ddb990cbbb77cf8803879d20f9.tar.bz2 |
Issue #13612: handle unknown encodings without a buffer overflow.
This affects pyexpat and _elementtree. PyExpat_CAPI now exposes a new
function - DefaultUnknownEncodingHandler.
Based on a patch by Serhiy Storchaka.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_xml_etree.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 746ca28..1722292 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -690,6 +690,98 @@ class ElementTreeTest(unittest.TestCase): check("cp437", '\u221a') check("mac-roman", '\u02da') + def xml(encoding): + return "<?xml version='1.0' encoding='%s'?><xml />" % encoding + def bxml(encoding): + return xml(encoding).encode(encoding) + supported_encodings = [ + 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', + 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', + 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', + 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', + 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', + 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', + 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1250', + 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', + 'cp1257', 'cp1258', + 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', + 'mac-roman', 'mac-turkish', + 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', + 'iso2022-jp-3', 'iso2022-jp-ext', + 'koi8-r', 'koi8-u', + 'hz', 'ptcp154', + ] + for encoding in supported_encodings: + self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') + + unsupported_ascii_compatible_encodings = [ + 'big5', 'big5hkscs', + 'cp932', 'cp949', 'cp950', + 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', + 'gb2312', 'gbk', 'gb18030', + 'iso2022-kr', 'johab', + 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', + 'utf-7', + ] + for encoding in unsupported_ascii_compatible_encodings: + self.assertRaises(ValueError, ET.XML, bxml(encoding)) + + unsupported_ascii_incompatible_encodings = [ + 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', + 'utf_32', 'utf_32_be', 'utf_32_le', + ] + for encoding in unsupported_ascii_incompatible_encodings: + self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) + + self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) + self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) + + def xml(encoding): + return "<?xml version='1.0' encoding='%s'?><xml />" % encoding + def bxml(encoding): + return xml(encoding).encode(encoding) + supported_encodings = [ + 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', + 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', + 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', + 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', + 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', + 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', + 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1250', + 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', + 'cp1257', 'cp1258', + 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', + 'mac-roman', 'mac-turkish', + 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', + 'iso2022-jp-3', 'iso2022-jp-ext', + 'koi8-r', 'koi8-u', + 'hz', 'ptcp154', + ] + for encoding in supported_encodings: + self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') + + unsupported_ascii_compatible_encodings = [ + 'big5', 'big5hkscs', + 'cp932', 'cp949', 'cp950', + 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', + 'gb2312', 'gbk', 'gb18030', + 'iso2022-kr', 'johab', + 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', + 'utf-7', + ] + for encoding in unsupported_ascii_compatible_encodings: + self.assertRaises(ValueError, ET.XML, bxml(encoding)) + + unsupported_ascii_incompatible_encodings = [ + 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', + 'utf_32', 'utf_32_be', 'utf_32_le', + ] + for encoding in unsupported_ascii_incompatible_encodings: + self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) + + self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) + self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) + def test_methods(self): # Test serialization methods. |