diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-09-29 10:32:13 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-09-29 10:32:13 (GMT) |
commit | c3713e9706e51bbd30958c27d35e7fda764b0c4a (patch) | |
tree | 43a7def678412164cfe0fdcbc0ac1250d7d3ab10 /Lib/test/test_codecs.py | |
parent | 5fbeabcbb6ea1d4af91fea0bc96c3d01f47b728f (diff) | |
download | cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.zip cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.gz cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.bz2 |
Optimize ascii/latin1+surrogateescape encoders
Issue #25227: Optimize ASCII and latin1 encoders with the ``surrogateescape``
error handler: the encoders are now up to 3 times as fast.
Initial patch written by Serhiy Storchaka.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e0e3119..254c0c1 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3060,7 +3060,31 @@ class CodePageTest(unittest.TestCase): class ASCIITest(unittest.TestCase): + def test_encode(self): + self.assertEqual('abc123'.encode('ascii'), b'abc123') + + def test_encode_error(self): + for data, error_handler, expected in ( + ('[\x80\xff\u20ac]', 'ignore', b'[]'), + ('[\x80\xff\u20ac]', 'replace', b'[???]'), + ('[\x80\xff\u20ac]', 'xmlcharrefreplace', b'[€ÿ€]'), + ('[\x80\xff\u20ac]', 'backslashreplace', b'[\\x80\\xff\\u20ac]'), + ('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'), + ): + with self.subTest(data=data, error_handler=error_handler, + expected=expected): + self.assertEqual(data.encode('ascii', error_handler), + expected) + + def test_encode_surrogateescape_error(self): + with self.assertRaises(UnicodeEncodeError): + # the first character can be decoded, but not the second + '\udc80\xff'.encode('ascii', 'surrogateescape') + def test_decode(self): + self.assertEqual(b'abc'.decode('ascii'), 'abc') + + def test_decode_error(self): for data, error_handler, expected in ( (b'[\x80\xff]', 'ignore', '[]'), (b'[\x80\xff]', 'replace', '[\ufffd\ufffd]'), @@ -3073,5 +3097,41 @@ class ASCIITest(unittest.TestCase): expected) +class Latin1Test(unittest.TestCase): + def test_encode(self): + for data, expected in ( + ('abc', b'abc'), + ('\x80\xe9\xff', b'\x80\xe9\xff'), + ): + with self.subTest(data=data, expected=expected): + self.assertEqual(data.encode('latin1'), expected) + + def test_encode_errors(self): + for data, error_handler, expected in ( + ('[\u20ac\udc80]', 'ignore', b'[]'), + ('[\u20ac\udc80]', 'replace', b'[??]'), + ('[\u20ac\udc80]', 'backslashreplace', b'[\\u20ac\\udc80]'), + ('[\u20ac\udc80]', 'xmlcharrefreplace', b'[€�]'), + ('[\udc80\udcff]', 'surrogateescape', b'[\x80\xff]'), + ): + with self.subTest(data=data, error_handler=error_handler, + expected=expected): + self.assertEqual(data.encode('latin1', error_handler), + expected) + + def test_encode_surrogateescape_error(self): + with self.assertRaises(UnicodeEncodeError): + # the first character can be decoded, but not the second + '\udc80\u20ac'.encode('latin1', 'surrogateescape') + + def test_decode(self): + for data, expected in ( + (b'abc', 'abc'), + (b'[\x80\xff]', '[\x80\xff]'), + ): + with self.subTest(data=data, expected=expected): + self.assertEqual(data.decode('latin1'), expected) + + if __name__ == "__main__": unittest.main() |