diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-08-06 13:56:26 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-08-06 13:56:26 (GMT) |
commit | e822b034e766e03cd8fbe7ab52fbc2d46fff6d33 (patch) | |
tree | a2e79671034ae47c4a0d6d0d030aecda007c8123 /Lib/test | |
parent | 5ad3514822a80c094f1cfe47ae59450001043482 (diff) | |
download | cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.zip cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.gz cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.bz2 |
Issue #15866: The xmlcharrefreplace error handler no more produces two XML
entities for a non-BMP character on narrow build.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 25 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 12 |
2 files changed, 34 insertions, 3 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 61c2df2..ecaf997 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -66,15 +66,34 @@ class CodecCallbackTest(unittest.TestCase): # replace unencodable characters which numeric character entities. # For ascii, latin-1 and charmaps this is completely implemented # in C and should be reasonably fast. - s = u"\u30b9\u30d1\u30e2 \xe4nd eggs" + s = u"\u30b9\u30d1\u30e2 \xe4nd egg\u0161" self.assertEqual( s.encode("ascii", "xmlcharrefreplace"), - "スパモ änd eggs" + "スパモ änd eggš" ) self.assertEqual( s.encode("latin-1", "xmlcharrefreplace"), - "スパモ \xe4nd eggs" + "スパモ \xe4nd eggš" ) + self.assertEqual( + s.encode("iso-8859-15", "xmlcharrefreplace"), + "スパモ \xe4nd egg\xa8" + ) + + def test_xmlcharrefreplace_with_surrogates(self): + tests = [(u'\U0001f49d', '💝'), + (u'\ud83d', '�'), + (u'\udc9d', '�'), + (u'\ud83d\udc9d', '💝' if len(u'\U0001f49d') > 1 else + '��'), + ] + for encoding in ['ascii', 'latin1', 'iso-8859-15']: + for s, exp in tests: + self.assertEqual(s.encode(encoding, 'xmlcharrefreplace'), + exp, msg='%r.encode(%r)' % (s, encoding)) + self.assertEqual((s+'X').encode(encoding, 'xmlcharrefreplace'), + exp+'X', + msg='%r.encode(%r)' % (s + 'X', encoding)) def test_xmlcharnamereplace(self): # This time use a named character entity for unencodable diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index e44fe03..666cab8 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1658,6 +1658,18 @@ class UnicodeTest( self.assertEqual(unicode_encodedecimal(u"123\u20ac\u0660", "replace"), b'123?0') + def test_encode_decimal_with_surrogates(self): + from _testcapi import unicode_encodedecimal + tests = [(u'\U0001f49d', '💝'), + (u'\ud83d', '�'), + (u'\udc9d', '�'), + (u'\ud83d\udc9d', '💝' if len(u'\U0001f49d') > 1 else + '��'), + ] + for s, exp in tests: + self.assertEqual( + unicode_encodedecimal(u"123" + s, "xmlcharrefreplace"), + '123' + exp) def test_main(): test_support.run_unittest(__name__) |