summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-08-06 13:56:26 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-08-06 13:56:26 (GMT)
commite822b034e766e03cd8fbe7ab52fbc2d46fff6d33 (patch)
treea2e79671034ae47c4a0d6d0d030aecda007c8123 /Lib/test
parent5ad3514822a80c094f1cfe47ae59450001043482 (diff)
downloadcpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.zip
cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.gz
cpython-e822b034e766e03cd8fbe7ab52fbc2d46fff6d33.tar.bz2
Issue #15866: The xmlcharrefreplace error handler no more produces two XML
entities for a non-BMP character on narrow build.
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_codeccallbacks.py25
-rw-r--r--Lib/test/test_unicode.py12
2 files changed, 34 insertions, 3 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 61c2df2..ecaf997 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -66,15 +66,34 @@ class CodecCallbackTest(unittest.TestCase):
# replace unencodable characters which numeric character entities.
# For ascii, latin-1 and charmaps this is completely implemented
# in C and should be reasonably fast.
- s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
+ s = u"\u30b9\u30d1\u30e2 \xe4nd egg\u0161"
self.assertEqual(
s.encode("ascii", "xmlcharrefreplace"),
- "&#12473;&#12497;&#12514; &#228;nd eggs"
+ "&#12473;&#12497;&#12514; &#228;nd egg&#353;"
)
self.assertEqual(
s.encode("latin-1", "xmlcharrefreplace"),
- "&#12473;&#12497;&#12514; \xe4nd eggs"
+ "&#12473;&#12497;&#12514; \xe4nd egg&#353;"
)
+ self.assertEqual(
+ s.encode("iso-8859-15", "xmlcharrefreplace"),
+ "&#12473;&#12497;&#12514; \xe4nd egg\xa8"
+ )
+
+ def test_xmlcharrefreplace_with_surrogates(self):
+ tests = [(u'\U0001f49d', '&#128157;'),
+ (u'\ud83d', '&#55357;'),
+ (u'\udc9d', '&#56477;'),
+ (u'\ud83d\udc9d', '&#128157;' if len(u'\U0001f49d') > 1 else
+ '&#55357;&#56477;'),
+ ]
+ for encoding in ['ascii', 'latin1', 'iso-8859-15']:
+ for s, exp in tests:
+ self.assertEqual(s.encode(encoding, 'xmlcharrefreplace'),
+ exp, msg='%r.encode(%r)' % (s, encoding))
+ self.assertEqual((s+'X').encode(encoding, 'xmlcharrefreplace'),
+ exp+'X',
+ msg='%r.encode(%r)' % (s + 'X', encoding))
def test_xmlcharnamereplace(self):
# This time use a named character entity for unencodable
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index e44fe03..666cab8 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1658,6 +1658,18 @@ class UnicodeTest(
self.assertEqual(unicode_encodedecimal(u"123\u20ac\u0660", "replace"),
b'123?0')
+ def test_encode_decimal_with_surrogates(self):
+ from _testcapi import unicode_encodedecimal
+ tests = [(u'\U0001f49d', '&#128157;'),
+ (u'\ud83d', '&#55357;'),
+ (u'\udc9d', '&#56477;'),
+ (u'\ud83d\udc9d', '&#128157;' if len(u'\U0001f49d') > 1 else
+ '&#55357;&#56477;'),
+ ]
+ for s, exp in tests:
+ self.assertEqual(
+ unicode_encodedecimal(u"123" + s, "xmlcharrefreplace"),
+ '123' + exp)
def test_main():
test_support.run_unittest(__name__)