#5127: Even on narrow unicode builds, the C functions that access the Unicode

Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
author: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2010-08-18 20:44:58 (GMT)
committer: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2010-08-18 20:44:58 (GMT)
commit: 324ac65cebf4b0141b946452a2604715f1ca7010 (patch)
tree: 4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Lib
parent: 36e778ef02b74f8a965da1205813e685e3ffcff4 (diff)
download: cpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2
2 files changed, 10 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index d7d30d2..ae5f53d 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1353,6 +1353,10 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual(repr(s1()), '\\n')
         self.assertEqual(repr(s2()), '\\n')
 
+    def test_printable_repr(self):
+        self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
+        self.assertEqual(repr('\U00011000'), "'\\U00011000'")     # nonprintable
+
     def test_expandtabs_overflows_gracefully(self):
         # This test only affects 32-bit platforms because expandtabs can only take
         # an int as the max value, not a 64-bit C long.  If expandtabs is changed
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index b572261..bfbb0aa 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -294,6 +294,12 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
                 self.assertEqual(len(lines), 1,
                                  r"\u%.4x should not be a linebreak" % i)
 
+    def test_UCS4(self):
+        # unicodedata should work with code points outside the BMP
+        # even on a narrow Unicode build
+        self.assertEqual(self.db.category(u"\U0001012A"), "No")
+        self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
+
 def test_main():
     test.support.run_unittest(
         UnicodeMiscTest,
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2010-08-18 20:44:58 (GMT)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2010-08-18 20:44:58 (GMT)
commit	324ac65cebf4b0141b946452a2604715f1ca7010 (patch)
tree	4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Lib
parent	36e778ef02b74f8a965da1205813e685e3ffcff4 (diff)
download	cpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2