diff options
author | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2010-08-18 20:44:58 (GMT) |
---|---|---|
committer | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2010-08-18 20:44:58 (GMT) |
commit | 324ac65cebf4b0141b946452a2604715f1ca7010 (patch) | |
tree | 4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Lib | |
parent | 36e778ef02b74f8a965da1205813e685e3ffcff4 (diff) | |
download | cpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2 |
#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept
and return characters from the full Unicode range (Py_UCS4).
The differences from Python code are few:
- unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit()
now return the correct value for large code points
- repr() may consider more characters as printable.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_unicode.py | 4 | ||||
-rw-r--r-- | Lib/test/test_unicodedata.py | 6 |
2 files changed, 10 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index d7d30d2..ae5f53d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1353,6 +1353,10 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual(repr(s1()), '\\n') self.assertEqual(repr(s2()), '\\n') + def test_printable_repr(self): + self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable + self.assertEqual(repr('\U00011000'), "'\\U00011000'") # nonprintable + def test_expandtabs_overflows_gracefully(self): # This test only affects 32-bit platforms because expandtabs can only take # an int as the max value, not a 64-bit C long. If expandtabs is changed diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index b572261..bfbb0aa 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -294,6 +294,12 @@ class UnicodeMiscTest(UnicodeDatabaseTest): self.assertEqual(len(lines), 1, r"\u%.4x should not be a linebreak" % i) + def test_UCS4(self): + # unicodedata should work with code points outside the BMP + # even on a narrow Unicode build + self.assertEqual(self.db.category(u"\U0001012A"), "No") + self.assertEqual(self.db.numeric(u"\U0001012A"), 9000) + def test_main(): test.support.run_unittest( UnicodeMiscTest, |