#5127: Even on narrow unicode builds, the C functions that access the Unicode

Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
author: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2010-08-18 20:44:58 (GMT)
committer: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2010-08-18 20:44:58 (GMT)
commit: 324ac65cebf4b0141b946452a2604715f1ca7010 (patch)
tree: 4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Tools/unicode/makeunicodedata.py
parent: 36e778ef02b74f8a965da1205813e685e3ffcff4 (diff)
download: cpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2
1 files changed, 4 insertions, 37 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index f38b866..7266a91 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -28,7 +28,7 @@
 import sys
 
 SCRIPT = sys.argv[0]
-VERSION = "2.6"
+VERSION = "3.2"
 
 # The Unicode Database
 UNIDATA_VERSION = "5.2.0"
@@ -479,7 +479,7 @@ def makeunicodetype(unicode, trace):
     print('/* Returns the numeric value as double for Unicode characters', file=fp)
     print(' * having this property, -1.0 otherwise.', file=fp)
     print(' */', file=fp)
-    print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp)
+    print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('    switch (ch) {', file=fp)
     for value, codepoints in numeric_items:
@@ -488,21 +488,10 @@ def makeunicodetype(unicode, trace):
         parts = [repr(float(part)) for part in parts]
         value = '/'.join(parts)
 
-        haswide = False
-        hasnonewide = False
         codepoints.sort()
         for codepoint in codepoints:
-            if codepoint < 0x10000:
-                hasnonewide = True
-            if codepoint >= 0x10000 and not haswide:
-                print('#ifdef Py_UNICODE_WIDE', file=fp)
-                haswide = True
             print('    case 0x%04X:' % (codepoint,), file=fp)
-        if haswide and hasnonewide:
-            print('#endif', file=fp)
         print('        return (double) %s;' % (value,), file=fp)
-        if haswide and not hasnonewide:
-            print('#endif', file=fp)
     print('    }', file=fp)
     print('    return -1.0;', file=fp)
     print('}', file=fp)
@@ -512,27 +501,16 @@ def makeunicodetype(unicode, trace):
     print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
     print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
     print(" */", file=fp)
-    print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp)
+    print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
     print('    return iswspace(ch);', file=fp)
     print('#else', file=fp)
     print('    switch (ch) {', file=fp)
 
-    haswide = False
-    hasnonewide = False
     for codepoint in sorted(spaces):
-        if codepoint < 0x10000:
-            hasnonewide = True
-        if codepoint >= 0x10000 and not haswide:
-            print('#ifdef Py_UNICODE_WIDE', file=fp)
-            haswide = True
         print('    case 0x%04X:' % (codepoint,), file=fp)
-    if haswide and hasnonewide:
-        print('#endif', file=fp)
     print('        return 1;', file=fp)
-    if haswide and not hasnonewide:
-        print('#endif', file=fp)
 
     print('    }', file=fp)
     print('    return 0;', file=fp)
@@ -545,23 +523,12 @@ def makeunicodetype(unicode, trace):
     print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
     print(" * type 'B', 0 otherwise.", file=fp)
     print(" */", file=fp)
-    print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp)
+    print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('    switch (ch) {', file=fp)
-    haswide = False
-    hasnonewide = False
     for codepoint in sorted(linebreaks):
-        if codepoint < 0x10000:
-            hasnonewide = True
-        if codepoint >= 0x10000 and not haswide:
-            print('#ifdef Py_UNICODE_WIDE', file=fp)
-            haswide = True
         print('    case 0x%04X:' % (codepoint,), file=fp)
-    if haswide and hasnonewide:
-        print('#endif', file=fp)
     print('        return 1;', file=fp)
-    if haswide and not hasnonewide:
-        print('#endif', file=fp)
 
     print('    }', file=fp)
     print('    return 0;', file=fp)
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2010-08-18 20:44:58 (GMT)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2010-08-18 20:44:58 (GMT)
commit	324ac65cebf4b0141b946452a2604715f1ca7010 (patch)
tree	4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Tools/unicode/makeunicodedata.py
parent	36e778ef02b74f8a965da1205813e685e3ffcff4 (diff)
download	cpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2