summaryrefslogtreecommitdiffstats
path: root/Tools/unicode/makeunicodedata.py
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2010-08-18 20:44:58 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2010-08-18 20:44:58 (GMT)
commit324ac65cebf4b0141b946452a2604715f1ca7010 (patch)
tree4e7f6013c54da773ccd41db43738d9f2b62c05c8 /Tools/unicode/makeunicodedata.py
parent36e778ef02b74f8a965da1205813e685e3ffcff4 (diff)
downloadcpython-324ac65cebf4b0141b946452a2604715f1ca7010.zip
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.gz
cpython-324ac65cebf4b0141b946452a2604715f1ca7010.tar.bz2
#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
Diffstat (limited to 'Tools/unicode/makeunicodedata.py')
-rw-r--r--Tools/unicode/makeunicodedata.py41
1 files changed, 4 insertions, 37 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index f38b866..7266a91 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -28,7 +28,7 @@
import sys
SCRIPT = sys.argv[0]
-VERSION = "2.6"
+VERSION = "3.2"
# The Unicode Database
UNIDATA_VERSION = "5.2.0"
@@ -479,7 +479,7 @@ def makeunicodetype(unicode, trace):
print('/* Returns the numeric value as double for Unicode characters', file=fp)
print(' * having this property, -1.0 otherwise.', file=fp)
print(' */', file=fp)
- print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp)
+ print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
print('{', file=fp)
print(' switch (ch) {', file=fp)
for value, codepoints in numeric_items:
@@ -488,21 +488,10 @@ def makeunicodetype(unicode, trace):
parts = [repr(float(part)) for part in parts]
value = '/'.join(parts)
- haswide = False
- hasnonewide = False
codepoints.sort()
for codepoint in codepoints:
- if codepoint < 0x10000:
- hasnonewide = True
- if codepoint >= 0x10000 and not haswide:
- print('#ifdef Py_UNICODE_WIDE', file=fp)
- haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp)
- if haswide and hasnonewide:
- print('#endif', file=fp)
print(' return (double) %s;' % (value,), file=fp)
- if haswide and not hasnonewide:
- print('#endif', file=fp)
print(' }', file=fp)
print(' return -1.0;', file=fp)
print('}', file=fp)
@@ -512,27 +501,16 @@ def makeunicodetype(unicode, trace):
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
print(" */", file=fp)
- print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp)
+ print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp)
print('{', file=fp)
print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
print(' return iswspace(ch);', file=fp)
print('#else', file=fp)
print(' switch (ch) {', file=fp)
- haswide = False
- hasnonewide = False
for codepoint in sorted(spaces):
- if codepoint < 0x10000:
- hasnonewide = True
- if codepoint >= 0x10000 and not haswide:
- print('#ifdef Py_UNICODE_WIDE', file=fp)
- haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp)
- if haswide and hasnonewide:
- print('#endif', file=fp)
print(' return 1;', file=fp)
- if haswide and not hasnonewide:
- print('#endif', file=fp)
print(' }', file=fp)
print(' return 0;', file=fp)
@@ -545,23 +523,12 @@ def makeunicodetype(unicode, trace):
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
print(" * type 'B', 0 otherwise.", file=fp)
print(" */", file=fp)
- print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp)
+ print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp)
print('{', file=fp)
print(' switch (ch) {', file=fp)
- haswide = False
- hasnonewide = False
for codepoint in sorted(linebreaks):
- if codepoint < 0x10000:
- hasnonewide = True
- if codepoint >= 0x10000 and not haswide:
- print('#ifdef Py_UNICODE_WIDE', file=fp)
- haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp)
- if haswide and hasnonewide:
- print('#endif', file=fp)
print(' return 1;', file=fp)
- if haswide and not hasnonewide:
- print('#endif', file=fp)
print(' }', file=fp)
print(' return 0;', file=fp)