summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/comparecodecs.py6
-rw-r--r--Tools/unicode/gencjkcodecs.py2
-rw-r--r--Tools/unicode/gencodec.py20
-rw-r--r--Tools/unicode/makeunicodedata.py17
-rw-r--r--Tools/unicode/mkstringprep.py38
5 files changed, 37 insertions, 46 deletions
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py
index c291be4..dade1ce 100644
--- a/Tools/unicode/comparecodecs.py
+++ b/Tools/unicode/comparecodecs.py
@@ -15,7 +15,7 @@ def compare_codecs(encoding1, encoding2):
mismatch = 0
# Check encoding
for i in range(sys.maxunicode):
- u = unichr(i)
+ u = chr(i)
try:
c1 = u.encode(encoding1)
except UnicodeError as reason:
@@ -34,11 +34,11 @@ def compare_codecs(encoding1, encoding2):
try:
u1 = c.decode(encoding1)
except UnicodeError:
- u1 = u'<undefined>'
+ u1 = '<undefined>'
try:
u2 = c.decode(encoding2)
except UnicodeError:
- u2 = u'<undefined>'
+ u2 = '<undefined>'
if u1 != u2:
print(' * decoding mismatch for 0x%04X: %-14r != %r' % \
(i, u1, u2))
diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py
index 975c19c..ebccfc7 100644
--- a/Tools/unicode/gencjkcodecs.py
+++ b/Tools/unicode/gencjkcodecs.py
@@ -55,7 +55,7 @@ def getregentry():
""")
def gencodecs(prefix):
- for loc, encodings in codecs.iteritems():
+ for loc, encodings in codecs.items():
for enc in encodings:
code = TEMPLATE.substitute(ENCODING=enc.upper(),
encoding=enc.lower(),
diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py
index c63f559..c3846e9 100644
--- a/Tools/unicode/gencodec.py
+++ b/Tools/unicode/gencodec.py
@@ -32,7 +32,7 @@ import re, os, marshal, codecs
MAX_TABLE_SIZE = 8192
# Standard undefined Unicode code point
-UNI_UNDEFINED = unichr(0xFFFE)
+UNI_UNDEFINED = chr(0xFFFE)
mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
'\s+'
@@ -62,7 +62,7 @@ def parsecodes(codes,
l[i] = int(l[i],16)
except ValueError:
l[i] = None
- l = filter(lambda x: x is not None, l)
+ l = [x for x in l if x is not None]
if len(l) == 1:
return l[0]
else:
@@ -75,12 +75,12 @@ def readmap(filename):
f.close()
enc2uni = {}
identity = []
- unmapped = range(256)
+ unmapped = list(range(256))
# UTC mapping tables per convention don't include the identity
# mappings for code points 0x00 - 0x1F and 0x7F, unless these are
# explicitly mapped to different characters or undefined
- for i in range(32) + [127]:
+ for i in list(range(32)) + [127]:
identity.append(i)
unmapped.remove(i)
enc2uni[i] = (i, 'CONTROL CHARACTER')
@@ -138,7 +138,7 @@ def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
l = []
append = l.append
- if map.has_key("IDENTITY"):
+ if "IDENTITY" in map:
append("%s = codecs.make_identity_dict(range(%d))" %
(varname, map["IDENTITY"]))
append("%s.update({" % varname)
@@ -150,8 +150,7 @@ def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
splits = 0
identity = 0
- mappings = map.items()
- mappings.sort()
+ mappings = sorted(map.items())
i = 0
key_precision, value_precision = precisions
for mapkey, mapvalue in mappings:
@@ -199,11 +198,10 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
append('%s = (' % varname)
# Analyze map and create table dict
- mappings = map.items()
- mappings.sort()
+ mappings = sorted(map.items())
table = {}
maxkey = 0
- if map.has_key('IDENTITY'):
+ if 'IDENTITY' in map:
for key in range(256):
table[key] = (key, '')
maxkey = 255
@@ -237,7 +235,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
# 1-n mappings not supported
return None
else:
- mapchar = unichr(mapvalue)
+ mapchar = chr(mapvalue)
if mapcomment and comments:
append(' %r\t# %s -> %s' % (mapchar,
hexrepr(key, key_precision),
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index f080ca2..885e559 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -71,7 +71,7 @@ def maketables(trace=0):
EASTASIAN_WIDTH % version,
DERIVED_CORE_PROPERTIES % version)
- print(len(list(filter(None, unicode.table))), "characters")
+ print(len(filter(None, unicode.table)), "characters")
for version in old_versions:
print("--- Reading", UNICODE_DATA % ("-"+version), "...")
@@ -79,7 +79,7 @@ def maketables(trace=0):
COMPOSITION_EXCLUSIONS % ("-"+version),
EASTASIAN_WIDTH % ("-"+version),
DERIVED_CORE_PROPERTIES % ("-"+version))
- print(len(list(filter(None, old_unicode.table))), "characters")
+ print(len(filter(None, old_unicode.table)), "characters")
merge_old_version(version, unicode, old_unicode)
makeunicodename(unicode, trace)
@@ -152,8 +152,7 @@ def makeunicodedata(unicode, trace):
prefix = i
assert prefix < 256
# content
- decomp = [prefix + (len(decomp)<<8)] +\
- list(map(lambda s: int(s, 16), decomp))
+ decomp = [prefix + (len(decomp)<<8)] + [int(s, 16) for s in decomp]
# Collect NFC pairs
if not prefix and len(decomp) == 3 and \
char not in unicode.exclusions and \
@@ -466,7 +465,7 @@ def makeunicodename(unicode, trace):
if name and name[0] != "<":
names[char] = name + chr(0)
- print(len(list(filter(lambda n: n is not None, names))), "distinct names")
+ print(len(n for n in names if n is not None), "distinct names")
# collect unique words from names (note that we differ between
# words inside a sentence, and words ending a sentence. the
@@ -740,7 +739,7 @@ class UnicodeData:
# public attributes
self.filename = filename
self.table = table
- self.chars = range(0x110000) # unicode 3.2
+ self.chars = list(range(0x110000)) # unicode 3.2
file = open(exclusions)
self.exclusions = {}
@@ -763,7 +762,7 @@ class UnicodeData:
s = s.split()[0].split(';')
if '..' in s[0]:
first, last = [int(c, 16) for c in s[0].split('..')]
- chars = range(first, last+1)
+ chars = list(range(first, last+1))
else:
chars = [int(s[0], 16)]
for char in chars:
@@ -785,7 +784,7 @@ class UnicodeData:
p = p.strip()
if ".." in r:
first, last = [int(c, 16) for c in r.split('..')]
- chars = range(first, last+1)
+ chars = list(range(first, last+1))
else:
chars = [int(r, 16)]
for char in chars:
@@ -796,7 +795,7 @@ class UnicodeData:
def uselatin1(self):
# restrict character range to ISO Latin 1
- self.chars = range(256)
+ self.chars = list(range(256))
# hash table tools
diff --git a/Tools/unicode/mkstringprep.py b/Tools/unicode/mkstringprep.py
index 83a5d8f..868f5cd 100644
--- a/Tools/unicode/mkstringprep.py
+++ b/Tools/unicode/mkstringprep.py
@@ -5,12 +5,12 @@ if sys.maxunicode == 65535:
def gen_category(cats):
for i in range(0, 0x110000):
- if unicodedata.category(unichr(i)) in cats:
+ if unicodedata.category(chr(i)) in cats:
yield(i)
def gen_bidirectional(cats):
for i in range(0, 0x110000):
- if unicodedata.bidirectional(unichr(i)) in cats:
+ if unicodedata.bidirectional(chr(i)) in cats:
yield(i)
def compact_set(l):
@@ -63,14 +63,14 @@ for l in data:
if m:
if m.group(1) == "Start":
if curname:
- raise "Double Start",(curname, l)
+ raise RuntimeError("Double Start", (curname, l))
curname = m.group(2)
table = {}
tables.append((curname, table))
continue
else:
if not curname:
- raise "End without start", l
+ raise RuntimeError("End without start", l)
curname = None
continue
if not curname:
@@ -87,7 +87,7 @@ for l in data:
try:
start, end = fields
except ValueError:
- raise "Unpacking problem", l
+ raise RuntimeError("Unpacking problem", l)
else:
start = end = fields[0]
start = int(start, 16)
@@ -146,8 +146,7 @@ def in_table_a1(code):
name, table = tables[0]
del tables[0]
assert name == "B.1"
-table = table.keys()
-table.sort()
+table = sorted(table.keys())
print("""
b1_set = """ + compact_set(table) + """
def in_table_b1(code):
@@ -177,8 +176,7 @@ for k,v in table_b2.items():
if map(ord, unichr(k).lower()) != v:
b3_exceptions[k] = u"".join(map(unichr,v))
-b3 = b3_exceptions.items()
-b3.sort()
+b3 = sorted(b3_exceptions.items())
print("""
b3_exceptions = {""")
@@ -207,7 +205,7 @@ def map_table_b3(code):
def map_table_b2(a):
al = map_table_b3(a)
b = unicodedata.normalize("NFKC", al)
- bl = u"".join([map_table_b3(ch) for ch in b])
+ bl = "".join([map_table_b3(ch) for ch in b])
c = unicodedata.normalize("NFKC", bl)
if b != c:
return c
@@ -216,7 +214,7 @@ def map_table_b2(a):
specials = {}
for k,v in table_b2.items():
- if map(ord, map_table_b2(unichr(k))) != v:
+ if list(map(ord, map_table_b2(chr(k)))) != v:
specials[k] = v
# B.3 should not add any additional special cases
@@ -321,9 +319,9 @@ name, table = tables[0]
del tables[0]
assert name == "C.4"
-nonchar = set(range(0xFDD0,0xFDF0) +
- range(0xFFFE,0x110000,0x10000) +
- range(0xFFFF,0x110000,0x10000))
+nonchar = set(range(0xFDD0,0xFDF0))
+nonchar.update(range(0xFFFE,0x110000,0x10000))
+nonchar.update(range(0xFFFF,0x110000,0x10000))
table = set(table.keys())
assert table == nonchar
@@ -353,8 +351,7 @@ name, table = tables[0]
del tables[0]
assert name == "C.6"
-table = table.keys()
-table.sort()
+table = sorted(table.keys())
print("""
c6_set = """ + compact_set(table) + """
@@ -367,8 +364,7 @@ name, table = tables[0]
del tables[0]
assert name == "C.7"
-table = table.keys()
-table.sort()
+table = sorted(table.keys())
print("""
c7_set = """ + compact_set(table) + """
@@ -381,8 +377,7 @@ name, table = tables[0]
del tables[0]
assert name == "C.8"
-table = table.keys()
-table.sort()
+table = sorted(table.keys())
print("""
c8_set = """ + compact_set(table) + """
@@ -395,8 +390,7 @@ name, table = tables[0]
del tables[0]
assert name == "C.9"
-table = table.keys()
-table.sort()
+table = sorted(table.keys())
print("""
c9_set = """ + compact_set(table) + """