diff options
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/comparecodecs.py | 6 | ||||
-rw-r--r-- | Tools/unicode/gencjkcodecs.py | 2 | ||||
-rw-r--r-- | Tools/unicode/gencodec.py | 20 | ||||
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 17 | ||||
-rw-r--r-- | Tools/unicode/mkstringprep.py | 38 |
5 files changed, 37 insertions, 46 deletions
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py index c291be4..dade1ce 100644 --- a/Tools/unicode/comparecodecs.py +++ b/Tools/unicode/comparecodecs.py @@ -15,7 +15,7 @@ def compare_codecs(encoding1, encoding2): mismatch = 0 # Check encoding for i in range(sys.maxunicode): - u = unichr(i) + u = chr(i) try: c1 = u.encode(encoding1) except UnicodeError as reason: @@ -34,11 +34,11 @@ def compare_codecs(encoding1, encoding2): try: u1 = c.decode(encoding1) except UnicodeError: - u1 = u'<undefined>' + u1 = '<undefined>' try: u2 = c.decode(encoding2) except UnicodeError: - u2 = u'<undefined>' + u2 = '<undefined>' if u1 != u2: print(' * decoding mismatch for 0x%04X: %-14r != %r' % \ (i, u1, u2)) diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py index 975c19c..ebccfc7 100644 --- a/Tools/unicode/gencjkcodecs.py +++ b/Tools/unicode/gencjkcodecs.py @@ -55,7 +55,7 @@ def getregentry(): """) def gencodecs(prefix): - for loc, encodings in codecs.iteritems(): + for loc, encodings in codecs.items(): for enc in encodings: code = TEMPLATE.substitute(ENCODING=enc.upper(), encoding=enc.lower(), diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py index c63f559..c3846e9 100644 --- a/Tools/unicode/gencodec.py +++ b/Tools/unicode/gencodec.py @@ -32,7 +32,7 @@ import re, os, marshal, codecs MAX_TABLE_SIZE = 8192 # Standard undefined Unicode code point -UNI_UNDEFINED = unichr(0xFFFE) +UNI_UNDEFINED = chr(0xFFFE) mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)' '\s+' @@ -62,7 +62,7 @@ def parsecodes(codes, l[i] = int(l[i],16) except ValueError: l[i] = None - l = filter(lambda x: x is not None, l) + l = [x for x in l if x is not None] if len(l) == 1: return l[0] else: @@ -75,12 +75,12 @@ def readmap(filename): f.close() enc2uni = {} identity = [] - unmapped = range(256) + unmapped = list(range(256)) # UTC mapping tables per convention don't include the identity # mappings for code points 0x00 - 0x1F and 0x7F, unless these are # explicitly mapped to different characters or undefined - for i in range(32) + [127]: + for i in list(range(32)) + [127]: identity.append(i) unmapped.remove(i) enc2uni[i] = (i, 'CONTROL CHARACTER') @@ -138,7 +138,7 @@ def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)): l = [] append = l.append - if map.has_key("IDENTITY"): + if "IDENTITY" in map: append("%s = codecs.make_identity_dict(range(%d))" % (varname, map["IDENTITY"])) append("%s.update({" % varname) @@ -150,8 +150,7 @@ def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)): splits = 0 identity = 0 - mappings = map.items() - mappings.sort() + mappings = sorted(map.items()) i = 0 key_precision, value_precision = precisions for mapkey, mapvalue in mappings: @@ -199,11 +198,10 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): append('%s = (' % varname) # Analyze map and create table dict - mappings = map.items() - mappings.sort() + mappings = sorted(map.items()) table = {} maxkey = 0 - if map.has_key('IDENTITY'): + if 'IDENTITY' in map: for key in range(256): table[key] = (key, '') maxkey = 255 @@ -237,7 +235,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): # 1-n mappings not supported return None else: - mapchar = unichr(mapvalue) + mapchar = chr(mapvalue) if mapcomment and comments: append(' %r\t# %s -> %s' % (mapchar, hexrepr(key, key_precision), diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index f080ca2..885e559 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -71,7 +71,7 @@ def maketables(trace=0): EASTASIAN_WIDTH % version, DERIVED_CORE_PROPERTIES % version) - print(len(list(filter(None, unicode.table))), "characters") + print(len(filter(None, unicode.table)), "characters") for version in old_versions: print("--- Reading", UNICODE_DATA % ("-"+version), "...") @@ -79,7 +79,7 @@ def maketables(trace=0): COMPOSITION_EXCLUSIONS % ("-"+version), EASTASIAN_WIDTH % ("-"+version), DERIVED_CORE_PROPERTIES % ("-"+version)) - print(len(list(filter(None, old_unicode.table))), "characters") + print(len(filter(None, old_unicode.table)), "characters") merge_old_version(version, unicode, old_unicode) makeunicodename(unicode, trace) @@ -152,8 +152,7 @@ def makeunicodedata(unicode, trace): prefix = i assert prefix < 256 # content - decomp = [prefix + (len(decomp)<<8)] +\ - list(map(lambda s: int(s, 16), decomp)) + decomp = [prefix + (len(decomp)<<8)] + [int(s, 16) for s in decomp] # Collect NFC pairs if not prefix and len(decomp) == 3 and \ char not in unicode.exclusions and \ @@ -466,7 +465,7 @@ def makeunicodename(unicode, trace): if name and name[0] != "<": names[char] = name + chr(0) - print(len(list(filter(lambda n: n is not None, names))), "distinct names") + print(len(n for n in names if n is not None), "distinct names") # collect unique words from names (note that we differ between # words inside a sentence, and words ending a sentence. the @@ -740,7 +739,7 @@ class UnicodeData: # public attributes self.filename = filename self.table = table - self.chars = range(0x110000) # unicode 3.2 + self.chars = list(range(0x110000)) # unicode 3.2 file = open(exclusions) self.exclusions = {} @@ -763,7 +762,7 @@ class UnicodeData: s = s.split()[0].split(';') if '..' in s[0]: first, last = [int(c, 16) for c in s[0].split('..')] - chars = range(first, last+1) + chars = list(range(first, last+1)) else: chars = [int(s[0], 16)] for char in chars: @@ -785,7 +784,7 @@ class UnicodeData: p = p.strip() if ".." in r: first, last = [int(c, 16) for c in r.split('..')] - chars = range(first, last+1) + chars = list(range(first, last+1)) else: chars = [int(r, 16)] for char in chars: @@ -796,7 +795,7 @@ class UnicodeData: def uselatin1(self): # restrict character range to ISO Latin 1 - self.chars = range(256) + self.chars = list(range(256)) # hash table tools diff --git a/Tools/unicode/mkstringprep.py b/Tools/unicode/mkstringprep.py index 83a5d8f..868f5cd 100644 --- a/Tools/unicode/mkstringprep.py +++ b/Tools/unicode/mkstringprep.py @@ -5,12 +5,12 @@ if sys.maxunicode == 65535: def gen_category(cats): for i in range(0, 0x110000): - if unicodedata.category(unichr(i)) in cats: + if unicodedata.category(chr(i)) in cats: yield(i) def gen_bidirectional(cats): for i in range(0, 0x110000): - if unicodedata.bidirectional(unichr(i)) in cats: + if unicodedata.bidirectional(chr(i)) in cats: yield(i) def compact_set(l): @@ -63,14 +63,14 @@ for l in data: if m: if m.group(1) == "Start": if curname: - raise "Double Start",(curname, l) + raise RuntimeError("Double Start", (curname, l)) curname = m.group(2) table = {} tables.append((curname, table)) continue else: if not curname: - raise "End without start", l + raise RuntimeError("End without start", l) curname = None continue if not curname: @@ -87,7 +87,7 @@ for l in data: try: start, end = fields except ValueError: - raise "Unpacking problem", l + raise RuntimeError("Unpacking problem", l) else: start = end = fields[0] start = int(start, 16) @@ -146,8 +146,7 @@ def in_table_a1(code): name, table = tables[0] del tables[0] assert name == "B.1" -table = table.keys() -table.sort() +table = sorted(table.keys()) print(""" b1_set = """ + compact_set(table) + """ def in_table_b1(code): @@ -177,8 +176,7 @@ for k,v in table_b2.items(): if map(ord, unichr(k).lower()) != v: b3_exceptions[k] = u"".join(map(unichr,v)) -b3 = b3_exceptions.items() -b3.sort() +b3 = sorted(b3_exceptions.items()) print(""" b3_exceptions = {""") @@ -207,7 +205,7 @@ def map_table_b3(code): def map_table_b2(a): al = map_table_b3(a) b = unicodedata.normalize("NFKC", al) - bl = u"".join([map_table_b3(ch) for ch in b]) + bl = "".join([map_table_b3(ch) for ch in b]) c = unicodedata.normalize("NFKC", bl) if b != c: return c @@ -216,7 +214,7 @@ def map_table_b2(a): specials = {} for k,v in table_b2.items(): - if map(ord, map_table_b2(unichr(k))) != v: + if list(map(ord, map_table_b2(chr(k)))) != v: specials[k] = v # B.3 should not add any additional special cases @@ -321,9 +319,9 @@ name, table = tables[0] del tables[0] assert name == "C.4" -nonchar = set(range(0xFDD0,0xFDF0) + - range(0xFFFE,0x110000,0x10000) + - range(0xFFFF,0x110000,0x10000)) +nonchar = set(range(0xFDD0,0xFDF0)) +nonchar.update(range(0xFFFE,0x110000,0x10000)) +nonchar.update(range(0xFFFF,0x110000,0x10000)) table = set(table.keys()) assert table == nonchar @@ -353,8 +351,7 @@ name, table = tables[0] del tables[0] assert name == "C.6" -table = table.keys() -table.sort() +table = sorted(table.keys()) print(""" c6_set = """ + compact_set(table) + """ @@ -367,8 +364,7 @@ name, table = tables[0] del tables[0] assert name == "C.7" -table = table.keys() -table.sort() +table = sorted(table.keys()) print(""" c7_set = """ + compact_set(table) + """ @@ -381,8 +377,7 @@ name, table = tables[0] del tables[0] assert name == "C.8" -table = table.keys() -table.sort() +table = sorted(table.keys()) print(""" c8_set = """ + compact_set(table) + """ @@ -395,8 +390,7 @@ name, table = tables[0] del tables[0] assert name == "C.9" -table = table.keys() -table.sort() +table = sorted(table.keys()) print(""" c9_set = """ + compact_set(table) + """ |