diff options
Diffstat (limited to 'Tools/unicode/gencodec.py')
-rw-r--r-- | Tools/unicode/gencodec.py | 111 |
1 files changed, 54 insertions, 57 deletions
diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py index 1e5aced..165c913 100644 --- a/Tools/unicode/gencodec.py +++ b/Tools/unicode/gencodec.py @@ -32,16 +32,13 @@ import re, os, marshal, codecs MAX_TABLE_SIZE = 8192 # Standard undefined Unicode code point -UNI_UNDEFINED = chr(0xFFFE) +UNI_UNDEFINED = unichr(0xFFFE) -# Placeholder for a missing code point -MISSING_CODE = -1 - -mapRE = re.compile(r'((?:0x[0-9a-fA-F]+\+?)+)' - r'\s+' - r'((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)' - r'\s*' - r'(#.+)?') +mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)' + '\s+' + '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)' + '\s*' + '(#.+)?') def parsecodes(codes, len=len, range=range): @@ -55,7 +52,7 @@ def parsecodes(codes, len=len, range=range): """ if not codes: - return MISSING_CODE + return None l = codes.split('+') if len(l) == 1: return int(l[0],16) @@ -63,8 +60,8 @@ def parsecodes(codes, len=len, range=range): try: l[i] = int(l[i],16) except ValueError: - l[i] = MISSING_CODE - l = [x for x in l if x != MISSING_CODE] + l[i] = None + l = [x for x in l if x is not None] if len(l) == 1: return l[0] else: @@ -72,16 +69,17 @@ def parsecodes(codes, len=len, range=range): def readmap(filename): - with open(filename) as f: - lines = f.readlines() + f = open(filename,'r') + lines = f.readlines() + f.close() enc2uni = {} identity = [] - unmapped = list(range(256)) + unmapped = range(256) # UTC mapping tables per convention don't include the identity # mappings for code points 0x00 - 0x1F and 0x7F, unless these are # explicitly mapped to different characters or undefined - for i in list(range(32)) + [127]: + for i in range(32) + [127]: identity.append(i) unmapped.remove(i) enc2uni[i] = (i, 'CONTROL CHARACTER') @@ -101,7 +99,7 @@ def readmap(filename): comment = '' else: comment = comment[1:].strip() - if not isinstance(enc, tuple) and enc < 256: + if enc < 256: if enc in unmapped: unmapped.remove(enc) if enc == uni: @@ -115,7 +113,7 @@ def readmap(filename): # mappings to None for the rest if len(identity) >= len(unmapped): for enc in unmapped: - enc2uni[enc] = (MISSING_CODE, "") + enc2uni[enc] = (None, "") enc2uni['IDENTITY'] = 256 return enc2uni @@ -126,13 +124,13 @@ def hexrepr(t, precision=4): return 'None' try: len(t) - except TypeError: + except: return '0x%0*X' % (precision, t) try: return '(' + ', '.join(['0x%0*X' % (precision, item) for item in t]) + ')' - except TypeError as why: - print('* failed to convert %r: %s' % (t, why)) + except TypeError, why: + print '* failed to convert %r: %s' % (t, why) raise def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)): @@ -201,10 +199,11 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): # Analyze map and create table dict mappings = sorted(map.items()) table = {} - maxkey = 255 + maxkey = 0 if 'IDENTITY' in map: for key in range(256): table[key] = (key, '') + maxkey = 255 del map['IDENTITY'] for mapkey, mapvalue in mappings: mapcomment = '' @@ -212,7 +211,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): (mapkey, mapcomment) = mapkey if isinstance(mapvalue, tuple): (mapvalue, mapcomment) = mapvalue - if mapkey == MISSING_CODE: + if mapkey is None: continue table[mapkey] = (mapvalue, mapcomment) if mapkey > maxkey: @@ -222,31 +221,27 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): return None # Create table code - maxchar = 0 for key in range(maxkey + 1): if key not in table: - mapvalue = MISSING_CODE + mapvalue = None mapcomment = 'UNDEFINED' else: mapvalue, mapcomment = table[key] - if mapvalue == MISSING_CODE: + if mapvalue is None: mapchar = UNI_UNDEFINED else: if isinstance(mapvalue, tuple): # 1-n mappings not supported return None else: - mapchar = chr(mapvalue) - maxchar = max(maxchar, ord(mapchar)) + mapchar = unichr(mapvalue) if mapcomment and comments: - append(' %a \t# %s -> %s' % (mapchar, + append(' %r\t# %s -> %s' % (mapchar, hexrepr(key, key_precision), mapcomment)) else: - append(' %a' % mapchar) + append(' %r' % mapchar) - if maxchar < 256: - append(' %a \t## Widen to UCS2 for optimization' % UNI_UNDEFINED) append(')') return l @@ -289,27 +284,27 @@ import codecs class Codec(codecs.Codec): - def encode(self, input, errors='strict'): - return codecs.charmap_encode(input, errors, encoding_%s) + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_%s) - def decode(self, input, errors='strict'): - return codecs.charmap_decode(input, errors, decoding_%s) + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_%s) ''' % (encodingname, name, suffix, suffix)] l.append('''\ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input, self.errors, encoding_%s)[0] + return codecs.charmap_encode(input,self.errors,encoding_%s)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): - return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' % + return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' % (suffix, suffix)) l.append(''' -class StreamWriter(Codec, codecs.StreamWriter): +class StreamWriter(Codec,codecs.StreamWriter): pass -class StreamReader(Codec, codecs.StreamReader): +class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API @@ -342,7 +337,7 @@ def getregentry(): if decoding_table_code: l.append(''' ### Encoding table -encoding_table = codecs.charmap_build(decoding_table) +encoding_table=codecs.charmap_build(decoding_table) ''') else: l.append(''' @@ -358,16 +353,18 @@ encoding_table = codecs.charmap_build(decoding_table) def pymap(name,map,pyfile,encodingname,comments=1): code = codegen(name,map,encodingname,comments) - with open(pyfile,'w') as f: - f.write(code) + f = open(pyfile,'w') + f.write(code) + f.close() def marshalmap(name,map,marshalfile): d = {} for e,(u,c) in map.items(): d[e] = (u,c) - with open(marshalfile,'wb') as f: - marshal.dump(d,f) + f = open(marshalfile,'wb') + marshal.dump(d,f) + f.close() def convertdir(dir, dirprefix='', nameprefix='', comments=1): @@ -383,18 +380,18 @@ def convertdir(dir, dirprefix='', nameprefix='', comments=1): name = nameprefix + name codefile = name + '.py' marshalfile = name + '.mapping' - print('converting %s to %s and %s' % (mapname, + print 'converting %s to %s and %s' % (mapname, dirprefix + codefile, - dirprefix + marshalfile)) + dirprefix + marshalfile) try: map = readmap(os.path.join(dir,mapname)) if not map: - print('* map is empty; skipping') + print '* map is empty; skipping' else: pymap(mappathname, map, dirprefix + codefile,name,comments) marshalmap(mappathname, map, dirprefix + marshalfile) - except ValueError as why: - print('* conversion failed: %s' % why) + except ValueError, why: + print '* conversion failed: %s' % why raise def rewritepythondir(dir, dirprefix='', comments=1): @@ -405,17 +402,17 @@ def rewritepythondir(dir, dirprefix='', comments=1): continue name = mapname[:-len('.mapping')] codefile = name + '.py' - print('converting %s to %s' % (mapname, - dirprefix + codefile)) + print 'converting %s to %s' % (mapname, + dirprefix + codefile) try: - with open(os.path.join(dir, mapname), 'rb') as f: - map = marshal.load(f) + map = marshal.load(open(os.path.join(dir,mapname), + 'rb')) if not map: - print('* map is empty; skipping') + print '* map is empty; skipping' else: pymap(mapname, map, dirprefix + codefile,name,comments) - except ValueError as why: - print('* conversion failed: %s' % why) + except ValueError, why: + print '* conversion failed: %s' % why if __name__ == '__main__': |