summaryrefslogtreecommitdiffstats
path: root/Tools/unicode/gencodec.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/unicode/gencodec.py')
-rw-r--r--Tools/unicode/gencodec.py111
1 files changed, 54 insertions, 57 deletions
diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py
index 1e5aced..165c913 100644
--- a/Tools/unicode/gencodec.py
+++ b/Tools/unicode/gencodec.py
@@ -32,16 +32,13 @@ import re, os, marshal, codecs
MAX_TABLE_SIZE = 8192
# Standard undefined Unicode code point
-UNI_UNDEFINED = chr(0xFFFE)
+UNI_UNDEFINED = unichr(0xFFFE)
-# Placeholder for a missing code point
-MISSING_CODE = -1
-
-mapRE = re.compile(r'((?:0x[0-9a-fA-F]+\+?)+)'
- r'\s+'
- r'((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
- r'\s*'
- r'(#.+)?')
+mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
+ '\s+'
+ '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
+ '\s*'
+ '(#.+)?')
def parsecodes(codes, len=len, range=range):
@@ -55,7 +52,7 @@ def parsecodes(codes, len=len, range=range):
"""
if not codes:
- return MISSING_CODE
+ return None
l = codes.split('+')
if len(l) == 1:
return int(l[0],16)
@@ -63,8 +60,8 @@ def parsecodes(codes, len=len, range=range):
try:
l[i] = int(l[i],16)
except ValueError:
- l[i] = MISSING_CODE
- l = [x for x in l if x != MISSING_CODE]
+ l[i] = None
+ l = [x for x in l if x is not None]
if len(l) == 1:
return l[0]
else:
@@ -72,16 +69,17 @@ def parsecodes(codes, len=len, range=range):
def readmap(filename):
- with open(filename) as f:
- lines = f.readlines()
+ f = open(filename,'r')
+ lines = f.readlines()
+ f.close()
enc2uni = {}
identity = []
- unmapped = list(range(256))
+ unmapped = range(256)
# UTC mapping tables per convention don't include the identity
# mappings for code points 0x00 - 0x1F and 0x7F, unless these are
# explicitly mapped to different characters or undefined
- for i in list(range(32)) + [127]:
+ for i in range(32) + [127]:
identity.append(i)
unmapped.remove(i)
enc2uni[i] = (i, 'CONTROL CHARACTER')
@@ -101,7 +99,7 @@ def readmap(filename):
comment = ''
else:
comment = comment[1:].strip()
- if not isinstance(enc, tuple) and enc < 256:
+ if enc < 256:
if enc in unmapped:
unmapped.remove(enc)
if enc == uni:
@@ -115,7 +113,7 @@ def readmap(filename):
# mappings to None for the rest
if len(identity) >= len(unmapped):
for enc in unmapped:
- enc2uni[enc] = (MISSING_CODE, "")
+ enc2uni[enc] = (None, "")
enc2uni['IDENTITY'] = 256
return enc2uni
@@ -126,13 +124,13 @@ def hexrepr(t, precision=4):
return 'None'
try:
len(t)
- except TypeError:
+ except:
return '0x%0*X' % (precision, t)
try:
return '(' + ', '.join(['0x%0*X' % (precision, item)
for item in t]) + ')'
- except TypeError as why:
- print('* failed to convert %r: %s' % (t, why))
+ except TypeError, why:
+ print '* failed to convert %r: %s' % (t, why)
raise
def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
@@ -201,10 +199,11 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
# Analyze map and create table dict
mappings = sorted(map.items())
table = {}
- maxkey = 255
+ maxkey = 0
if 'IDENTITY' in map:
for key in range(256):
table[key] = (key, '')
+ maxkey = 255
del map['IDENTITY']
for mapkey, mapvalue in mappings:
mapcomment = ''
@@ -212,7 +211,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
(mapkey, mapcomment) = mapkey
if isinstance(mapvalue, tuple):
(mapvalue, mapcomment) = mapvalue
- if mapkey == MISSING_CODE:
+ if mapkey is None:
continue
table[mapkey] = (mapvalue, mapcomment)
if mapkey > maxkey:
@@ -222,31 +221,27 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
return None
# Create table code
- maxchar = 0
for key in range(maxkey + 1):
if key not in table:
- mapvalue = MISSING_CODE
+ mapvalue = None
mapcomment = 'UNDEFINED'
else:
mapvalue, mapcomment = table[key]
- if mapvalue == MISSING_CODE:
+ if mapvalue is None:
mapchar = UNI_UNDEFINED
else:
if isinstance(mapvalue, tuple):
# 1-n mappings not supported
return None
else:
- mapchar = chr(mapvalue)
- maxchar = max(maxchar, ord(mapchar))
+ mapchar = unichr(mapvalue)
if mapcomment and comments:
- append(' %a \t# %s -> %s' % (mapchar,
+ append(' %r\t# %s -> %s' % (mapchar,
hexrepr(key, key_precision),
mapcomment))
else:
- append(' %a' % mapchar)
+ append(' %r' % mapchar)
- if maxchar < 256:
- append(' %a \t## Widen to UCS2 for optimization' % UNI_UNDEFINED)
append(')')
return l
@@ -289,27 +284,27 @@ import codecs
class Codec(codecs.Codec):
- def encode(self, input, errors='strict'):
- return codecs.charmap_encode(input, errors, encoding_%s)
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_%s)
- def decode(self, input, errors='strict'):
- return codecs.charmap_decode(input, errors, decoding_%s)
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_%s)
''' % (encodingname, name, suffix, suffix)]
l.append('''\
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
- return codecs.charmap_encode(input, self.errors, encoding_%s)[0]
+ return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
- return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' %
+ return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
(suffix, suffix))
l.append('''
-class StreamWriter(Codec, codecs.StreamWriter):
+class StreamWriter(Codec,codecs.StreamWriter):
pass
-class StreamReader(Codec, codecs.StreamReader):
+class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
@@ -342,7 +337,7 @@ def getregentry():
if decoding_table_code:
l.append('''
### Encoding table
-encoding_table = codecs.charmap_build(decoding_table)
+encoding_table=codecs.charmap_build(decoding_table)
''')
else:
l.append('''
@@ -358,16 +353,18 @@ encoding_table = codecs.charmap_build(decoding_table)
def pymap(name,map,pyfile,encodingname,comments=1):
code = codegen(name,map,encodingname,comments)
- with open(pyfile,'w') as f:
- f.write(code)
+ f = open(pyfile,'w')
+ f.write(code)
+ f.close()
def marshalmap(name,map,marshalfile):
d = {}
for e,(u,c) in map.items():
d[e] = (u,c)
- with open(marshalfile,'wb') as f:
- marshal.dump(d,f)
+ f = open(marshalfile,'wb')
+ marshal.dump(d,f)
+ f.close()
def convertdir(dir, dirprefix='', nameprefix='', comments=1):
@@ -383,18 +380,18 @@ def convertdir(dir, dirprefix='', nameprefix='', comments=1):
name = nameprefix + name
codefile = name + '.py'
marshalfile = name + '.mapping'
- print('converting %s to %s and %s' % (mapname,
+ print 'converting %s to %s and %s' % (mapname,
dirprefix + codefile,
- dirprefix + marshalfile))
+ dirprefix + marshalfile)
try:
map = readmap(os.path.join(dir,mapname))
if not map:
- print('* map is empty; skipping')
+ print '* map is empty; skipping'
else:
pymap(mappathname, map, dirprefix + codefile,name,comments)
marshalmap(mappathname, map, dirprefix + marshalfile)
- except ValueError as why:
- print('* conversion failed: %s' % why)
+ except ValueError, why:
+ print '* conversion failed: %s' % why
raise
def rewritepythondir(dir, dirprefix='', comments=1):
@@ -405,17 +402,17 @@ def rewritepythondir(dir, dirprefix='', comments=1):
continue
name = mapname[:-len('.mapping')]
codefile = name + '.py'
- print('converting %s to %s' % (mapname,
- dirprefix + codefile))
+ print 'converting %s to %s' % (mapname,
+ dirprefix + codefile)
try:
- with open(os.path.join(dir, mapname), 'rb') as f:
- map = marshal.load(f)
+ map = marshal.load(open(os.path.join(dir,mapname),
+ 'rb'))
if not map:
- print('* map is empty; skipping')
+ print '* map is empty; skipping'
else:
pymap(mapname, map, dirprefix + codefile,name,comments)
- except ValueError as why:
- print('* conversion failed: %s' % why)
+ except ValueError, why:
+ print '* conversion failed: %s' % why
if __name__ == '__main__':