diff options
Diffstat (limited to 'Tools/unicode/genwincodec.py')
-rw-r--r-- | Tools/unicode/genwincodec.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/Tools/unicode/genwincodec.py b/Tools/unicode/genwincodec.py new file mode 100644 index 0000000..7a1ef5e --- /dev/null +++ b/Tools/unicode/genwincodec.py @@ -0,0 +1,61 @@ +"""This script generates a Python codec module from a Windows Code Page. + +It uses the function MultiByteToWideChar to generate a decoding table. +""" + +import ctypes +from ctypes import wintypes +from gencodec import codegen +import unicodedata + +def genwinmap(codepage): + MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar + MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, + wintypes.LPCSTR, ctypes.c_int, + wintypes.LPWSTR, ctypes.c_int] + MultiByteToWideChar.restype = ctypes.c_int + + enc2uni = {} + + for i in list(range(32)) + [127]: + enc2uni[i] = (i, 'CONTROL CHARACTER') + + for i in range(256): + buf = ctypes.create_unicode_buffer(2) + ret = MultiByteToWideChar( + codepage, 0, + bytes([i]), 1, + buf, 2) + assert ret == 1, "invalid code page" + assert buf[1] == '\x00' + try: + name = unicodedata.name(buf[0]) + except ValueError: + try: + name = enc2uni[i][1] + except KeyError: + name = '' + + enc2uni[i] = (ord(buf[0]), name) + + return enc2uni + +def genwincodec(codepage): + import platform + map = genwinmap(codepage) + encodingname = 'cp%d' % codepage + code = codegen("", map, encodingname) + # Replace first lines with our own docstring + code = '''\ +"""Python Character Mapping Codec %s generated on Windows: +%s with the command: + python Tools/unicode/genwincodec.py %s +"""#" +''' % (encodingname, ' '.join(platform.win32_ver()), codepage + ) + code.split('"""#"', 1)[1] + + print(code) + +if __name__ == '__main__': + import sys + genwincodec(int(sys.argv[1])) |