diff options
author | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2009-07-13 20:01:11 (GMT) |
---|---|---|
committer | Amaury Forgeot d'Arc <amauryfa@gmail.com> | 2009-07-13 20:01:11 (GMT) |
commit | 70dda76cdec8e6a4c3e1fe112f8df6d28f37434e (patch) | |
tree | 00e210d3ec4bbcd5ab913ee12109e2ce6123727b /Tools | |
parent | f31fd0179ef1fb18718558612b1b9ddc6f2d28c1 (diff) | |
download | cpython-70dda76cdec8e6a4c3e1fe112f8df6d28f37434e.zip cpython-70dda76cdec8e6a4c3e1fe112f8df6d28f37434e.tar.gz cpython-70dda76cdec8e6a4c3e1fe112f8df6d28f37434e.tar.bz2 |
#1616979: Add the cp720 (Arabic DOS) encoding.
Since there is no official mapping file from unicode.org,
the codec file is generated on Windows with the new genwincodec.py script.
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/unicode/genwincodec.py | 61 | ||||
-rw-r--r-- | Tools/unicode/genwincodecs.bat | 7 |
2 files changed, 68 insertions, 0 deletions
diff --git a/Tools/unicode/genwincodec.py b/Tools/unicode/genwincodec.py new file mode 100644 index 0000000..32dcada --- /dev/null +++ b/Tools/unicode/genwincodec.py @@ -0,0 +1,61 @@ +"""This script generates a Python codec module from a Windows Code Page.
+
+It uses the function MultiByteToWideChar to generate a decoding table.
+"""
+
+import ctypes
+from ctypes import wintypes
+from gencodec import codegen
+import unicodedata
+
+def genwinmap(codepage):
+ MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
+ MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
+ wintypes.LPCSTR, ctypes.c_int,
+ wintypes.LPWSTR, ctypes.c_int]
+ MultiByteToWideChar.restype = ctypes.c_int
+
+ enc2uni = {}
+
+ for i in range(32) + [127]:
+ enc2uni[i] = (i, 'CONTROL CHARACTER')
+
+ for i in range(256):
+ buf = ctypes.create_unicode_buffer(2)
+ ret = MultiByteToWideChar(
+ codepage, 0,
+ chr(i), 1,
+ buf, 2)
+ assert ret == 1, "invalid code page"
+ assert buf[1] == '\x00'
+ try:
+ name = unicodedata.name(buf[0])
+ except ValueError:
+ try:
+ name = enc2uni[i][1]
+ except KeyError:
+ name = ''
+
+ enc2uni[i] = (ord(buf[0]), name)
+
+ return enc2uni
+
+def genwincodec(codepage):
+ import platform
+ map = genwinmap(codepage)
+ encodingname = 'cp%d' % codepage
+ code = codegen("", map, encodingname)
+ # Replace first lines with our own docstring
+ code = '''\
+"""Python Character Mapping Codec %s generated on Windows:
+%s with the command:
+ python Tools/unicode/genwincodec.py %s
+"""#"
+''' % (encodingname, ' '.join(platform.win32_ver()), codepage
+ ) + code.split('"""#"', 1)[1]
+
+ print code
+
+if __name__ == '__main__':
+ import sys
+ genwincodec(int(sys.argv[1]))
diff --git a/Tools/unicode/genwincodecs.bat b/Tools/unicode/genwincodecs.bat new file mode 100644 index 0000000..6a6a671 --- /dev/null +++ b/Tools/unicode/genwincodecs.bat @@ -0,0 +1,7 @@ +@rem Recreate some python charmap codecs from the Windows function
+@rem MultiByteToWideChar.
+
+@cd /d %~dp0
+@mkdir build
+@rem Arabic DOS code page
+c:\python26\python genwincodec.py 720 > build/cp720.py
|