summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2009-07-13 20:38:21 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2009-07-13 20:38:21 (GMT)
commit8b84ea0aa4ee79d86367d0bce017da368a62338a (patch)
treea15b8b24f35f39d2fe056a1f2fcbb306484c6f64 /Tools/unicode
parent514ae0178d554f0c47e8d59666e54bcafb307ea3 (diff)
downloadcpython-8b84ea0aa4ee79d86367d0bce017da368a62338a.zip
cpython-8b84ea0aa4ee79d86367d0bce017da368a62338a.tar.gz
cpython-8b84ea0aa4ee79d86367d0bce017da368a62338a.tar.bz2
Merged revisions 74000-74001 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r74000 | amaury.forgeotdarc | 2009-07-13 22:01:11 +0200 (lun., 13 juil. 2009) | 4 lines #1616979: Add the cp720 (Arabic DOS) encoding. Since there is no official mapping file from unicode.org, the codec file is generated on Windows with the new genwincodec.py script. ........ r74001 | amaury.forgeotdarc | 2009-07-13 22:03:21 +0200 (lun., 13 juil. 2009) | 2 lines NEWS entry for r74000. ........
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/gencodec.py4
-rw-r--r--Tools/unicode/genwincodec.py61
-rw-r--r--Tools/unicode/genwincodecs.bat7
3 files changed, 70 insertions, 2 deletions
diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py
index c3846e9..198ae56 100644
--- a/Tools/unicode/gencodec.py
+++ b/Tools/unicode/gencodec.py
@@ -237,11 +237,11 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2):
else:
mapchar = chr(mapvalue)
if mapcomment and comments:
- append(' %r\t# %s -> %s' % (mapchar,
+ append(' %a \t# %s -> %s' % (mapchar,
hexrepr(key, key_precision),
mapcomment))
else:
- append(' %r' % mapchar)
+ append(' %a' % mapchar)
append(')')
return l
diff --git a/Tools/unicode/genwincodec.py b/Tools/unicode/genwincodec.py
new file mode 100644
index 0000000..17ba809
--- /dev/null
+++ b/Tools/unicode/genwincodec.py
@@ -0,0 +1,61 @@
+"""This script generates a Python codec module from a Windows Code Page.
+
+It uses the function MultiByteToWideChar to generate a decoding table.
+"""
+
+import ctypes
+from ctypes import wintypes
+from gencodec import codegen
+import unicodedata
+
+def genwinmap(codepage):
+ MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
+ MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
+ wintypes.LPCSTR, ctypes.c_int,
+ wintypes.LPWSTR, ctypes.c_int]
+ MultiByteToWideChar.restype = ctypes.c_int
+
+ enc2uni = {}
+
+ for i in list(range(32)) + [127]:
+ enc2uni[i] = (i, 'CONTROL CHARACTER')
+
+ for i in range(256):
+ buf = ctypes.create_unicode_buffer(2)
+ ret = MultiByteToWideChar(
+ codepage, 0,
+ chr(i), 1,
+ buf, 2)
+ assert ret == 1, "invalid code page"
+ assert buf[1] == '\x00'
+ try:
+ name = unicodedata.name(buf[0])
+ except ValueError:
+ try:
+ name = enc2uni[i][1]
+ except KeyError:
+ name = ''
+
+ enc2uni[i] = (ord(buf[0]), name)
+
+ return enc2uni
+
+def genwincodec(codepage):
+ import platform
+ map = genwinmap(codepage)
+ encodingname = 'cp%d' % codepage
+ code = codegen("", map, encodingname)
+ # Replace first lines with our own docstring
+ code = '''\
+"""Python Character Mapping Codec %s generated on Windows:
+%s with the command:
+ python Tools/unicode/genwincodec.py %s
+"""#"
+''' % (encodingname, ' '.join(platform.win32_ver()), codepage
+ ) + code.split('"""#"', 1)[1]
+
+ print(code)
+
+if __name__ == '__main__':
+ import sys
+ genwincodec(int(sys.argv[1]))
diff --git a/Tools/unicode/genwincodecs.bat b/Tools/unicode/genwincodecs.bat
new file mode 100644
index 0000000..ad45c6c
--- /dev/null
+++ b/Tools/unicode/genwincodecs.bat
@@ -0,0 +1,7 @@
+@rem Recreate some python charmap codecs from the Windows function
+@rem MultiByteToWideChar.
+
+@cd /d %~dp0
+@mkdir build
+@rem Arabic DOS code page
+c:\python30\python genwincodec.py 720 > build/cp720.py