diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
commit | 3e2a30692085d32ac63f72b35da39158a471fc68 (patch) | |
tree | 4cbe735f61eae87ac56a13ca6bd32113b98bd03d /Lib/encodings/aliases.py | |
parent | cd1f7430cb8f48de970021071d7683054c23b10f (diff) | |
download | cpython-3e2a30692085d32ac63f72b35da39158a471fc68.zip cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.gz cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.bz2 |
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
Diffstat (limited to 'Lib/encodings/aliases.py')
-rw-r--r-- | Lib/encodings/aliases.py | 109 |
1 files changed, 100 insertions, 9 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index b304f15..0594171 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -14,12 +14,6 @@ codecs. In addition to these, a few Python specific codec aliases have also been added. - About the CJK codec aliases: - - The codecs for these encodings are not distributed with the - Python core, but are included here for reference, since the - locale module relies on having these aliases available. - """ aliases = { @@ -41,6 +35,10 @@ aliases = { 'base64' : 'base64_codec', 'base_64' : 'base64_codec', + # big5 codec + 'big5_tw' : 'big5', + 'csbig5' : 'big5', + # bz2_codec codec 'bz2' : 'bz2_codec', @@ -168,9 +166,91 @@ aliases = { 'csibm869' : 'cp869', 'ibm869' : 'cp869', + # cp932 codec + '932' : 'cp932', + 'ms932' : 'cp932', + 'mskanji' : 'cp932', + 'ms_kanji' : 'cp932', + + # cp949 codec + '949' : 'cp949', + 'ms949' : 'cp949', + 'uhc' : 'cp949', + + # cp950 codec + '950' : 'cp950', + 'ms950' : 'cp950', + + # euc_jisx0213 codec + 'jisx0213' : 'euc_jisx0213', + 'eucjisx0213' : 'euc_jisx0213', + + # euc_jp codec + 'eucjp' : 'euc_jp', + 'ujis' : 'euc_jp', + 'u_jis' : 'euc_jp', + + # euc_kr codec + 'euckr' : 'euc_kr', + 'korean' : 'euc_kr', + 'ksc5601' : 'euc_kr', + 'ks_c_5601' : 'euc_kr', + 'ks_c_5601_1987' : 'euc_kr', + 'ksx1001' : 'euc_kr', + 'ks_x_1001' : 'euc_kr', + + # gb18030 codec + 'gb18030_2000' : 'gb18030', + + # gb2312 codec + 'chinese' : 'gb2312', + 'csiso58gb231280' : 'gb2312', + 'euc_cn' : 'gb2312', + 'euccn' : 'gb2312', + 'eucgb2312_cn' : 'gb2312', + 'gb2312_1980' : 'gb2312', + 'gb2312_80' : 'gb2312', + 'iso_ir_58' : 'gb2312', + + # gbk codec + '936' : 'gbk', + 'cp936' : 'gbk', + 'ms936' : 'gbk', + # hex_codec codec 'hex' : 'hex_codec', + # hz codec + 'hzgb' : 'hz', + 'hz_gb' : 'hz', + 'hz_gb_2312' : 'hz', + + # iso2022_jp codec + 'csiso2022jp' : 'iso2022_jp', + 'iso2022jp' : 'iso2022_jp', + 'iso_2022_jp' : 'iso2022_jp', + + # iso2022_jp_1 codec + 'iso2022jp_1' : 'iso2022_jp_1', + 'iso_2022_jp_1' : 'iso2022_jp_1', + + # iso2022_jp_2 codec + 'iso2022jp_2' : 'iso2022_jp_2', + 'iso_2022_jp_2' : 'iso2022_jp_2', + + # iso_3022_jp_3 codec + 'iso2022jp_3' : 'iso2022_jp_3', + 'iso_2022_jp_3' : 'iso2022_jp_3', + + # iso2022_jp_ext codec + 'iso2022jp_ext' : 'iso2022_jp_ext', + 'iso_2022_jp_ext' : 'iso2022_jp_ext', + + # iso2022_kr codec + 'csiso2022kr' : 'iso2022_kr', + 'iso2022kr' : 'iso2022_kr', + 'iso_2022_kr' : 'iso2022_kr', + # iso8859_10 codec 'csisolatin6' : 'iso8859_10', 'iso_8859_10' : 'iso8859_10', @@ -258,9 +338,9 @@ aliases = { 'l5' : 'iso8859_9', 'latin5' : 'iso8859_9', - # jis_7 codec - 'csiso2022jp' : 'jis_7', - 'iso_2022_jp' : 'jis_7', + # johab codec + 'cp1361' : 'johab', + 'ms1361' : 'johab', # koi8_r codec 'cskoi8r' : 'koi8_r', @@ -308,6 +388,17 @@ aliases = { # rot_13 codec 'rot13' : 'rot_13', + # shift_jis codec + 'csshiftjis' : 'shift_jis', + 'shiftjis' : 'shift_jis', + 'sjis' : 'shift_jis', + 's_jis' : 'shift_jis', + + # shift_jisx0213 codec + 'shiftjisx0213' : 'shift_jisx0213', + 'sjisx0213' : 'shift_jisx0213', + 's_jisx0213' : 'shift_jisx0213', + # tactis codec 'tis260' : 'tactis', |