diff options
-rw-r--r-- | Lib/locale.py | 99 | ||||
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rwxr-xr-x | Tools/i18n/makelocalealias.py | 45 |
3 files changed, 148 insertions, 1 deletions
diff --git a/Lib/locale.py b/Lib/locale.py index fa24829..f7d4d7b 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -822,11 +822,15 @@ locale_alias = { 'a3': 'az_AZ.KOI8-C', 'a3_az': 'az_AZ.KOI8-C', 'a3_az.koic': 'az_AZ.KOI8-C', + 'aa_dj': 'aa_DJ.ISO8859-1', + 'aa_er': 'aa_ER.UTF-8', + 'aa_et': 'aa_ET.UTF-8', 'af': 'af_ZA.ISO8859-1', 'af_za': 'af_ZA.ISO8859-1', 'am': 'am_ET.UTF-8', 'am_et': 'am_ET.UTF-8', 'american': 'en_US.ISO8859-1', + 'an_es': 'an_ES.ISO8859-15', 'ar': 'ar_AA.ISO8859-6', 'ar_aa': 'ar_AA.ISO8859-6', 'ar_ae': 'ar_AE.ISO8859-6', @@ -850,6 +854,8 @@ locale_alias = { 'arabic': 'ar_AA.ISO8859-6', 'as': 'as_IN.UTF-8', 'as_in': 'as_IN.UTF-8', + 'ast_es': 'ast_ES.ISO8859-15', + 'ayc_pe': 'ayc_PE.UTF-8', 'az': 'az_AZ.ISO8859-9E', 'az_az': 'az_AZ.ISO8859-9E', 'az_az.iso88599e': 'az_AZ.ISO8859-9E', @@ -857,17 +863,25 @@ locale_alias = { 'be@latin': 'be_BY.UTF-8@latin', 'be_by': 'be_BY.CP1251', 'be_by@latin': 'be_BY.UTF-8@latin', + 'bem_zm': 'bem_ZM.UTF-8', + 'ber_dz': 'ber_DZ.UTF-8', + 'ber_ma': 'ber_MA.UTF-8', 'bg': 'bg_BG.CP1251', 'bg_bg': 'bg_BG.CP1251', + 'bho_in': 'bho_IN.UTF-8', + 'bn_bd': 'bn_BD.UTF-8', 'bn_in': 'bn_IN.UTF-8', + 'bo_cn': 'bo_CN.UTF-8', 'bo_in': 'bo_IN.UTF-8', 'bokmal': 'nb_NO.ISO8859-1', 'bokm\xe5l': 'nb_NO.ISO8859-1', 'br': 'br_FR.ISO8859-1', 'br_fr': 'br_FR.ISO8859-1', + 'brx_in': 'brx_IN.UTF-8', 'bs': 'bs_BA.ISO8859-2', 'bs_ba': 'bs_BA.ISO8859-2', 'bulgarian': 'bg_BG.CP1251', + 'byn_er': 'byn_ER.UTF-8', 'c': 'C', 'c-french': 'fr_CA.ISO8859-1', 'c.ascii': 'C', @@ -884,10 +898,13 @@ locale_alias = { 'cextend': 'en_US.ISO8859-1', 'chinese-s': 'zh_CN.eucCN', 'chinese-t': 'zh_TW.eucTW', + 'crh_ua': 'crh_UA.UTF-8', 'croatian': 'hr_HR.ISO8859-2', 'cs': 'cs_CZ.ISO8859-2', 'cs_cs': 'cs_CZ.ISO8859-2', 'cs_cz': 'cs_CZ.ISO8859-2', + 'csb_pl': 'csb_PL.UTF-8', + 'cv_ru': 'cv_RU.UTF-8', 'cy': 'cy_GB.ISO8859-1', 'cy_gb': 'cy_GB.ISO8859-1', 'cz': 'cs_CZ.ISO8859-2', @@ -904,23 +921,30 @@ locale_alias = { 'de_de': 'de_DE.ISO8859-1', 'de_lu': 'de_LU.ISO8859-1', 'deutsch': 'de_DE.ISO8859-1', + 'doi_in': 'doi_IN.UTF-8', 'dutch': 'nl_NL.ISO8859-1', 'dutch.iso88591': 'nl_BE.ISO8859-1', + 'dv_mv': 'dv_MV.UTF-8', + 'dz_bt': 'dz_BT.UTF-8', 'ee': 'ee_EE.ISO8859-4', 'ee_ee': 'ee_EE.ISO8859-4', 'eesti': 'et_EE.ISO8859-1', 'el': 'el_GR.ISO8859-7', + 'el_cy': 'el_CY.ISO8859-7', 'el_gr': 'el_GR.ISO8859-7', 'el_gr@euro': 'el_GR.ISO8859-15', 'en': 'en_US.ISO8859-1', + 'en_ag': 'en_AG.UTF-8', 'en_au': 'en_AU.ISO8859-1', 'en_be': 'en_BE.ISO8859-1', 'en_bw': 'en_BW.ISO8859-1', 'en_ca': 'en_CA.ISO8859-1', + 'en_dk': 'en_DK.ISO8859-1', 'en_gb': 'en_GB.ISO8859-1', 'en_hk': 'en_HK.ISO8859-1', 'en_ie': 'en_IE.ISO8859-1', 'en_in': 'en_IN.ISO8859-1', + 'en_ng': 'en_NG.UTF-8', 'en_nz': 'en_NZ.ISO8859-1', 'en_ph': 'en_PH.ISO8859-1', 'en_sg': 'en_SG.ISO8859-1', @@ -928,6 +952,7 @@ locale_alias = { 'en_us': 'en_US.ISO8859-1', 'en_us@euro@euro': 'en_US.ISO8859-15', 'en_za': 'en_ZA.ISO8859-1', + 'en_zm': 'en_ZM.UTF-8', 'en_zw': 'en_ZW.ISO8859-1', 'eng_gb': 'en_GB.ISO8859-1', 'english': 'en_EN.ISO8859-1', @@ -944,6 +969,7 @@ locale_alias = { 'es_cl': 'es_CL.ISO8859-1', 'es_co': 'es_CO.ISO8859-1', 'es_cr': 'es_CR.ISO8859-1', + 'es_cu': 'es_CU.UTF-8', 'es_do': 'es_DO.ISO8859-1', 'es_ec': 'es_EC.ISO8859-1', 'es_es': 'es_ES.ISO8859-1', @@ -967,8 +993,10 @@ locale_alias = { 'fa': 'fa_IR.UTF-8', 'fa_ir': 'fa_IR.UTF-8', 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342', + 'ff_sn': 'ff_SN.UTF-8', 'fi': 'fi_FI.ISO8859-15', 'fi_fi': 'fi_FI.ISO8859-15', + 'fil_ph': 'fil_PH.UTF-8', 'finnish': 'fi_FI.ISO8859-1', 'fo': 'fo_FO.ISO8859-1', 'fo_fo': 'fo_FO.ISO8859-1', @@ -983,6 +1011,9 @@ locale_alias = { 'french': 'fr_FR.ISO8859-1', 'french.iso88591': 'fr_CH.ISO8859-1', 'french_france': 'fr_FR.ISO8859-1', + 'fur_it': 'fur_IT.UTF-8', + 'fy_de': 'fy_DE.UTF-8', + 'fy_nl': 'fy_NL.UTF-8', 'ga': 'ga_IE.ISO8859-1', 'ga_ie': 'ga_IE.ISO8859-1', 'galego': 'gl_ES.ISO8859-1', @@ -993,12 +1024,15 @@ locale_alias = { 'german': 'de_DE.ISO8859-1', 'german.iso88591': 'de_CH.ISO8859-1', 'german_germany': 'de_DE.ISO8859-1', + 'gez_er': 'gez_ER.UTF-8', + 'gez_et': 'gez_ET.UTF-8', 'gl': 'gl_ES.ISO8859-1', 'gl_es': 'gl_ES.ISO8859-1', 'greek': 'el_GR.ISO8859-7', 'gu_in': 'gu_IN.UTF-8', 'gv': 'gv_GB.ISO8859-1', 'gv_gb': 'gv_GB.ISO8859-1', + 'ha_ng': 'ha_NG.UTF-8', 'he': 'he_IL.ISO8859-8', 'he_il': 'he_IL.ISO8859-8', 'hebrew': 'he_IL.ISO8859-8', @@ -1010,12 +1044,19 @@ locale_alias = { 'hr': 'hr_HR.ISO8859-2', 'hr_hr': 'hr_HR.ISO8859-2', 'hrvatski': 'hr_HR.ISO8859-2', + 'hsb_de': 'hsb_DE.ISO8859-2', + 'ht_ht': 'ht_HT.UTF-8', 'hu': 'hu_HU.ISO8859-2', 'hu_hu': 'hu_HU.ISO8859-2', 'hungarian': 'hu_HU.ISO8859-2', + 'hy_am': 'hy_AM.UTF-8', + 'hy_am.armscii8': 'hy_AM.ARMSCII_8', + 'ia_fr': 'ia_FR.UTF-8', 'icelandic': 'is_IS.ISO8859-1', 'id': 'id_ID.ISO8859-1', 'id_id': 'id_ID.ISO8859-1', + 'ig_ng': 'ig_NG.UTF-8', + 'ik_ca': 'ik_CA.UTF-8', 'in': 'id_ID.ISO8859-1', 'in_id': 'id_ID.ISO8859-1', 'is': 'is_IS.ISO8859-1', @@ -1035,6 +1076,7 @@ locale_alias = { 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8', 'iw': 'he_IL.ISO8859-8', 'iw_il': 'he_IL.ISO8859-8', + 'iw_il.utf8': 'iw_IL.UTF-8', 'ja': 'ja_JP.eucJP', 'ja_jp': 'ja_JP.eucJP', 'ja_jp.euc': 'ja_JP.eucJP', @@ -1050,6 +1092,7 @@ locale_alias = { 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY', 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS', 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY', + 'kk_kz': 'kk_KZ.ptcp154', 'kl': 'kl_GL.ISO8859-1', 'kl_gl': 'kl_GL.ISO8859-1', 'km_kh': 'km_KH.UTF-8', @@ -1058,14 +1101,21 @@ locale_alias = { 'ko': 'ko_KR.eucKR', 'ko_kr': 'ko_KR.eucKR', 'ko_kr.euc': 'ko_KR.eucKR', + 'kok_in': 'kok_IN.UTF-8', 'korean': 'ko_KR.eucKR', 'korean.euc': 'ko_KR.eucKR', 'ks': 'ks_IN.UTF-8', 'ks_in': 'ks_IN.UTF-8', + 'ku_tr': 'ku_TR.ISO8859-9', 'kw': 'kw_GB.ISO8859-1', 'kw_gb': 'kw_GB.ISO8859-1', 'ky': 'ky_KG.UTF-8', 'ky_kg': 'ky_KG.UTF-8', + 'lb_lu': 'lb_LU.UTF-8', + 'lg_ug': 'lg_UG.ISO8859-10', + 'li_be': 'li_BE.UTF-8', + 'li_nl': 'li_NL.UTF-8', + 'lij_it': 'lij_IT.UTF-8', 'lithuanian': 'lt_LT.ISO8859-13', 'lo': 'lo_LA.MULELAO-1', 'lo_la': 'lo_LA.MULELAO-1', @@ -1076,24 +1126,37 @@ locale_alias = { 'lt_lt': 'lt_LT.ISO8859-13', 'lv': 'lv_LV.ISO8859-13', 'lv_lv': 'lv_LV.ISO8859-13', + 'mag_in': 'mag_IN.UTF-8', 'mai': 'mai_IN.UTF-8', 'mai_in': 'mai_IN.UTF-8', + 'mg_mg': 'mg_MG.ISO8859-15', + 'mhr_ru': 'mhr_RU.UTF-8', 'mi': 'mi_NZ.ISO8859-1', 'mi_nz': 'mi_NZ.ISO8859-1', 'mk': 'mk_MK.ISO8859-5', 'mk_mk': 'mk_MK.ISO8859-5', 'ml': 'ml_IN.UTF-8', 'ml_in': 'ml_IN.UTF-8', + 'mn_mn': 'mn_MN.UTF-8', + 'mni_in': 'mni_IN.UTF-8', 'mr': 'mr_IN.UTF-8', 'mr_in': 'mr_IN.UTF-8', 'ms': 'ms_MY.ISO8859-1', 'ms_my': 'ms_MY.ISO8859-1', 'mt': 'mt_MT.ISO8859-3', 'mt_mt': 'mt_MT.ISO8859-3', + 'my_mm': 'my_MM.UTF-8', + 'nan_tw@latin': 'nan_TW.UTF-8@latin', 'nb': 'nb_NO.ISO8859-1', 'nb_no': 'nb_NO.ISO8859-1', + 'nds_de': 'nds_DE.UTF-8', + 'nds_nl': 'nds_NL.UTF-8', 'ne_np': 'ne_NP.UTF-8', + 'nhn_mx': 'nhn_MX.UTF-8', + 'niu_nu': 'niu_NU.UTF-8', + 'niu_nz': 'niu_NZ.UTF-8', 'nl': 'nl_NL.ISO8859-1', + 'nl_aw': 'nl_AW.UTF-8', 'nl_be': 'nl_BE.ISO8859-1', 'nl_nl': 'nl_NL.ISO8859-1', 'nn': 'nn_NO.ISO8859-1', @@ -1113,10 +1176,15 @@ locale_alias = { 'nynorsk': 'nn_NO.ISO8859-1', 'oc': 'oc_FR.ISO8859-1', 'oc_fr': 'oc_FR.ISO8859-1', + 'om_et': 'om_ET.UTF-8', + 'om_ke': 'om_KE.ISO8859-1', 'or': 'or_IN.UTF-8', 'or_in': 'or_IN.UTF-8', + 'os_ru': 'os_RU.UTF-8', 'pa': 'pa_IN.UTF-8', 'pa_in': 'pa_IN.UTF-8', + 'pa_pk': 'pa_PK.UTF-8', + 'pap_an': 'pap_AN.UTF-8', 'pd': 'pd_US.ISO8859-1', 'pd_de': 'pd_DE.ISO8859-1', 'pd_us': 'pd_US.ISO8859-1', @@ -1131,6 +1199,7 @@ locale_alias = { 'posix-utf2': 'C', 'pp': 'pp_AN.ISO8859-1', 'pp_an': 'pp_AN.ISO8859-1', + 'ps_af': 'ps_AF.UTF-8', 'pt': 'pt_PT.ISO8859-1', 'pt_br': 'pt_BR.ISO8859-1', 'pt_pt': 'pt_PT.ISO8859-1', @@ -1144,6 +1213,9 @@ locale_alias = { 'russian': 'ru_RU.ISO8859-5', 'rw': 'rw_RW.ISO8859-1', 'rw_rw': 'rw_RW.ISO8859-1', + 'sa_in': 'sa_IN.UTF-8', + 'sat_in': 'sat_IN.UTF-8', + 'sc_it': 'sc_IT.UTF-8', 'sd': 'sd_IN.UTF-8', 'sd_in': 'sd_IN.UTF-8', 'se_no': 'se_NO.UTF-8', @@ -1154,8 +1226,10 @@ locale_alias = { 'sh_hr.iso88592': 'hr_HR.ISO8859-2', 'sh_sp': 'sr_CS.ISO8859-2', 'sh_yu': 'sr_RS.UTF-8@latin', + 'shs_ca': 'shs_CA.UTF-8', 'si': 'si_LK.UTF-8', 'si_lk': 'si_LK.UTF-8', + 'sid_et': 'sid_ET.UTF-8', 'sinhala': 'si_LK.UTF-8', 'sk': 'sk_SK.ISO8859-2', 'sk_sk': 'sk_SK.ISO8859-2', @@ -1165,12 +1239,17 @@ locale_alias = { 'slovak': 'sk_SK.ISO8859-2', 'slovene': 'sl_SI.ISO8859-2', 'slovenian': 'sl_SI.ISO8859-2', + 'so_dj': 'so_DJ.ISO8859-1', + 'so_et': 'so_ET.UTF-8', + 'so_ke': 'so_KE.ISO8859-1', + 'so_so': 'so_SO.ISO8859-1', 'sp': 'sr_CS.ISO8859-5', 'sp_yu': 'sr_CS.ISO8859-5', 'spanish': 'es_ES.ISO8859-1', 'spanish_spain': 'es_ES.ISO8859-1', 'sq': 'sq_AL.ISO8859-2', 'sq_al': 'sq_AL.ISO8859-2', + 'sq_mk': 'sq_MK.UTF-8', 'sr': 'sr_RS.UTF-8', 'sr@cyrillic': 'sr_RS.UTF-8', 'sr@latn': 'sr_CS.UTF-8@latin', @@ -1195,12 +1274,17 @@ locale_alias = { 'sv': 'sv_SE.ISO8859-1', 'sv_fi': 'sv_FI.ISO8859-1', 'sv_se': 'sv_SE.ISO8859-1', + 'sw_ke': 'sw_KE.UTF-8', + 'sw_tz': 'sw_TZ.UTF-8', 'swedish': 'sv_SE.ISO8859-1', + 'szl_pl': 'szl_PL.UTF-8', 'ta': 'ta_IN.TSCII-0', 'ta_in': 'ta_IN.TSCII-0', 'ta_in.tscii': 'ta_IN.TSCII-0', 'ta_in.tscii0': 'ta_IN.TSCII-0', + 'ta_lk': 'ta_LK.UTF-8', 'te': 'te_IN.UTF-8', + 'te_in': 'te_IN.UTF-8', 'tg': 'tg_TJ.KOI8-C', 'tg_tj': 'tg_TJ.KOI8-C', 'th': 'th_TH.ISO8859-11', @@ -1208,23 +1292,31 @@ locale_alias = { 'th_th.tactis': 'th_TH.TIS620', 'th_th.tis620': 'th_TH.TIS620', 'thai': 'th_TH.ISO8859-11', + 'ti_er': 'ti_ER.UTF-8', + 'ti_et': 'ti_ET.UTF-8', + 'tig_er': 'tig_ER.UTF-8', + 'tk_tm': 'tk_TM.UTF-8', 'tl': 'tl_PH.ISO8859-1', 'tl_ph': 'tl_PH.ISO8859-1', 'tn': 'tn_ZA.ISO8859-15', 'tn_za': 'tn_ZA.ISO8859-15', 'tr': 'tr_TR.ISO8859-9', + 'tr_cy': 'tr_CY.ISO8859-9', 'tr_tr': 'tr_TR.ISO8859-9', 'ts': 'ts_ZA.ISO8859-1', 'ts_za': 'ts_ZA.ISO8859-1', 'tt': 'tt_RU.TATAR-CYR', 'tt_ru': 'tt_RU.TATAR-CYR', 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR', + 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif', 'turkish': 'tr_TR.ISO8859-9', + 'ug_cn': 'ug_CN.UTF-8', 'uk': 'uk_UA.KOI8-U', 'uk_ua': 'uk_UA.KOI8-U', 'univ': 'en_US.utf', 'universal': 'en_US.utf', 'universal.utf8@ucs4': 'en_US.UTF-8', + 'unm_us': 'unm_US.UTF-8', 'ur': 'ur_PK.CP1256', 'ur_in': 'ur_IN.UTF-8', 'ur_pk': 'ur_PK.CP1256', @@ -1241,16 +1333,23 @@ locale_alias = { 'vi_vn.viscii111': 'vi_VN.VISCII', 'wa': 'wa_BE.ISO8859-1', 'wa_be': 'wa_BE.ISO8859-1', + 'wae_ch': 'wae_CH.UTF-8', + 'wal_et': 'wal_ET.UTF-8', + 'wo_sn': 'wo_SN.UTF-8', 'xh': 'xh_ZA.ISO8859-1', 'xh_za': 'xh_ZA.ISO8859-1', 'yi': 'yi_US.CP1255', 'yi_us': 'yi_US.CP1255', + 'yo_ng': 'yo_NG.UTF-8', + 'yue_hk': 'yue_HK.UTF-8', 'zh': 'zh_CN.eucCN', 'zh_cn': 'zh_CN.gb2312', 'zh_cn.big5': 'zh_TW.big5', 'zh_cn.euc': 'zh_CN.eucCN', 'zh_hk': 'zh_HK.big5hkscs', 'zh_hk.big5hk': 'zh_HK.big5hkscs', + 'zh_sg': 'zh_SG.GB2312', + 'zh_sg.gbk': 'zh_SG.GBK', 'zh_tw': 'zh_TW.big5', 'zh_tw.euc': 'zh_TW.eucTW', 'zh_tw.euctw': 'zh_TW.eucTW', @@ -156,6 +156,8 @@ Core and Builtins Library ------- +- Issue #20079: Added locales supported in glibc 2.18 to locale alias table. + - Issue #20218: Added convenience methods read_text/write_text and read_bytes/ write_bytes to pathlib.Path objects. @@ -1186,6 +1188,9 @@ Tests Tools/Demos ----------- +- Issue #20079: The makelocalealias.py script now can parse the SUPPORTED file + from glibc sources and supports command line options for source paths. + - Issue #22201: Command-line interface of the zipfile module now correctly extracts ZIP files with directory entries. Patch by Ryan Wilson. diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index 10887ce..ca69daa 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -8,6 +8,7 @@ """ import locale import sys +_locale = locale # Location of the alias file LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias' @@ -51,6 +52,37 @@ def parse(filename): data[locale] = alias return data +def parse_glibc_supported(filename): + + with open(filename, encoding='latin1') as f: + lines = list(f) + data = {} + for line in lines: + line = line.strip() + if not line: + continue + if line[:1] == '#': + continue + if '/' not in line: + continue + line = line.rstrip('\\').rstrip() + alias, _, alias_encoding = line.partition('/') + # Lower-case locale + locale = alias.lower() + # Normalize encoding, if given + if '.' in locale: + lang, encoding = locale.split('.')[:2] + encoding = encoding.replace('-', '') + encoding = encoding.replace('_', '') + locale = lang + '.' + encoding + # Add an encoding to alias + alias, _, modifier = alias.partition('@') + alias = _locale._replace_encoding(alias, alias_encoding) + if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'): + alias += '@' + modifier + data[locale] = alias + return data + def pprint(data): items = sorted(data.items()) for k, v in items: @@ -92,8 +124,19 @@ def check(data): return errors if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--locale-alias', default=LOCALE_ALIAS, + help='location of the X11 alias file ' + '(default: %a)' % LOCALE_ALIAS) + parser.add_argument('--glibc-supported', + help='location of the glibc SUPPORTED locales file') + args = parser.parse_args() + data = locale.locale_alias.copy() - data.update(parse(LOCALE_ALIAS)) + if args.glibc_supported: + data.update(parse_glibc_supported(args.glibc_supported)) + data.update(parse(args.locale_alias)) data = optimize(data) print_differences(data, locale.locale_alias) print() |