diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-09 17:12:58 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-09 17:12:58 (GMT) |
commit | 6c86b99dc1e2d02cb4631df65f0726416a783087 (patch) | |
tree | 97ade424c0b4c0d33715d968d14260da2e531cc5 | |
parent | c70b4483d2c5042c68198dc7c4945ef3cfc95b27 (diff) | |
download | cpython-6c86b99dc1e2d02cb4631df65f0726416a783087.zip cpython-6c86b99dc1e2d02cb4631df65f0726416a783087.tar.gz cpython-6c86b99dc1e2d02cb4631df65f0726416a783087.tar.bz2 |
- merged setlocale and set_locale. the internal setlocale
function is overridden by a python version which accepts
*either* a string (old behaviour) or a locale tuple.
- renamed a few methods (for consistency):
get_locale => getlocale
get_default_locale => getdefaultlocale
set_to_default => resetlocale (!)
- the _locale implementation module can now implement
an optional _getdefaultlocale function. if that function
isn't available, a POSIX-based approach is used (checking
LANG and other environment variables, as usual).
(patch #100765)
-rw-r--r-- | Lib/locale.py | 251 |
1 files changed, 159 insertions, 92 deletions
diff --git a/Lib/locale.py b/Lib/locale.py index 437755e..47f5c4e 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -11,15 +11,21 @@ """ -import string +import string, sys -### Load C lib locale APIs or use an emulation +# Try importing the _locale module. +# +# If this fails, fall back on a basic 'C' locale emulation. +# try: + from _locale import * except ImportError: + # Locale emulation + CHAR_MAX = 127 LC_ALL = 6 LC_COLLATE = 3 @@ -31,46 +37,46 @@ except ImportError: Error = ValueError def localeconv(): - """ localeconv() -> dict. + """ localeconv() -> dict. Returns numeric and monetary locale-specific parameters. """ # 'C' locale default values return {'grouping': [127], 'currency_symbol': '', 'n_sign_posn': 127, - 'p_cs_precedes': 127, - 'n_cs_precedes': 127, - 'mon_grouping': [], + 'p_cs_precedes': 127, + 'n_cs_precedes': 127, + 'mon_grouping': [], 'n_sep_by_space': 127, 'decimal_point': '.', 'negative_sign': '', 'positive_sign': '', - 'p_sep_by_space': 127, + 'p_sep_by_space': 127, 'int_curr_symbol': '', - 'p_sign_posn': 127, + 'p_sign_posn': 127, 'thousands_sep': '', - 'mon_thousands_sep': '', - 'frac_digits': 127, + 'mon_thousands_sep': '', + 'frac_digits': 127, 'mon_decimal_point': '', 'int_frac_digits': 127} - + def setlocale(category, value=None): - """ setlocale(integer,string=None) -> string. + """ setlocale(integer,string=None) -> string. Activates/queries locale processing. """ if value is not None and \ value is not 'C': - raise Error,'_locale emulation only supports "C" locale' + raise Error, '_locale emulation only supports "C" locale' return 'C' def strcoll(a,b): - """ strcoll(string,string) -> int. + """ strcoll(string,string) -> int. Compares two strings according to the locale. """ return cmp(a,b) def strxfrm(s): - """ strxfrm(string) -> string. + """ strxfrm(string) -> string. Returns a string that behaves for cmp locale-aware. """ return s @@ -86,7 +92,7 @@ def _group(s): if not grouping:return s result="" while s and grouping: - # if grouping is -1, we are done + # if grouping is -1, we are done if grouping[0]==CHAR_MAX: break # 0: re-use last group ad infinitum @@ -107,7 +113,7 @@ def _group(s): def format(f,val,grouping=0): """Formats a value in the same way that the % formatting would use, - but takes the current locale into account. + but takes the current locale into account. Grouping is applied if the third parameter is true.""" result = f % val fields = string.split(result, ".") @@ -118,8 +124,8 @@ def format(f,val,grouping=0): elif len(fields)==1: return fields[0] else: - raise Error,"Too many decimal points in result string" - + raise Error, "Too many decimal points in result string" + def str(val): """Convert float to integer, taking the locale into account.""" return format("%.12g",val) @@ -135,7 +141,7 @@ def atof(str,func=string.atof): dd = localeconv()['decimal_point'] if dd: s=string.split(str,dd) - str=string.join(s,'.') + str=string.join(s, '.') #finally, parse the string return func(str) @@ -144,17 +150,22 @@ def atoi(str): return atof(str,string.atoi) def _test(): - setlocale(LC_ALL,"") + setlocale(LC_ALL, "") #do grouping - s1=format("%d",123456789,1) - print s1,"is",atoi(s1) + s1=format("%d", 123456789,1) + print s1, "is", atoi(s1) #standard formatting s1=str(3.14) - print s1,"is",atof(s1) + print s1, "is", atof(s1) ### Locale name aliasing engine # Author: Marc-Andre Lemburg, mal@lemburg.com +# Various tweaks by Fredrik Lundh <effbot@telia.com> + +# store away the low-level version of setlocale (it's +# overridden below) +_setlocale = setlocale def normalize(localename): @@ -229,7 +240,7 @@ def _parse_localename(localename): elif code == 'C': return None, None else: - raise ValueError,'unknown locale: %s' % localename + raise ValueError, 'unknown locale: %s' % localename return l def _build_localename(localetuple): @@ -247,15 +258,15 @@ def _build_localename(localetuple): return language else: return language + '.' + encoding - -def get_default(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): + +def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): """ Tries to determine the default locale settings and returns them as tuple (language code, encoding). According to POSIX, a program which has not called - setlocale(LC_ALL,"") runs using the portable 'C' locale. - Calling setlocale(LC_ALL,"") lets it use the default locale as + setlocale(LC_ALL, "") runs using the portable 'C' locale. + Calling setlocale(LC_ALL, "") lets it use the default locale as defined by the LANG variable. Since we don't want to interfere with the current locale setting we thus emulate the behaviour in the way described above. @@ -271,6 +282,17 @@ def get_default(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): be determined. """ + try: + # check if it's supported by the _locale module + import _locale + code, encoding = _locale._getdefaultlocale() + if sys.platform == "win32" and code and code[:2] == "0x": + # map windows language identifier to language name + code = windows_locale.get(int(code, 0)) + return code, encoding + except (ImportError, NameError): + pass + # fall back on POSIX behaviour import os lookup = os.environ.get for variable in envvars: @@ -281,7 +303,10 @@ def get_default(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): localename = 'C' return _parse_localename(localename) -def get_locale(category=LC_CTYPE): +# compatibility +get_default = getdefaultlocale + +def getlocale(category=LC_CTYPE): """ Returns the current setting for the given locale category as tuple (language code, encoding). @@ -294,34 +319,36 @@ def get_locale(category=LC_CTYPE): be determined. """ - localename = setlocale(category) + localename = _setlocale(category) if category == LC_ALL and ';' in localename: - raise TypeError,'category LC_ALL is not supported' + raise TypeError, 'category LC_ALL is not supported' return _parse_localename(localename) -def set_locale(localetuple, category=LC_ALL): +def setlocale(category, locale=None): - """ Set the locale according to the localetuple (language code, - encoding) as returned by get_locale() and get_default(). + """ Set the locale for the given category. The locale can be + a string, a locale tuple (language code, encoding), or None. - The given codes are passed through the locale aliasing engine - before being given to setlocale() for processing. + Locale tuples are converted to strings the locale aliasing + engine. Locale strings are passed directly to the C lib. - category may be given as one of the LC_* values. It defaults - to LC_ALL. + category may be given as one of the LC_* values. """ - setlocale(category, normalize(_build_localename(localetuple))) + if locale and type(locale) is not type(""): + # convert to string + locale = normalize(_build_localename(locale)) + return _setlocale(category, locale) -def set_to_default(category=LC_ALL): +def resetlocale(category=LC_ALL): """ Sets the locale for category to the default setting. The default setting is determined by calling - get_default(). category defaults to LC_ALL. - + getdefaultlocale(). category defaults to LC_ALL. + """ - setlocale(category, _build_localename(get_default())) + _setlocale(category, _build_localename(getdefaultlocale())) ### Database # @@ -329,47 +356,47 @@ def set_to_default(category=LC_ALL): # comes with X11 and then hand edited removing the explicit encoding # definitions and adding some more aliases. The file is usually # available as /usr/lib/X11/locale/locale.alias. -# +# # # The encoding_alias table maps lowercase encoding alias names to C # locale encoding names (case-sensitive). # encoding_alias = { - '437': 'C', - 'c': 'C', - 'iso8859': 'ISO8859-1', - '8859': 'ISO8859-1', - '88591': 'ISO8859-1', - 'ascii': 'ISO8859-1', - 'en': 'ISO8859-1', - 'iso88591': 'ISO8859-1', - 'iso_8859-1': 'ISO8859-1', - '885915': 'ISO8859-15', - 'iso885915': 'ISO8859-15', - 'iso_8859-15': 'ISO8859-15', - 'iso8859-2': 'ISO8859-2', - 'iso88592': 'ISO8859-2', - 'iso_8859-2': 'ISO8859-2', - 'iso88595': 'ISO8859-5', - 'iso88596': 'ISO8859-6', - 'iso88597': 'ISO8859-7', - 'iso88598': 'ISO8859-8', - 'iso88599': 'ISO8859-9', - 'iso-2022-jp': 'JIS7', - 'jis': 'JIS7', - 'jis7': 'JIS7', - 'sjis': 'SJIS', - 'tis620': 'TACTIS', - 'ajec': 'eucJP', - 'eucjp': 'eucJP', - 'ujis': 'eucJP', - 'utf-8': 'utf', - 'utf8': 'utf', - 'utf8@ucs4': 'utf', + '437': 'C', + 'c': 'C', + 'iso8859': 'ISO8859-1', + '8859': 'ISO8859-1', + '88591': 'ISO8859-1', + 'ascii': 'ISO8859-1', + 'en': 'ISO8859-1', + 'iso88591': 'ISO8859-1', + 'iso_8859-1': 'ISO8859-1', + '885915': 'ISO8859-15', + 'iso885915': 'ISO8859-15', + 'iso_8859-15': 'ISO8859-15', + 'iso8859-2': 'ISO8859-2', + 'iso88592': 'ISO8859-2', + 'iso_8859-2': 'ISO8859-2', + 'iso88595': 'ISO8859-5', + 'iso88596': 'ISO8859-6', + 'iso88597': 'ISO8859-7', + 'iso88598': 'ISO8859-8', + 'iso88599': 'ISO8859-9', + 'iso-2022-jp': 'JIS7', + 'jis': 'JIS7', + 'jis7': 'JIS7', + 'sjis': 'SJIS', + 'tis620': 'TACTIS', + 'ajec': 'eucJP', + 'eucjp': 'eucJP', + 'ujis': 'eucJP', + 'utf-8': 'utf', + 'utf8': 'utf', + 'utf8@ucs4': 'utf', } -# +# # The locale_alias table maps lowercase alias names to C locale names # (case-sensitive). Encodings are always separated from the locale # name using a dot ('.'); they should only be given in case the @@ -561,6 +588,46 @@ locale_alias = { 'zh_tw.euc': 'zh_TW.eucTW', } +# +# this maps windows language identifiers (as used on Windows 95 and +# earlier) to locale strings. +# +# NOTE: this mapping is incomplete. If your language is missing, send +# a note with the missing language identifier and the suggested locale +# code to Fredrik Lundh <effbot@telia.com>. Thanks /F + +windows_locale = { + 0x0404: "zh_TW", # Chinese (Taiwan) + 0x0804: "zh_CN", # Chinese (PRC) + 0x0406: "da_DK", # Danish + 0x0413: "nl_NL", # Dutch (Netherlands) + 0x0409: "en_US", # English (United States) + 0x0809: "en_UK", # English (United Kingdom) + 0x0c09: "en_AU", # English (Australian) + 0x1009: "en_CA", # English (Canadian) + 0x1409: "en_NZ", # English (New Zealand) + 0x1809: "en_IE", # English (Ireland) + 0x1c09: "en_ZA", # English (South Africa) + 0x040b: "fi_FI", # Finnish + 0x040c: "fr_FR", # French (Standard) + 0x080c: "fr_BE", # French (Belgian) + 0x0c0c: "fr_CA", # French (Canadian) + 0x100c: "fr_CH", # French (Switzerland) + 0x0407: "de_DE", # German (Standard) + 0x0408: "el_GR", # Greek + 0x040d: "iw_IL", # Hebrew + 0x040f: "is_IS", # Icelandic + 0x0410: "it_IT", # Italian (Standard) + 0x0411: "ja_JA", # Japanese + 0x0414: "no_NO", # Norwegian (Bokmal) + 0x0816: "pt_PT", # Portuguese (Standard) + 0x0c0a: "es_ES", # Spanish (Modern Sort) + 0x0441: "sw_KE", # Swahili (Kenya) + 0x041d: "sv_SE", # Swedish + 0x081d: "sv_FI", # Swedish (Finland) + 0x041f: "tr_TR", # Turkish +} + def _print_locale(): """ Test function. @@ -573,9 +640,9 @@ def _print_locale(): _init_categories() del categories['LC_ALL'] - print 'Locale defaults as determined by get_default():' + print 'Locale defaults as determined by getdefaultlocale():' print '-'*72 - lang, enc = get_default() + lang, enc = getdefaultlocale() print 'Language: ', lang or '(undefined)' print 'Encoding: ', enc or '(undefined)' print @@ -583,40 +650,40 @@ def _print_locale(): print 'Locale settings on startup:' print '-'*72 for name,category in categories.items(): - print name,'...' - lang, enc = get_locale(category) + print name, '...' + lang, enc = getlocale(category) print ' Language: ', lang or '(undefined)' print ' Encoding: ', enc or '(undefined)' print - set_to_default() print - print 'Locale settings after calling set_to_default():' + print 'Locale settings after calling resetlocale():' print '-'*72 + resetlocale() for name,category in categories.items(): - print name,'...' - lang, enc = get_locale(category) + print name, '...' + lang, enc = getlocale(category) print ' Language: ', lang or '(undefined)' print ' Encoding: ', enc or '(undefined)' print - + try: - setlocale(LC_ALL,"") + setlocale(LC_ALL, "") except: print 'NOTE:' - print 'setlocale(LC_ALL,"") does not support the default locale' + print 'setlocale(LC_ALL, "") does not support the default locale' print 'given in the OS environment variables.' else: print - print 'Locale settings after calling setlocale(LC_ALL,""):' + print 'Locale settings after calling setlocale(LC_ALL, ""):' print '-'*72 for name,category in categories.items(): - print name,'...' - lang, enc = get_locale(category) + print name, '...' + lang, enc = getlocale(category) print ' Language: ', lang or '(undefined)' print ' Encoding: ', enc or '(undefined)' print - + ### if __name__=='__main__': |