diff options
author | Barry Warsaw <barry@python.org> | 2000-08-25 19:13:37 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2000-08-25 19:13:37 (GMT) |
commit | 95be23dc8626684caa268df7ff7749fcb89adddf (patch) | |
tree | fbd3c2fb75e40cbca441710aa8091b35dafebb4d /Lib/gettext.py | |
parent | 6f9876212d65978038439b585ba0fdaf243ed574 (diff) | |
download | cpython-95be23dc8626684caa268df7ff7749fcb89adddf.zip cpython-95be23dc8626684caa268df7ff7749fcb89adddf.tar.gz cpython-95be23dc8626684caa268df7ff7749fcb89adddf.tar.bz2 |
Initial revision of gettext support for Python. This will undergo
some changes.
Diffstat (limited to 'Lib/gettext.py')
-rw-r--r-- | Lib/gettext.py | 288 |
1 files changed, 288 insertions, 0 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py new file mode 100644 index 0000000..a7f89b2 --- /dev/null +++ b/Lib/gettext.py @@ -0,0 +1,288 @@ +"""Internationalization and localization support. + +This module provides internationalization (I18N) and localization (L10N) +support for your Python programs by providing an interface to the GNU gettext +message catalog library. + +I18N refers to the operation by which a program is made aware of multiple +languages. L10N refers to the adaptation of your program, once +internationalized, to the local language and cultural habits. In order to +provide multilingual messages for your Python programs, you need to take the +following steps: + + - prepare your program by specially marking translatable strings + - run a suite of tools over your marked program files to generate raw + messages catalogs + - create language specific translations of the message catalogs + - use this module so that message strings are properly translated + +In order to prepare your program for I18N, you need to look at all the strings +in your program. Any string that needs to be translated should be marked by +wrapping it in _('...') -- i.e. a call to the function `_'. For example: + + filename = 'mylog.txt' + message = _('writing a log message') + fp = open(filename, 'w') + fp.write(message) + fp.close() + +In this example, the string `writing a log message' is marked as a candidate +for translation, while the strings `mylog.txt' and `w' are not. + +The GNU gettext package provides a tool, called xgettext, that scans C and C++ +source code looking for these specially marked strings. xgettext generates +what are called `.pot' files, essentially structured human readable files +which contain every marked string in the source code. These .pot files are +copied and handed over to translators who write language-specific versions for +every supported language. + +For I18N Python programs however, xgettext won't work; it doesn't understand +the myriad of string types support by Python. The standard Python +distribution provides a tool called pygettext that does though (found in the +Tools/i18n directory). This is a command line script that supports a similar +interface as xgettext; see its documentation for details. Once you've used +pygettext to create your .pot files, you can use the standard GNU gettext +tools to generate your machine-readable .mo files, which are what's used by +this module. + +In the simple case, to use this module then, you need only add the following +bit of code to the main driver file of your application: + + import gettext + gettext.install() + +This sets everything up so that your _('...') function calls Just Work. In +other words, it installs `_' in the builtins namespace for convenience. You +can skip this step and do it manually by the equivalent code: + + import gettext + import __builtin__ + __builtin__['_'] = gettext.gettext + +Once you've done this, you probably want to call bindtextdomain() and +textdomain() to get the domain set up properly. Again, for convenience, you +can pass the domain and localedir to install to set everything up in one fell +swoop: + + import gettext + gettext.install('mydomain', '/my/locale/dir') + +If your program needs to support many languages at the same time, you will +want to create Translation objects explicitly, like so: + + import gettext + gettext.install() + + lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo')) + lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo')) + lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo')) + + gettext.set(lang1) + # all _() will now translate to language 1 + gettext.set(lang2) + # all _() will now translate to language 2 + +Currently, only GNU gettext format binary .mo files are supported. + +""" + +# This module represents the integration of work from the following authors: +# +# Martin von Loewis, who wrote the initial implementation of the underlying +# C-based libintlmodule (later renamed _gettext), along with a skeletal +# gettext.py implementation. +# +# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule, +# which also included a pure-Python implementation to read .mo files if +# intlmodule wasn't available. +# +# James Henstridge, who also wrote a gettext.py module, which has some +# interesting, but currently unsupported experimental features: the notion of +# a Catalog class and instances, and the ability to add to a catalog file via +# a Python API. +# +# Barry Warsaw integrated these modules, wrote the .install() API and code, +# and conformed all C and Python code to Python's coding standards. + +import os +import sys +import struct +from UserDict import UserDict + + + +# globals +_translations = {} +_current_translation = None +_current_domain = 'messages' + +# Domain to directory mapping, for use by bindtextdomain() +_localedirs = {} + + + +class GNUTranslations(UserDict): + # Magic number of .mo files + MAGIC = 0x950412de + + def __init__(self, fp): + if fp is None: + d = {} + else: + d = self._parse(fp) + UserDict.__init__(self, d) + + def _parse(self, fp): + """Override this method to support alternative .mo formats.""" + unpack = struct.unpack + filename = getattr(fp, 'name', '') + # Parse the .mo file header, which consists of 5 little endian 32 + # bit words. + catalog = {} + buf = fp.read() + magic, version, msgcount, masteridx, transidx = unpack( + '<5i', buf[:20]) + if magic <> self.MAGIC: + raise IOError(0, 'Bad magic number', filename) + # + # Now put all messages from the .mo file buffer into the catalog + # dictionary. + for i in xrange(0, msgcount): + mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0] + mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0] + tstart = unpack('<i', buf[transidx+4:transidx+8])[0] + tend = tstart + unpack('<i', buf[transidx:transidx+4])[0] + if mend < len(buf) and tend < len(buf): + catalog[buf[mstart:mend]] = buf[tstart:tend] + else: + raise IOError(0, 'File is corrupt', filename) + # + # advance to next entry in the seek tables + masteridx = masteridx + 8 + transidx = transidx + 8 + return catalog + + + +# By default, use GNU gettext format .mo files +Translations = GNUTranslations + +# Locate a .mo file using the gettext strategy +def _find(localedir=None, languages=None, category=None, domain=None): + global _current_domain + global _localedirs + + # Get some reasonable defaults for arguments that were not supplied + if domain is None: + domain = _current_domain + if category is None: + category = 'LC_MESSAGES' + if localedir is None: + localedir = _localedirs.get( + domain, + # TBD: The default localedir is actually system dependent. I + # don't know of a good platform-consistent and portable way to + # default it, so instead, we'll just use sys.prefix. Most + # programs should be calling bindtextdomain() or such explicitly + # anyway. + os.path.join(sys.prefix, 'share', 'locale')) + if languages is None: + languages = [] + for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): + val = os.environ.get(envar) + if val: + languages = val.split(':') + break + if 'C' not in languages: + languages.append('C') + # select a language + for lang in languages: + if lang == 'C': + break + mofile = os.path.join(localedir, lang, category, '%s.mo' % domain) + # see if it's in the cache + mo = _translations.get(mofile) + if mo: + return mo + fp = None + try: + try: + fp = open(mofile, 'rb') + t = Translations(fp) + _translations[mofile] = t + return t + except IOError: + pass + finally: + if fp: + fp.close() + return {} + + + +def bindtextdomain(domain=None, localedir=None): + """Bind domain to a file in the specified directory.""" + global _localedirs + if domain is None: + return None + if localedir is None: + return _localedirs.get(domain, _localedirs.get('C')) + _localedirs[domain] = localedir + return localedir + + +def textdomain(domain=None): + """Change or query the current global domain.""" + global _current_domain + if domain is None: + return _current_domain + else: + _current_domain = domain + return domain + + +def gettext(message): + """Return localized version of a message.""" + return _find().get(message, message) + + +def dgettext(domain, message): + """Like gettext(), but look up message in specified domain.""" + return _find(domain=domain).get(message, message) + + +def dcgettext(domain, message, category): + try: + from locale import LC_CTYPE, LC_TIME, LC_COLLATE + from locale import LC_MONETARY, LC_MESSAGES, LC_NUMERIC + except ImportError: + return message + categories = { + LC_CTYPE : 'LC_CTYPE', + LC_TIME : 'LC_TIME', + LC_COLLATE : 'LC_COLLATE', + LC_MONETARY : 'LC_MONETARY', + LC_MESSAGES : 'LC_MESSAGES', + LC_NUMERIC : 'LC_NUMERIC' + } + return _find(domain=domain, category=category).get(message, message) + + + +# A higher level API +def set(translation): + global _current_translation + _current_translation = translation + + +def get(): + global _current_translation + return _current_translation + + +def install(domain=None, localedir=None): + import __builtin__ + __builtin__.__dict__['_'] = gettext + if domain is not None: + bindtextdomain(domain, localedir) + textdomain(domain) |