"""Internationalization and localization support. This module provides internationalization (I18N) and localization (L10N) support for your Python programs by providing an interface to the GNU gettext message catalog library. I18N refers to the operation by which a program is made aware of multiple languages. L10N refers to the adaptation of your program, once internationalized, to the local language and cultural habits. In order to provide multilingual messages for your Python programs, you need to take the following steps: - prepare your program by specially marking translatable strings - run a suite of tools over your marked program files to generate raw messages catalogs - create language specific translations of the message catalogs - use this module so that message strings are properly translated In order to prepare your program for I18N, you need to look at all the strings in your program. Any string that needs to be translated should be marked by wrapping it in _('...') -- i.e. a call to the function `_'. For example: filename = 'mylog.txt' message = _('writing a log message') fp = open(filename, 'w') fp.write(message) fp.close() In this example, the string `writing a log message' is marked as a candidate for translation, while the strings `mylog.txt' and `w' are not. The GNU gettext package provides a tool, called xgettext, that scans C and C++ source code looking for these specially marked strings. xgettext generates what are called `.pot' files, essentially structured human readable files which contain every marked string in the source code. These .pot files are copied and handed over to translators who write language-specific versions for every supported language. For I18N Python programs however, xgettext won't work; it doesn't understand the myriad of string types support by Python. The standard Python distribution provides a tool called pygettext that does though (found in the Tools/i18n directory). This is a command line script that supports a similar interface as xgettext; see its documentation for details. Once you've used pygettext to create your .pot files, you can use the standard GNU gettext tools to generate your machine-readable .mo files, which are what's used by this module. In the simple case, to use this module then, you need only add the following bit of code to the main driver file of your application: import gettext gettext.install() This sets everything up so that your _('...') function calls Just Work. In other words, it installs `_' in the builtins namespace for convenience. You can skip this step and do it manually by the equivalent code: import gettext import __builtin__ __builtin__['_'] = gettext.gettext Once you've done this, you probably want to call bindtextdomain() and textdomain() to get the domain set up properly. Again, for convenience, you can pass the domain and localedir to install to set everything up in one fell swoop: import gettext gettext.install('mydomain', '/my/locale/dir') If your program needs to support many languages at the same time, you will want to create Translation objects explicitly, like so: import gettext gettext.install() lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo')) lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo')) lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo')) gettext.set(lang1) # all _() will now translate to language 1 gettext.set(lang2) # all _() will now translate to language 2 Currently, only GNU gettext format binary .mo files are supported. """ # This module represents the integration of work, contributions, feedback, and # suggestions from the following people: # # Martin von Loewis, who wrote the initial implementation of the underlying # C-based libintlmodule (later renamed _gettext), along with a skeletal # gettext.py implementation. # # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule, # which also included a pure-Python implementation to read .mo files if # intlmodule wasn't available. # # James Henstridge, who also wrote a gettext.py module, which has some # interesting, but currently unsupported experimental features: the notion of # a Catalog class and instances, and the ability to add to a catalog file via # a Python API. # # Barry Warsaw integrated these modules, wrote the .install() API and code, # and conformed all C and Python code to Python's coding standards. import os import sys import struct from UserDict import UserDict # globals _translations = {} _current_translation = None _current_domain = 'messages' # Domain to directory mapping, for use by bindtextdomain() _localedirs = {} def _expand_lang(locale): from locale import normalize locale = normalize(locale) COMPONENT_CODESET = 1 << 0 COMPONENT_TERRITORY = 1 << 1 COMPONENT_MODIFIER = 1 << 2 # split up the locale into its base components mask = 0 pos = locale.find('@') if pos >= 0: modifier = locale[pos:] locale = locale[:pos] mask |= COMPONENT_MODIFIER else: modifier = '' pos = locale.find('.') if pos >= 0: codeset = locale[pos:] locale = locale[:pos] mask |= COMPONENT_CODESET else: codeset = '' pos = locale.find('_') if pos >= 0: territory = locale[pos:] locale = locale[:pos] mask |= COMPONENT_TERRITORY else: territory = '' language = locale ret = [] for i in range(mask+1): if not (i & ~mask): # if all components for this combo exist ... val = language if i & COMPONENT_TERRITORY: val += territory if i & COMPONENT_CODESET: val += codeset if i & COMPONENT_MODIFIER: val += modifier ret.append(val) ret.reverse() return ret class GNUTranslations(UserDict): # Magic number of .mo files MAGIC = 0x950412de def __init__(self, fp): if fp is None: d = {} else: d = self._parse(fp) UserDict.__init__(self, d) def _parse(self, fp): """Override this method to support alternative .mo formats.""" unpack = struct.unpack filename = getattr(fp, 'name', '') # Parse the .mo file header, which consists of 5 little endian 32 # bit words. catalog = {} buf = fp.read() magic, version, msgcount, masteridx, transidx = unpack( '<5i', buf[:20]) if magic <> self.MAGIC: raise IOError(0, 'Bad magic number', filename) # # Now put all messages from the .mo file buffer into the catalog # dictionary. for i in xrange(0, msgcount): mstart = unpack('