diff options
-rw-r--r-- | Lib/gettext.py | 313 |
1 files changed, 140 insertions, 173 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py index c216089..647fe56 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -6,83 +6,7 @@ message catalog library. I18N refers to the operation by which a program is made aware of multiple languages. L10N refers to the adaptation of your program, once -internationalized, to the local language and cultural habits. In order to -provide multilingual messages for your Python programs, you need to take the -following steps: - - - prepare your program by specially marking translatable strings - - run a suite of tools over your marked program files to generate raw - messages catalogs - - create language specific translations of the message catalogs - - use this module so that message strings are properly translated - -In order to prepare your program for I18N, you need to look at all the strings -in your program. Any string that needs to be translated should be marked by -wrapping it in _('...') -- i.e. a call to the function `_'. For example: - - filename = 'mylog.txt' - message = _('writing a log message') - fp = open(filename, 'w') - fp.write(message) - fp.close() - -In this example, the string `writing a log message' is marked as a candidate -for translation, while the strings `mylog.txt' and `w' are not. - -The GNU gettext package provides a tool, called xgettext, that scans C and C++ -source code looking for these specially marked strings. xgettext generates -what are called `.pot' files, essentially structured human readable files -which contain every marked string in the source code. These .pot files are -copied and handed over to translators who write language-specific versions for -every supported language. - -For I18N Python programs however, xgettext won't work; it doesn't understand -the myriad of string types support by Python. The standard Python -distribution provides a tool called pygettext that does though (found in the -Tools/i18n directory). This is a command line script that supports a similar -interface as xgettext; see its documentation for details. Once you've used -pygettext to create your .pot files, you can use the standard GNU gettext -tools to generate your machine-readable .mo files, which are what's used by -this module. - -In the simple case, to use this module then, you need only add the following -bit of code to the main driver file of your application: - - import gettext - gettext.install() - -This sets everything up so that your _('...') function calls Just Work. In -other words, it installs `_' in the builtins namespace for convenience. You -can skip this step and do it manually by the equivalent code: - - import gettext - import __builtin__ - __builtin__['_'] = gettext.gettext - -Once you've done this, you probably want to call bindtextdomain() and -textdomain() to get the domain set up properly. Again, for convenience, you -can pass the domain and localedir to install to set everything up in one fell -swoop: - - import gettext - gettext.install('mydomain', '/my/locale/dir') - -If your program needs to support many languages at the same time, you will -want to create Translation objects explicitly, like so: - - import gettext - gettext.install() - - lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo')) - lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo')) - lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo')) - - gettext.set(lang1) - # all _() will now translate to language 1 - gettext.set(lang2) - # all _() will now translate to language 2 - -Currently, only GNU gettext format binary .mo files are supported. +internationalized, to the local language and cultural habits. """ @@ -104,21 +28,27 @@ Currently, only GNU gettext format binary .mo files are supported. # # Barry Warsaw integrated these modules, wrote the .install() API and code, # and conformed all C and Python code to Python's coding standards. +# +# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this +# module. +# +# TODO: +# - Lazy loading of .mo files. Currently the entire catalog is loaded into +# memory, but that's probably bad for large translated programs. Instead, +# the lexical sort of original strings in GNU .mo files should be exploited +# to do binary searches and lazy initializations. Or you might want to use +# the undocumented double-hash algorithm for .mo files with hash tables, but +# you'll need to study the GNU gettext code to do this. +# +# - Support Solaris .mo file formats. Unfortunately, we've been unable to +# find this format documented anywhere. import os import sys import struct -from UserDict import UserDict - - - -# globals -_translations = {} -_current_translation = None -_current_domain = 'messages' +from errno import ENOENT -# Domain to directory mapping, for use by bindtextdomain() -_localedirs = {} +_default_localedir = os.path.join(sys.prefix, 'share', 'locale') @@ -165,16 +95,37 @@ def _expand_lang(locale): -class GNUTranslations(UserDict): - # Magic number of .mo files - MAGIC = 0x950412de +class NullTranslations: + def __init__(self, fp=None): + self._info = {} + self._charset = None + if fp: + self._parse(fp) - def __init__(self, fp): - if fp is None: - d = {} - else: - d = self._parse(fp) - UserDict.__init__(self, d) + def _parse(self, fp): + pass + + def gettext(self, message): + return message + + def ugettext(self, message): + return unicode(message) + + def info(self): + return self._info + + def charset(self): + return self._charset + + def install(self, unicode=0): + import __builtin__ + __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext + + +class GNUTranslations(NullTranslations): + # Magic number of .mo files + LE_MAGIC = 0x950412de + BE_MAGIC = struct.unpack('>i', struct.pack('<i', LE_MAGIC))[0] def _parse(self, fp): """Override this method to support alternative .mo formats.""" @@ -182,51 +133,62 @@ class GNUTranslations(UserDict): filename = getattr(fp, 'name', '') # Parse the .mo file header, which consists of 5 little endian 32 # bit words. - catalog = {} + self._catalog = catalog = {} buf = fp.read() - magic, version, msgcount, masteridx, transidx = unpack( - '<5i', buf[:20]) - if magic <> self.MAGIC: + # Are we big endian or little endian? + magic = unpack('<i', buf[:4])[0] + if magic == self.LE_MAGIC: + version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20]) + ii = '<ii' + elif magic == self.BE_MAGIC: + version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20]) + ii = '>ii' + else: raise IOError(0, 'Bad magic number', filename) # # Now put all messages from the .mo file buffer into the catalog # dictionary. for i in xrange(0, msgcount): - mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0] - mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0] - tstart = unpack('<i', buf[transidx+4:transidx+8])[0] - tend = tstart + unpack('<i', buf[transidx:transidx+4])[0] + mlen, moff = unpack(ii, buf[masteridx:masteridx+8]) + mend = moff + mlen + tlen, toff = unpack(ii, buf[transidx:transidx+8]) + tend = toff + tlen if mend < len(buf) and tend < len(buf): - catalog[buf[mstart:mend]] = buf[tstart:tend] + tmsg = buf[toff:tend] + catalog[buf[moff:mend]] = tmsg else: raise IOError(0, 'File is corrupt', filename) - # + # See if we're looking at GNU .mo conventions for metadata + if mlen == 0 and tmsg.lower().startswith('project-id-version:'): + # Catalog description + for item in tmsg.split('\n'): + item = item.strip() + if not item: + continue + k, v = item.split(':', 1) + k = k.strip().lower() + v = v.strip() + self._info[k] = v + if k == 'content-type': + self._charset = v.split('charset=')[1] # advance to next entry in the seek tables masteridx += 8 transidx += 8 - return catalog + def gettext(self, message): + return self._catalog.get(message, message) + + def ugettext(self, message): + tmsg = self._catalog.get(message, message) + return unicode(tmsg, self._charset) - -# By default, use GNU gettext format .mo files -Translations = GNUTranslations + # Locate a .mo file using the gettext strategy -def _find(localedir=None, languages=None, domain=None): - global _current_domain - global _localedirs +def find(domain, localedir=None, languages=None): # Get some reasonable defaults for arguments that were not supplied - if domain is None: - domain = _current_domain if localedir is None: - localedir = _localedirs.get( - domain, - # TBD: The default localedir is actually system dependent. I - # don't know of a good platform-consistent and portable way to - # default it, so instead, we'll just use sys.prefix. Most - # programs should be calling bindtextdomain() or such explicitly - # anyway. - os.path.join(sys.prefix, 'share', 'locale')) + localedir = _default_localedir if languages is None: languages = [] for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): @@ -247,72 +209,77 @@ def _find(localedir=None, languages=None, domain=None): if lang == 'C': break mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain) - # see if it's in the cache - mo = _translations.get(mofile) - if mo: - return mo - fp = None - try: - try: - fp = open(mofile, 'rb') - t = Translations(fp) - _translations[mofile] = t - return t - except IOError: - pass - finally: - if fp: - fp.close() - return {} + if os.path.exists(mofile): + return mofile + return None -def bindtextdomain(domain=None, localedir=None): - """Bind domain to a file in the specified directory.""" - global _localedirs - if domain is None: - return None - if localedir is None: - return _localedirs.get(domain, _localedirs.get('C')) - _localedirs[domain] = localedir - return localedir +# a mapping between absolute .mo file path and Translation object +_translations = {} + +def translation(domain, localedir=None, languages=None, class_=None): + if class_ is None: + class_ = GNUTranslations + mofile = find(domain, localedir, languages) + if mofile is None: + raise IOError(ENOENT, 'No translation file found for domain', domain) + key = os.path.abspath(mofile) + # TBD: do we need to worry about the file pointer getting collected? + t = _translations.setdefault(key, class_(open(mofile, 'rb'))) + return t + + + +def install(domain, localedir=None, unicode=0): + translation(domain, localedir).install(unicode) + + + +# a mapping b/w domains and locale directories +_localedirs = {} +# current global domain, `messages' used for compatibility w/ GNU gettext +_current_domain = 'messages' def textdomain(domain=None): - """Change or query the current global domain.""" global _current_domain - if domain is None: - return _current_domain - else: + if domain is not None: _current_domain = domain - return domain + return _current_domain -def gettext(message): - """Return localized version of a message.""" - return _find().get(message, message) +def bindtextdomain(domain, localedir=None): + global _localedirs + if localedir is not None: + _localedirs[domain] = localedir + return _localedirs.get(domain, _default_localedir) def dgettext(domain, message): - """Like gettext(), but look up message in specified domain.""" - return _find(domain=domain).get(message, message) + try: + t = translation(domain, _localedirs.get(domain, None)) + except IOError: + return message + return t.gettext(message) + +def gettext(message): + return dgettext(_current_domain, message) - -# A higher level API -def set(translation): - global _current_translation - _current_translation = translation +# dcgettext() has been deemed unnecessary and is not implemented. -def get(): - global _current_translation - return _current_translation +# James Henstridge's Catalog constructor from GNOME gettext. Documented usage +# was: +# +# import gettext +# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR) +# _ = cat.gettext +# print _('Hello World') +# The resulting catalog object currently don't support access through a +# dictionary API, which was supported (but apparently unused) in GNOME +# gettext. -def install(domain=None, localedir=None): - import __builtin__ - __builtin__.__dict__['_'] = gettext - if domain is not None: - bindtextdomain(domain, localedir) - textdomain(domain) +Catalog = translation |