summaryrefslogtreecommitdiffstats
path: root/Lib/gettext.py
blob: c216089de96b7dcdc0e5034d436e27f7aece3938 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
"""Internationalization and localization support.

This module provides internationalization (I18N) and localization (L10N)
support for your Python programs by providing an interface to the GNU gettext
message catalog library.

I18N refers to the operation by which a program is made aware of multiple
languages.  L10N refers to the adaptation of your program, once
internationalized, to the local language and cultural habits.  In order to
provide multilingual messages for your Python programs, you need to take the
following steps:

    - prepare your program by specially marking translatable strings
    - run a suite of tools over your marked program files to generate raw
      messages catalogs
    - create language specific translations of the message catalogs
    - use this module so that message strings are properly translated

In order to prepare your program for I18N, you need to look at all the strings
in your program.  Any string that needs to be translated should be marked by
wrapping it in _('...') -- i.e. a call to the function `_'.  For example:

    filename = 'mylog.txt'
    message = _('writing a log message')
    fp = open(filename, 'w')
    fp.write(message)
    fp.close()

In this example, the string `writing a log message' is marked as a candidate
for translation, while the strings `mylog.txt' and `w' are not.

The GNU gettext package provides a tool, called xgettext, that scans C and C++
source code looking for these specially marked strings.  xgettext generates
what are called `.pot' files, essentially structured human readable files
which contain every marked string in the source code.  These .pot files are
copied and handed over to translators who write language-specific versions for
every supported language.

For I18N Python programs however, xgettext won't work; it doesn't understand
the myriad of string types support by Python.  The standard Python
distribution provides a tool called pygettext that does though (found in the
Tools/i18n directory).  This is a command line script that supports a similar
interface as xgettext; see its documentation for details.  Once you've used
pygettext to create your .pot files, you can use the standard GNU gettext
tools to generate your machine-readable .mo files, which are what's used by
this module.

In the simple case, to use this module then, you need only add the following
bit of code to the main driver file of your application:

    import gettext
    gettext.install()

This sets everything up so that your _('...') function calls Just Work.  In
other words, it installs `_' in the builtins namespace for convenience.  You
can skip this step and do it manually by the equivalent code:

    import gettext
    import __builtin__
    __builtin__['_'] = gettext.gettext

Once you've done this, you probably want to call bindtextdomain() and
textdomain() to get the domain set up properly.  Again, for convenience, you
can pass the domain and localedir to install to set everything up in one fell
swoop:

    import gettext
    gettext.install('mydomain', '/my/locale/dir')

If your program needs to support many languages at the same time, you will
want to create Translation objects explicitly, like so:

    import gettext
    gettext.install()

    lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
    lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
    lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))

    gettext.set(lang1)
    # all _() will now translate to language 1
    gettext.set(lang2)
    # all _() will now translate to language 2

Currently, only GNU gettext format binary .mo files are supported.

"""

# This module represents the integration of work, contributions, feedback, and
# suggestions from the following people:
#
# Martin von Loewis, who wrote the initial implementation of the underlying
# C-based libintlmodule (later renamed _gettext), along with a skeletal
# gettext.py implementation.
#
# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
# which also included a pure-Python implementation to read .mo files if
# intlmodule wasn't available.
#
# James Henstridge, who also wrote a gettext.py module, which has some
# interesting, but currently unsupported experimental features: the notion of
# a Catalog class and instances, and the ability to add to a catalog file via
# a Python API.
#
# Barry Warsaw integrated these modules, wrote the .install() API and code,
# and conformed all C and Python code to Python's coding standards.

import os
import sys
import struct
from UserDict import UserDict



# globals
_translations = {}
_current_translation = None
_current_domain = 'messages'

# Domain to directory mapping, for use by bindtextdomain()
_localedirs = {}



def _expand_lang(locale):
    from locale import normalize
    locale = normalize(locale)
    COMPONENT_CODESET   = 1 << 0
    COMPONENT_TERRITORY = 1 << 1
    COMPONENT_MODIFIER  = 1 << 2
    # split up the locale into its base components
    mask = 0
    pos = locale.find('@')
    if pos >= 0:
        modifier = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_MODIFIER
    else:
        modifier = ''
    pos = locale.find('.')
    if pos >= 0:
        codeset = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_CODESET
    else:
        codeset = ''
    pos = locale.find('_')
    if pos >= 0:
        territory = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_TERRITORY
    else:
        territory = ''
    language = locale
    ret = []
    for i in range(mask+1):
        if not (i & ~mask):  # if all components for this combo exist ...
            val = language
            if i & COMPONENT_TERRITORY: val += territory
            if i & COMPONENT_CODESET:   val += codeset
            if i & COMPONENT_MODIFIER:  val += modifier
            ret.append(val)
    ret.reverse()
    return ret



class GNUTranslations(UserDict):
    # Magic number of .mo files
    MAGIC = 0x950412de

    def __init__(self, fp):
        if fp is None:
            d = {}
        else:
            d = self._parse(fp)
        UserDict.__init__(self, d)

    def _parse(self, fp):
        """Override this method to support alternative .mo formats."""
        unpack = struct.unpack
        filename = getattr(fp, 'name', '')
        # Parse the .mo file header, which consists of 5 little endian 32
        # bit words.
        catalog = {}
        buf = fp.read()
        magic, version, msgcount, masteridx, transidx = unpack(
            '<5i', buf[:20])
        if magic <> self.MAGIC:
            raise IOError(0, 'Bad magic number', filename)
        #
        # Now put all messages from the .mo file buffer into the catalog
        # dictionary.
        for i in xrange(0, msgcount):
            mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
            mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
            tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
            tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
            if mend < len(buf) and tend < len(buf):
                catalog[buf[mstart:mend]] = buf[tstart:tend]
            else:
                raise IOError(0, 'File is corrupt', filename)
            #
            # advance to next entry in the seek tables
            masteridx += 8
            transidx += 8
        return catalog



# By default, use GNU gettext format .mo files
Translations = GNUTranslations

# Locate a .mo file using the gettext strategy
def _find(localedir=None, languages=None, domain=None):
    global _current_domain
    global _localedirs
    # Get some reasonable defaults for arguments that were not supplied
    if domain is None:
        domain = _current_domain
    if localedir is None:
        localedir = _localedirs.get(
            domain,
            # TBD: The default localedir is actually system dependent.  I
            # don't know of a good platform-consistent and portable way to
            # default it, so instead, we'll just use sys.prefix.  Most
            # programs should be calling bindtextdomain() or such explicitly
            # anyway.
            os.path.join(sys.prefix, 'share', 'locale'))
    if languages is None:
        languages = []
        for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
            val = os.environ.get(envar)
            if val:
                languages = val.split(':')
                break
        if 'C' not in languages:
            languages.append('C')
    # now normalize and expand the languages
    langdict = {}
    for lang in languages:
        for nelang in _expand_lang(lang):
            langdict[nelang] = nelang
    languages = langdict.keys()
    # select a language
    for lang in languages:
        if lang == 'C':
            break
        mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
        # see if it's in the cache
        mo = _translations.get(mofile)
        if mo:
            return mo
        fp = None
        try:
            try:
                fp = open(mofile, 'rb')
                t = Translations(fp)
                _translations[mofile] = t
                return t
            except IOError:
                pass
        finally:
            if fp:
                fp.close()
    return {}



def bindtextdomain(domain=None, localedir=None):
    """Bind domain to a file in the specified directory."""
    global _localedirs
    if domain is None:
        return None
    if localedir is None:
        return _localedirs.get(domain, _localedirs.get('C'))
    _localedirs[domain] = localedir
    return localedir


def textdomain(domain=None):
    """Change or query the current global domain."""
    global _current_domain
    if domain is None:
        return _current_domain
    else:
        _current_domain = domain
        return domain


def gettext(message):
    """Return localized version of a message."""
    return _find().get(message, message)


def dgettext(domain, message):
    """Like gettext(), but look up message in specified domain."""
    return _find(domain=domain).get(message, message)



# A higher level API
def set(translation):
    global _current_translation
    _current_translation = translation


def get():
    global _current_translation
    return _current_translation


def install(domain=None, localedir=None):
    import __builtin__
    __builtin__.__dict__['_'] = gettext
    if domain is not None:
        bindtextdomain(domain, localedir)
        textdomain(domain)