diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-11-21 21:45:32 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-11-21 21:45:32 (GMT) |
commit | d899605e30eef8e77f70184eac15fad1bf770586 (patch) | |
tree | c82d9d42d30135449d87fa7f761827bb4154c5f3 /Lib/gettext.py | |
parent | 21b60147e9d4e6c895b581429d292cb60bcdb5e7 (diff) | |
download | cpython-d899605e30eef8e77f70184eac15fad1bf770586.zip cpython-d899605e30eef8e77f70184eac15fad1bf770586.tar.gz cpython-d899605e30eef8e77f70184eac15fad1bf770586.tar.bz2 |
Patch #633547: Support plural forms. Do TODOs in test suite.
Diffstat (limited to 'Lib/gettext.py')
-rw-r--r-- | Lib/gettext.py | 140 |
1 files changed, 135 insertions, 5 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py index f7649e6..2be677b 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -32,6 +32,8 @@ internationalized, to the local language and cultural habits. # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this # module. # +# J. David Ibanez implemented plural forms. +# # TODO: # - Lazy loading of .mo files. Currently the entire catalog is loaded into # memory, but that's probably bad for large translated programs. Instead, @@ -43,18 +45,76 @@ internationalized, to the local language and cultural habits. # - Support Solaris .mo file formats. Unfortunately, we've been unable to # find this format documented anywhere. -import os -import sys -import struct -import copy + +import copy, os, re, struct, sys from errno import ENOENT + __all__ = ["bindtextdomain","textdomain","gettext","dgettext", "find","translation","install","Catalog"] _default_localedir = os.path.join(sys.prefix, 'share', 'locale') +def test(condition, true, false): + """ + Implements the C expression: + + condition ? true : false + + Required to correctly interpret plural forms. + """ + if condition: + return true + else: + return false + + +def c2py(plural): + """ + Gets a C expression as used in PO files for plural forms and + returns a Python lambda function that implements an equivalent + expression. + """ + # Security check, allow only the "n" identifier + from StringIO import StringIO + import token, tokenize + tokens = tokenize.generate_tokens(StringIO(plural).readline) + danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ] + if danger: + raise ValueError, 'dangerous expression' + + # Replace some C operators by their Python equivalents + plural = plural.replace('&&', ' and ') + plural = plural.replace('||', ' or ') + + expr = re.compile(r'\![^=]') + plural = expr.sub(' not ', plural) + + # Regular expression and replacement function used to transform + # "a?b:c" to "test(a,b,c)". + expr = re.compile(r'(.*?)\?(.*?):(.*)') + def repl(x): + return "test(%s, %s, %s)" % (x.group(1), x.group(2), + expr.sub(repl, x.group(3))) + + # Code to transform the plural expression, taking care of parentheses + stack = [''] + for c in plural: + if c == '(': + stack.append('') + elif c == ')': + if len(stack) == 0: + raise ValueError, 'unbalanced parenthesis in plural form' + s = expr.sub(repl, stack.pop()) + stack[-1] += '(%s)' % s + else: + stack[-1] += c + plural = expr.sub(repl, stack.pop()) + + return eval('lambda n: int(%s)' % plural) + + def _expand_lang(locale): from locale import normalize @@ -121,11 +181,27 @@ class NullTranslations: return self._fallback.gettext(message) return message + def ngettext(self, msgid1, msgid2, n): + if self._fallback: + return self._fallback.ngettext(msgid1, msgid2, n) + if n == 1: + return msgid1 + else: + return msgid2 + def ugettext(self, message): if self._fallback: return self._fallback.ugettext(message) return unicode(message) + def ungettext(self, msgid1, msgid2, n): + if self._fallback: + return self._fallback.ungettext(msgid1, msgid2, n) + if n == 1: + return unicode(msgid1) + else: + return unicode(msgid2) + def info(self): return self._info @@ -169,8 +245,16 @@ class GNUTranslations(NullTranslations): tlen, toff = unpack(ii, buf[transidx:transidx+8]) tend = toff + tlen if mend < buflen and tend < buflen: + msg = buf[moff:mend] tmsg = buf[toff:tend] - catalog[buf[moff:mend]] = tmsg + if msg.find('\x00') >= 0: + # Plural forms + msgid1, msgid2 = msg.split('\x00') + tmsg = tmsg.split('\x00') + for i in range(len(tmsg)): + catalog[(msgid1, i)] = tmsg[i] + else: + catalog[msg] = tmsg else: raise IOError(0, 'File is corrupt', filename) # See if we're looking at GNU .mo conventions for metadata @@ -186,6 +270,12 @@ class GNUTranslations(NullTranslations): self._info[k] = v if k == 'content-type': self._charset = v.split('charset=')[1] + elif k == 'plural-forms': + v = v.split(';') +## nplurals = v[0].split('nplurals=')[1] +## nplurals = int(nplurals.strip()) + plural = v[1].split('plural=')[1] + self.plural = c2py(plural) # advance to next entry in the seek tables masteridx += 8 transidx += 8 @@ -198,6 +288,19 @@ class GNUTranslations(NullTranslations): return self._fallback.gettext(message) return message + + def ngettext(self, msgid1, msgid2, n): + try: + return self._catalog[(msgid1, self.plural(n))] + except KeyError: + if self._fallback: + return self._fallback.ngettext(msgid1, msgid2, n) + if n == 1: + return msgid1 + else: + return msgid2 + + def ugettext(self, message): try: tmsg = self._catalog[message] @@ -208,6 +311,18 @@ class GNUTranslations(NullTranslations): return unicode(tmsg, self._charset) + def ungettext(self, msgid1, msgid2, n): + try: + tmsg = self._catalog[(msgid1, self.plural(n))] + except KeyError: + if self._fallback: + return self._fallback.ungettext(msgid1, msgid2, n) + if n == 1: + tmsg = msgid1 + else: + tmsg = msgid2 + return unicode(tmsg, self._charset) + # Locate a .mo file using the gettext strategy def find(domain, localedir=None, languages=None, all=0): @@ -311,10 +426,25 @@ def dgettext(domain, message): return t.gettext(message) +def dngettext(domain, msgid1, msgid2, n): + try: + t = translation(domain, _localedirs.get(domain, None)) + except IOError: + if n == 1: + return msgid1 + else: + return msgid2 + return t.ngettext(msgid1, msgid2, n) + + def gettext(message): return dgettext(_current_domain, message) +def ngettext(msgid1, msgid2, n): + return dngettext(_current_domain, msgid1, msgid2, n) + + # dcgettext() has been deemed unnecessary and is not implemented. # James Henstridge's Catalog constructor from GNOME gettext. Documented usage |