summaryrefslogtreecommitdiffstats
path: root/Lib/gettext.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-11-21 21:45:32 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-11-21 21:45:32 (GMT)
commitd899605e30eef8e77f70184eac15fad1bf770586 (patch)
treec82d9d42d30135449d87fa7f761827bb4154c5f3 /Lib/gettext.py
parent21b60147e9d4e6c895b581429d292cb60bcdb5e7 (diff)
downloadcpython-d899605e30eef8e77f70184eac15fad1bf770586.zip
cpython-d899605e30eef8e77f70184eac15fad1bf770586.tar.gz
cpython-d899605e30eef8e77f70184eac15fad1bf770586.tar.bz2
Patch #633547: Support plural forms. Do TODOs in test suite.
Diffstat (limited to 'Lib/gettext.py')
-rw-r--r--Lib/gettext.py140
1 files changed, 135 insertions, 5 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py
index f7649e6..2be677b 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -32,6 +32,8 @@ internationalized, to the local language and cultural habits.
# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
# module.
#
+# J. David Ibanez implemented plural forms.
+#
# TODO:
# - Lazy loading of .mo files. Currently the entire catalog is loaded into
# memory, but that's probably bad for large translated programs. Instead,
@@ -43,18 +45,76 @@ internationalized, to the local language and cultural habits.
# - Support Solaris .mo file formats. Unfortunately, we've been unable to
# find this format documented anywhere.
-import os
-import sys
-import struct
-import copy
+
+import copy, os, re, struct, sys
from errno import ENOENT
+
__all__ = ["bindtextdomain","textdomain","gettext","dgettext",
"find","translation","install","Catalog"]
_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
+def test(condition, true, false):
+ """
+ Implements the C expression:
+
+ condition ? true : false
+
+ Required to correctly interpret plural forms.
+ """
+ if condition:
+ return true
+ else:
+ return false
+
+
+def c2py(plural):
+ """
+ Gets a C expression as used in PO files for plural forms and
+ returns a Python lambda function that implements an equivalent
+ expression.
+ """
+ # Security check, allow only the "n" identifier
+ from StringIO import StringIO
+ import token, tokenize
+ tokens = tokenize.generate_tokens(StringIO(plural).readline)
+ danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
+ if danger:
+ raise ValueError, 'dangerous expression'
+
+ # Replace some C operators by their Python equivalents
+ plural = plural.replace('&&', ' and ')
+ plural = plural.replace('||', ' or ')
+
+ expr = re.compile(r'\![^=]')
+ plural = expr.sub(' not ', plural)
+
+ # Regular expression and replacement function used to transform
+ # "a?b:c" to "test(a,b,c)".
+ expr = re.compile(r'(.*?)\?(.*?):(.*)')
+ def repl(x):
+ return "test(%s, %s, %s)" % (x.group(1), x.group(2),
+ expr.sub(repl, x.group(3)))
+
+ # Code to transform the plural expression, taking care of parentheses
+ stack = ['']
+ for c in plural:
+ if c == '(':
+ stack.append('')
+ elif c == ')':
+ if len(stack) == 0:
+ raise ValueError, 'unbalanced parenthesis in plural form'
+ s = expr.sub(repl, stack.pop())
+ stack[-1] += '(%s)' % s
+ else:
+ stack[-1] += c
+ plural = expr.sub(repl, stack.pop())
+
+ return eval('lambda n: int(%s)' % plural)
+
+
def _expand_lang(locale):
from locale import normalize
@@ -121,11 +181,27 @@ class NullTranslations:
return self._fallback.gettext(message)
return message
+ def ngettext(self, msgid1, msgid2, n):
+ if self._fallback:
+ return self._fallback.ngettext(msgid1, msgid2, n)
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+
def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
return unicode(message)
+ def ungettext(self, msgid1, msgid2, n):
+ if self._fallback:
+ return self._fallback.ungettext(msgid1, msgid2, n)
+ if n == 1:
+ return unicode(msgid1)
+ else:
+ return unicode(msgid2)
+
def info(self):
return self._info
@@ -169,8 +245,16 @@ class GNUTranslations(NullTranslations):
tlen, toff = unpack(ii, buf[transidx:transidx+8])
tend = toff + tlen
if mend < buflen and tend < buflen:
+ msg = buf[moff:mend]
tmsg = buf[toff:tend]
- catalog[buf[moff:mend]] = tmsg
+ if msg.find('\x00') >= 0:
+ # Plural forms
+ msgid1, msgid2 = msg.split('\x00')
+ tmsg = tmsg.split('\x00')
+ for i in range(len(tmsg)):
+ catalog[(msgid1, i)] = tmsg[i]
+ else:
+ catalog[msg] = tmsg
else:
raise IOError(0, 'File is corrupt', filename)
# See if we're looking at GNU .mo conventions for metadata
@@ -186,6 +270,12 @@ class GNUTranslations(NullTranslations):
self._info[k] = v
if k == 'content-type':
self._charset = v.split('charset=')[1]
+ elif k == 'plural-forms':
+ v = v.split(';')
+## nplurals = v[0].split('nplurals=')[1]
+## nplurals = int(nplurals.strip())
+ plural = v[1].split('plural=')[1]
+ self.plural = c2py(plural)
# advance to next entry in the seek tables
masteridx += 8
transidx += 8
@@ -198,6 +288,19 @@ class GNUTranslations(NullTranslations):
return self._fallback.gettext(message)
return message
+
+ def ngettext(self, msgid1, msgid2, n):
+ try:
+ return self._catalog[(msgid1, self.plural(n))]
+ except KeyError:
+ if self._fallback:
+ return self._fallback.ngettext(msgid1, msgid2, n)
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+
+
def ugettext(self, message):
try:
tmsg = self._catalog[message]
@@ -208,6 +311,18 @@ class GNUTranslations(NullTranslations):
return unicode(tmsg, self._charset)
+ def ungettext(self, msgid1, msgid2, n):
+ try:
+ tmsg = self._catalog[(msgid1, self.plural(n))]
+ except KeyError:
+ if self._fallback:
+ return self._fallback.ungettext(msgid1, msgid2, n)
+ if n == 1:
+ tmsg = msgid1
+ else:
+ tmsg = msgid2
+ return unicode(tmsg, self._charset)
+
# Locate a .mo file using the gettext strategy
def find(domain, localedir=None, languages=None, all=0):
@@ -311,10 +426,25 @@ def dgettext(domain, message):
return t.gettext(message)
+def dngettext(domain, msgid1, msgid2, n):
+ try:
+ t = translation(domain, _localedirs.get(domain, None))
+ except IOError:
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+ return t.ngettext(msgid1, msgid2, n)
+
+
def gettext(message):
return dgettext(_current_domain, message)
+def ngettext(msgid1, msgid2, n):
+ return dngettext(_current_domain, msgid1, msgid2, n)
+
+
# dcgettext() has been deemed unnecessary and is not implemented.
# James Henstridge's Catalog constructor from GNOME gettext. Documented usage