summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2003-04-24 18:13:39 (GMT)
committerBarry Warsaw <barry@python.org>2003-04-24 18:13:39 (GMT)
commitc4acc2bd32f14c22f9cd7a95021184809e096ebe (patch)
treee51f109f210992aa53a0ca66b37cdb16c9fcd915
parentedb155fda199f7fa481d0020e911a6eec0705dfa (diff)
downloadcpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.zip
cpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.tar.gz
cpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.tar.bz2
GNUTranslations:
__init__(): Removed since we no longer need the coerce flag. Message ids and strings are now always coerced to Unicode, /if/ the catalog specified a charset parameter. gettext(), ngettext(): Since the message strings are Unicodes in the catalog, coerce back to encoded 8-bit strings on return. ugettext(), ungettext(): Coerce the message ids to Unicode when there's no entry for the id in the catalog. Minor code cleanups; use booleans where appropriate.
-rw-r--r--Lib/gettext.py66
1 files changed, 33 insertions, 33 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py
index aa43409..bc6779f 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -73,17 +73,15 @@ def test(condition, true, false):
def c2py(plural):
- """
- Gets a C expression as used in PO files for plural forms and
- returns a Python lambda function that implements an equivalent
- expression.
+ """Gets a C expression as used in PO files for plural forms and returns a
+ Python lambda function that implements an equivalent expression.
"""
# Security check, allow only the "n" identifier
from StringIO import StringIO
import token, tokenize
tokens = tokenize.generate_tokens(StringIO(plural).readline)
try:
- danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
+ danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
except tokenize.TokenError:
raise ValueError, \
'plural forms expression error, maybe unbalanced parenthesis'
@@ -218,7 +216,7 @@ class NullTranslations:
def charset(self):
return self._charset
- def install(self, unicode=0):
+ def install(self, unicode=False):
import __builtin__
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):
LE_MAGIC = 0x950412deL
BE_MAGIC = 0xde120495L
- def __init__(self, fp=None, coerce=False):
- # Set this attribute before calling the base class constructor, since
- # the latter calls _parse() which depends on self._coerce.
- self._coerce = coerce
- NullTranslations.__init__(self, fp)
-
def _parse(self, fp):
"""Override this method to support alternative .mo formats."""
unpack = struct.unpack
@@ -281,21 +273,28 @@ class GNUTranslations(NullTranslations):
self._charset = v.split('charset=')[1]
elif k == 'plural-forms':
v = v.split(';')
-## nplurals = v[0].split('nplurals=')[1]
-## nplurals = int(nplurals.strip())
plural = v[1].split('plural=')[1]
self.plural = c2py(plural)
+ # Note: we unconditionally convert both msgids and msgstrs to
+ # Unicode using the character encoding specified in the charset
+ # parameter of the Content-Type header. The gettext documentation
+ # strongly encourages msgids to be us-ascii, but some appliations
+ # require alternative encodings (e.g. Zope's ZCML and ZPT). For
+ # traditional gettext applications, the msgid conversion will
+ # cause no problems since us-ascii should always be a subset of
+ # the charset encoding. We may want to fall back to 8-bit msgids
+ # if the Unicode conversion fails.
if msg.find('\x00') >= 0:
# Plural forms
msgid1, msgid2 = msg.split('\x00')
tmsg = tmsg.split('\x00')
- if self._coerce:
+ if self._charset:
msgid1 = unicode(msgid1, self._charset)
tmsg = [unicode(x, self._charset) for x in tmsg]
for i in range(len(tmsg)):
catalog[(msgid1, i)] = tmsg[i]
else:
- if self._coerce:
+ if self._charset:
msg = unicode(msg, self._charset)
tmsg = unicode(tmsg, self._charset)
catalog[msg] = tmsg
@@ -304,16 +303,23 @@ class GNUTranslations(NullTranslations):
transidx += 8
def gettext(self, message):
- try:
- return self._catalog[message]
- except KeyError:
+ missing = object()
+ tmsg = self._catalog.get(message, missing)
+ if tmsg is missing:
if self._fallback:
return self._fallback.gettext(message)
return message
+ # Encode the Unicode tmsg back to an 8-bit string, if possible
+ if self._charset:
+ return tmsg.encode(self._charset)
+ return tmsg
def ngettext(self, msgid1, msgid2, n):
try:
- return self._catalog[(msgid1, self.plural(n))]
+ tmsg = self._catalog[(msgid1, self.plural(n))]
+ if self._charset:
+ return tmsg.encode(self._charset)
+ return tmsg
except KeyError:
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
@@ -328,10 +334,7 @@ class GNUTranslations(NullTranslations):
if tmsg is missing:
if self._fallback:
return self._fallback.ugettext(message)
- tmsg = message
- if not self._coerce:
- return unicode(tmsg, self._charset)
- # The msgstr is already coerced to Unicode
+ return unicode(message)
return tmsg
def ungettext(self, msgid1, msgid2, n):
@@ -341,12 +344,9 @@ class GNUTranslations(NullTranslations):
if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1:
- tmsg = msgid1
+ tmsg = unicode(msgid1)
else:
- tmsg = msgid2
- if not self._coerce:
- return unicode(tmsg, self._charset)
- # The msgstr is already coerced to Unicode
+ tmsg = unicode(msgid2)
return tmsg
@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):
_translations = {}
def translation(domain, localedir=None, languages=None,
- class_=None, fallback=0):
+ class_=None, fallback=False):
if class_ is None:
class_ = GNUTranslations
mofiles = find(domain, localedir, languages, all=1)
- if len(mofiles)==0:
+ if not mofiles:
if fallback:
return NullTranslations()
raise IOError(ENOENT, 'No translation file found for domain', domain)
@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,
return result
-def install(domain, localedir=None, unicode=0):
- translation(domain, localedir, fallback=1).install(unicode)
+def install(domain, localedir=None, unicode=False):
+ translation(domain, localedir, fallback=True).install(unicode)