diff options
| author | Barry Warsaw <barry@python.org> | 2003-04-24 18:13:39 (GMT) | 
|---|---|---|
| committer | Barry Warsaw <barry@python.org> | 2003-04-24 18:13:39 (GMT) | 
| commit | c4acc2bd32f14c22f9cd7a95021184809e096ebe (patch) | |
| tree | e51f109f210992aa53a0ca66b37cdb16c9fcd915 /Lib/gettext.py | |
| parent | edb155fda199f7fa481d0020e911a6eec0705dfa (diff) | |
| download | cpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.zip cpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.tar.gz cpython-c4acc2bd32f14c22f9cd7a95021184809e096ebe.tar.bz2  | |
GNUTranslations:
    __init__(): Removed since we no longer need the coerce flag.
    Message ids and strings are now always coerced to Unicode, /if/
    the catalog specified a charset parameter.
    gettext(), ngettext(): Since the message strings are Unicodes in
    the catalog, coerce back to encoded 8-bit strings on return.
    ugettext(), ungettext(): Coerce the message ids to Unicode when
    there's no entry for the id in the catalog.
Minor code cleanups; use booleans where appropriate.
Diffstat (limited to 'Lib/gettext.py')
| -rw-r--r-- | Lib/gettext.py | 66 | 
1 files changed, 33 insertions, 33 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py index aa43409..bc6779f 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -73,17 +73,15 @@ def test(condition, true, false):  def c2py(plural): -    """ -    Gets a C expression as used in PO files for plural forms and -    returns a Python lambda function that implements an equivalent -    expression. +    """Gets a C expression as used in PO files for plural forms and returns a +    Python lambda function that implements an equivalent expression.      """      # Security check, allow only the "n" identifier      from StringIO import StringIO      import token, tokenize      tokens = tokenize.generate_tokens(StringIO(plural).readline)      try: -        danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ] +        danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']      except tokenize.TokenError:          raise ValueError, \                'plural forms expression error, maybe unbalanced parenthesis' @@ -218,7 +216,7 @@ class NullTranslations:      def charset(self):          return self._charset -    def install(self, unicode=0): +    def install(self, unicode=False):          import __builtin__          __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext @@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):      LE_MAGIC = 0x950412deL      BE_MAGIC = 0xde120495L -    def __init__(self, fp=None, coerce=False): -        # Set this attribute before calling the base class constructor, since -        # the latter calls _parse() which depends on self._coerce. -        self._coerce = coerce -        NullTranslations.__init__(self, fp) -      def _parse(self, fp):          """Override this method to support alternative .mo formats."""          unpack = struct.unpack @@ -281,21 +273,28 @@ class GNUTranslations(NullTranslations):                          self._charset = v.split('charset=')[1]                      elif k == 'plural-forms':                          v = v.split(';') -##                        nplurals = v[0].split('nplurals=')[1] -##                        nplurals = int(nplurals.strip())                          plural = v[1].split('plural=')[1]                          self.plural = c2py(plural) +            # Note: we unconditionally convert both msgids and msgstrs to +            # Unicode using the character encoding specified in the charset +            # parameter of the Content-Type header.  The gettext documentation +            # strongly encourages msgids to be us-ascii, but some appliations +            # require alternative encodings (e.g. Zope's ZCML and ZPT).  For +            # traditional gettext applications, the msgid conversion will +            # cause no problems since us-ascii should always be a subset of +            # the charset encoding.  We may want to fall back to 8-bit msgids +            # if the Unicode conversion fails.              if msg.find('\x00') >= 0:                  # Plural forms                  msgid1, msgid2 = msg.split('\x00')                  tmsg = tmsg.split('\x00') -                if self._coerce: +                if self._charset:                      msgid1 = unicode(msgid1, self._charset)                      tmsg = [unicode(x, self._charset) for x in tmsg]                  for i in range(len(tmsg)):                      catalog[(msgid1, i)] = tmsg[i]              else: -                if self._coerce: +                if self._charset:                      msg = unicode(msg, self._charset)                      tmsg = unicode(tmsg, self._charset)                  catalog[msg] = tmsg @@ -304,16 +303,23 @@ class GNUTranslations(NullTranslations):              transidx += 8      def gettext(self, message): -        try: -            return self._catalog[message] -        except KeyError: +        missing = object() +        tmsg = self._catalog.get(message, missing) +        if tmsg is missing:              if self._fallback:                  return self._fallback.gettext(message)              return message +        # Encode the Unicode tmsg back to an 8-bit string, if possible +        if self._charset: +            return tmsg.encode(self._charset) +        return tmsg      def ngettext(self, msgid1, msgid2, n):          try: -            return self._catalog[(msgid1, self.plural(n))] +            tmsg = self._catalog[(msgid1, self.plural(n))] +            if self._charset: +                return tmsg.encode(self._charset) +            return tmsg          except KeyError:              if self._fallback:                  return self._fallback.ngettext(msgid1, msgid2, n) @@ -328,10 +334,7 @@ class GNUTranslations(NullTranslations):          if tmsg is missing:              if self._fallback:                  return self._fallback.ugettext(message) -            tmsg = message -        if not self._coerce: -            return unicode(tmsg, self._charset) -        # The msgstr is already coerced to Unicode +            return unicode(message)          return tmsg      def ungettext(self, msgid1, msgid2, n): @@ -341,12 +344,9 @@ class GNUTranslations(NullTranslations):              if self._fallback:                  return self._fallback.ungettext(msgid1, msgid2, n)              if n == 1: -                tmsg = msgid1 +                tmsg = unicode(msgid1)              else: -                tmsg = msgid2 -        if not self._coerce: -            return unicode(tmsg, self._charset) -        # The msgstr is already coerced to Unicode +                tmsg = unicode(msgid2)          return tmsg @@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):  _translations = {}  def translation(domain, localedir=None, languages=None, -                class_=None, fallback=0): +                class_=None, fallback=False):      if class_ is None:          class_ = GNUTranslations      mofiles = find(domain, localedir, languages, all=1) -    if len(mofiles)==0: +    if not mofiles:          if fallback:              return NullTranslations()          raise IOError(ENOENT, 'No translation file found for domain', domain) @@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,      return result -def install(domain, localedir=None, unicode=0): -    translation(domain, localedir, fallback=1).install(unicode) +def install(domain, localedir=None, unicode=False): +    translation(domain, localedir, fallback=True).install(unicode)  | 
