summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-06-20 14:13:29 (GMT)
committerGitHub <noreply@github.com>2017-06-20 14:13:29 (GMT)
commit26cb4657bcc9a7adffa95798ececb588dddfeadb (patch)
treee1ebdeb6ee1f428a739a50933ffef3e6ee141aa8
parent8457706ee308a621103e9b9c760ca9da3cc4e7c0 (diff)
downloadcpython-26cb4657bcc9a7adffa95798ececb588dddfeadb.zip
cpython-26cb4657bcc9a7adffa95798ececb588dddfeadb.tar.gz
cpython-26cb4657bcc9a7adffa95798ececb588dddfeadb.tar.bz2
bpo-29755: Fixed the lgettext() family of functions in the gettext module. (#2266)
They now always return bytes. Updated the gettext documentation.
-rw-r--r--Doc/library/gettext.rst161
-rw-r--r--Lib/gettext.py40
-rw-r--r--Lib/test/test_gettext.py132
-rw-r--r--Misc/NEWS3
4 files changed, 229 insertions, 107 deletions
diff --git a/Doc/library/gettext.rst b/Doc/library/gettext.rst
index 3a87bf5..053d9d3 100644
--- a/Doc/library/gettext.rst
+++ b/Doc/library/gettext.rst
@@ -48,9 +48,10 @@ class-based API instead.
.. function:: bind_textdomain_codeset(domain, codeset=None)
- Bind the *domain* to *codeset*, changing the encoding of strings returned by the
- :func:`gettext` family of functions. If *codeset* is omitted, then the current
- binding is returned.
+ Bind the *domain* to *codeset*, changing the encoding of byte strings
+ returned by the :func:`lgettext`, :func:`ldgettext`, :func:`lngettext`
+ and :func:`ldngettext` functions.
+ If *codeset* is omitted, then the current binding is returned.
.. function:: textdomain(domain=None)
@@ -67,28 +68,14 @@ class-based API instead.
:func:`_` in the local namespace (see examples below).
-.. function:: lgettext(message)
-
- Equivalent to :func:`gettext`, but the translation is returned in the
- preferred system encoding, if no other encoding was explicitly set with
- :func:`bind_textdomain_codeset`.
-
-
.. function:: dgettext(domain, message)
- Like :func:`gettext`, but look the message up in the specified *domain*.
-
-
-.. function:: ldgettext(domain, message)
-
- Equivalent to :func:`dgettext`, but the translation is returned in the
- preferred system encoding, if no other encoding was explicitly set with
- :func:`bind_textdomain_codeset`.
+ Like :func:`.gettext`, but look the message up in the specified *domain*.
.. function:: ngettext(singular, plural, n)
- Like :func:`gettext`, but consider plural forms. If a translation is found,
+ Like :func:`.gettext`, but consider plural forms. If a translation is found,
apply the plural formula to *n*, and return the resulting message (some
languages have more than two plural forms). If no translation is found, return
*singular* if *n* is 1; return *plural* otherwise.
@@ -101,24 +88,33 @@ class-based API instead.
formulas for a variety of languages.
-.. function:: lngettext(singular, plural, n)
-
- Equivalent to :func:`ngettext`, but the translation is returned in the
- preferred system encoding, if no other encoding was explicitly set with
- :func:`bind_textdomain_codeset`.
-
-
.. function:: dngettext(domain, singular, plural, n)
Like :func:`ngettext`, but look the message up in the specified *domain*.
+.. function:: lgettext(message)
+.. function:: ldgettext(domain, message)
+.. function:: lngettext(singular, plural, n)
.. function:: ldngettext(domain, singular, plural, n)
- Equivalent to :func:`dngettext`, but the translation is returned in the
- preferred system encoding, if no other encoding was explicitly set with
+ Equivalent to the corresponding functions without the ``l`` prefix
+ (:func:`.gettext`, :func:`dgettext`, :func:`ngettext` and :func:`dngettext`),
+ but the translation is returned as a byte string encoded in the preferred
+ system encoding if no other encoding was explicitly set with
:func:`bind_textdomain_codeset`.
+ .. warning::
+
+ These functions should be avoided in Python 3, because they return
+ encoded bytes. It's much better to use alternatives which return
+ Unicode strings instead, since most Python applications will want to
+ manipulate human readable text as strings instead of bytes. Further,
+ it's possible that you may get unexpected Unicode-related exceptions
+ if there are encoding problems with the translated strings. It is
+ possible that the ``l*()`` functions will be deprecated in future Python
+ versions due to their inherent problems and limitations.
+
Note that GNU :program:`gettext` also defines a :func:`dcgettext` method, but
this was deemed not useful and so it is currently unimplemented.
@@ -179,8 +175,9 @@ class can also install themselves in the built-in namespace as the function
names are cached. The actual class instantiated is either *class_* if
provided, otherwise :class:`GNUTranslations`. The class's constructor must
take a single :term:`file object` argument. If provided, *codeset* will change
- the charset used to encode translated strings in the :meth:`lgettext` and
- :meth:`lngettext` methods.
+ the charset used to encode translated strings in the
+ :meth:`~NullTranslations.lgettext` and :meth:`~NullTranslations.lngettext`
+ methods.
If multiple files are found, later files are used as fallbacks for earlier ones.
To allow setting the fallback, :func:`copy.copy` is used to clone each
@@ -250,26 +247,29 @@ are the methods of :class:`NullTranslations`:
.. method:: gettext(message)
- If a fallback has been set, forward :meth:`gettext` to the fallback.
- Otherwise, return the translated message. Overridden in derived classes.
-
-
- .. method:: lgettext(message)
-
- If a fallback has been set, forward :meth:`lgettext` to the fallback.
- Otherwise, return the translated message. Overridden in derived classes.
+ If a fallback has been set, forward :meth:`.gettext` to the fallback.
+ Otherwise, return *message*. Overridden in derived classes.
.. method:: ngettext(singular, plural, n)
If a fallback has been set, forward :meth:`ngettext` to the fallback.
- Otherwise, return the translated message. Overridden in derived classes.
+ Otherwise, return *singular* if *n* is 1; return *plural* otherwise.
+ Overridden in derived classes.
+ .. method:: lgettext(message)
.. method:: lngettext(singular, plural, n)
- If a fallback has been set, forward :meth:`lngettext` to the fallback.
- Otherwise, return the translated message. Overridden in derived classes.
+ Equivalent to :meth:`.gettext` and :meth:`ngettext`, but the translation
+ is returned as a byte string encoded in the preferred system encoding
+ if no encoding was explicitly set with :meth:`set_output_charset`.
+ Overridden in derived classes.
+
+ .. warning::
+
+ These methods should be avoided in Python 3. See the warning for the
+ :func:`lgettext` function.
.. method:: info()
@@ -279,32 +279,28 @@ are the methods of :class:`NullTranslations`:
.. method:: charset()
- Return the "protected" :attr:`_charset` variable, which is the encoding of
- the message catalog file.
+ Return the encoding of the message catalog file.
.. method:: output_charset()
- Return the "protected" :attr:`_output_charset` variable, which defines the
- encoding used to return translated messages in :meth:`lgettext` and
- :meth:`lngettext`.
+ Return the encoding used to return translated messages in :meth:`.lgettext`
+ and :meth:`.lngettext`.
.. method:: set_output_charset(charset)
- Change the "protected" :attr:`_output_charset` variable, which defines the
- encoding used to return translated messages.
+ Change the encoding used to return translated messages.
.. method:: install(names=None)
- This method installs :meth:`self.gettext` into the built-in namespace,
+ This method installs :meth:`.gettext` into the built-in namespace,
binding it to ``_``.
If the *names* parameter is given, it must be a sequence containing the
names of functions you want to install in the builtins namespace in
- addition to :func:`_`. Supported names are ``'gettext'`` (bound to
- :meth:`self.gettext`), ``'ngettext'`` (bound to :meth:`self.ngettext`),
+ addition to :func:`_`. Supported names are ``'gettext'``, ``'ngettext'``,
``'lgettext'`` and ``'lngettext'``.
Note that this is only one way, albeit the most convenient way, to make
@@ -349,49 +345,52 @@ If the :file:`.mo` file's magic number is invalid, the major version number is
unexpected, or if other problems occur while reading the file, instantiating a
:class:`GNUTranslations` class can raise :exc:`OSError`.
-The following methods are overridden from the base class implementation:
-
+.. class:: GNUTranslations
-.. method:: GNUTranslations.gettext(message)
+ The following methods are overridden from the base class implementation:
- Look up the *message* id in the catalog and return the corresponding message
- string, as a Unicode string. If there is no entry in the catalog for the
- *message* id, and a fallback has been set, the look up is forwarded to the
- fallback's :meth:`gettext` method. Otherwise, the *message* id is returned.
+ .. method:: gettext(message)
+ Look up the *message* id in the catalog and return the corresponding message
+ string, as a Unicode string. If there is no entry in the catalog for the
+ *message* id, and a fallback has been set, the look up is forwarded to the
+ fallback's :meth:`~NullTranslations.gettext` method. Otherwise, the
+ *message* id is returned.
-.. method:: GNUTranslations.lgettext(message)
- Equivalent to :meth:`gettext`, but the translation is returned as a
- bytestring encoded in the selected output charset, or in the preferred system
- encoding if no encoding was explicitly set with :meth:`set_output_charset`.
+ .. method:: ngettext(singular, plural, n)
+ Do a plural-forms lookup of a message id. *singular* is used as the message id
+ for purposes of lookup in the catalog, while *n* is used to determine which
+ plural form to use. The returned message string is a Unicode string.
-.. method:: GNUTranslations.ngettext(singular, plural, n)
+ If the message id is not found in the catalog, and a fallback is specified,
+ the request is forwarded to the fallback's :meth:`~NullTranslations.ngettext`
+ method. Otherwise, when *n* is 1 *singular* is returned, and *plural* is
+ returned in all other cases.
- Do a plural-forms lookup of a message id. *singular* is used as the message id
- for purposes of lookup in the catalog, while *n* is used to determine which
- plural form to use. The returned message string is a Unicode string.
+ Here is an example::
- If the message id is not found in the catalog, and a fallback is specified, the
- request is forwarded to the fallback's :meth:`ngettext` method. Otherwise, when
- *n* is 1 *singular* is returned, and *plural* is returned in all other cases.
+ n = len(os.listdir('.'))
+ cat = GNUTranslations(somefile)
+ message = cat.ngettext(
+ 'There is %(num)d file in this directory',
+ 'There are %(num)d files in this directory',
+ n) % {'num': n}
- Here is an example::
- n = len(os.listdir('.'))
- cat = GNUTranslations(somefile)
- message = cat.ngettext(
- 'There is %(num)d file in this directory',
- 'There are %(num)d files in this directory',
- n) % {'num': n}
+ .. method:: lgettext(message)
+ .. method:: lngettext(singular, plural, n)
+ Equivalent to :meth:`.gettext` and :meth:`.ngettext`, but the translation
+ is returned as a byte string encoded in the preferred system encoding
+ if no encoding was explicitly set with
+ :meth:`~NullTranslations.set_output_charset`.
-.. method:: GNUTranslations.lngettext(singular, plural, n)
+ .. warning::
- Equivalent to :meth:`gettext`, but the translation is returned as a
- bytestring encoded in the selected output charset, or in the preferred system
- encoding if no encoding was explicitly set with :meth:`set_output_charset`.
+ These methods should be avoided in Python 3. See the warning for the
+ :func:`lgettext` function.
Solaris message catalog support
@@ -509,7 +508,7 @@ module::
import gettext
t = gettext.translation('spam', '/usr/share/locale')
- _ = t.lgettext
+ _ = t.gettext
Localizing your application
diff --git a/Lib/gettext.py b/Lib/gettext.py
index 08d051b..5ad7ff6 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -279,7 +279,9 @@ class NullTranslations:
def lgettext(self, message):
if self._fallback:
return self._fallback.lgettext(message)
- return message
+ if self._output_charset:
+ return message.encode(self._output_charset)
+ return message.encode(locale.getpreferredencoding())
def ngettext(self, msgid1, msgid2, n):
if self._fallback:
@@ -293,9 +295,12 @@ class NullTranslations:
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
- return msgid1
+ tmsg = msgid1
else:
- return msgid2
+ tmsg = msgid2
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ return tmsg.encode(locale.getpreferredencoding())
def info(self):
return self._info
@@ -377,7 +382,7 @@ class GNUTranslations(NullTranslations):
if mlen == 0:
# Catalog description
lastk = None
- for b_item in tmsg.split('\n'.encode("ascii")):
+ for b_item in tmsg.split(b'\n'):
item = b_item.decode().strip()
if not item:
continue
@@ -425,7 +430,7 @@ class GNUTranslations(NullTranslations):
if tmsg is missing:
if self._fallback:
return self._fallback.lgettext(message)
- return message
+ tmsg = message
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
@@ -433,16 +438,16 @@ class GNUTranslations(NullTranslations):
def lngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
- if self._output_charset:
- return tmsg.encode(self._output_charset)
- return tmsg.encode(locale.getpreferredencoding())
except KeyError:
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
- return msgid1
+ tmsg = msgid1
else:
- return msgid2
+ tmsg = msgid2
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ return tmsg.encode(locale.getpreferredencoding())
def gettext(self, message):
missing = object()
@@ -582,11 +587,11 @@ def dgettext(domain, message):
return t.gettext(message)
def ldgettext(domain, message):
+ codeset = _localecodesets.get(domain)
try:
- t = translation(domain, _localedirs.get(domain, None),
- codeset=_localecodesets.get(domain))
+ t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
except OSError:
- return message
+ return message.encode(codeset or locale.getpreferredencoding())
return t.lgettext(message)
def dngettext(domain, msgid1, msgid2, n):
@@ -601,14 +606,15 @@ def dngettext(domain, msgid1, msgid2, n):
return t.ngettext(msgid1, msgid2, n)
def ldngettext(domain, msgid1, msgid2, n):
+ codeset = _localecodesets.get(domain)
try:
- t = translation(domain, _localedirs.get(domain, None),
- codeset=_localecodesets.get(domain))
+ t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
except OSError:
if n == 1:
- return msgid1
+ tmsg = msgid1
else:
- return msgid2
+ tmsg = msgid2
+ return tmsg.encode(codeset or locale.getpreferredencoding())
return t.lngettext(msgid1, msgid2, n)
def gettext(message):
diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py
index 7bfe747..b5ed05e 100644
--- a/Lib/test/test_gettext.py
+++ b/Lib/test/test_gettext.py
@@ -1,6 +1,7 @@
import os
import base64
import gettext
+import locale
import unittest
from test import support
@@ -455,6 +456,122 @@ class PluralFormsTestCase(GettextBaseTest):
self.assertRaises(TypeError, f, object())
+class LGettextTestCase(GettextBaseTest):
+ def setUp(self):
+ GettextBaseTest.setUp(self)
+ self.mofile = MOFILE
+
+ def test_lgettext(self):
+ lgettext = gettext.lgettext
+ ldgettext = gettext.ldgettext
+ self.assertEqual(lgettext('mullusk'), b'bacon')
+ self.assertEqual(lgettext('spam'), b'spam')
+ self.assertEqual(ldgettext('gettext', 'mullusk'), b'bacon')
+ self.assertEqual(ldgettext('gettext', 'spam'), b'spam')
+
+ def test_lgettext_2(self):
+ with open(self.mofile, 'rb') as fp:
+ t = gettext.GNUTranslations(fp)
+ lgettext = t.lgettext
+ self.assertEqual(lgettext('mullusk'), b'bacon')
+ self.assertEqual(lgettext('spam'), b'spam')
+
+ def test_lgettext_bind_textdomain_codeset(self):
+ lgettext = gettext.lgettext
+ ldgettext = gettext.ldgettext
+ saved_codeset = gettext.bind_textdomain_codeset('gettext')
+ try:
+ gettext.bind_textdomain_codeset('gettext', 'utf-16')
+ self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
+ self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
+ self.assertEqual(ldgettext('gettext', 'mullusk'), 'bacon'.encode('utf-16'))
+ self.assertEqual(ldgettext('gettext', 'spam'), 'spam'.encode('utf-16'))
+ finally:
+ del gettext._localecodesets['gettext']
+ gettext.bind_textdomain_codeset('gettext', saved_codeset)
+
+ def test_lgettext_output_encoding(self):
+ with open(self.mofile, 'rb') as fp:
+ t = gettext.GNUTranslations(fp)
+ lgettext = t.lgettext
+ t.set_output_charset('utf-16')
+ self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
+ self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
+
+ def test_lngettext(self):
+ lngettext = gettext.lngettext
+ ldngettext = gettext.ldngettext
+ x = lngettext('There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, b'Hay %s fichero')
+ x = lngettext('There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, b'Hay %s ficheros')
+ x = lngettext('There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, b'There is %s directory')
+ x = lngettext('There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, b'There are %s directories')
+ x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, b'Hay %s fichero')
+ x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, b'Hay %s ficheros')
+ x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, b'There is %s directory')
+ x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, b'There are %s directories')
+
+ def test_lngettext_2(self):
+ with open(self.mofile, 'rb') as fp:
+ t = gettext.GNUTranslations(fp)
+ lngettext = t.lngettext
+ x = lngettext('There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, b'Hay %s fichero')
+ x = lngettext('There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, b'Hay %s ficheros')
+ x = lngettext('There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, b'There is %s directory')
+ x = lngettext('There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, b'There are %s directories')
+
+ def test_lngettext_bind_textdomain_codeset(self):
+ lngettext = gettext.lngettext
+ ldngettext = gettext.ldngettext
+ saved_codeset = gettext.bind_textdomain_codeset('gettext')
+ try:
+ gettext.bind_textdomain_codeset('gettext', 'utf-16')
+ x = lngettext('There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+ x = lngettext('There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+ x = lngettext('There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+ x = lngettext('There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+ x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+ x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+ x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+ x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+ finally:
+ del gettext._localecodesets['gettext']
+ gettext.bind_textdomain_codeset('gettext', saved_codeset)
+
+ def test_lngettext_output_encoding(self):
+ with open(self.mofile, 'rb') as fp:
+ t = gettext.GNUTranslations(fp)
+ lngettext = t.lngettext
+ t.set_output_charset('utf-16')
+ x = lngettext('There is %s file', 'There are %s files', 1)
+ self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+ x = lngettext('There is %s file', 'There are %s files', 2)
+ self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+ x = lngettext('There is %s directory', 'There are %s directories', 1)
+ self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+ x = lngettext('There is %s directory', 'There are %s directories', 2)
+ self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+
+
class GNUTranslationParsingTest(GettextBaseTest):
def test_plural_form_error_issue17898(self):
with open(MOFILE, 'wb') as fp:
@@ -472,13 +589,10 @@ class UnicodeTranslationsTest(GettextBaseTest):
self._ = self.t.gettext
def test_unicode_msgid(self):
- unless = self.assertTrue
- unless(isinstance(self._(''), str))
- unless(isinstance(self._(''), str))
+ self.assertIsInstance(self._(''), str)
def test_unicode_msgstr(self):
- eq = self.assertEqual
- eq(self._('ab\xde'), '\xa4yz')
+ self.assertEqual(self._('ab\xde'), '\xa4yz')
class WeirdMetadataTest(GettextBaseTest):
@@ -547,7 +661,7 @@ if __name__ == '__main__':
# The original version was automatically generated from the sources with
# pygettext. Later it was manually modified to add plural forms support.
-'''
+b'''
# Dummy translation for the Python test_gettext.py module.
# Copyright (C) 2001 Python Software Foundation
# Barry Warsaw <barry@python.org>, 2000.
@@ -607,7 +721,7 @@ msgstr[1] "Hay %s ficheros"
# Here's the second example po file example, used to generate the UMO_DATA
# containing utf-8 encoded Unicode strings
-'''
+b'''
# Dummy translation for the Python test_gettext.py module.
# Copyright (C) 2001 Python Software Foundation
# Barry Warsaw <barry@python.org>, 2000.
@@ -630,7 +744,7 @@ msgstr "\xc2\xa4yz"
# Here's the third example po file, used to generate MMO_DATA
-'''
+b'''
msgid ""
msgstr ""
"Project-Id-Version: No Project 0.0\n"
@@ -649,7 +763,7 @@ msgstr ""
# messages.po, used for bug 17898
#
-'''
+b'''
# test file for http://bugs.python.org/issue17898
msgid ""
msgstr ""
diff --git a/Misc/NEWS b/Misc/NEWS
index 3af74ff..88b1e3e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -368,6 +368,9 @@ Extension Modules
Library
-------
+- bpo-29755: Fixed the lgettext() family of functions in the gettext module.
+ They now always return bytes.
+
- [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse
fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``