summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-11-08 19:17:46 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-11-08 19:17:46 (GMT)
commit07bcf05fcf3fd1d4001e8e3489162e6d67638285 (patch)
treeee55a562d4ac5d1ff722e3ac13a750e762ed71a9
parentd751040b1a4e35fd3b01fc919cd8f9374ed714fd (diff)
downloadcpython-07bcf05fcf3fd1d4001e8e3489162e6d67638285.zip
cpython-07bcf05fcf3fd1d4001e8e3489162e6d67638285.tar.gz
cpython-07bcf05fcf3fd1d4001e8e3489162e6d67638285.tar.bz2
Issue #28563: Fixed possible DoS and arbitrary code execution when handle
plural form selections in the gettext module. The expression parser now supports exact syntax supported by GNU gettext.
-rw-r--r--Lib/gettext.py172
-rw-r--r--Lib/test/test_gettext.py85
-rw-r--r--Misc/NEWS4
3 files changed, 216 insertions, 45 deletions
diff --git a/Lib/gettext.py b/Lib/gettext.py
index e43f044..1dadbc7 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -57,55 +57,139 @@ __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
+# Expression parsing for plural form selection.
+#
+# The gettext library supports a small subset of C syntax. The only
+# incompatible difference is that integer literals starting with zero are
+# decimal.
+#
+# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
+# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
+
+_token_pattern = re.compile(r"""
+ (?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
+ (?P<NUMBER>[0-9]+\b) | # decimal integer
+ (?P<NAME>n\b) | # only n is allowed
+ (?P<PARENTHESIS>[()]) |
+ (?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
+ # <=, >=, ==, !=, &&, ||,
+ # ? :
+ # unary and bitwise ops
+ # not allowed
+ (?P<INVALID>\w+|.) # invalid token
+ """, re.VERBOSE|re.DOTALL)
+
+def _tokenize(plural):
+ for mo in re.finditer(_token_pattern, plural):
+ kind = mo.lastgroup
+ if kind == 'WHITESPACES':
+ continue
+ value = mo.group(kind)
+ if kind == 'INVALID':
+ raise ValueError('invalid token in plural form: %s' % value)
+ yield value
+ yield ''
+
+def _error(value):
+ if value:
+ return ValueError('unexpected token in plural form: %s' % value)
+ else:
+ return ValueError('unexpected end of plural form')
+
+_binary_ops = (
+ ('||',),
+ ('&&',),
+ ('==', '!='),
+ ('<', '>', '<=', '>='),
+ ('+', '-'),
+ ('*', '/', '%'),
+)
+_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops}
+_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'}
+
+def _parse(tokens, priority=-1):
+ result = ''
+ nexttok = next(tokens)
+ while nexttok == '!':
+ result += 'not '
+ nexttok = next(tokens)
+
+ if nexttok == '(':
+ sub, nexttok = _parse(tokens)
+ result = '%s(%s)' % (result, sub)
+ if nexttok != ')':
+ raise ValueError('unbalanced parenthesis in plural form')
+ elif nexttok == 'n':
+ result = '%s%s' % (result, nexttok)
+ else:
+ try:
+ value = int(nexttok, 10)
+ except ValueError:
+ raise _error(nexttok) from None
+ result = '%s%d' % (result, value)
+ nexttok = next(tokens)
+
+ j = 100
+ while nexttok in _binary_ops:
+ i = _binary_ops[nexttok]
+ if i < priority:
+ break
+ # Break chained comparisons
+ if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>='
+ result = '(%s)' % result
+ # Replace some C operators by their Python equivalents
+ op = _c2py_ops.get(nexttok, nexttok)
+ right, nexttok = _parse(tokens, i + 1)
+ result = '%s %s %s' % (result, op, right)
+ j = i
+ if j == priority == 4: # '<', '>', '<=', '>='
+ result = '(%s)' % result
+
+ if nexttok == '?' and priority <= 0:
+ if_true, nexttok = _parse(tokens, 0)
+ if nexttok != ':':
+ raise _error(nexttok)
+ if_false, nexttok = _parse(tokens)
+ result = '%s if %s else %s' % (if_true, result, if_false)
+ if priority == 0:
+ result = '(%s)' % result
+
+ return result, nexttok
def c2py(plural):
"""Gets a C expression as used in PO files for plural forms and returns a
- Python lambda function that implements an equivalent expression.
+ Python function that implements an equivalent expression.
"""
- # Security check, allow only the "n" identifier
- import token, tokenize
- tokens = tokenize.generate_tokens(io.StringIO(plural).readline)
- try:
- danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
- except tokenize.TokenError:
- raise ValueError('plural forms expression error, maybe unbalanced parenthesis')
- else:
- if danger:
- raise ValueError('plural forms expression could be dangerous')
-
- # Replace some C operators by their Python equivalents
- plural = plural.replace('&&', ' and ')
- plural = plural.replace('||', ' or ')
-
- expr = re.compile(r'\!([^=])')
- plural = expr.sub(' not \\1', plural)
-
- # Regular expression and replacement function used to transform
- # "a?b:c" to "b if a else c".
- expr = re.compile(r'(.*?)\?(.*?):(.*)')
- def repl(x):
- return "(%s if %s else %s)" % (x.group(2), x.group(1),
- expr.sub(repl, x.group(3)))
-
- # Code to transform the plural expression, taking care of parentheses
- stack = ['']
- for c in plural:
- if c == '(':
- stack.append('')
- elif c == ')':
- if len(stack) == 1:
- # Actually, we never reach this code, because unbalanced
- # parentheses get caught in the security check at the
- # beginning.
- raise ValueError('unbalanced parenthesis in plural form')
- s = expr.sub(repl, stack.pop())
- stack[-1] += '(%s)' % s
- else:
- stack[-1] += c
- plural = expr.sub(repl, stack.pop())
-
- return eval('lambda n: int(%s)' % plural)
+ if len(plural) > 1000:
+ raise ValueError('plural form expression is too long')
+ try:
+ result, nexttok = _parse(_tokenize(plural))
+ if nexttok:
+ raise _error(nexttok)
+
+ depth = 0
+ for c in result:
+ if c == '(':
+ depth += 1
+ if depth > 20:
+ # Python compiler limit is about 90.
+ # The most complex example has 2.
+ raise ValueError('plural form expression is too complex')
+ elif c == ')':
+ depth -= 1
+
+ ns = {}
+ exec('''if True:
+ def func(n):
+ if not isinstance(n, int):
+ raise ValueError('Plural value must be an integer.')
+ return int(%s)
+ ''' % result, ns)
+ return ns['func']
+ except RuntimeError:
+ # Recursion error can be raised in _parse() or exec().
+ raise ValueError('plural form expression is too complex')
def _expand_lang(loc):
diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py
index 5456948..f8df622 100644
--- a/Lib/test/test_gettext.py
+++ b/Lib/test/test_gettext.py
@@ -230,7 +230,9 @@ class PluralFormsTestCase(GettextBaseTest):
x = t.ngettext('There is %s file', 'There are %s files', 2)
eq(x, 'Hay %s ficheros')
- def test_hu(self):
+ # Examples from http://www.gnu.org/software/gettext/manual/gettext.html
+
+ def test_ja(self):
eq = self.assertEqual
f = gettext.c2py('0')
s = ''.join([ str(f(x)) for x in range(200) ])
@@ -248,6 +250,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+ def test_lv(self):
+ eq = self.assertEqual
+ f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2')
+ s = ''.join([ str(f(x)) for x in range(200) ])
+ eq(s, "20111111111111111111101111111110111111111011111111101111111110111111111011111111101111111110111111111011111111111111111110111111111011111111101111111110111111111011111111101111111110111111111011111111")
+
def test_gd(self):
eq = self.assertEqual
f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2')
@@ -261,6 +269,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
+ def test_ro(self):
+ eq = self.assertEqual
+ f = gettext.c2py('n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2')
+ s = ''.join([ str(f(x)) for x in range(200) ])
+ eq(s, "10111111111111111111222222222222222222222222222222222222222222222222222222222222222222222222222222222111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222")
+
def test_lt(self):
eq = self.assertEqual
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -273,6 +287,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222")
+ def test_cs(self):
+ eq = self.assertEqual
+ f = gettext.c2py('(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2')
+ s = ''.join([ str(f(x)) for x in range(200) ])
+ eq(s, "20111222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
+
def test_pl(self):
eq = self.assertEqual
f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -285,10 +305,73 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333")
+ def test_ar(self):
+ eq = self.assertEqual
+ f = gettext.c2py('n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5')
+ s = ''.join([ str(f(x)) for x in range(200) ])
+ eq(s, "01233333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445553333333344444444444444444444444444444444444444444444444444444444444444444444444444444444444444444")
+
def test_security(self):
raises = self.assertRaises
# Test for a dangerous expression
raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)")
+ # issue28563
+ raises(ValueError, gettext.c2py, '"(eval(foo) && ""')
+ raises(ValueError, gettext.c2py, 'f"{os.system(\'sh\')}"')
+ # Maximum recursion depth exceeded during compilation
+ raises(ValueError, gettext.c2py, 'n+'*10000 + 'n')
+ self.assertEqual(gettext.c2py('n+'*100 + 'n')(1), 101)
+ # MemoryError during compilation
+ raises(ValueError, gettext.c2py, '('*100 + 'n' + ')'*100)
+ # Maximum recursion depth exceeded in C to Python translator
+ raises(ValueError, gettext.c2py, '('*10000 + 'n' + ')'*10000)
+ self.assertEqual(gettext.c2py('('*20 + 'n' + ')'*20)(1), 1)
+
+ def test_chained_comparison(self):
+ # C doesn't chain comparison as Python so 2 == 2 == 2 gets different results
+ f = gettext.c2py('n == n == n')
+ self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
+ f = gettext.c2py('1 < n == n')
+ self.assertEqual(''.join(str(f(x)) for x in range(3)), '100')
+ f = gettext.c2py('n == n < 2')
+ self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
+ f = gettext.c2py('0 < n < 2')
+ self.assertEqual(''.join(str(f(x)) for x in range(3)), '111')
+
+ def test_decimal_number(self):
+ self.assertEqual(gettext.c2py('0123')(1), 123)
+
+ def test_invalid_syntax(self):
+ invalid_expressions = [
+ 'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2',
+ 'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n',
+ ]
+ for expr in invalid_expressions:
+ with self.assertRaises(ValueError):
+ gettext.c2py(expr)
+
+ def test_nested_condition_operator(self):
+ self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4)
+ self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2)
+ self.assertEqual(gettext.c2py('n?1:3?4:5')(0), 4)
+ self.assertEqual(gettext.c2py('n?1:3?4:5')(1), 1)
+
+ def test_division(self):
+ f = gettext.c2py('2/n*3')
+ self.assertEqual(f(1), 6)
+ self.assertEqual(f(2), 3)
+ self.assertEqual(f(3), 0)
+ self.assertEqual(f(-1), -6)
+ self.assertRaises(ZeroDivisionError, f, 0)
+
+ def test_plural_number(self):
+ f = gettext.c2py('1')
+ self.assertEqual(f(1), 1)
+ self.assertRaises(ValueError, f, 1.0)
+ self.assertRaises(ValueError, f, '1')
+ self.assertRaises(ValueError, f, [])
+ self.assertRaises(ValueError, f, object())
+
class UnicodeTranslationsTest(GettextBaseTest):
diff --git a/Misc/NEWS b/Misc/NEWS
index 731cd0f..c55dc4a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,10 @@ Core and Builtins
Library
-------
+- Issue #28563: Fixed possible DoS and arbitrary code execution when handle
+ plural form selections in the gettext module. The expression parser now
+ supports exact syntax supported by GNU gettext.
+
- Issue #27783: Fix possible usage of uninitialized memory in operator.methodcaller.
- Issue #27774: Fix possible Py_DECREF on unowned object in _sre.