From 6156560e4b40ae81304d80b5a932fd90e6b4ba80 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 20 Nov 2015 21:56:21 +0200 Subject: Issue #25678: Copy buffer objects to null-terminated strings. Avoid buffer overreads when int(), long(), float(), and compile() are passed buffer objects. Similar code is removed from the complex() constructor, where it was not reachable. Patch backported from issue #24802 by Eryk Sun. --- Lib/test/test_compile.py | 13 +++++++++++++ Lib/test/test_float.py | 30 ++++++++++++++++++++++++++++++ Lib/test/test_int.py | 25 +++++++++++++++++++++---- Misc/NEWS | 4 ++++ Objects/abstract.c | 25 +++++++++++++++++++++---- Objects/complexobject.c | 2 +- Objects/floatobject.c | 11 ++++++++++- Python/bltinmodule.c | 21 ++++++++++++++++----- 8 files changed, 116 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index c166ff1..6a6ef0e 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -571,6 +571,19 @@ if 1: test_support.rmtree(tmpd) self.assertIn(b"Non-ASCII", err) + def test_null_terminated(self): + # The source code is null-terminated internally, but bytes-like + # objects are accepted, which could be not terminated. + with self.assertRaisesRegexp(TypeError, "without null bytes"): + compile(u"123\x00", "", "eval") + with self.assertRaisesRegexp(TypeError, "without null bytes"): + compile(buffer("123\x00"), "", "eval") + code = compile(buffer("123\x00", 1, 2), "", "eval") + self.assertEqual(eval(code), 23) + code = compile(buffer("1234", 1, 2), "", "eval") + self.assertEqual(eval(code), 23) + code = compile(buffer("$23$", 1, 2), "", "eval") + self.assertEqual(eval(code), 23) class TestStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 5bf1d31..4224306 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -53,6 +53,36 @@ class GeneralFloatCases(unittest.TestCase): float('.' + '1'*1000) float(unicode('.' + '1'*1000)) + def test_non_numeric_input_types(self): + # Test possible non-numeric types for the argument x, including + # subclasses of the explicitly documented accepted types. + class CustomStr(str): pass + class CustomByteArray(bytearray): pass + factories = [str, bytearray, CustomStr, CustomByteArray, buffer] + + if test_support.have_unicode: + class CustomUnicode(unicode): pass + factories += [unicode, CustomUnicode] + + for f in factories: + x = f(" 3.14 ") + msg = 'x has value %s and type %s' % (x, type(x).__name__) + try: + self.assertEqual(float(x), 3.14, msg=msg) + except TypeError, err: + raise AssertionError('For %s got TypeError: %s' % + (type(x).__name__, err)) + errmsg = "could not convert" + with self.assertRaisesRegexp(ValueError, errmsg, msg=msg): + float(f('A' * 0x10)) + + def test_float_buffer(self): + self.assertEqual(float(buffer('12.3', 1, 3)), 2.3) + self.assertEqual(float(buffer('12.3\x00', 1, 3)), 2.3) + self.assertEqual(float(buffer('12.3 ', 1, 3)), 2.3) + self.assertEqual(float(buffer('12.3A', 1, 3)), 2.3) + self.assertEqual(float(buffer('12.34', 1, 3)), 2.3) + def check_conversion_to_int(self, x): """Check that int(x) has the correct value and type, for a float x.""" n = int(x) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 365f9a2..2ca6cf2 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -340,20 +340,37 @@ class IntTestCases(IntLongCommonTests, unittest.TestCase): # Test possible valid non-numeric types for x, including subclasses # of the allowed built-in types. class CustomStr(str): pass - values = ['100', CustomStr('100')] + class CustomByteArray(bytearray): pass + factories = [str, bytearray, CustomStr, CustomByteArray, buffer] if have_unicode: class CustomUnicode(unicode): pass - values += [unicode('100'), CustomUnicode(unicode('100'))] + factories += [unicode, CustomUnicode] - for x in values: + for f in factories: + x = f('100') msg = 'x has value %s and type %s' % (x, type(x).__name__) try: self.assertEqual(int(x), 100, msg=msg) - self.assertEqual(int(x, 2), 4, msg=msg) + if isinstance(x, basestring): + self.assertEqual(int(x, 2), 4, msg=msg) except TypeError, err: raise AssertionError('For %s got TypeError: %s' % (type(x).__name__, err)) + if not isinstance(x, basestring): + errmsg = "can't convert non-string" + with self.assertRaisesRegexp(TypeError, errmsg, msg=msg): + int(x, 2) + errmsg = 'invalid literal' + with self.assertRaisesRegexp(ValueError, errmsg, msg=msg): + int(f('A' * 0x10)) + + def test_int_buffer(self): + self.assertEqual(int(buffer('123', 1, 2)), 23) + self.assertEqual(int(buffer('123\x00', 1, 2)), 23) + self.assertEqual(int(buffer('123 ', 1, 2)), 23) + self.assertEqual(int(buffer('123A', 1, 2)), 23) + self.assertEqual(int(buffer('1234', 1, 2)), 23) def test_error_on_string_float_for_x(self): self.assertRaises(ValueError, int, '1.2') diff --git a/Misc/NEWS b/Misc/NEWS index 1075daa..439d9fb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 2.7.11? Core and Builtins ----------------- +- Issue #25678: Avoid buffer overreads when int(), long(), float(), and + compile() are passed buffer objects. These objects are not necessarily + terminated by a null byte, but the functions assumed they were. + - Issue #25388: Fixed tokenizer hang when processing undecodable source code with a null byte. diff --git a/Objects/abstract.c b/Objects/abstract.c index ad7889e..3a50b73 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1666,8 +1666,17 @@ PyNumber_Int(PyObject *o) PyUnicode_GET_SIZE(o), 10); #endif - if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) - return int_from_string((char*)buffer, buffer_len); + if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) { + PyObject *result, *str; + + /* Copy to NUL-terminated buffer. */ + str = PyString_FromStringAndSize((const char *)buffer, buffer_len); + if (str == NULL) + return NULL; + result = int_from_string(PyString_AS_STRING(str), buffer_len); + Py_DECREF(str); + return result; + } return type_error("int() argument must be a string or a " "number, not '%.200s'", o); @@ -1765,9 +1774,17 @@ PyNumber_Long(PyObject *o) PyUnicode_GET_SIZE(o), 10); #endif - if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) - return long_from_string(buffer, buffer_len); + if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) { + PyObject *result, *str; + /* Copy to NUL-terminated buffer. */ + str = PyString_FromStringAndSize((const char *)buffer, buffer_len); + if (str == NULL) + return NULL; + result = long_from_string(PyString_AS_STRING(str), buffer_len); + Py_DECREF(str); + return result; + } return type_error("long() argument must be a string or a " "number, not '%.200s'", o); } diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 2659a23..9e97d1b 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -1000,7 +1000,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) len = strlen(s); } #endif - else if (PyObject_AsCharBuffer(v, &s, &len)) { + else { PyErr_SetString(PyExc_TypeError, "complex() arg is not a string"); return NULL; diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 0ce7f6c..1143fab 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -180,6 +180,7 @@ PyFloat_FromString(PyObject *v, char **pend) char *s_buffer = NULL; #endif Py_ssize_t len; + PyObject *str = NULL; PyObject *result = NULL; if (pend) @@ -202,7 +203,14 @@ PyFloat_FromString(PyObject *v, char **pend) len = strlen(s); } #endif - else if (PyObject_AsCharBuffer(v, &s, &len)) { + else if (!PyObject_AsCharBuffer(v, &s, &len)) { + /* Copy to NUL-terminated buffer. */ + str = PyString_FromStringAndSize(s, len); + if (str == NULL) + return NULL; + s = PyString_AS_STRING(str); + } + else { PyErr_SetString(PyExc_TypeError, "float() argument must be a string or a number"); return NULL; @@ -233,6 +241,7 @@ PyFloat_FromString(PyObject *v, char **pend) if (s_buffer) PyMem_FREE(s_buffer); #endif + Py_XDECREF(str); return result; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index f052574..d99b676 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -538,18 +538,29 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) } return result; } - + if (PyString_Check(cmd)) { + str = PyString_AS_STRING(cmd); + length = PyString_GET_SIZE(cmd); + } #ifdef Py_USING_UNICODE - if (PyUnicode_Check(cmd)) { + else if (PyUnicode_Check(cmd)) { tmp = PyUnicode_AsUTF8String(cmd); if (tmp == NULL) return NULL; - cmd = tmp; cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + str = PyString_AS_STRING(tmp); + length = PyString_GET_SIZE(tmp); } #endif - - if (PyObject_AsReadBuffer(cmd, (const void **)&str, &length)) + else if (!PyObject_AsReadBuffer(cmd, (const void **)&str, &length)) { + /* Copy to NUL-terminated buffer. */ + tmp = PyString_FromStringAndSize(str, length); + if (tmp == NULL) + return NULL; + str = PyString_AS_STRING(tmp); + length = PyString_GET_SIZE(tmp); + } + else goto cleanup; if ((size_t)length != strlen(str)) { PyErr_SetString(PyExc_TypeError, -- cgit v0.12