From eeb896c4116dd763efea45cb3c1b53257128f4e4 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Sat, 7 Nov 2015 02:32:21 +0000 Subject: Issue #24802: Copy bytes-like objects to null-terminated buffers if necessary This avoids possible buffer overreads when int(), float(), compile(), exec() and eval() are passed bytes-like objects. Similar code is removed from the complex() constructor, where it was not reachable. Patch by John Leitch, Serhiy Storchaka and Martin Panter. --- Lib/test/test_compile.py | 21 +++++++++++++++++++++ Lib/test/test_float.py | 38 ++++++++++++++++++++++++++++++++++++-- Lib/test/test_int.py | 44 ++++++++++++++++++++++++++++++++++---------- Misc/NEWS | 4 ++++ Objects/abstract.c | 22 ++++++++++++++++++++-- Objects/complexobject.c | 7 ------- Objects/floatobject.c | 15 +++++++++++++++ Python/bltinmodule.c | 48 ++++++++++++++++++++++++++++++++---------------- 8 files changed, 162 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index cff3c9e..2affcc9 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -530,6 +530,27 @@ if 1: check_limit("a", "[0]") check_limit("a", "*a") + def test_null_terminated(self): + # The source code is null-terminated internally, but bytes-like + # objects are accepted, which could be not terminated. + # Exception changed from TypeError to ValueError in 3.5 + with self.assertRaisesRegex(Exception, "cannot contain null"): + compile("123\x00", "", "eval") + with self.assertRaisesRegex(Exception, "cannot contain null"): + compile(memoryview(b"123\x00"), "", "eval") + code = compile(memoryview(b"123\x00")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"1234")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"$23$")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + + # Also test when eval() and exec() do the compilation step + self.assertEqual(eval(memoryview(b"1234")[1:-1]), 23) + namespace = dict() + exec(memoryview(b"ax = 123")[1:-1], namespace) + self.assertEqual(namespace['x'], 12) + class TestStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index e87aab0..504f39c 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -31,7 +31,6 @@ class GeneralFloatCases(unittest.TestCase): self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) self.assertEqual(float(" 3.14 "), 3.14) - self.assertEqual(float(b" 3.14 "), 3.14) self.assertRaises(ValueError, float, " 0x3.1 ") self.assertRaises(ValueError, float, " -0x3.p-1 ") self.assertRaises(ValueError, float, " +0x3.p-1 ") @@ -43,7 +42,6 @@ class GeneralFloatCases(unittest.TestCase): self.assertRaises(ValueError, float, "+.inf") self.assertRaises(ValueError, float, ".") self.assertRaises(ValueError, float, "-.") - self.assertRaises(ValueError, float, b"-") self.assertRaises(TypeError, float, {}) self.assertRaisesRegex(TypeError, "not 'dict'", float, {}) # Lone surrogate @@ -57,6 +55,42 @@ class GeneralFloatCases(unittest.TestCase): float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_non_numeric_input_types(self): + # Test possible non-numeric types for the argument x, including + # subclasses of the explicitly documented accepted types. + class CustomStr(str): pass + class CustomBytes(bytes): pass + class CustomByteArray(bytearray): pass + + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) + + for f in factories: + x = f(b" 3.14 ") + with self.subTest(type(x)): + self.assertEqual(float(x), 3.14) + with self.assertRaisesRegex(ValueError, "could not convert"): + float(f(b'A' * 0x10)) + + def test_float_memoryview(self): + self.assertEqual(float(memoryview(b'12.3')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3\x00')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3 ')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3A')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.34')[1:4]), 2.3) + def test_error_message(self): testlist = ('\xbd', '123\xbd', ' 123 456 ') for s in testlist: diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index e94602e..ab3917f 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -276,16 +276,40 @@ class IntTestCases(unittest.TestCase): class CustomBytes(bytes): pass class CustomByteArray(bytearray): pass - values = [b'100', - bytearray(b'100'), - CustomStr('100'), - CustomBytes(b'100'), - CustomByteArray(b'100')] - - for x in values: - msg = 'x has type %s' % type(x).__name__ - self.assertEqual(int(x), 100, msg=msg) - self.assertEqual(int(x, 2), 4, msg=msg) + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) + + for f in factories: + x = f(b'100') + with self.subTest(type(x)): + self.assertEqual(int(x), 100) + if isinstance(x, (str, bytes, bytearray)): + self.assertEqual(int(x, 2), 4) + else: + msg = "can't convert non-string" + with self.assertRaisesRegex(TypeError, msg): + int(x, 2) + with self.assertRaisesRegex(ValueError, 'invalid literal'): + int(f(b'A' * 0x10)) + + def test_int_memoryview(self): + self.assertEqual(int(memoryview(b'123')[1:3]), 23) + self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23) + self.assertEqual(int(memoryview(b'123 ')[1:3]), 23) + self.assertEqual(int(memoryview(b'123A')[1:3]), 23) + self.assertEqual(int(memoryview(b'1234')[1:3]), 23) def test_string_float(self): self.assertRaises(ValueError, int, '1.2') diff --git a/Misc/NEWS b/Misc/NEWS index d477370..a0c1a6d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ Release date: tba Core and Builtins ----------------- +- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec() + and eval() are passed bytes-like objects. These objects are not + necessarily terminated by a null byte, but the functions assumed they were. + - Issue #24402: Fix input() to prompt to the redirected stdout when sys.stdout.fileno() fails. diff --git a/Objects/abstract.c b/Objects/abstract.c index a20a84c..5e96138 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1264,12 +1264,30 @@ PyNumber_Long(PyObject *o) /* The below check is done in PyLong_FromUnicode(). */ return PyLong_FromUnicodeObject(o, 10); - if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + if (PyBytes_Check(o)) /* need to do extra error checking that PyLong_FromString() * doesn't do. In particular int('9\x005') must raise an * exception, not truncate at the null. */ - PyObject *result = _PyLong_FromBytes(view.buf, view.len, 10); + return _PyLong_FromBytes(PyBytes_AS_STRING(o), + PyBytes_GET_SIZE(o), 10); + + if (PyByteArray_Check(o)) + return _PyLong_FromBytes(PyByteArray_AS_STRING(o), + PyByteArray_GET_SIZE(o), 10); + + if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + PyObject *result, *bytes; + + /* Copy to NUL-terminated buffer. */ + bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len); + if (bytes == NULL) { + PyBuffer_Release(&view); + return NULL; + } + result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes), + PyBytes_GET_SIZE(bytes), 10); + Py_DECREF(bytes); PyBuffer_Release(&view); return result; } diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 7f4cdd9..7aaaeab 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -767,7 +767,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) int got_bracket=0; PyObject *s_buffer = NULL; Py_ssize_t len; - Py_buffer view = {NULL, NULL}; if (PyUnicode_Check(v)) { s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v); @@ -777,10 +776,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) if (s == NULL) goto error; } - else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { - s = (const char *)view.buf; - len = view.len; - } else { PyErr_Format(PyExc_TypeError, "complex() argument must be a string or a number, not '%.200s'", @@ -895,7 +890,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) if (s-start != len) goto parse_error; - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); @@ -903,7 +897,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return NULL; } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 1dca947..9c1b714 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -144,9 +144,24 @@ PyFloat_FromString(PyObject *v) return NULL; } } + else if (PyBytes_Check(v)) { + s = PyBytes_AS_STRING(v); + len = PyBytes_GET_SIZE(v); + } + else if (PyByteArray_Check(v)) { + s = PyByteArray_AS_STRING(v); + len = PyByteArray_GET_SIZE(v); + } else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { s = (const char *)view.buf; len = view.len; + /* Copy to NUL-terminated buffer. */ + s_buffer = PyBytes_FromStringAndSize(s, len); + if (s_buffer == NULL) { + PyBuffer_Release(&view); + return NULL; + } + s = PyBytes_AS_STRING(s_buffer); } else { PyErr_Format(PyExc_TypeError, diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index aed93e5..53009e3 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -560,20 +560,37 @@ Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."); static const char * -source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, Py_buffer *view) +source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) { const char *str; Py_ssize_t size; + Py_buffer view; + *cmd_copy = NULL; if (PyUnicode_Check(cmd)) { cf->cf_flags |= PyCF_IGNORE_COOKIE; str = PyUnicode_AsUTF8AndSize(cmd, &size); if (str == NULL) return NULL; } - else if (PyObject_GetBuffer(cmd, view, PyBUF_SIMPLE) == 0) { - str = (const char *)view->buf; - size = view->len; + else if (PyBytes_Check(cmd)) { + str = PyBytes_AS_STRING(cmd); + size = PyBytes_GET_SIZE(cmd); + } + else if (PyByteArray_Check(cmd)) { + str = PyByteArray_AS_STRING(cmd); + size = PyByteArray_GET_SIZE(cmd); + } + else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) { + /* Copy to NUL-terminated buffer. */ + *cmd_copy = PyBytes_FromStringAndSize( + (const char *)view.buf, view.len); + PyBuffer_Release(&view); + if (*cmd_copy == NULL) { + return NULL; + } + str = PyBytes_AS_STRING(*cmd_copy); + size = PyBytes_GET_SIZE(*cmd_copy); } else { PyErr_Format(PyExc_TypeError, @@ -585,7 +602,7 @@ source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompil if (strlen(str) != size) { PyErr_SetString(PyExc_TypeError, "source code string cannot contain null bytes"); - PyBuffer_Release(view); + Py_CLEAR(*cmd_copy); return NULL; } return str; @@ -594,7 +611,7 @@ source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompil static PyObject * builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { - Py_buffer view = {NULL, NULL}; + PyObject *cmd_copy; const char *str; PyObject *filename; char *startstr; @@ -681,12 +698,12 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) goto finally; } - str = source_as_string(cmd, "compile", "string, bytes or AST", &cf, &view); + str = source_as_string(cmd, "compile", "string, bytes or AST", &cf, &cmd_copy); if (str == NULL) goto error; result = Py_CompileStringObject(str, filename, start[mode], &cf, optimize); - PyBuffer_Release(&view); + Py_XDECREF(cmd_copy); goto finally; error: @@ -754,9 +771,8 @@ Return the tuple ((x-x%y)/y, x%y). Invariant: div*y + mod == x."); static PyObject * builtin_eval(PyObject *self, PyObject *args) { - PyObject *cmd, *result, *tmp = NULL; + PyObject *cmd, *result, *cmd_copy; PyObject *globals = Py_None, *locals = Py_None; - Py_buffer view = {NULL, NULL}; const char *str; PyCompilerFlags cf; @@ -806,7 +822,7 @@ builtin_eval(PyObject *self, PyObject *args) } cf.cf_flags = PyCF_SOURCE_IS_UTF8; - str = source_as_string(cmd, "eval", "string, bytes or code", &cf, &view); + str = source_as_string(cmd, "eval", "string, bytes or code", &cf, &cmd_copy); if (str == NULL) return NULL; @@ -815,8 +831,7 @@ builtin_eval(PyObject *self, PyObject *args) (void)PyEval_MergeCompilerFlags(&cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); - PyBuffer_Release(&view); - Py_XDECREF(tmp); + Py_XDECREF(cmd_copy); return result; } @@ -882,12 +897,13 @@ builtin_exec(PyObject *self, PyObject *args) v = PyEval_EvalCode(prog, globals, locals); } else { - Py_buffer view = {NULL, NULL}; + PyObject *prog_copy; const char *str; PyCompilerFlags cf; cf.cf_flags = PyCF_SOURCE_IS_UTF8; str = source_as_string(prog, "exec", - "string, bytes or code", &cf, &view); + "string, bytes or code", &cf, + &prog_copy); if (str == NULL) return NULL; if (PyEval_MergeCompilerFlags(&cf)) @@ -895,7 +911,7 @@ builtin_exec(PyObject *self, PyObject *args) locals, &cf); else v = PyRun_String(str, Py_file_input, globals, locals); - PyBuffer_Release(&view); + Py_XDECREF(prog_copy); } if (v == NULL) return NULL; -- cgit v0.12