From e7d8be80ba634fa15ece6f503c33592e0d333361 Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Thu, 31 Jul 2008 17:17:14 +0000 Subject: Security patches from Apple: prevent int overflow when allocating memory --- Lib/test/seq_tests.py | 12 ++--- Lib/test/test_bigmem.py | 114 +++++++++++++++++++++++++++++++++++++++++++++-- Lib/test/test_strop.py | 19 ++++++++ Lib/test/test_support.py | 25 +++++++++++ Misc/NEWS | 2 + Modules/gcmodule.c | 7 ++- Modules/mmapmodule.c | 2 +- Modules/stropmodule.c | 15 +++++++ Objects/bufferobject.c | 4 ++ Objects/longobject.c | 2 + Objects/stringobject.c | 19 +++++++- Objects/tupleobject.c | 5 ++- Objects/unicodeobject.c | 61 +++++++++++++++++++------ 13 files changed, 258 insertions(+), 29 deletions(-) diff --git a/Lib/test/seq_tests.py b/Lib/test/seq_tests.py index c273a3f..14303aa 100644 --- a/Lib/test/seq_tests.py +++ b/Lib/test/seq_tests.py @@ -306,11 +306,13 @@ class CommonTest(unittest.TestCase): self.assertEqual(id(s), id(s*1)) def test_bigrepeat(self): - x = self.type2test([0]) - x *= 2**16 - self.assertRaises(MemoryError, x.__mul__, 2**16) - if hasattr(x, '__imul__'): - self.assertRaises(MemoryError, x.__imul__, 2**16) + import sys + if sys.maxint <= 2147483647: + x = self.type2test([0]) + x *= 2**16 + self.assertRaises(MemoryError, x.__mul__, 2**16) + if hasattr(x, '__imul__'): + self.assertRaises(MemoryError, x.__imul__, 2**16) def test_subscript(self): a = self.type2test([10, 11]) diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index d4fc6eb..360bcb2 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -1,5 +1,5 @@ from test import test_support -from test.test_support import bigmemtest, _1G, _2G +from test.test_support import bigmemtest, _1G, _2G, _4G, precisionbigmemtest import unittest import operator @@ -54,6 +54,22 @@ class StrTest(unittest.TestCase): self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) self.assertEquals(s.strip(), SUBSTR.strip()) + @precisionbigmemtest(size=_2G - 1, memuse=1) + def test_center_unicode(self, size): + SUBSTR = u' abc def ghi' + try: + s = SUBSTR.center(size) + except OverflowError: + pass # acceptable on 32-bit + else: + self.assertEquals(len(s), size) + lpadsize = rpadsize = (len(s) - len(SUBSTR)) // 2 + if len(s) % 2: + lpadsize += 1 + self.assertEquals(s[lpadsize:-rpadsize], SUBSTR) + self.assertEquals(s.strip(), SUBSTR.strip()) + del s + @bigmemtest(minsize=_2G, memuse=2) def test_count(self, size): SUBSTR = ' abc def ghi' @@ -70,10 +86,51 @@ class StrTest(unittest.TestCase): s = '.' * size self.assertEquals(len(s.decode('utf-8')), size) + def basic_encode_test(self, size, enc, c=u'.', expectedsize=None): + if expectedsize is None: + expectedsize = size + + s = c * size + self.assertEquals(len(s.encode(enc)), expectedsize) + @bigmemtest(minsize=_2G + 2, memuse=3) def test_encode(self, size): - s = u'.' * size - self.assertEquals(len(s.encode('utf-8')), size) + return self.basic_encode_test(size, 'utf-8') + + @precisionbigmemtest(size=_4G / 6 + 2, memuse=2) + def test_encode_raw_unicode_escape(self, size): + try: + return self.basic_encode_test(size, 'raw_unicode_escape') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G / 5 + 70, memuse=3) + def test_encode_utf7(self, size): + try: + return self.basic_encode_test(size, 'utf7') + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_4G / 4 + 5, memuse=6) + def test_encode_utf32(self, size): + try: + return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4) + except MemoryError: + pass # acceptable on 32-bit + + @precisionbigmemtest(size=_2G-1, memuse=2) + def test_decodeascii(self, size): + return self.basic_encode_test(size, 'ascii', c='A') + + @precisionbigmemtest(size=_4G / 5, memuse=6+2) + def test_unicode_repr_oflw(self, size): + try: + s = u"\uAAAA"*size + r = repr(s) + except MemoryError: + pass # acceptable on 32-bit + else: + self.failUnless(s == eval(r)) @bigmemtest(minsize=_2G, memuse=2) def test_endswith(self, size): @@ -459,6 +516,11 @@ class StrTest(unittest.TestCase): self.assertEquals(s.count('\\'), size) self.assertEquals(s.count('0'), size * 2) + @bigmemtest(minsize=2**32 / 5, memuse=6+2) + def test_unicode_repr(self, size): + s = u"\uAAAA" * size + self.failUnless(len(repr(s)) > size) + # This test is meaningful even with size < 2G, as long as the # doubled string is > 2G (but it tests more if both are > 2G :) @bigmemtest(minsize=_1G + 2, memuse=3) @@ -642,6 +704,35 @@ class TupleTest(unittest.TestCase): def test_repeat_large(self, size): return self.basic_test_repeat(size) + @bigmemtest(minsize=_1G - 1, memuse=12) + def test_repeat_large_2(self, size): + return self.basic_test_repeat(size) + + @precisionbigmemtest(size=_1G - 1, memuse=9) + def test_from_2G_generator(self, size): + try: + t = tuple(xrange(size)) + except MemoryError: + pass # acceptable on 32-bit + else: + count = 0 + for item in t: + self.assertEquals(item, count) + count += 1 + self.assertEquals(count, size) + + @precisionbigmemtest(size=_1G - 25, memuse=9) + def test_from_almost_2G_generator(self, size): + try: + t = tuple(xrange(size)) + count = 0 + for item in t: + self.assertEquals(item, count) + count += 1 + self.assertEquals(count, size) + except MemoryError: + pass # acceptable, expected on 32-bit + # Like test_concat, split in two. def basic_test_repr(self, size): t = (0,) * size @@ -957,8 +1048,23 @@ class ListTest(unittest.TestCase): self.assertEquals(l[:10], [1] * 10) self.assertEquals(l[-10:], [5] * 10) +class BufferTest(unittest.TestCase): + + @precisionbigmemtest(size=_1G, memuse=4) + def test_repeat(self, size): + try: + b = buffer("AAAA")*size + except MemoryError: + pass # acceptable on 32-bit + else: + count = 0 + for c in b: + self.assertEquals(c, 'A') + count += 1 + self.assertEquals(count, size*4) + def test_main(): - test_support.run_unittest(StrTest, TupleTest, ListTest) + test_support.run_unittest(StrTest, TupleTest, ListTest, BufferTest) if __name__ == '__main__': if len(sys.argv) > 1: diff --git a/Lib/test/test_strop.py b/Lib/test/test_strop.py index 2ac7986..7cda83f 100644 --- a/Lib/test/test_strop.py +++ b/Lib/test/test_strop.py @@ -115,6 +115,25 @@ class StropFunctionTestCase(unittest.TestCase): strop.uppercase strop.whitespace + @test_support.precisionbigmemtest(size=test_support._2G - 1, memuse=5) + def test_stropjoin_huge_list(self, size): + a = "A" * size + try: + r = strop.join([a, a], a) + except OverflowError: + pass + else: + self.assertEquals(len(r), len(a) * 3) + + @test_support.precisionbigmemtest(size=test_support._2G - 1, memuse=1) + def test_stropjoin_huge_tup(self, size): + a = "A" * size + try: + r = strop.join((a, a), a) + except OverflowError: + pass # acceptable on 32-bit + else: + self.assertEquals(len(r), len(a) * 3) transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 1aa21d3..adcbdd1 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -67,6 +67,7 @@ verbose = 1 # Flag set to 0 by regrtest.py use_resources = None # Flag set to [] by regrtest.py max_memuse = 0 # Disable bigmem tests (they will still be run with # small sizes, to make sure they work.) +real_max_memuse = 0 # _original_stdout is meant to hold stdout at the time regrtest began. # This may be "the real" stdout, or IDLE's emulation of stdout, or whatever. @@ -611,12 +612,14 @@ def run_with_locale(catstr, *locales): _1M = 1024*1024 _1G = 1024 * _1M _2G = 2 * _1G +_4G = 4 * _1G MAX_Py_ssize_t = sys.maxsize def set_memlimit(limit): import re global max_memuse + global real_max_memuse sizes = { 'k': 1024, 'm': _1M, @@ -628,6 +631,7 @@ def set_memlimit(limit): if m is None: raise ValueError('Invalid memory limit %r' % (limit,)) memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()]) + real_max_memuse = memlimit if memlimit > MAX_Py_ssize_t: memlimit = MAX_Py_ssize_t if memlimit < _2G - 1: @@ -673,6 +677,27 @@ def bigmemtest(minsize, memuse, overhead=5*_1M): return wrapper return decorator +def precisionbigmemtest(size, memuse, overhead=5*_1M): + def decorator(f): + def wrapper(self): + if not real_max_memuse: + maxsize = 5147 + else: + maxsize = size + + if real_max_memuse and real_max_memuse < maxsize * memuse: + if verbose: + sys.stderr.write("Skipping %s because of memory " + "constraint\n" % (f.__name__,)) + return + + return f(self, maxsize) + wrapper.size = size + wrapper.memuse = memuse + wrapper.overhead = overhead + return wrapper + return decorator + def bigaddrspacetest(f): """Decorator for tests that fill the address space.""" def wrapper(self): diff --git a/Misc/NEWS b/Misc/NEWS index e93b5fe..723d073 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.6 beta 3? Core and Builtins ----------------- +- Apply security patches from Apple. + - Issue #2542: Now that issubclass() may call arbitrary code, ensure that PyErr_ExceptionMatches returns 0 when an exception occurs there. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index b8f9c31..6f12972 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1342,7 +1342,10 @@ PyObject * _PyObject_GC_Malloc(size_t basicsize) { PyObject *op; - PyGC_Head *g = (PyGC_Head *)PyObject_MALLOC( + PyGC_Head *g; + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return PyErr_NoMemory(); + g = (PyGC_Head *)PyObject_MALLOC( sizeof(PyGC_Head) + basicsize); if (g == NULL) return PyErr_NoMemory(); @@ -1385,6 +1388,8 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); PyGC_Head *g = AS_GC(op); + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return (PyVarObject *)PyErr_NoMemory(); g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); if (g == NULL) return (PyVarObject *)PyErr_NoMemory(); diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index c71d840..08b5a96 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -239,7 +239,7 @@ mmap_read_method(mmap_object *self, return(NULL); /* silently 'adjust' out-of-range requests */ - if ((self->pos + num_bytes) > self->size) { + if (num_bytes > self->size - self->pos) { num_bytes -= (self->pos+num_bytes) - self->size; } result = Py_BuildValue("s#", self->data+self->pos, num_bytes); diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c index bc60959..2d88474 100644 --- a/Modules/stropmodule.c +++ b/Modules/stropmodule.c @@ -216,6 +216,13 @@ strop_joinfields(PyObject *self, PyObject *args) return NULL; } slen = PyString_GET_SIZE(item); + if (slen > PY_SSIZE_T_MAX - reslen || + seplen > PY_SSIZE_T_MAX - reslen - seplen) { + PyErr_SetString(PyExc_OverflowError, + "input too long"); + Py_DECREF(res); + return NULL; + } while (reslen + slen + seplen >= sz) { if (_PyString_Resize(&res, sz * 2) < 0) return NULL; @@ -253,6 +260,14 @@ strop_joinfields(PyObject *self, PyObject *args) return NULL; } slen = PyString_GET_SIZE(item); + if (slen > PY_SSIZE_T_MAX - reslen || + seplen > PY_SSIZE_T_MAX - reslen - seplen) { + PyErr_SetString(PyExc_OverflowError, + "input too long"); + Py_DECREF(res); + Py_XDECREF(item); + return NULL; + } while (reslen + slen + seplen >= sz) { if (_PyString_Resize(&res, sz * 2) < 0) { Py_DECREF(item); diff --git a/Objects/bufferobject.c b/Objects/bufferobject.c index 3bd8c6b..9a5c39f 100644 --- a/Objects/bufferobject.c +++ b/Objects/bufferobject.c @@ -431,6 +431,10 @@ buffer_repeat(PyBufferObject *self, Py_ssize_t count) count = 0; if (!get_buf(self, &ptr, &size, ANY_BUFFER)) return NULL; + if (count > PY_SSIZE_T_MAX / size) { + PyErr_SetString(PyExc_MemoryError, "result too large"); + return NULL; + } ob = PyString_FromStringAndSize(NULL, size * count); if ( ob == NULL ) return NULL; diff --git a/Objects/longobject.c b/Objects/longobject.c index 2c228bb..228376a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -70,6 +70,8 @@ _PyLong_New(Py_ssize_t size) return NULL; } /* coverity[ampersand_in_size] */ + /* XXX(nnorwitz): This can overflow -- + PyObject_NEW_VAR / _PyObject_VAR_SIZE need to detect overflow */ return PyObject_NEW_VAR(PyLongObject, &PyLong_Type, size); } diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 793cc88..0d2ceb1 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -74,6 +74,11 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *)op; } + if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { + PyErr_SetString(PyExc_OverflowError, "string is too large"); + return NULL; + } + /* Inline PyObject_NewVar */ op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); if (op == NULL) @@ -109,7 +114,7 @@ PyString_FromString(const char *str) assert(str != NULL); size = strlen(str); - if (size > PY_SSIZE_T_MAX) { + if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { PyErr_SetString(PyExc_OverflowError, "string is too long for a Python string"); return NULL; @@ -977,13 +982,23 @@ string_concat(register PyStringObject *a, register PyObject *bb) return (PyObject *)a; } size = Py_SIZE(a) + Py_SIZE(b); - if (size < 0) { + /* Check that string sizes are not negative, to prevent an + overflow in cases where we are passed incorrectly-created + strings with negative lengths (due to a bug in other code). + */ + if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || + Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { PyErr_SetString(PyExc_OverflowError, "strings are too large to concat"); return NULL; } /* Inline PyObject_NewVar */ + if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) { + PyErr_SetString(PyExc_OverflowError, + "strings are too large to concat"); + return NULL; + } op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); if (op == NULL) return PyErr_NoMemory(); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 79d7553..348ae8c 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -60,11 +60,12 @@ PyTuple_New(register Py_ssize_t size) Py_ssize_t nbytes = size * sizeof(PyObject *); /* Check for overflow */ if (nbytes / sizeof(PyObject *) != (size_t)size || - (nbytes += sizeof(PyTupleObject) - sizeof(PyObject *)) - <= 0) + (nbytes > PY_SSIZE_T_MAX - sizeof(PyTupleObject) - sizeof(PyObject *))) { return PyErr_NoMemory(); } + nbytes += sizeof(PyTupleObject) - sizeof(PyObject *); + op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); if (op == NULL) return NULL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 16d97c3..7abf984 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -299,6 +299,11 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) return unicode_empty; } + /* Ensure we won't overflow the size. */ + if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + return (PyUnicodeObject *)PyErr_NoMemory(); + } + /* Unicode freelist & memory allocation */ if (free_list) { unicode = free_list; @@ -1651,6 +1656,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, char * out; char * start; + if (cbAllocated / 5 != size) + return PyErr_NoMemory(); + if (size == 0) return PyString_FromStringAndSize(NULL, 0); @@ -2245,8 +2253,9 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, { PyObject *v; unsigned char *p; + Py_ssize_t nsize, bytesize; #ifndef Py_UNICODE_WIDE - int i, pairs; + Py_ssize_t i, pairs; #else const int pairs = 0; #endif @@ -2274,8 +2283,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF) pairs++; #endif - v = PyString_FromStringAndSize(NULL, - 4 * (size - pairs + (byteorder == 0))); + nsize = (size - pairs + (byteorder == 0)); + bytesize = nsize * 4; + if (bytesize / 4 != nsize) + return PyErr_NoMemory(); + v = PyString_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; @@ -2515,8 +2527,9 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, { PyObject *v; unsigned char *p; + Py_ssize_t nsize, bytesize; #ifdef Py_UNICODE_WIDE - int i, pairs; + Py_ssize_t i, pairs; #else const int pairs = 0; #endif @@ -2539,8 +2552,15 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, if (s[i] >= 0x10000) pairs++; #endif - v = PyString_FromStringAndSize(NULL, - 2 * (size + pairs + (byteorder == 0))); + /* 2 * (size + pairs + (byteorder == 0)) */ + if (size > PY_SSIZE_T_MAX || + size > PY_SSIZE_T_MAX - pairs - (byteorder == 0)) + return PyErr_NoMemory(); + nsize = size + pairs + (byteorder == 0); + bytesize = nsize * 2; + if (bytesize / 2 != nsize) + return PyErr_NoMemory(); + v = PyString_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; @@ -2868,6 +2888,11 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, char *p; static const char *hexdigit = "0123456789abcdef"; +#ifdef Py_UNICODE_WIDE + const Py_ssize_t expandsize = 10; +#else + const Py_ssize_t expandsize = 6; +#endif /* XXX(nnorwitz): rather than over-allocating, it would be better to choose a different scheme. Perhaps scan the @@ -2887,13 +2912,12 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, escape. */ + if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) + return PyErr_NoMemory(); + repr = PyString_FromStringAndSize(NULL, 2 -#ifdef Py_UNICODE_WIDE - + 10*size -#else - + 6*size -#endif + + expandsize*size + 1); if (repr == NULL) return NULL; @@ -3146,12 +3170,16 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, char *q; static const char *hexdigit = "0123456789abcdef"; - #ifdef Py_UNICODE_WIDE - repr = PyString_FromStringAndSize(NULL, 10 * size); + const Py_ssize_t expandsize = 10; #else - repr = PyString_FromStringAndSize(NULL, 6 * size); + const Py_ssize_t expandsize = 6; #endif + + if (size > PY_SSIZE_T_MAX / expandsize) + return PyErr_NoMemory(); + + repr = PyString_FromStringAndSize(NULL, expandsize * size); if (repr == NULL) return NULL; if (size == 0) @@ -5574,6 +5602,11 @@ PyUnicodeObject *pad(PyUnicodeObject *self, return self; } + if (left > PY_SSIZE_T_MAX - self->length || + right > PY_SSIZE_T_MAX - (left + self->length)) { + PyErr_SetString(PyExc_OverflowError, "padded string is too long"); + return NULL; + } u = _PyUnicode_New(left + self->length + right); if (u) { if (left) -- cgit v0.12