From b2d0f342404de73905e4febb8c202f41fc0f654f Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Wed, 4 Oct 2006 10:13:32 +0000 Subject: A review of overflow-detecting code in the 2.4 branch. * unified the way intobject, longobject and mystrtoul handle values around -sys.maxint-1. * in general, trying to entierely avoid overflows in any computation involving signed ints or longs is extremely involved. Fixed a few simple cases where a compiler might be too clever (but that's all guesswork). * more overflow checks against bad data in marshal.c. --- Lib/test/test_builtin.py | 11 +++++++++-- Lib/test/test_long.py | 10 ++++++++-- Misc/NEWS | 5 +++-- Objects/abstract.c | 8 ++++---- Objects/fileobject.c | 7 +++++-- Objects/intobject.c | 24 ++++++++++++++--------- Objects/listobject.c | 14 ++++++++------ Objects/longobject.c | 50 +++++++++++++++++++++++++++++++++--------------- Objects/stringobject.c | 2 +- Objects/unicodeobject.c | 2 ++ Python/marshal.c | 42 +++++++++++++++++++++++++++------------- Python/mystrtoul.c | 32 +++++++++++++++++++------------ 12 files changed, 139 insertions(+), 68 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index f8a5047..12abc79 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -113,6 +113,11 @@ class BuiltinTest(unittest.TestCase): # str self.assertRaises(TypeError, abs, 'a') + def test_neg(self): + x = -sys.maxint-1 + self.assert_(isinstance(x, int)) + self.assertEqual(-x, sys.maxint+1) + def test_apply(self): def f0(*args): self.assertEqual(args, ()) @@ -574,9 +579,11 @@ class BuiltinTest(unittest.TestCase): pass s = repr(-1-sys.maxint) - self.assertEqual(int(s)+1, -sys.maxint) + x = int(s) + self.assertEqual(x+1, -sys.maxint) + self.assert_(isinstance(x, int)) # should return long - int(s[1:]) + self.assertEqual(int(s[1:]), sys.maxint+1) # should return long x = int(1e100) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 74ae7c6..c0ecc2b 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -253,16 +253,22 @@ def test_misc(maxdigits=MAXDIGITS): "long(-sys.maxint-1) != -sys.maxint-1") # long -> int should not fail for hugepos_aslong or hugeneg_aslong + x = int(hugepos_aslong) try: - check(int(hugepos_aslong) == hugepos, + check(x == hugepos, "converting sys.maxint to long and back to int fails") except OverflowError: raise TestFailed, "int(long(sys.maxint)) overflowed!" + if not isinstance(x, int): + raise TestFailed("int(long(sys.maxint)) should have returned int") + x = int(hugeneg_aslong) try: - check(int(hugeneg_aslong) == hugeneg, + check(x == hugeneg, "converting -sys.maxint-1 to long and back to int fails") except OverflowError: raise TestFailed, "int(long(-sys.maxint-1)) overflowed!" + if not isinstance(x, int): + raise TestFailed("int(long(-sys.maxint-1)) should have returned int") # but long -> int should overflow for hugepos+1 and hugeneg-1 x = hugepos_aslong + 1 diff --git a/Misc/NEWS b/Misc/NEWS index e505a32..096030a6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,8 +12,9 @@ What's New in Python 2.4.4c1? Core and builtins ----------------- -- Integer negation and absolute value were fixed to not rely - on undefined behaviour of the C compiler anymore. +- A number of places, including integer negation and absolute value, + were fixed to not rely on undefined behaviour of the C compiler + anymore. - Patch #1567691: super() and new.instancemethod() now don't accept keyword arguments any more (previously they accepted them, but didn't diff --git a/Objects/abstract.c b/Objects/abstract.c index 159af34..7d9f9b6 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1591,12 +1591,12 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) if (cmp > 0) { switch (operation) { case PY_ITERSEARCH_COUNT: - ++n; - if (n <= 0) { + if (n == INT_MAX) { PyErr_SetString(PyExc_OverflowError, "count exceeds C int size"); goto Fail; } + ++n; break; case PY_ITERSEARCH_INDEX: @@ -1617,9 +1617,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) } if (operation == PY_ITERSEARCH_INDEX) { - ++n; - if (n <= 0) + if (n == INT_MAX) wrapped = 1; + ++n; } } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index a66846c..0fcad12 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -908,6 +908,7 @@ getline_via_fgets(FILE *fp) size_t nfree; /* # of free buffer slots; pvend-pvfree */ size_t total_v_size; /* total # of slots in buffer */ size_t increment; /* amount to increment the buffer */ + size_t prev_v_size; /* Optimize for normal case: avoid _PyString_Resize if at all * possible via first reading into stack buffer "buf". @@ -1020,8 +1021,10 @@ getline_via_fgets(FILE *fp) /* expand buffer and try again */ assert(*(pvend-1) == '\0'); increment = total_v_size >> 2; /* mild exponential growth */ + prev_v_size = total_v_size; total_v_size += increment; - if (total_v_size > INT_MAX) { + /* check for overflow */ + if (total_v_size <= prev_v_size || total_v_size > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold"); Py_DECREF(v); @@ -1030,7 +1033,7 @@ getline_via_fgets(FILE *fp) if (_PyString_Resize(&v, (int)total_v_size) < 0) return NULL; /* overwrite the trailing null byte */ - pvfree = BUF(v) + (total_v_size - increment - 1); + pvfree = BUF(v) + (prev_v_size - 1); } if (BUF(v) + total_v_size != p) _PyString_Resize(&v, p - BUF(v)); diff --git a/Objects/intobject.c b/Objects/intobject.c index 579fe7d..5949672 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -460,6 +460,17 @@ int_mul(PyObject *v, PyObject *w) } } +/* Integer overflow checking for unary negation: on a 2's-complement + * box, -x overflows iff x is the most negative long. In this case we + * get -x == x. However, -x is undefined (by C) if x /is/ the most + * negative long (it's a signed overflow case), and some compilers care. + * So we cast x to unsigned long first. However, then other compilers + * warn about applying unary minus to an unsigned operand. Hence the + * weird "0-". + */ +#define UNARY_NEG_WOULD_OVERFLOW(x) \ + ((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x)) + /* Return type of i_divmod */ enum divmod_result { DIVMOD_OK, /* Correct result */ @@ -478,14 +489,8 @@ i_divmod(register long x, register long y, "integer division or modulo by zero"); return DIVMOD_ERROR; } - /* (-sys.maxint-1)/-1 is the only overflow case. x is the most - * negative long iff x < 0 and, on a 2's-complement box, x == -x. - * However, -x is undefined (by C) if x /is/ the most negative long - * (it's a signed overflow case), and some compilers care. So we cast - * x to unsigned long first. However, then other compilers warn about - * applying unary minus to an unsigned operand. Hence the weird "0-". - */ - if (y == -1 && x < 0 && (unsigned long)x == 0-(unsigned long)x) + /* (-sys.maxint-1)/-1 is the only overflow case. */ + if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x)) return DIVMOD_OVERFLOW; xdivy = x / y; xmody = x - xdivy * y; @@ -676,7 +681,8 @@ int_neg(PyIntObject *v) { register long a; a = v->ob_ival; - if (a < 0 && (unsigned long)a == 0-(unsigned long)a) { + /* check for overflow */ + if (UNARY_NEG_WOULD_OVERFLOW(a)) { PyObject *o = PyLong_FromLong(a); if (o != NULL) { PyObject *result = PyNumber_Negative(o); diff --git a/Objects/listobject.c b/Objects/listobject.c index 40d4620..b496997 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -860,7 +860,8 @@ list_inplace_concat(PyListObject *self, PyObject *other) static PyObject * listpop(PyListObject *self, PyObject *args) { - int i = -1; + long ii = -1; + int i; PyObject *v, *arg = NULL; int status; @@ -868,8 +869,8 @@ listpop(PyListObject *self, PyObject *args) return NULL; if (arg != NULL) { if (PyInt_Check(arg)) - i = (int)(PyInt_AS_LONG((PyIntObject*) arg)); - else if (!PyArg_ParseTuple(args, "|i:pop", &i)) + ii = PyInt_AS_LONG((PyIntObject*) arg); + else if (!PyArg_ParseTuple(args, "|l:pop", &ii)) return NULL; } if (self->ob_size == 0) { @@ -877,12 +878,13 @@ listpop(PyListObject *self, PyObject *args) PyErr_SetString(PyExc_IndexError, "pop from empty list"); return NULL; } - if (i < 0) - i += self->ob_size; - if (i < 0 || i >= self->ob_size) { + if (ii < 0) + ii += self->ob_size; + if (ii < 0 || ii >= self->ob_size) { PyErr_SetString(PyExc_IndexError, "pop index out of range"); return NULL; } + i = (int)ii; v = self->ob_item[i]; if (i == self->ob_size - 1) { status = list_resize(self, self->ob_size - 1); diff --git a/Objects/longobject.c b/Objects/longobject.c index f4a9a42..5847feb 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -188,6 +188,17 @@ PyLong_FromDouble(double dval) return (PyObject *)v; } +/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define + * anything about what happens when a signed integer operation overflows, + * and some compilers think they're doing you a favor by being "clever" + * then. The bit pattern for the largest postive signed long is + * (unsigned long)LONG_MAX, and for the smallest negative signed long + * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. + * However, some other compilers warn about applying unary minus to an + * unsigned operand. Hence the weird "0-". + */ +#define Py_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) + /* Get a C long int from a long int object. Returns -1 and sets an error condition if overflow occurs. */ @@ -219,14 +230,16 @@ PyLong_AsLong(PyObject *vv) if ((x >> SHIFT) != prev) goto overflow; } - /* Haven't lost any bits, but if the sign bit is set we're in - * trouble *unless* this is the min negative number. So, - * trouble iff sign bit set && (positive || some bit set other - * than the sign bit). + /* Haven't lost any bits, but casting to long requires extra care + * (see comment above). */ - if ((long)x < 0 && (sign > 0 || (x << 1) != 0)) - goto overflow; - return (long)x * sign; + if (x <= (unsigned long)LONG_MAX) { + return (long)x * sign; + } + else if (sign < 0 && x == Py_ABS_LONG_MIN) { + return LONG_MIN; + } + /* else overflow */ overflow: PyErr_SetString(PyExc_OverflowError, @@ -1042,7 +1055,7 @@ long_format(PyObject *aa, int base, int addL) { register PyLongObject *a = (PyLongObject *)aa; PyStringObject *str; - int i; + int i, j, sz; int size_a; char *p; int bits; @@ -1062,11 +1075,18 @@ long_format(PyObject *aa, int base, int addL) ++bits; i >>= 1; } - i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits; - str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i); + i = 5 + (addL ? 1 : 0); + j = size_a*SHIFT + bits-1; + sz = i + j / bits; + if (j / SHIFT < size_a || sz < i) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz); if (str == NULL) return NULL; - p = PyString_AS_STRING(str) + i; + p = PyString_AS_STRING(str) + sz; *p = '\0'; if (addL) *--p = 'L'; @@ -1224,14 +1244,14 @@ long_from_binary_base(char **str, int base) ++p; } *str = p; - n = (p - start) * bits_per_char; - if (n / bits_per_char != p - start) { + /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ + n = (p - start) * bits_per_char + SHIFT - 1; + if (n / bits_per_char < p - start) { PyErr_SetString(PyExc_ValueError, "long string too large to convert"); return NULL; } - /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ - n = (n + SHIFT - 1) / SHIFT; + n = n / SHIFT; z = _PyLong_New(n); if (z == NULL) return NULL; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 7749167..e7ce15f 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -799,7 +799,7 @@ PyString_Repr(PyObject *obj, int smartquotes) register PyStringObject* op = (PyStringObject*) obj; size_t newsize = 2 + 4 * op->ob_size; PyObject *v; - if (newsize > INT_MAX) { + if (newsize > INT_MAX || newsize / 4 != op->ob_size) { PyErr_SetString(PyExc_OverflowError, "string is too large to make repr"); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d65355a..ddfeee2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2316,6 +2316,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, PyObject *exc = NULL; unimax = PyUnicode_GetMax(); + /* XXX overflow detection missing */ v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); if (v == NULL) goto onError; @@ -2931,6 +2932,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, int needed = (targetsize - extrachars) + \ (targetsize << 2); extrachars += needed; + /* XXX overflow detection missing */ if (_PyUnicode_Resize(&v, PyUnicode_GET_SIZE(v) + needed) < 0) { Py_DECREF(x); diff --git a/Python/marshal.c b/Python/marshal.c index df88bcb..4f4d6b3 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -451,6 +451,11 @@ r_object(RFILE *p) int size; PyLongObject *ob; n = r_long(p); + if (n < -INT_MAX || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "bad marshal data"); + return NULL; + } size = n<0 ? -n : n; ob = _PyLong_New(size); if (ob == NULL) @@ -518,7 +523,7 @@ r_object(RFILE *p) case TYPE_INTERNED: case TYPE_STRING: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -539,6 +544,10 @@ r_object(RFILE *p) case TYPE_STRINGREF: n = r_long(p); + if (n < 0 || n >= PyList_GET_SIZE(p->strings)) { + PyErr_SetString(PyExc_ValueError, "bad marshal data"); + return NULL; + } v = PyList_GET_ITEM(p->strings, n); Py_INCREF(v); return v; @@ -549,7 +558,7 @@ r_object(RFILE *p) char *buffer; n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -570,7 +579,7 @@ r_object(RFILE *p) case TYPE_TUPLE: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -593,7 +602,7 @@ r_object(RFILE *p) case TYPE_LIST: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -643,10 +652,11 @@ r_object(RFILE *p) return NULL; } else { - int argcount = r_long(p); - int nlocals = r_long(p); - int stacksize = r_long(p); - int flags = r_long(p); + /* XXX ignore long->int overflows for now */ + int argcount = (int)r_long(p); + int nlocals = (int)r_long(p); + int stacksize = (int)r_long(p); + int flags = (int)r_long(p); PyObject *code = r_object(p); PyObject *consts = r_object(p); PyObject *names = r_object(p); @@ -655,7 +665,7 @@ r_object(RFILE *p) PyObject *cellvars = r_object(p); PyObject *filename = r_object(p); PyObject *name = r_object(p); - int firstlineno = r_long(p); + int firstlineno = (int)r_long(p); PyObject *lnotab = r_object(p); if (!PyErr_Occurred()) { @@ -821,10 +831,16 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); Py_XDECREF(wf.strings); - if (wf.str != NULL) - _PyString_Resize(&wf.str, - (int) (wf.ptr - - PyString_AS_STRING((PyStringObject *)wf.str))); + if (wf.str != NULL) { + char *base = PyString_AS_STRING((PyStringObject *)wf.str); + if (wf.ptr - base > INT_MAX) { + Py_DECREF(wf.str); + PyErr_SetString(PyExc_OverflowError, + "too much marshall data for a string"); + return NULL; + } + _PyString_Resize(&wf.str, (int)(wf.ptr - base)); + } if (wf.error) { Py_XDECREF(wf.str); PyErr_SetString(PyExc_ValueError, diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c index 8e60c0c..d9b3a4d 100644 --- a/Python/mystrtoul.c +++ b/Python/mystrtoul.c @@ -122,30 +122,38 @@ PyOS_strtoul(register char *str, char **ptr, int base) return result; } +/* Checking for overflow in PyOS_strtol is a PITA; see comments + * about Py_ABS_LONG_MIN in longobject.c. + */ +#define Py_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) + long PyOS_strtol(char *str, char **ptr, int base) { long result; + unsigned long uresult; char sign; - + while (*str && isspace(Py_CHARMASK(*str))) str++; - + sign = *str; if (sign == '+' || sign == '-') str++; - - result = (long) PyOS_strtoul(str, ptr, base); - - /* Signal overflow if the result appears negative, - except for the largest negative integer */ - if (result < 0 && !(sign == '-' && result == -result)) { + + uresult = PyOS_strtoul(str, ptr, base); + + if (uresult <= (unsigned long)LONG_MAX) { + result = (long)uresult; + if (sign == '-') + result = -result; + } + else if (sign == '-' && uresult == Py_ABS_LONG_MIN) { + result = LONG_MIN; + } + else { errno = ERANGE; result = 0x7fffffff; } - - if (sign == '-') - result = -result; - return result; } -- cgit v0.12