From b2d0f342404de73905e4febb8c202f41fc0f654f Mon Sep 17 00:00:00 2001
From: Armin Rigo <arigo@tunes.org>
Date: Wed, 4 Oct 2006 10:13:32 +0000
Subject: A review of overflow-detecting code in the 2.4 branch.

* unified the way intobject, longobject and mystrtoul handle
  values around -sys.maxint-1.

* in general, trying to entierely avoid overflows in any computation
  involving signed ints or longs is extremely involved.  Fixed a few
  simple cases where a compiler might be too clever (but that's all
  guesswork).

* more overflow checks against bad data in marshal.c.
---
 Lib/test/test_builtin.py | 11 +++++++++--
 Lib/test/test_long.py    | 10 ++++++++--
 Misc/NEWS                |  5 +++--
 Objects/abstract.c       |  8 ++++----
 Objects/fileobject.c     |  7 +++++--
 Objects/intobject.c      | 24 ++++++++++++++---------
 Objects/listobject.c     | 14 ++++++++------
 Objects/longobject.c     | 50 +++++++++++++++++++++++++++++++++---------------
 Objects/stringobject.c   |  2 +-
 Objects/unicodeobject.c  |  2 ++
 Python/marshal.c         | 42 +++++++++++++++++++++++++++-------------
 Python/mystrtoul.c       | 32 +++++++++++++++++++------------
 12 files changed, 139 insertions(+), 68 deletions(-)

diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index f8a5047..12abc79 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -113,6 +113,11 @@ class BuiltinTest(unittest.TestCase):
         # str
         self.assertRaises(TypeError, abs, 'a')
 
+    def test_neg(self):
+        x = -sys.maxint-1
+        self.assert_(isinstance(x, int))
+        self.assertEqual(-x, sys.maxint+1)
+
     def test_apply(self):
         def f0(*args):
             self.assertEqual(args, ())
@@ -574,9 +579,11 @@ class BuiltinTest(unittest.TestCase):
                         pass
 
         s = repr(-1-sys.maxint)
-        self.assertEqual(int(s)+1, -sys.maxint)
+        x = int(s)
+        self.assertEqual(x+1, -sys.maxint)
+        self.assert_(isinstance(x, int))
         # should return long
-        int(s[1:])
+        self.assertEqual(int(s[1:]), sys.maxint+1)
 
         # should return long
         x = int(1e100)
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
index 74ae7c6..c0ecc2b 100644
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -253,16 +253,22 @@ def test_misc(maxdigits=MAXDIGITS):
         "long(-sys.maxint-1) != -sys.maxint-1")
 
     # long -> int should not fail for hugepos_aslong or hugeneg_aslong
+    x = int(hugepos_aslong)
     try:
-        check(int(hugepos_aslong) == hugepos,
+        check(x == hugepos,
               "converting sys.maxint to long and back to int fails")
     except OverflowError:
         raise TestFailed, "int(long(sys.maxint)) overflowed!"
+    if not isinstance(x, int):
+        raise TestFailed("int(long(sys.maxint)) should have returned int")
+    x = int(hugeneg_aslong)
     try:
-        check(int(hugeneg_aslong) == hugeneg,
+        check(x == hugeneg,
               "converting -sys.maxint-1 to long and back to int fails")
     except OverflowError:
         raise TestFailed, "int(long(-sys.maxint-1)) overflowed!"
+    if not isinstance(x, int):
+        raise TestFailed("int(long(-sys.maxint-1)) should have returned int")
 
     # but long -> int should overflow for hugepos+1 and hugeneg-1
     x = hugepos_aslong + 1
diff --git a/Misc/NEWS b/Misc/NEWS
index e505a32..096030a6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,8 +12,9 @@ What's New in Python 2.4.4c1?
 Core and builtins
 -----------------
 
-- Integer negation and absolute value were fixed to not rely
-  on undefined behaviour of the C compiler anymore.
+- A number of places, including integer negation and absolute value,
+  were fixed to not rely on undefined behaviour of the C compiler
+  anymore.
 
 - Patch #1567691: super() and new.instancemethod() now don't accept
   keyword arguments any more (previously they accepted them, but didn't
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 159af34..7d9f9b6 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1591,12 +1591,12 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
 		if (cmp > 0) {
 			switch (operation) {
 			case PY_ITERSEARCH_COUNT:
-				++n;
-				if (n <= 0) {
+				if (n == INT_MAX) {
 					PyErr_SetString(PyExc_OverflowError,
 				                "count exceeds C int size");
 					goto Fail;
 				}
+				++n;
 				break;
 
 			case PY_ITERSEARCH_INDEX:
@@ -1617,9 +1617,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
 		}
 
 		if (operation == PY_ITERSEARCH_INDEX) {
-			++n;
-			if (n <= 0)
+			if (n == INT_MAX)
 				wrapped = 1;
+			++n;
 		}
 	}
 
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index a66846c..0fcad12 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -908,6 +908,7 @@ getline_via_fgets(FILE *fp)
 	size_t nfree;	/* # of free buffer slots; pvend-pvfree */
 	size_t total_v_size;  /* total # of slots in buffer */
 	size_t increment;	/* amount to increment the buffer */
+	size_t prev_v_size;
 
 	/* Optimize for normal case:  avoid _PyString_Resize if at all
 	 * possible via first reading into stack buffer "buf".
@@ -1020,8 +1021,10 @@ getline_via_fgets(FILE *fp)
 		/* expand buffer and try again */
 		assert(*(pvend-1) == '\0');
 		increment = total_v_size >> 2;	/* mild exponential growth */
+		prev_v_size = total_v_size;
 		total_v_size += increment;
-		if (total_v_size > INT_MAX) {
+		/* check for overflow */
+		if (total_v_size <= prev_v_size || total_v_size > INT_MAX) {
 			PyErr_SetString(PyExc_OverflowError,
 			    "line is longer than a Python string can hold");
 			Py_DECREF(v);
@@ -1030,7 +1033,7 @@ getline_via_fgets(FILE *fp)
 		if (_PyString_Resize(&v, (int)total_v_size) < 0)
 			return NULL;
 		/* overwrite the trailing null byte */
-		pvfree = BUF(v) + (total_v_size - increment - 1);
+		pvfree = BUF(v) + (prev_v_size - 1);
 	}
 	if (BUF(v) + total_v_size != p)
 		_PyString_Resize(&v, p - BUF(v));
diff --git a/Objects/intobject.c b/Objects/intobject.c
index 579fe7d..5949672 100644
--- a/Objects/intobject.c
+++ b/Objects/intobject.c
@@ -460,6 +460,17 @@ int_mul(PyObject *v, PyObject *w)
 	}
 }
 
+/* Integer overflow checking for unary negation: on a 2's-complement
+ * box, -x overflows iff x is the most negative long.  In this case we
+ * get -x == x.  However, -x is undefined (by C) if x /is/ the most
+ * negative long (it's a signed overflow case), and some compilers care.
+ * So we cast x to unsigned long first.  However, then other compilers
+ * warn about applying unary minus to an unsigned operand.  Hence the
+ * weird "0-".
+ */
+#define UNARY_NEG_WOULD_OVERFLOW(x)	\
+	((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x))
+
 /* Return type of i_divmod */
 enum divmod_result {
 	DIVMOD_OK,		/* Correct result */
@@ -478,14 +489,8 @@ i_divmod(register long x, register long y,
 				"integer division or modulo by zero");
 		return DIVMOD_ERROR;
 	}
-	/* (-sys.maxint-1)/-1 is the only overflow case.  x is the most
-	 * negative long iff x < 0 and, on a 2's-complement box, x == -x.
-	 * However, -x is undefined (by C) if x /is/ the most negative long
-	 * (it's a signed overflow case), and some compilers care.  So we cast
-	 * x to unsigned long first.  However, then other compilers warn about
-	 * applying unary minus to an unsigned operand.  Hence the weird "0-".
-	 */
-	if (y == -1 && x < 0 && (unsigned long)x == 0-(unsigned long)x)
+	/* (-sys.maxint-1)/-1 is the only overflow case. */
+	if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x))
 		return DIVMOD_OVERFLOW;
 	xdivy = x / y;
 	xmody = x - xdivy * y;
@@ -676,7 +681,8 @@ int_neg(PyIntObject *v)
 {
 	register long a;
 	a = v->ob_ival;
-	if (a < 0 && (unsigned long)a == 0-(unsigned long)a) {
+        /* check for overflow */
+	if (UNARY_NEG_WOULD_OVERFLOW(a)) {
 		PyObject *o = PyLong_FromLong(a);
 		if (o != NULL) {
 			PyObject *result = PyNumber_Negative(o);
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 40d4620..b496997 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -860,7 +860,8 @@ list_inplace_concat(PyListObject *self, PyObject *other)
 static PyObject *
 listpop(PyListObject *self, PyObject *args)
 {
-	int i = -1;
+	long ii = -1;
+	int i;
 	PyObject *v, *arg = NULL;
 	int status;
 
@@ -868,8 +869,8 @@ listpop(PyListObject *self, PyObject *args)
 		return NULL;
 	if (arg != NULL) {
 		if (PyInt_Check(arg))
-			i = (int)(PyInt_AS_LONG((PyIntObject*) arg));
-		else if (!PyArg_ParseTuple(args, "|i:pop", &i))
+			ii = PyInt_AS_LONG((PyIntObject*) arg);
+		else if (!PyArg_ParseTuple(args, "|l:pop", &ii))
    			return NULL;
 	}
 	if (self->ob_size == 0) {
@@ -877,12 +878,13 @@ listpop(PyListObject *self, PyObject *args)
 		PyErr_SetString(PyExc_IndexError, "pop from empty list");
 		return NULL;
 	}
-	if (i < 0)
-		i += self->ob_size;
-	if (i < 0 || i >= self->ob_size) {
+	if (ii < 0)
+		ii += self->ob_size;
+	if (ii < 0 || ii >= self->ob_size) {
 		PyErr_SetString(PyExc_IndexError, "pop index out of range");
 		return NULL;
 	}
+	i = (int)ii;
 	v = self->ob_item[i];
 	if (i == self->ob_size - 1) {
 		status = list_resize(self, self->ob_size - 1);
diff --git a/Objects/longobject.c b/Objects/longobject.c
index f4a9a42..5847feb 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -188,6 +188,17 @@ PyLong_FromDouble(double dval)
 	return (PyObject *)v;
 }
 
+/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define
+ * anything about what happens when a signed integer operation overflows,
+ * and some compilers think they're doing you a favor by being "clever"
+ * then.  The bit pattern for the largest postive signed long is
+ * (unsigned long)LONG_MAX, and for the smallest negative signed long
+ * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN.
+ * However, some other compilers warn about applying unary minus to an
+ * unsigned operand.  Hence the weird "0-".
+ */
+#define Py_ABS_LONG_MIN		(0-(unsigned long)LONG_MIN)
+
 /* Get a C long int from a long int object.
    Returns -1 and sets an error condition if overflow occurs. */
 
@@ -219,14 +230,16 @@ PyLong_AsLong(PyObject *vv)
 		if ((x >> SHIFT) != prev)
 			goto overflow;
 	}
-	/* Haven't lost any bits, but if the sign bit is set we're in
-	 * trouble *unless* this is the min negative number.  So,
-	 * trouble iff sign bit set && (positive || some bit set other
-	 * than the sign bit).
+	/* Haven't lost any bits, but casting to long requires extra care
+	 * (see comment above).
 	 */
-	if ((long)x < 0 && (sign > 0 || (x << 1) != 0))
-		goto overflow;
-	return (long)x * sign;
+	if (x <= (unsigned long)LONG_MAX) {
+		return (long)x * sign;
+	}
+	else if (sign < 0 && x == Py_ABS_LONG_MIN) {
+		return LONG_MIN;
+	}
+	/* else overflow */
 
  overflow:
 	PyErr_SetString(PyExc_OverflowError,
@@ -1042,7 +1055,7 @@ long_format(PyObject *aa, int base, int addL)
 {
 	register PyLongObject *a = (PyLongObject *)aa;
 	PyStringObject *str;
-	int i;
+	int i, j, sz;
 	int size_a;
 	char *p;
 	int bits;
@@ -1062,11 +1075,18 @@ long_format(PyObject *aa, int base, int addL)
 		++bits;
 		i >>= 1;
 	}
-	i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits;
-	str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i);
+	i = 5 + (addL ? 1 : 0);
+	j = size_a*SHIFT + bits-1;
+	sz = i + j / bits;
+	if (j / SHIFT < size_a || sz < i) {
+		PyErr_SetString(PyExc_OverflowError,
+				"long is too large to format");
+		return NULL;
+	}
+	str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz);
 	if (str == NULL)
 		return NULL;
-	p = PyString_AS_STRING(str) + i;
+	p = PyString_AS_STRING(str) + sz;
 	*p = '\0';
         if (addL)
                 *--p = 'L';
@@ -1224,14 +1244,14 @@ long_from_binary_base(char **str, int base)
 		++p;
 	}
 	*str = p;
-	n = (p - start) * bits_per_char;
-	if (n / bits_per_char != p - start) {
+	/* n <- # of Python digits needed, = ceiling(n/SHIFT). */
+	n = (p - start) * bits_per_char + SHIFT - 1;
+	if (n / bits_per_char < p - start) {
 		PyErr_SetString(PyExc_ValueError,
 				"long string too large to convert");
 		return NULL;
 	}
-	/* n <- # of Python digits needed, = ceiling(n/SHIFT). */
-	n = (n + SHIFT - 1) / SHIFT;
+	n = n / SHIFT;
 	z = _PyLong_New(n);
 	if (z == NULL)
 		return NULL;
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 7749167..e7ce15f 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -799,7 +799,7 @@ PyString_Repr(PyObject *obj, int smartquotes)
 	register PyStringObject* op = (PyStringObject*) obj;
 	size_t newsize = 2 + 4 * op->ob_size;
 	PyObject *v;
-	if (newsize > INT_MAX) {
+	if (newsize > INT_MAX || newsize / 4 != op->ob_size) {
 		PyErr_SetString(PyExc_OverflowError,
 			"string is too large to make repr");
 	}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d65355a..ddfeee2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2316,6 +2316,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
     PyObject *exc = NULL;
 
     unimax = PyUnicode_GetMax();
+    /* XXX overflow detection missing */
     v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
     if (v == NULL)
 	goto onError;
@@ -2931,6 +2932,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 		    int needed = (targetsize - extrachars) + \
 			         (targetsize << 2);
 		    extrachars += needed;
+                    /* XXX overflow detection missing */
 		    if (_PyUnicode_Resize(&v,
 					 PyUnicode_GET_SIZE(v) + needed) < 0) {
 			Py_DECREF(x);
diff --git a/Python/marshal.c b/Python/marshal.c
index df88bcb..4f4d6b3 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -451,6 +451,11 @@ r_object(RFILE *p)
 			int size;
 			PyLongObject *ob;
 			n = r_long(p);
+			if (n < -INT_MAX || n > INT_MAX) {
+				PyErr_SetString(PyExc_ValueError,
+						"bad marshal data");
+				return NULL;
+			}
 			size = n<0 ? -n : n;
 			ob = _PyLong_New(size);
 			if (ob == NULL)
@@ -518,7 +523,7 @@ r_object(RFILE *p)
 	case TYPE_INTERNED:
 	case TYPE_STRING:
 		n = r_long(p);
-		if (n < 0) {
+		if (n < 0 || n > INT_MAX) {
 			PyErr_SetString(PyExc_ValueError, "bad marshal data");
 			return NULL;
 		}
@@ -539,6 +544,10 @@ r_object(RFILE *p)
 
 	case TYPE_STRINGREF:
 		n = r_long(p);
+		if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
+			PyErr_SetString(PyExc_ValueError, "bad marshal data");
+			return NULL;
+		}
 		v = PyList_GET_ITEM(p->strings, n);
 		Py_INCREF(v);
 		return v;
@@ -549,7 +558,7 @@ r_object(RFILE *p)
 		char *buffer;
 
 		n = r_long(p);
-		if (n < 0) {
+		if (n < 0 || n > INT_MAX) {
 			PyErr_SetString(PyExc_ValueError, "bad marshal data");
 			return NULL;
 		}
@@ -570,7 +579,7 @@ r_object(RFILE *p)
 
 	case TYPE_TUPLE:
 		n = r_long(p);
-		if (n < 0) {
+		if (n < 0 || n > INT_MAX) {
 			PyErr_SetString(PyExc_ValueError, "bad marshal data");
 			return NULL;
 		}
@@ -593,7 +602,7 @@ r_object(RFILE *p)
 
 	case TYPE_LIST:
 		n = r_long(p);
-		if (n < 0) {
+		if (n < 0 || n > INT_MAX) {
 			PyErr_SetString(PyExc_ValueError, "bad marshal data");
 			return NULL;
 		}
@@ -643,10 +652,11 @@ r_object(RFILE *p)
 			return NULL;
 		}
 		else {
-			int argcount = r_long(p);
-			int nlocals = r_long(p);
-			int stacksize = r_long(p);
-			int flags = r_long(p);
+			/* XXX ignore long->int overflows for now */
+			int argcount = (int)r_long(p);
+			int nlocals = (int)r_long(p);
+			int stacksize = (int)r_long(p);
+			int flags = (int)r_long(p);
 			PyObject *code = r_object(p);
 			PyObject *consts = r_object(p);
 			PyObject *names = r_object(p);
@@ -655,7 +665,7 @@ r_object(RFILE *p)
 			PyObject *cellvars = r_object(p);
 			PyObject *filename = r_object(p);
 			PyObject *name = r_object(p);
-			int firstlineno = r_long(p);
+			int firstlineno = (int)r_long(p);
 			PyObject *lnotab = r_object(p);
 
 			if (!PyErr_Occurred()) {
@@ -821,10 +831,16 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
 	wf.strings = (version > 0) ? PyDict_New() : NULL;
 	w_object(x, &wf);
 	Py_XDECREF(wf.strings);
-	if (wf.str != NULL)
-		_PyString_Resize(&wf.str,
-		    (int) (wf.ptr -
-			   PyString_AS_STRING((PyStringObject *)wf.str)));
+	if (wf.str != NULL) {
+		char *base = PyString_AS_STRING((PyStringObject *)wf.str);
+		if (wf.ptr - base > INT_MAX) {
+			Py_DECREF(wf.str);
+			PyErr_SetString(PyExc_OverflowError,
+					"too much marshall data for a string");
+			return NULL;
+		}
+		_PyString_Resize(&wf.str, (int)(wf.ptr - base));
+	}
 	if (wf.error) {
 		Py_XDECREF(wf.str);
 		PyErr_SetString(PyExc_ValueError,
diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c
index 8e60c0c..d9b3a4d 100644
--- a/Python/mystrtoul.c
+++ b/Python/mystrtoul.c
@@ -122,30 +122,38 @@ PyOS_strtoul(register char *str, char **ptr, int base)
     return result;
 }
 
+/* Checking for overflow in PyOS_strtol is a PITA; see comments
+ * about Py_ABS_LONG_MIN in longobject.c.
+ */
+#define Py_ABS_LONG_MIN		(0-(unsigned long)LONG_MIN)
+
 long
 PyOS_strtol(char *str, char **ptr, int base)
 {
 	long result;
+	unsigned long uresult;
 	char sign;
-	
+
 	while (*str && isspace(Py_CHARMASK(*str)))
 		str++;
-	
+
 	sign = *str;
 	if (sign == '+' || sign == '-')
 		str++;
-	
-	result = (long) PyOS_strtoul(str, ptr, base);
-	
-	/* Signal overflow if the result appears negative,
-	   except for the largest negative integer */
-	if (result < 0 && !(sign == '-' && result == -result)) {
+
+	uresult = PyOS_strtoul(str, ptr, base);
+
+	if (uresult <= (unsigned long)LONG_MAX) {
+		result = (long)uresult;
+		if (sign == '-')
+			result = -result;
+	}
+	else if (sign == '-' && uresult == Py_ABS_LONG_MIN) {
+		result = LONG_MIN;
+	}
+	else {
 		errno = ERANGE;
 		result = 0x7fffffff;
 	}
-	
-	if (sign == '-')
-		result = -result;
-	
 	return result;
 }
-- 
cgit v0.12