From 903f1e0c40cd25489cdf8856ccb1bb8932c819f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Tue, 4 Feb 2003 16:28:00 +0000 Subject: filterstring() and filterunicode() in Python/bltinmodule.c blindly assumed that tp_as_sequence->sq_item always returns a str or unicode object. This might fail with str or unicode subclasses. This patch checks whether the object returned from __getitem__ is a str/unicode object and raises a TypeError if not (and the filter function returned true). Furthermore the result for __getitem__ can be more than one character long, so checks for enough memory have to be done. --- Lib/test/test_builtin.py | 21 +++++++++++++ Python/bltinmodule.c | 78 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 91 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 55ea8d2..6e13050 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -367,6 +367,16 @@ class BuiltinTest(unittest.TestCase): raise ValueError self.assertRaises(ValueError, filter, lambda x: x >="3", badstr("1234")) + class badstr2(str): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badstr2("1234")) + + class weirdstr(str): + def __getitem__(self, index): + return weirdstr(2*str.__getitem__(self, index)) + self.assertEqual(filter(lambda x: x>="33", weirdstr("1234")), "3344") + if have_unicode: # test bltinmodule.c::filterunicode() self.assertEqual(filter(None, unicode("12")), unicode("12")) @@ -374,6 +384,17 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, filter, 42, unicode("12")) self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(unicode("1234"))) + class badunicode(unicode): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badunicode("1234")) + + class weirdunicode(unicode): + def __getitem__(self, index): + return weirdunicode(2*unicode.__getitem__(self, index)) + self.assertEqual( + filter(lambda x: x>=unicode("33"), weirdunicode("1234")), unicode("3344")) + def test_float(self): self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 466fab9..c273012 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1892,6 +1892,7 @@ filterstring(PyObject *func, PyObject *strobj) PyObject *result; register int i, j; int len = PyString_Size(strobj); + int outlen = len; if (func == Py_None) { /* No character is ever false -- share input string */ @@ -1921,13 +1922,43 @@ filterstring(PyObject *func, PyObject *strobj) } ok = PyObject_IsTrue(good); Py_DECREF(good); - if (ok) - PyString_AS_STRING((PyStringObject *)result)[j++] = - PyString_AS_STRING((PyStringObject *)item)[0]; + if (ok) { + int reslen; + if (!PyString_Check(item)) { + PyErr_SetString(PyExc_TypeError, "can't filter str to str:" + " __getitem__ returned different type"); + Py_DECREF(item); + goto Fail_1; + } + reslen = PyString_GET_SIZE(item); + if (reslen == 1) { + PyString_AS_STRING(result)[j++] = + PyString_AS_STRING(item)[0]; + } else { + /* do we need more space? */ + int need = j + reslen + len-i-1; + if (need > outlen) { + /* overallocate, to avoid reallocations */ + if (need<2*outlen) + need = 2*outlen; + if (_PyString_Resize(&result, need)) { + Py_DECREF(item); + return NULL; + } + outlen = need; + } + memcpy( + PyString_AS_STRING(result) + j, + PyString_AS_STRING(item), + reslen + ); + j += reslen; + } + } Py_DECREF(item); } - if (j < len) + if (j < outlen) _PyString_Resize(&result, j); return result; @@ -1946,6 +1977,7 @@ filterunicode(PyObject *func, PyObject *strobj) PyObject *result; register int i, j; int len = PyUnicode_GetSize(strobj); + int outlen = len; if (func == Py_None) { /* No character is ever false -- share input string */ @@ -1975,13 +2007,43 @@ filterunicode(PyObject *func, PyObject *strobj) } ok = PyObject_IsTrue(good); Py_DECREF(good); - if (ok) - PyUnicode_AS_UNICODE((PyStringObject *)result)[j++] = - PyUnicode_AS_UNICODE((PyStringObject *)item)[0]; + if (ok) { + int reslen; + if (!PyUnicode_Check(item)) { + PyErr_SetString(PyExc_TypeError, "can't filter unicode to unicode:" + " __getitem__ returned different type"); + Py_DECREF(item); + goto Fail_1; + } + reslen = PyUnicode_GET_SIZE(item); + if (reslen == 1) { + PyUnicode_AS_UNICODE(result)[j++] = + PyUnicode_AS_UNICODE(item)[0]; + } else { + /* do we need more space? */ + int need = j + reslen + len-i-1; + if (need > outlen) { + /* overallocate, to avoid reallocations */ + if (need<2*outlen) + need = 2*outlen; + if (PyUnicode_Resize(&result, need)) { + Py_DECREF(item); + return NULL; + } + outlen = need; + } + memcpy( + PyUnicode_AS_UNICODE(result) + j, + PyUnicode_AS_UNICODE(item), + reslen*sizeof(Py_UNICODE) + ); + j += reslen; + } + } Py_DECREF(item); } - if (j < len) + if (j < outlen) PyUnicode_Resize(&result, j); return result; -- cgit v0.12