diff options
author | Walter Dörwald <walter@livinglogic.de> | 2003-02-04 16:28:00 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2003-02-04 16:28:00 (GMT) |
commit | 903f1e0c40cd25489cdf8856ccb1bb8932c819f8 (patch) | |
tree | 8cf90251718137f576117a39922165165ab8a5f8 | |
parent | 57ba55b77deb297dfada56e3e15cb91697665b06 (diff) | |
download | cpython-903f1e0c40cd25489cdf8856ccb1bb8932c819f8.zip cpython-903f1e0c40cd25489cdf8856ccb1bb8932c819f8.tar.gz cpython-903f1e0c40cd25489cdf8856ccb1bb8932c819f8.tar.bz2 |
filterstring() and filterunicode() in Python/bltinmodule.c
blindly assumed that tp_as_sequence->sq_item always returns
a str or unicode object. This might fail with str or unicode
subclasses.
This patch checks whether the object returned from __getitem__
is a str/unicode object and raises a TypeError if not (and
the filter function returned true).
Furthermore the result for __getitem__ can be more than one
character long, so checks for enough memory have to be done.
-rw-r--r-- | Lib/test/test_builtin.py | 21 | ||||
-rw-r--r-- | Python/bltinmodule.c | 78 |
2 files changed, 91 insertions, 8 deletions
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 55ea8d2..6e13050 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -367,6 +367,16 @@ class BuiltinTest(unittest.TestCase): raise ValueError self.assertRaises(ValueError, filter, lambda x: x >="3", badstr("1234")) + class badstr2(str): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badstr2("1234")) + + class weirdstr(str): + def __getitem__(self, index): + return weirdstr(2*str.__getitem__(self, index)) + self.assertEqual(filter(lambda x: x>="33", weirdstr("1234")), "3344") + if have_unicode: # test bltinmodule.c::filterunicode() self.assertEqual(filter(None, unicode("12")), unicode("12")) @@ -374,6 +384,17 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, filter, 42, unicode("12")) self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(unicode("1234"))) + class badunicode(unicode): + def __getitem__(self, index): + return 42 + self.assertRaises(TypeError, filter, lambda x: x >=42, badunicode("1234")) + + class weirdunicode(unicode): + def __getitem__(self, index): + return weirdunicode(2*unicode.__getitem__(self, index)) + self.assertEqual( + filter(lambda x: x>=unicode("33"), weirdunicode("1234")), unicode("3344")) + def test_float(self): self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 466fab9..c273012 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1892,6 +1892,7 @@ filterstring(PyObject *func, PyObject *strobj) PyObject *result; register int i, j; int len = PyString_Size(strobj); + int outlen = len; if (func == Py_None) { /* No character is ever false -- share input string */ @@ -1921,13 +1922,43 @@ filterstring(PyObject *func, PyObject *strobj) } ok = PyObject_IsTrue(good); Py_DECREF(good); - if (ok) - PyString_AS_STRING((PyStringObject *)result)[j++] = - PyString_AS_STRING((PyStringObject *)item)[0]; + if (ok) { + int reslen; + if (!PyString_Check(item)) { + PyErr_SetString(PyExc_TypeError, "can't filter str to str:" + " __getitem__ returned different type"); + Py_DECREF(item); + goto Fail_1; + } + reslen = PyString_GET_SIZE(item); + if (reslen == 1) { + PyString_AS_STRING(result)[j++] = + PyString_AS_STRING(item)[0]; + } else { + /* do we need more space? */ + int need = j + reslen + len-i-1; + if (need > outlen) { + /* overallocate, to avoid reallocations */ + if (need<2*outlen) + need = 2*outlen; + if (_PyString_Resize(&result, need)) { + Py_DECREF(item); + return NULL; + } + outlen = need; + } + memcpy( + PyString_AS_STRING(result) + j, + PyString_AS_STRING(item), + reslen + ); + j += reslen; + } + } Py_DECREF(item); } - if (j < len) + if (j < outlen) _PyString_Resize(&result, j); return result; @@ -1946,6 +1977,7 @@ filterunicode(PyObject *func, PyObject *strobj) PyObject *result; register int i, j; int len = PyUnicode_GetSize(strobj); + int outlen = len; if (func == Py_None) { /* No character is ever false -- share input string */ @@ -1975,13 +2007,43 @@ filterunicode(PyObject *func, PyObject *strobj) } ok = PyObject_IsTrue(good); Py_DECREF(good); - if (ok) - PyUnicode_AS_UNICODE((PyStringObject *)result)[j++] = - PyUnicode_AS_UNICODE((PyStringObject *)item)[0]; + if (ok) { + int reslen; + if (!PyUnicode_Check(item)) { + PyErr_SetString(PyExc_TypeError, "can't filter unicode to unicode:" + " __getitem__ returned different type"); + Py_DECREF(item); + goto Fail_1; + } + reslen = PyUnicode_GET_SIZE(item); + if (reslen == 1) { + PyUnicode_AS_UNICODE(result)[j++] = + PyUnicode_AS_UNICODE(item)[0]; + } else { + /* do we need more space? */ + int need = j + reslen + len-i-1; + if (need > outlen) { + /* overallocate, to avoid reallocations */ + if (need<2*outlen) + need = 2*outlen; + if (PyUnicode_Resize(&result, need)) { + Py_DECREF(item); + return NULL; + } + outlen = need; + } + memcpy( + PyUnicode_AS_UNICODE(result) + j, + PyUnicode_AS_UNICODE(item), + reslen*sizeof(Py_UNICODE) + ); + j += reslen; + } + } Py_DECREF(item); } - if (j < len) + if (j < outlen) PyUnicode_Resize(&result, j); return result; |