Change filterstring() and filterunicode(): If the

object is not a real str or unicode but an instance of a subclass, construct the output via looping over __getitem__. This guarantees that the result is the same for function==None and function==lambda x:x This doesn't happen for tuples, because filtertuple() uses PyTuple_GetItem(). (This was discussed on SF bug #665835).
author: Walter Dörwald <walter@livinglogic.de> 2003-02-10 13:19:13 (GMT)
committer: Walter Dörwald <walter@livinglogic.de> 2003-02-10 13:19:13 (GMT)
commit: 1918f7755e03900224c5a53cca9fc0088c3186d3 (patch)
tree: 0c2ac8d1d8373699f5d431c88c4960f9bc6849ee
parent: b4bb64e2882297f4759e5d4e6758100d8e9f3273 (diff)
download: cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.zip
cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.tar.gz
cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.tar.bz2
2 files changed, 75 insertions, 56 deletions
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 047e93e..9af3233 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -418,26 +418,40 @@ class BuiltinTest(unittest.TestCase):
 
     def test_filter_subclasses(self):
         # test, that filter() never returns tuple, str or unicode subclasses
+        # and that the result always go's through __getitem__
+        # FIXME: For tuple currently it doesn't go through __getitem__
         funcs = (None, lambda x: True)
         class tuple2(tuple):
-            pass
+            def __getitem__(self, index):
+                return 2*tuple.__getitem__(self, index)
         class str2(str):
-            pass
+            def __getitem__(self, index):
+                return 2*str.__getitem__(self, index)
         inputs = {
-            tuple2: [(), (1,2,3)],
-            str2:   ["", "123"]
+            tuple2: {(): (), (1, 2, 3): (1, 2, 3)}, # FIXME
+            str2:   {"": "", "123": "112233"}
         }
         if have_unicode:
             class unicode2(unicode):
-                pass
-            inputs[unicode2] = [unicode(), unicode("123")]
-
-        for func in funcs:
-            for (cls, inps) in inputs.iteritems():
-                for inp in inps:
-                    out = filter(func, cls(inp))
-                    self.assertEqual(inp, out)
-                    self.assert_(not isinstance(out, cls))
+                def __getitem__(self, index):
+                    return 2*unicode.__getitem__(self, index)
+            inputs[unicode2] = {
+                unicode(): unicode(),
+                unicode("123"): unicode("112233")
+            }
+
+        for (cls, inps) in inputs.iteritems():
+            for (inp, exp) in inps.iteritems():
+                 # make sure the output goes through __getitem__
+                 # even if func is None
+                 self.assertEqual(
+                     filter(funcs[0], cls(inp)),
+                     filter(funcs[1], cls(inp))
+                 )
+                 for func in funcs:
+                    outp = filter(func, cls(inp))
+                    self.assertEqual(outp, exp)
+                    self.assert_(not isinstance(outp, cls))
 
     def test_float(self):
         self.assertEqual(float(3.14), 3.14)
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 0b43905..19af6f7 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -1934,40 +1934,43 @@ filterstring(PyObject *func, PyObject *strobj)
 	int outlen = len;
 
 	if (func == Py_None) {
-		/* No character is ever false -- share input string
-		 * (if it's not a subclass) */
-		if (PyString_CheckExact(strobj))
+		/* If it's a real string we can return the original,
+		 * as no character is ever false and __getitem__
+		 * does return this character. If it's a subclass
+		 * we must go through the __getitem__ loop */
+		if (PyString_CheckExact(strobj)) {
 			Py_INCREF(strobj);
-		else
-			strobj = PyString_FromStringAndSize(
-				PyString_AS_STRING(strobj),
-				len
-			);
-		return strobj;
+			return strobj;
+		}
 	}
 	if ((result = PyString_FromStringAndSize(NULL, len)) == NULL)
 		return NULL;
 
 	for (i = j = 0; i < len; ++i) {
-		PyObject *item, *arg, *good;
+		PyObject *item;
 		int ok;
 
 		item = (*strobj->ob_type->tp_as_sequence->sq_item)(strobj, i);
 		if (item == NULL)
 			goto Fail_1;
-		arg = Py_BuildValue("(O)", item);
-		if (arg == NULL) {
-			Py_DECREF(item);
-			goto Fail_1;
-		}
-		good = PyEval_CallObject(func, arg);
-		Py_DECREF(arg);
-		if (good == NULL) {
-			Py_DECREF(item);
-			goto Fail_1;
+		if (func==Py_None) {
+			ok = 1;
+		} else {
+			PyObject *arg, *good;
+			arg = Py_BuildValue("(O)", item);
+			if (arg == NULL) {
+				Py_DECREF(item);
+				goto Fail_1;
+			}
+			good = PyEval_CallObject(func, arg);
+			Py_DECREF(arg);
+			if (good == NULL) {
+				Py_DECREF(item);
+				goto Fail_1;
+			}
+			ok = PyObject_IsTrue(good);
+			Py_DECREF(good);
 		}
-		ok = PyObject_IsTrue(good);
-		Py_DECREF(good);
 		if (ok) {
 			int reslen;
 			if (!PyString_Check(item)) {
@@ -2026,16 +2029,14 @@ filterunicode(PyObject *func, PyObject *strobj)
 	int outlen = len;
 
 	if (func == Py_None) {
-		/* No character is ever false -- share input string
-		 * (it if's not a subclass) */
-		if (PyUnicode_CheckExact(strobj))
+		/* If it's a real string we can return the original,
+		 * as no character is ever false and __getitem__
+		 * does return this character. If it's a subclass
+		 * we must go through the __getitem__ loop */
+		if (PyUnicode_CheckExact(strobj)) {
 			Py_INCREF(strobj);
-		else
-			strobj = PyUnicode_FromUnicode(
-				PyUnicode_AS_UNICODE(strobj),
-				len
-			);
-		return strobj;
+			return strobj;
+		}
 	}
 	if ((result = PyUnicode_FromUnicode(NULL, len)) == NULL)
 		return NULL;
@@ -2047,19 +2048,23 @@ filterunicode(PyObject *func, PyObject *strobj)
 		item = (*strobj->ob_type->tp_as_sequence->sq_item)(strobj, i);
 		if (item == NULL)
 			goto Fail_1;
-		arg = Py_BuildValue("(O)", item);
-		if (arg == NULL) {
-			Py_DECREF(item);
-			goto Fail_1;
-		}
-		good = PyEval_CallObject(func, arg);
-		Py_DECREF(arg);
-		if (good == NULL) {
-			Py_DECREF(item);
-			goto Fail_1;
+		if (func == Py_None) {
+			ok = 1;
+		} else {
+			arg = Py_BuildValue("(O)", item);
+			if (arg == NULL) {
+				Py_DECREF(item);
+				goto Fail_1;
+			}
+			good = PyEval_CallObject(func, arg);
+			Py_DECREF(arg);
+			if (good == NULL) {
+				Py_DECREF(item);
+				goto Fail_1;
+			}
+			ok = PyObject_IsTrue(good);
+			Py_DECREF(good);
 		}
-		ok = PyObject_IsTrue(good);
-		Py_DECREF(good);
 		if (ok) {
 			int reslen;
 			if (!PyUnicode_Check(item)) {
author	Walter Dörwald <walter@livinglogic.de>	2003-02-10 13:19:13 (GMT)
committer	Walter Dörwald <walter@livinglogic.de>	2003-02-10 13:19:13 (GMT)
commit	1918f7755e03900224c5a53cca9fc0088c3186d3 (patch)
tree	0c2ac8d1d8373699f5d431c88c4960f9bc6849ee
parent	b4bb64e2882297f4759e5d4e6758100d8e9f3273 (diff)
download	cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.zip cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.tar.gz cpython-1918f7755e03900224c5a53cca9fc0088c3186d3.tar.bz2