diff options
-rw-r--r-- | Lib/test/test_iter.py | 41 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Objects/stringobject.c | 9 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 26 |
4 files changed, 65 insertions, 13 deletions
diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index bfe032f..073ffb4 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -431,4 +431,45 @@ class TestCase(unittest.TestCase): d = {"one": 1, "two": 2, "three": 3} self.assertEqual(reduce(add, d), "".join(d.keys())) + def test_unicode_join_endcase(self): + + # This class inserts a Unicode object into its argument's natural + # iteration, in the 3rd position. + class OhPhooey: + def __init__(self, seq): + self.it = iter(seq) + self.i = 0 + + def __iter__(self): + return self + + def next(self): + i = self.i + self.i = i+1 + if i == 2: + return u"fooled you!" + return self.it.next() + + f = open(TESTFN, "w") + try: + f.write("a\n" + "b\n" + "c\n") + finally: + f.close() + + f = open(TESTFN, "r") + # Nasty: string.join(s) can't know whether unicode.join() is needed + # until it's seen all of s's elements. But in this case, f's + # iterator cannot be restarted. So what we're testing here is + # whether string.join() can manage to remember everything it's seen + # and pass that on to unicode.join(). + try: + got = " - ".join(OhPhooey(f)) + self.assertEqual(got, u"a\n - b\n - fooled you! - c\n") + finally: + f.close() + try: + unlink(TESTFN) + except OSError: + pass + run_unittest(TestCase) @@ -25,7 +25,7 @@ Core reduce() string.join() tuple() - XXX TODO unicode.join() + unicode.join() XXX TODO zip() XXX TODO 'x in y' diff --git a/Objects/stringobject.c b/Objects/stringobject.c index b905679..87d7c195 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -861,8 +861,15 @@ string_join(PyStringObject *self, PyObject *args) item = PySequence_Fast_GET_ITEM(seq, i); if (!PyString_Check(item)){ if (PyUnicode_Check(item)) { + /* Defer to Unicode join. + * CAUTION: There's no gurantee that the + * original sequence can be iterated over + * again, so we must pass seq here. + */ + PyObject *result; + result = PyUnicode_Join((PyObject *)self, seq); Py_DECREF(seq); - return PyUnicode_Join((PyObject *)self, orig); + return result; } PyErr_Format(PyExc_TypeError, "sequence item %i: expected string," diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e52d628..5da4d2f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2724,10 +2724,11 @@ PyObject *PyUnicode_Join(PyObject *separator, int seqlen = 0; int sz = 100; int i; + PyObject *it; - seqlen = PySequence_Size(seq); - if (seqlen < 0 && PyErr_Occurred()) - return NULL; + it = PyObject_GetIter(seq); + if (it == NULL) + return NULL; if (separator == NULL) { Py_UNICODE blank = ' '; @@ -2737,7 +2738,7 @@ PyObject *PyUnicode_Join(PyObject *separator, else { separator = PyUnicode_FromObject(separator); if (separator == NULL) - return NULL; + goto onError; sep = PyUnicode_AS_UNICODE(separator); seplen = PyUnicode_GET_SIZE(separator); } @@ -2748,13 +2749,14 @@ PyObject *PyUnicode_Join(PyObject *separator, p = PyUnicode_AS_UNICODE(res); reslen = 0; - for (i = 0; i < seqlen; i++) { + for (i = 0; ; ++i) { int itemlen; - PyObject *item; - - item = PySequence_GetItem(seq, i); - if (item == NULL) - goto onError; + PyObject *item = PyIter_Next(it); + if (item == NULL) { + if (PyErr_Occurred()) + goto onError; + break; + } if (!PyUnicode_Check(item)) { PyObject *v; v = PyUnicode_FromObject(item); @@ -2784,11 +2786,13 @@ PyObject *PyUnicode_Join(PyObject *separator, goto onError; Py_XDECREF(separator); + Py_DECREF(it); return (PyObject *)res; onError: Py_XDECREF(separator); - Py_DECREF(res); + Py_XDECREF(res); + Py_DECREF(it); return NULL; } |