summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libitertools.tex27
-rw-r--r--Lib/test/test_itertools.py54
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/itertoolsmodule.c234
4 files changed, 317 insertions, 0 deletions
diff --git a/Doc/lib/libitertools.tex b/Doc/lib/libitertools.tex
index 59fbd98..ac6028b 100644
--- a/Doc/lib/libitertools.tex
+++ b/Doc/lib/libitertools.tex
@@ -302,6 +302,33 @@ by functions or loops that truncate the stream.
don't care about trailing, unmatched values from the longer iterables.
\end{funcdesc}
+\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
+ Make an iterator that aggregates elements from each of the iterables.
+ If the iterables are of uneven length, missing values are filled-in
+ with \var{fillvalue}. Iteration continues until the longest iterable
+ is exhausted. Equivalent to:
+
+ \begin{verbatim}
+ def izip_longest(*args, **kwds):
+ fillvalue = kwds.get('fillvalue')
+ def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
+ yield counter() # yields the fillvalue, or raises IndexError
+ fillers = repeat(fillvalue)
+ iters = [chain(it, sentinel(), fillers) for it in args]
+ try:
+ for tup in izip(*iters):
+ yield tup
+ except IndexError:
+ pass
+ \end{verbatim}
+
+ If one of the iterables is potentially infinite, then the
+ \function{izip_longest()} function should be wrapped with something
+ that limits the number of calls (for example \function{islice()} or
+ \function{take()}).
+ \versionadded{2.6}
+\end{funcdesc}
+
\begin{funcdesc}{repeat}{object\optional{, times}}
Make an iterator that returns \var{object} over and over again.
Runs indefinitely unless the \var{times} argument is specified.
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index c965d4c..93fdab7 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase):
ids = map(id, list(izip('abc', 'def')))
self.assertEqual(len(dict.fromkeys(ids)), len(ids))
+ def test_iziplongest(self):
+ for args in [
+ ['abc', range(6)],
+ [range(6), 'abc'],
+ [range(1000), range(2000,2100), range(3000,3050)],
+ [range(1000), range(0), range(3000,3050), range(1200), range(1500)],
+ [range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)],
+ ]:
+ target = map(None, *args)
+ self.assertEqual(list(izip_longest(*args)), target)
+ self.assertEqual(list(izip_longest(*args, **{})), target)
+ target = [tuple((e is None and 'X' or e) for e in t) for t in target] # Replace None fills with 'X'
+ self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target)
+
+ self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input
+
+ self.assertEqual(list(izip_longest()), zip())
+ self.assertEqual(list(izip_longest([])), zip([]))
+ self.assertEqual(list(izip_longest('abcdef')), zip('abcdef'))
+
+ self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict
+ self.assertRaises(TypeError, izip_longest, 3)
+ self.assertRaises(TypeError, izip_longest, range(3), 3)
+
+ for stmt in [
+ "izip_longest('abc', fv=1)",
+ "izip_longest('abc', fillvalue=1, bogus_keyword=None)",
+ ]:
+ try:
+ eval(stmt, globals(), locals())
+ except TypeError:
+ pass
+ else:
+ self.fail('Did not raise Type in: ' + stmt)
+
+ # Check tuple re-use (implementation detail)
+ self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')],
+ zip('abc', 'def'))
+ self.assertEqual([pair for pair in izip_longest('abc', 'def')],
+ zip('abc', 'def'))
+ ids = map(id, izip_longest('abc', 'def'))
+ self.assertEqual(min(ids), max(ids))
+ ids = map(id, list(izip_longest('abc', 'def')))
+ self.assertEqual(len(dict.fromkeys(ids)), len(ids))
+
def test_repeat(self):
self.assertEqual(zip(xrange(3),repeat('a')),
[(0, 'a'), (1, 'a'), (2, 'a')])
@@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
self.assertRaises(TypeError, list, izip(N(s)))
self.assertRaises(ZeroDivisionError, list, izip(E(s)))
+ def test_iziplongest(self):
+ for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
+ for g in (G, I, Ig, S, L, R):
+ self.assertEqual(list(izip_longest(g(s))), zip(g(s)))
+ self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s)))
+ self.assertRaises(TypeError, izip_longest, X(s))
+ self.assertRaises(TypeError, list, izip_longest(N(s)))
+ self.assertRaises(ZeroDivisionError, list, izip_longest(E(s)))
+
def test_imap(self):
for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)):
for g in (G, I, Ig, S, L, R):
diff --git a/Misc/NEWS b/Misc/NEWS
index 72d5832..d7eab9c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -127,6 +127,8 @@ Library
- Added heapq.merge() for merging sorted input streams.
+- Added itertools.izip_longest().
+
- Have the encoding package's search function dynamically import using absolute
import semantics.
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index 70f787f..1c91a19 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = {
PyObject_GC_Del, /* tp_free */
};
+/* iziplongest object ************************************************************/
+
+#include "Python.h"
+
+typedef struct {
+ PyObject_HEAD
+ Py_ssize_t tuplesize;
+ Py_ssize_t numactive;
+ PyObject *ittuple; /* tuple of iterators */
+ PyObject *result;
+ PyObject *fillvalue;
+ PyObject *filler; /* repeat(fillvalue) */
+} iziplongestobject;
+
+static PyTypeObject iziplongest_type;
+
+static PyObject *
+izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ iziplongestobject *lz;
+ Py_ssize_t i;
+ PyObject *ittuple; /* tuple of iterators */
+ PyObject *result;
+ PyObject *fillvalue = Py_None;
+ PyObject *filler;
+ Py_ssize_t tuplesize = PySequence_Length(args);
+
+ if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) {
+ fillvalue = PyDict_GetItemString(kwds, "fillvalue");
+ if (fillvalue == NULL || PyDict_Size(kwds) > 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "izip_longest() got an unexpected keyword argument");
+ return NULL;
+ }
+ }
+
+ /* args must be a tuple */
+ assert(PyTuple_Check(args));
+
+ /* obtain iterators */
+ ittuple = PyTuple_New(tuplesize);
+ if (ittuple == NULL)
+ return NULL;
+ for (i=0; i < tuplesize; ++i) {
+ PyObject *item = PyTuple_GET_ITEM(args, i);
+ PyObject *it = PyObject_GetIter(item);
+ if (it == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ PyErr_Format(PyExc_TypeError,
+ "izip_longest argument #%zd must support iteration",
+ i+1);
+ Py_DECREF(ittuple);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(ittuple, i, it);
+ }
+
+ filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL);
+ if (filler == NULL) {
+ Py_DECREF(ittuple);
+ return NULL;
+ }
+
+ /* create a result holder */
+ result = PyTuple_New(tuplesize);
+ if (result == NULL) {
+ Py_DECREF(ittuple);
+ Py_DECREF(filler);
+ return NULL;
+ }
+ for (i=0 ; i < tuplesize ; i++) {
+ Py_INCREF(Py_None);
+ PyTuple_SET_ITEM(result, i, Py_None);
+ }
+
+ /* create iziplongestobject structure */
+ lz = (iziplongestobject *)type->tp_alloc(type, 0);
+ if (lz == NULL) {
+ Py_DECREF(ittuple);
+ Py_DECREF(filler);
+ Py_DECREF(result);
+ return NULL;
+ }
+ lz->ittuple = ittuple;
+ lz->tuplesize = tuplesize;
+ lz->numactive = tuplesize;
+ lz->result = result;
+ Py_INCREF(fillvalue);
+ lz->fillvalue = fillvalue;
+ Py_INCREF(filler);
+ lz->filler = filler; /* XXX */
+ return (PyObject *)lz;
+}
+
+static void
+izip_longest_dealloc(iziplongestobject *lz)
+{
+ PyObject_GC_UnTrack(lz);
+ Py_XDECREF(lz->ittuple);
+ Py_XDECREF(lz->result);
+ Py_XDECREF(lz->fillvalue);
+ Py_XDECREF(lz->filler);
+ lz->ob_type->tp_free(lz);
+}
+
+static int
+izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg)
+{
+ Py_VISIT(lz->ittuple);
+ Py_VISIT(lz->result);
+ Py_VISIT(lz->fillvalue);
+ Py_VISIT(lz->filler);
+ return 0;
+}
+
+static PyObject *
+izip_longest_next(iziplongestobject *lz)
+{
+ Py_ssize_t i;
+ Py_ssize_t tuplesize = lz->tuplesize;
+ PyObject *result = lz->result;
+ PyObject *it;
+ PyObject *item;
+ PyObject *olditem;
+
+ if (tuplesize == 0)
+ return NULL;
+ if (result->ob_refcnt == 1) {
+ Py_INCREF(result);
+ for (i=0 ; i < tuplesize ; i++) {
+ it = PyTuple_GET_ITEM(lz->ittuple, i);
+ assert(PyIter_Check(it));
+ item = (*it->ob_type->tp_iternext)(it);
+ if (item == NULL) {
+ if (lz->numactive <= 1) {
+ Py_DECREF(result);
+ return NULL;
+ } else {
+ Py_INCREF(lz->filler);
+ PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
+ Py_INCREF(lz->fillvalue);
+ item = lz->fillvalue;
+ Py_DECREF(it);
+ lz->numactive -= 1;
+ }
+ }
+ olditem = PyTuple_GET_ITEM(result, i);
+ PyTuple_SET_ITEM(result, i, item);
+ Py_DECREF(olditem);
+ }
+ } else {
+ result = PyTuple_New(tuplesize);
+ if (result == NULL)
+ return NULL;
+ for (i=0 ; i < tuplesize ; i++) {
+ it = PyTuple_GET_ITEM(lz->ittuple, i);
+ assert(PyIter_Check(it));
+ item = (*it->ob_type->tp_iternext)(it);
+ if (item == NULL) {
+ if (lz->numactive <= 1) {
+ Py_DECREF(result);
+ return NULL;
+ } else {
+ Py_INCREF(lz->filler);
+ PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
+ Py_INCREF(lz->fillvalue);
+ item = lz->fillvalue;
+ Py_DECREF(it);
+ lz->numactive -= 1;
+ }
+ }
+ PyTuple_SET_ITEM(result, i, item);
+ }
+ }
+ return result;
+}
+
+PyDoc_STRVAR(izip_longest_doc,
+"izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\
+\n\
+Return an izip_longest object whose .next() method returns a tuple where\n\
+the i-th element comes from the i-th iterable argument. The .next()\n\
+method continues until the longest iterable in the argument sequence\n\
+is exhausted and then it raises StopIteration. When the shorter iterables\n\
+are exhausted, the fillvalue is substituted in their place. The fillvalue\n\
+defaults to None or can be specified by a keyword argument.\n\
+");
+
+static PyTypeObject iziplongest_type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "itertools.izip_longest", /* tp_name */
+ sizeof(iziplongestobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)izip_longest_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+ Py_TPFLAGS_BASETYPE, /* tp_flags */
+ izip_longest_doc, /* tp_doc */
+ (traverseproc)izip_longest_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)izip_longest_next, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ izip_longest_new, /* tp_new */
+ PyObject_GC_Del, /* tp_free */
+};
/* module level code ********************************************************/
@@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
\n\
Iterators terminating on the shortest input sequence:\n\
izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
+izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\
ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
islice(seq, [start,] stop [, step]) --> elements from\n\
@@ -2522,6 +2755,7 @@ inititertools(void)
&ifilterfalse_type,
&count_type,
&izip_type,
+ &iziplongest_type,
&repeat_type,
&groupby_type,
NULL