diff options
author | Raymond Hettinger <python@rcn.com> | 2009-01-25 21:04:14 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2009-01-25 21:04:14 (GMT) |
commit | 2bcb8e9b0d19bf60b38553dcb1d9a6c4cb86a8a1 (patch) | |
tree | 11bc79db1c0995c2a116f55abf8d704b4234c2aa | |
parent | 1af3b5a9a42217959868d9510d80c5a805af5fca (diff) | |
download | cpython-2bcb8e9b0d19bf60b38553dcb1d9a6c4cb86a8a1.zip cpython-2bcb8e9b0d19bf60b38553dcb1d9a6c4cb86a8a1.tar.gz cpython-2bcb8e9b0d19bf60b38553dcb1d9a6c4cb86a8a1.tar.bz2 |
Promote compress() from a recipe to being a regular itertool.
-rw-r--r-- | Doc/library/collections.rst | 2 | ||||
-rw-r--r-- | Doc/library/itertools.rst | 18 | ||||
-rw-r--r-- | Lib/test/test_itertools.py | 40 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Modules/itertoolsmodule.c | 157 |
5 files changed, 206 insertions, 13 deletions
diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 51fdf89..0dc0ae4 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -298,7 +298,7 @@ counts less than one:: Section 4.6.3, Exercise 19*\. * To enumerate all distinct multisets of a given size over a given set of - elements, see the :func:`combinations_with_replacement` function in the + elements, see :func:`combinations_with_replacement` in the :ref:`itertools-recipes` for itertools:: map(Counter, combinations_with_replacement('ABC', 2)) --> AA AB AC BB BC CC diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index aef3f6a..903284e 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -139,6 +139,20 @@ loops that truncate the stream. .. versionadded:: 2.6 +.. function:: compress(data, selectors) + + Make an iterator that filters elements from *data* returning only those that + have a corresponding element in *selectors* that evaluates to ``True``. + Stops when either the *data* or *selectors* iterables have been exhausted. + Equivalent to:: + + def compress(data, selectors): + # compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F + return (d for d, s in izip(data, selectors) if s) + + .. versionadded:: 2.7 + + .. function:: count([n]) Make an iterator that returns consecutive integers starting with *n*. If not @@ -679,10 +693,6 @@ which incur interpreter overhead. for n in xrange(2**len(pairs)): yield set(x for m, x in pairs if m&n) - def compress(data, selectors): - "compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F" - return (d for d, s in izip(data, selectors) if s) - def combinations_with_replacement(iterable, r): "combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC" # number items returned: (n+r-1)! / r! / (n-1)! diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 2182cb9..a62bad2 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -191,6 +191,21 @@ class TestBasicOps(unittest.TestCase): self.assertEqual(len(set(map(id, permutations('abcde', 3)))), 1) self.assertNotEqual(len(set(map(id, list(permutations('abcde', 3))))), 1) + def test_compress(self): + self.assertEqual(list(compress('ABCDEF', [1,0,1,0,1,1])), list('ACEF')) + self.assertEqual(list(compress('ABCDEF', [0,0,0,0,0,0])), list('')) + self.assertEqual(list(compress('ABCDEF', [1,1,1,1,1,1])), list('ABCDEF')) + self.assertEqual(list(compress('ABCDEF', [1,0,1])), list('AC')) + self.assertEqual(list(compress('ABC', [0,1,1,1,1,1])), list('BC')) + n = 10000 + data = chain.from_iterable(repeat(range(6), n)) + selectors = chain.from_iterable(repeat((0, 1))) + self.assertEqual(list(compress(data, selectors)), [1,3,5] * n) + self.assertRaises(TypeError, compress, None, range(6)) # 1st arg not iterable + self.assertRaises(TypeError, compress, range(6), None) # 2nd arg not iterable + self.assertRaises(TypeError, compress, range(6)) # too few args + self.assertRaises(TypeError, compress, range(6), None) # too many args + def test_count(self): self.assertEqual(zip('abc',count()), [('a', 0), ('b', 1), ('c', 2)]) self.assertEqual(zip('abc',count(3)), [('a', 3), ('b', 4), ('c', 5)]) @@ -701,6 +716,9 @@ class TestExamples(unittest.TestCase): self.assertEqual(list(combinations(range(4), 3)), [(0,1,2), (0,1,3), (0,2,3), (1,2,3)]) + def test_compress(self): + self.assertEqual(list(compress('ABCDEF', [1,0,1,0,1,1])), list('ACEF')) + def test_count(self): self.assertEqual(list(islice(count(10), 5)), [10, 11, 12, 13, 14]) @@ -781,6 +799,10 @@ class TestGC(unittest.TestCase): a = [] self.makecycle(combinations([1,2,a,3], 3), a) + def test_compress(self): + a = [] + self.makecycle(compress('ABCDEF', [1,0,1,0,1,0]), a) + def test_cycle(self): a = [] self.makecycle(cycle([a]*2), a) @@ -934,6 +956,15 @@ class TestVariousIteratorArgs(unittest.TestCase): self.assertRaises(TypeError, list, chain(N(s))) self.assertRaises(ZeroDivisionError, list, chain(E(s))) + def test_compress(self): + for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): + n = len(s) + for g in (G, I, Ig, S, L, R): + self.assertEqual(list(compress(g(s), repeat(1))), list(g(s))) + self.assertRaises(TypeError, compress, X(s), repeat(1)) + self.assertRaises(TypeError, list, compress(N(s), repeat(1))) + self.assertRaises(ZeroDivisionError, list, compress(E(s), repeat(1))) + def test_product(self): for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): self.assertRaises(TypeError, product, X(s)) @@ -1125,7 +1156,7 @@ class SubclassWithKwargsTest(unittest.TestCase): def test_keywords_in_subclass(self): # count is not subclassable... for cls in (repeat, izip, ifilter, ifilterfalse, chain, imap, - starmap, islice, takewhile, dropwhile, cycle): + starmap, islice, takewhile, dropwhile, cycle, compress): class Subclass(cls): def __init__(self, newarg=None, *args): cls.__init__(self, *args) @@ -1262,10 +1293,6 @@ Samuele ... for n in xrange(2**len(pairs)): ... yield set(x for m, x in pairs if m&n) ->>> def compress(data, selectors): -... "compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F" -... return (d for d, s in izip(data, selectors) if s) - >>> def combinations_with_replacement(iterable, r): ... "combinations_with_replacement('ABC', 3) --> AA AB AC BB BC CC" ... pool = tuple(iterable) @@ -1361,9 +1388,6 @@ perform as purported. >>> map(sorted, powerset('ab')) [[], ['a'], ['b'], ['a', 'b']] ->>> list(compress('abcdef', [1,0,1,0,1,1])) -['a', 'c', 'e', 'f'] - >>> list(combinations_with_replacement('abc', 2)) [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')] @@ -147,6 +147,8 @@ Library - Issue #4863: distutils.mwerkscompiler has been removed. +- Added a new function: itertools.compress(). + - Fix and properly document the multiprocessing module's logging support, expose the internal levels and provide proper usage examples. diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 9d6dbd9..f66d052 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -2506,6 +2506,162 @@ static PyTypeObject permutations_type = { }; +/* compress object ************************************************************/ + +/* Equivalent to: + + def compress(data, selectors): + "compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F" + return (d for d, s in izip(data, selectors) if s) +*/ + +typedef struct { + PyObject_HEAD + PyObject *data; + PyObject *selectors; +} compressobject; + +static PyTypeObject compress_type; + +static PyObject * +compress_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *seq1, *seq2; + PyObject *data=NULL, *selectors=NULL; + compressobject *lz; + + if (type == &compress_type && !_PyArg_NoKeywords("compress()", kwds)) + return NULL; + + if (!PyArg_UnpackTuple(args, "compress", 2, 2, &seq1, &seq2)) + return NULL; + + data = PyObject_GetIter(seq1); + if (data == NULL) + goto fail; + selectors = PyObject_GetIter(seq2); + if (selectors == NULL) + goto fail; + + /* create compressobject structure */ + lz = (compressobject *)type->tp_alloc(type, 0); + if (lz == NULL) + goto fail; + lz->data = data; + lz->selectors = selectors; + return (PyObject *)lz; + +fail: + Py_XDECREF(data); + Py_XDECREF(selectors); + return NULL; +} + +static void +compress_dealloc(compressobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->data); + Py_XDECREF(lz->selectors); + Py_TYPE(lz)->tp_free(lz); +} + +static int +compress_traverse(compressobject *lz, visitproc visit, void *arg) +{ + Py_VISIT(lz->data); + Py_VISIT(lz->selectors); + return 0; +} + +static PyObject * +compress_next(compressobject *lz) +{ + PyObject *data = lz->data, *selectors = lz->selectors; + PyObject *datum, *selector; + PyObject *(*datanext)(PyObject *) = *Py_TYPE(data)->tp_iternext; + PyObject *(*selectornext)(PyObject *) = *Py_TYPE(selectors)->tp_iternext; + int ok; + + while (1) { + /* Steps: get datum, get selector, evaluate selector. + Order is important (to match the pure python version + in terms of which input gets a chance to raise an + exception first). + */ + + datum = datanext(data); + if (datum == NULL) + return NULL; + + selector = selectornext(selectors); + if (selector == NULL) { + Py_DECREF(datum); + return NULL; + } + + ok = PyObject_IsTrue(selector); + Py_DECREF(selector); + if (ok == 1) + return datum; + Py_DECREF(datum); + if (ok == -1) + return NULL; + } +} + +PyDoc_STRVAR(compress_doc, +"compress(data sequence, selector sequence) --> iterator over selected data\n\ +\n\ +Return data elements corresponding to true selector elements.\n\ +Forms a shorter iterator from selected data elements using the\n\ +selectors to choose the data elements."); + +static PyTypeObject compress_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "itertools.compress", /* tp_name */ + sizeof(compressobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)compress_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + compress_doc, /* tp_doc */ + (traverseproc)compress_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)compress_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + compress_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + /* ifilter object ************************************************************/ typedef struct { @@ -3552,6 +3708,7 @@ inititertools(void) &starmap_type, &imap_type, &chain_type, + &compress_type, &ifilter_type, &ifilterfalse_type, &count_type, |