diff options
author | Raymond Hettinger <python@rcn.com> | 2003-12-06 16:23:06 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2003-12-06 16:23:06 (GMT) |
commit | d25c1c635164daa5c300342ac99c0810fd9b575c (patch) | |
tree | df412ba3ffaa8fee35e2e12f96aab0beecdaaec0 /Modules/itertoolsmodule.c | |
parent | b8d5f245b7077d869121835ed72656ac14962ef0 (diff) | |
download | cpython-d25c1c635164daa5c300342ac99c0810fd9b575c.zip cpython-d25c1c635164daa5c300342ac99c0810fd9b575c.tar.gz cpython-d25c1c635164daa5c300342ac99c0810fd9b575c.tar.bz2 |
Implement itertools.groupby()
Original idea by Guido van Rossum.
Idea for skipable inner iterators by Raymond Hettinger.
Idea for argument order and identity function default by Alex Martelli.
Implementation by Hye-Shik Chang (with tweaks by Raymond Hettinger).
Diffstat (limited to 'Modules/itertoolsmodule.c')
-rw-r--r-- | Modules/itertoolsmodule.c | 322 |
1 files changed, 321 insertions, 1 deletions
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index a341a66..387133c 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -7,6 +7,323 @@ All rights reserved. */ + +/* groupby object ***********************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *it; + PyObject *keyfunc; + PyObject *tgtkey; + PyObject *currkey; + PyObject *currvalue; +} groupbyobject; + +static PyTypeObject groupby_type; +static PyObject *_grouper_create(groupbyobject *, PyObject *); + +static PyObject * +groupby_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + static char *kwargs[] = {"iterable", "key", NULL}; + groupbyobject *gbo; + PyObject *it, *keyfunc = Py_None; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:groupby", kwargs, + &it, &keyfunc)) + return NULL; + + gbo = (groupbyobject *)type->tp_alloc(type, 0); + if (gbo == NULL) + return NULL; + gbo->tgtkey = NULL; + gbo->currkey = NULL; + gbo->currvalue = NULL; + gbo->keyfunc = keyfunc; + Py_INCREF(keyfunc); + gbo->it = PyObject_GetIter(it); + if (gbo->it == NULL) { + Py_DECREF(gbo); + return NULL; + } + return (PyObject *)gbo; +} + +static void +groupby_dealloc(groupbyobject *gbo) +{ + PyObject_GC_UnTrack(gbo); + Py_XDECREF(gbo->it); + Py_XDECREF(gbo->keyfunc); + Py_XDECREF(gbo->tgtkey); + Py_XDECREF(gbo->currkey); + Py_XDECREF(gbo->currvalue); + gbo->ob_type->tp_free(gbo); +} + +static int +groupby_traverse(groupbyobject *gbo, visitproc visit, void *arg) +{ + int err; + + if (gbo->it) { + err = visit(gbo->it, arg); + if (err) + return err; + } + if (gbo->keyfunc) { + err = visit(gbo->keyfunc, arg); + if (err) + return err; + } + if (gbo->tgtkey) { + err = visit(gbo->tgtkey, arg); + if (err) + return err; + } + if (gbo->currkey) { + err = visit(gbo->currkey, arg); + if (err) + return err; + } + if (gbo->currvalue) { + err = visit(gbo->currvalue, arg); + if (err) + return err; + } + return 0; +} + +static PyObject * +groupby_next(groupbyobject *gbo) +{ + PyObject *newvalue, *newkey, *r, *grouper; + + /* skip to next iteration group */ + for (;;) { + if (gbo->currkey == NULL) + /* pass */; + else if (gbo->tgtkey == NULL) + break; + else { + int rcmp; + + rcmp = PyObject_RichCompareBool(gbo->tgtkey, + gbo->currkey, Py_EQ); + if (rcmp == -1) + return NULL; + else if (rcmp == 0) + break; + } + + newvalue = PyIter_Next(gbo->it); + if (newvalue == NULL) + return NULL; + + if (gbo->keyfunc == Py_None) { + newkey = newvalue; + Py_INCREF(newvalue); + } else { + newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc, + newvalue, NULL); + if (newkey == NULL) { + Py_DECREF(newvalue); + return NULL; + } + } + + Py_XDECREF(gbo->currkey); + gbo->currkey = newkey; + Py_XDECREF(gbo->currvalue); + gbo->currvalue = newvalue; + } + + Py_XDECREF(gbo->tgtkey); + gbo->tgtkey = gbo->currkey; + Py_INCREF(gbo->currkey); + + grouper = _grouper_create(gbo, gbo->tgtkey); + if (grouper == NULL) + return NULL; + + r = PyTuple_Pack(2, gbo->currkey, grouper); + Py_DECREF(grouper); + return r; +} + +PyDoc_STRVAR(groupby_doc, +"groupby(iterable[, keyfunc]) -> create an iterator which returns\n\ +(key, sub-iterator) grouped by each value of key(value).\n"); + +static PyTypeObject groupby_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools.groupby", /* tp_name */ + sizeof(groupbyobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)groupby_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + groupby_doc, /* tp_doc */ + (traverseproc)groupby_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)groupby_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + groupby_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + +/* _grouper object (internal) ************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *parent; + PyObject *tgtkey; +} _grouperobject; + +static PyTypeObject _grouper_type; + +static PyObject * +_grouper_create(groupbyobject *parent, PyObject *tgtkey) +{ + _grouperobject *igo; + + igo = PyObject_New(_grouperobject, &_grouper_type); + if (igo == NULL) + return NULL; + igo->parent = (PyObject *)parent; + Py_INCREF(parent); + igo->tgtkey = tgtkey; + Py_INCREF(tgtkey); + + return (PyObject *)igo; +} + +static void +_grouper_dealloc(_grouperobject *igo) +{ + Py_DECREF(igo->parent); + Py_DECREF(igo->tgtkey); + PyObject_Del(igo); +} + +static PyObject * +_grouper_next(_grouperobject *igo) +{ + groupbyobject *gbo = (groupbyobject *)igo->parent; + PyObject *newvalue, *newkey, *r; + int rcmp; + + if (gbo->currvalue == NULL) { + newvalue = PyIter_Next(gbo->it); + if (newvalue == NULL) + return NULL; + + if (gbo->keyfunc == Py_None) { + newkey = newvalue; + Py_INCREF(newvalue); + } else { + newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc, + newvalue, NULL); + if (newkey == NULL) { + Py_DECREF(newvalue); + return NULL; + } + } + + assert(gbo->currkey == NULL); + gbo->currkey = newkey; + gbo->currvalue = newvalue; + } + + assert(gbo->currkey != NULL); + rcmp = PyObject_RichCompareBool(igo->tgtkey, gbo->currkey, Py_EQ); + if (rcmp <= 0) + /* got any error or current group is end */ + return NULL; + + r = gbo->currvalue; + gbo->currvalue = NULL; + Py_DECREF(gbo->currkey); + gbo->currkey = NULL; + + return r; +} + +static PyTypeObject _grouper_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools._grouper", /* tp_name */ + sizeof(_grouperobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)_grouper_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)_grouper_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + + + /* tee object and with supporting function and objects ***************/ /* The teedataobject pre-allocates space for LINKCELLS number of objects. @@ -2103,6 +2420,7 @@ tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\ chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\ takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\ dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\ +groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\ "); @@ -2130,6 +2448,7 @@ inititertools(void) &count_type, &izip_type, &repeat_type, + &groupby_type, NULL }; @@ -2148,5 +2467,6 @@ inititertools(void) return; if (PyType_Ready(&tee_type) < 0) return; - + if (PyType_Ready(&_grouper_type) < 0) + return; } |