From 59d1d2b434e8cf79e8b1321f148254c68f56c1f7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 20 Apr 2001 19:13:02 +0000 Subject: Iterators phase 1. This comprises: new slot tp_iter in type object, plus new flag Py_TPFLAGS_HAVE_ITER new C API PyObject_GetIter(), calls tp_iter new builtin iter(), with two forms: iter(obj), and iter(function, sentinel) new internal object types iterobject and calliterobject new exception StopIteration new opcodes for "for" loops, GET_ITER and FOR_ITER (also supported by dis.py) new magic number for .pyc files new special method for instances: __iter__() returns an iterator iteration over dictionaries: "for x in dict" iterates over the keys iteration over files: "for x in file" iterates over lines TODO: documentation test suite decide whether to use a different way to spell iter(function, sentinal) decide whether "for key in dict" is a good idea use iterators in map/filter/reduce, min/max, and elsewhere (in/not in?) speed tuning (make next() a slot tp_next???) --- Include/Python.h | 1 + Include/abstract.h | 5 +++ Include/object.h | 14 +++++-- Include/opcode.h | 2 + Include/pyerrors.h | 1 + Lib/dis.py | 2 + Makefile.pre.in | 2 + Objects/abstract.c | 17 ++++++++ Objects/classobject.c | 31 ++++++++++++++- Objects/dictobject.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++ Objects/stringobject.c | 2 + Python/bltinmodule.c | 27 +++++++++++++ Python/ceval.c | 41 ++++++++++++++++++++ Python/compile.c | 24 +++--------- Python/exceptions.c | 7 ++++ Python/import.c | 2 +- 16 files changed, 256 insertions(+), 25 deletions(-) diff --git a/Include/Python.h b/Include/Python.h index b771c8c..2393872 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -82,6 +82,7 @@ #include "traceback.h" #include "sliceobject.h" #include "cellobject.h" +#include "iterobject.h" #include "codecs.h" #include "pyerrors.h" diff --git a/Include/abstract.h b/Include/abstract.h index e0765e5..c56c887 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -470,6 +470,11 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ */ + DL_IMPORT(PyObject *) PyObject_GetIter(PyObject *); + /* Takes an object and returns an iterator for it. + This is typically a new iterator but if the argument + is an iterator, this returns itself. */ + /* Number Protocol:*/ DL_IMPORT(int) PyNumber_Check(PyObject *o); diff --git a/Include/object.h b/Include/object.h index 80669da..4a53835 100644 --- a/Include/object.h +++ b/Include/object.h @@ -200,6 +200,7 @@ typedef int (*cmpfunc)(PyObject *, PyObject *); typedef PyObject *(*reprfunc)(PyObject *); typedef long (*hashfunc)(PyObject *); typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +typedef PyObject *(*getiterfunc) (PyObject *); typedef struct _typeobject { PyObject_VAR_HEAD @@ -249,8 +250,11 @@ typedef struct _typeobject { /* weak reference enabler */ long tp_weaklistoffset; + /* Iterators */ + getiterfunc tp_iter; + #ifdef COUNT_ALLOCS - /* these must be last */ + /* these must be last and never explicitly initialized */ int tp_alloc; int tp_free; int tp_maxalloc; @@ -342,20 +346,22 @@ given type object has a specified feature. /* PyNumberMethods do their own coercion */ #define Py_TPFLAGS_CHECKTYPES (1L<<4) +/* tp_richcompare is defined */ #define Py_TPFLAGS_HAVE_RICHCOMPARE (1L<<5) /* Objects which are weakly referencable if their tp_weaklistoffset is >0 */ -/* XXX Should this have the same value as Py_TPFLAGS_HAVE_RICHCOMPARE? - * These both indicate a feature that appeared in the same alpha release. - */ #define Py_TPFLAGS_HAVE_WEAKREFS (1L<<6) +/* tp_iter is defined */ +#define Py_TPFLAGS_HAVE_ITER (1L<<7) + #define Py_TPFLAGS_DEFAULT ( \ Py_TPFLAGS_HAVE_GETCHARBUFFER | \ Py_TPFLAGS_HAVE_SEQUENCE_IN | \ Py_TPFLAGS_HAVE_INPLACEOPS | \ Py_TPFLAGS_HAVE_RICHCOMPARE | \ Py_TPFLAGS_HAVE_WEAKREFS | \ + Py_TPFLAGS_HAVE_ITER | \ 0) #define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) diff --git a/Include/opcode.h b/Include/opcode.h index 546ad08..0cace8e 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -53,6 +53,7 @@ extern "C" { #define BINARY_XOR 65 #define BINARY_OR 66 #define INPLACE_POWER 67 +#define GET_ITER 68 #define PRINT_EXPR 70 #define PRINT_ITEM 71 @@ -80,6 +81,7 @@ extern "C" { #define STORE_NAME 90 /* Index in name list */ #define DELETE_NAME 91 /* "" */ #define UNPACK_SEQUENCE 92 /* Number of sequence items */ +#define FOR_ITER 93 #define STORE_ATTR 95 /* Index in name list */ #define DELETE_ATTR 96 /* "" */ diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 622af2d..3c57e2f 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -24,6 +24,7 @@ DL_IMPORT(void) PyErr_NormalizeException(PyObject**, PyObject**, PyObject**); /* Predefined exceptions */ extern DL_IMPORT(PyObject *) PyExc_Exception; +extern DL_IMPORT(PyObject *) PyExc_StopIteration; extern DL_IMPORT(PyObject *) PyExc_StandardError; extern DL_IMPORT(PyObject *) PyExc_ArithmeticError; extern DL_IMPORT(PyObject *) PyExc_LookupError; diff --git a/Lib/dis.py b/Lib/dis.py index dc30257..fb97369 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -205,6 +205,7 @@ def_op('BINARY_AND', 64) def_op('BINARY_XOR', 65) def_op('BINARY_OR', 66) def_op('INPLACE_POWER', 67) +def_op('GET_ITER', 68) def_op('PRINT_EXPR', 70) def_op('PRINT_ITEM', 71) @@ -232,6 +233,7 @@ HAVE_ARGUMENT = 90 # Opcodes from here have an argument: name_op('STORE_NAME', 90) # Index in name list name_op('DELETE_NAME', 91) # "" def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +def_op('FOR_ITER', 93) name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" diff --git a/Makefile.pre.in b/Makefile.pre.in index f2f3336..103a36b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -238,6 +238,7 @@ OBJECT_OBJS= \ Objects/frameobject.o \ Objects/funcobject.o \ Objects/intobject.o \ + Objects/iterobject.o \ Objects/listobject.o \ Objects/longobject.o \ Objects/dictobject.o \ @@ -433,6 +434,7 @@ PYTHON_HEADERS= \ Include/bufferobject.h \ Include/tupleobject.h \ Include/listobject.h \ + Include/iterobject.h \ Include/dictobject.h \ Include/methodobject.h \ Include/moduleobject.h \ diff --git a/Objects/abstract.c b/Objects/abstract.c index 3ccac71..8a6df76 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1738,3 +1738,20 @@ PyObject_IsSubclass(PyObject *derived, PyObject *cls) return retval; } + +PyObject * +PyObject_GetIter(PyObject *o) +{ + PyTypeObject *t = o->ob_type; + getiterfunc f = NULL; + if (PyType_HasFeature(t, Py_TPFLAGS_HAVE_ITER)) + f = t->tp_iter; + if (f == NULL) { + if (PySequence_Check(o)) + return PyIter_New(o); + PyErr_SetString(PyExc_TypeError, "iter() of non-sequence"); + return NULL; + } + else + return (*f)(o); +} diff --git a/Objects/classobject.c b/Objects/classobject.c index fa71c4e..742e472 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -848,7 +848,7 @@ instance_traverse(PyInstanceObject *o, visitproc visit, void *arg) return 0; } -static PyObject *getitemstr, *setitemstr, *delitemstr, *lenstr; +static PyObject *getitemstr, *setitemstr, *delitemstr, *lenstr, *iterstr; static int instance_length(PyInstanceObject *inst) @@ -1712,6 +1712,32 @@ instance_richcompare(PyObject *v, PyObject *w, int op) } +/* Get the iterator */ +static PyObject * +instance_getiter(PyInstanceObject *self) +{ + PyObject *func; + + if (iterstr == NULL) + iterstr = PyString_InternFromString("__iter__"); + if (getitemstr == NULL) + getitemstr = PyString_InternFromString("__getitem__"); + + if ((func = instance_getattr(self, iterstr)) != NULL) { + PyObject *res = PyEval_CallObject(func, (PyObject *)NULL); + Py_DECREF(func); + return res; + } + PyErr_Clear(); + if ((func = instance_getattr(self, getitemstr)) == NULL) { + PyErr_SetString(PyExc_TypeError, "iter() of non-sequence"); + return NULL; + } + Py_DECREF(func); + return PyIter_New((PyObject *)self); +} + + static PyNumberMethods instance_as_number = { (binaryfunc)instance_add, /* nb_add */ (binaryfunc)instance_sub, /* nb_subtract */ @@ -1775,7 +1801,8 @@ PyTypeObject PyInstance_Type = { (traverseproc)instance_traverse, /* tp_traverse */ 0, /* tp_clear */ instance_richcompare, /* tp_richcompare */ - offsetof(PyInstanceObject, in_weakreflist) /* tp_weaklistoffset */ + offsetof(PyInstanceObject, in_weakreflist), /* tp_weaklistoffset */ + (getiterfunc)instance_getiter, /* tp_iter */ }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 95d5b71..17b6a04 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1324,6 +1324,8 @@ static PySequenceMethods dict_as_sequence = { 0, /* sq_inplace_repeat */ }; +staticforward PyObject *dictiter_new(dictobject *); + PyTypeObject PyDict_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, @@ -1350,6 +1352,8 @@ PyTypeObject PyDict_Type = { (traverseproc)dict_traverse, /* tp_traverse */ (inquiry)dict_tp_clear, /* tp_clear */ 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)dictiter_new, /* tp_iter */ }; /* For backward compatibility with old dictionary interface */ @@ -1392,3 +1396,102 @@ PyDict_DelItemString(PyObject *v, char *key) Py_DECREF(kv); return err; } + +/* Dictionary iterator type */ + +extern PyTypeObject PyDictIter_Type; /* Forward */ + +typedef struct { + PyObject_HEAD + dictobject *di_dict; + int di_size; + int di_pos; +} dictiterobject; + +static PyObject * +dictiter_new(dictobject *dict) +{ + dictiterobject *di; + di = PyObject_NEW(dictiterobject, &PyDictIter_Type); + if (di == NULL) + return NULL; + Py_INCREF(dict); + di->di_dict = dict; + di->di_size = dict->ma_size; + di->di_pos = 0; + return (PyObject *)di; +} + +static void +dictiter_dealloc(dictiterobject *di) +{ + Py_DECREF(di->di_dict); + PyObject_DEL(di); +} + +static PyObject * +dictiter_next(dictiterobject *di, PyObject *args) +{ + PyObject *key; + if (di->di_size != di->di_dict->ma_size) { + PyErr_SetString(PyExc_RuntimeError, + "dictionary changed size during iteration"); + return NULL; + } + if (PyDict_Next((PyObject *)(di->di_dict), &di->di_pos, &key, NULL)) { + Py_INCREF(key); + return key; + } + PyErr_SetObject(PyExc_StopIteration, Py_None); + return NULL; +} + +static PyObject * +dictiter_getiter(PyObject *it) +{ + Py_INCREF(it); + return it; +} + +static PyMethodDef dictiter_methods[] = { + {"next", (PyCFunction)dictiter_next, METH_VARARGS, + "it.next() -- get the next value, or raise StopIteration"}, + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +dictiter_getattr(dictiterobject *it, char *name) +{ + return Py_FindMethod(dictiter_methods, (PyObject *)it, name); +} + +PyTypeObject PyDictIter_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /* ob_size */ + "dictionary-iterator", /* tp_name */ + sizeof(dictiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)dictiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + (getattrfunc)dictiter_getattr, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)dictiter_getiter, /* tp_iter */ +}; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 1701b2f..861cade 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3232,6 +3232,8 @@ PyString_Fini(void) void _Py_ReleaseInternedStrings(void) { if (interned) { + fprintf(stderr, "releasing interned strings\n"); + PyDict_Clear(interned); Py_DECREF(interned); interned = NULL; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 576447c..7e8f555 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1312,6 +1312,32 @@ Convert a string or number to a floating point number, if possible."; static PyObject * +builtin_iter(PyObject *self, PyObject *args) +{ + PyObject *v, *w = NULL; + + if (!PyArg_ParseTuple(args, "O|O:iter", &v, &w)) + return NULL; + if (w == NULL) + return PyObject_GetIter(v); + if (!PyCallable_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "iter(v, w): v must be callable"); + return NULL; + } + return PyCallIter_New(v, w); +} + +static char iter_doc[] = +"iter(collection) -> iterator\n\ +iter(callable, sentinel) -> iterator\n\ +\n\ +Get an iterator from an object. In the first form, the argument must\n\ +supply its own iterator, or be a sequence.\n\ +In the second form, the callable is called until it returns the sentinel."; + + +static PyObject * builtin_len(PyObject *self, PyObject *args) { PyObject *v; @@ -2148,6 +2174,7 @@ static PyMethodDef builtin_methods[] = { {"int", builtin_int, 1, int_doc}, {"isinstance", builtin_isinstance, 1, isinstance_doc}, {"issubclass", builtin_issubclass, 1, issubclass_doc}, + {"iter", builtin_iter, 1, iter_doc}, {"len", builtin_len, 1, len_doc}, {"list", builtin_list, 1, list_doc}, {"locals", builtin_locals, 1, locals_doc}, diff --git a/Python/ceval.c b/Python/ceval.c index b6686b6..4f4a646 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -381,6 +381,7 @@ eval_code2(PyCodeObject *co, PyObject *globals, PyObject *locals, /* Make it easier to find out where we are with a debugger */ char *filename = PyString_AsString(co->co_filename); #endif + static PyObject *nextstr; /* Code access macros */ @@ -416,6 +417,11 @@ eval_code2(PyCodeObject *co, PyObject *globals, PyObject *locals, GETLOCAL(i) = value; } while (0) /* Start of code */ + if (nextstr == NULL) { + nextstr = PyString_InternFromString("next"); + if (nextstr == NULL) + return NULL; + } #ifdef USE_STACKCHECK if (tstate->recursion_depth%10 == 0 && PyOS_CheckStack()) { @@ -1875,6 +1881,41 @@ eval_code2(PyCodeObject *co, PyObject *globals, PyObject *locals, JUMPTO(oparg); continue; + case GET_ITER: + /* before: [obj]; after [getiter(obj)] */ + v = POP(); + x = PyObject_GetIter(v); + Py_DECREF(v); + if (x != NULL) { + w = x; + x = PyObject_GetAttr(w, nextstr); + Py_DECREF(w); + if (x != NULL) { + PUSH(x); + continue; + } + } + break; + + case FOR_ITER: + /* before: [iter]; after: [iter, iter()] *or* [] */ + v = TOP(); + x = PyObject_CallObject(v, NULL); + if (x == NULL) { + if (PyErr_ExceptionMatches( + PyExc_StopIteration)) + { + PyErr_Clear(); + x = v = POP(); + Py_DECREF(v); + JUMPBY(oparg); + continue; + } + break; + } + PUSH(x); + continue; + case FOR_LOOP: /* for v in s: ... On entry: stack contains s, i. diff --git a/Python/compile.c b/Python/compile.c index 0939f05..cb85ce3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1232,21 +1232,15 @@ parsestrplus(node *n) static void com_list_for(struct compiling *c, node *n, node *e, char *t) { - PyObject *v; int anchor = 0; int save_begin = c->c_begin; /* list_iter: for v in expr [list_iter] */ com_node(c, CHILD(n, 3)); /* expr */ - v = PyInt_FromLong(0L); - if (v == NULL) - c->c_errors++; - com_addoparg(c, LOAD_CONST, com_addconst(c, v)); - com_push(c, 1); - Py_XDECREF(v); + com_addbyte(c, GET_ITER); c->c_begin = c->c_nexti; com_addoparg(c, SET_LINENO, n->n_lineno); - com_addfwref(c, FOR_LOOP, &anchor); + com_addfwref(c, FOR_ITER, &anchor); com_push(c, 1); com_assign(c, CHILD(n, 1), OP_ASSIGN, NULL); c->c_loops++; @@ -1255,7 +1249,7 @@ com_list_for(struct compiling *c, node *n, node *e, char *t) com_addoparg(c, JUMP_ABSOLUTE, c->c_begin); c->c_begin = save_begin; com_backpatch(c, anchor); - com_pop(c, 2); /* FOR_LOOP has popped these */ + com_pop(c, 1); /* FOR_ITER has popped this */ } static void @@ -2873,7 +2867,6 @@ com_while_stmt(struct compiling *c, node *n) static void com_for_stmt(struct compiling *c, node *n) { - PyObject *v; int break_anchor = 0; int anchor = 0; int save_begin = c->c_begin; @@ -2882,15 +2875,10 @@ com_for_stmt(struct compiling *c, node *n) com_addfwref(c, SETUP_LOOP, &break_anchor); block_push(c, SETUP_LOOP); com_node(c, CHILD(n, 3)); - v = PyInt_FromLong(0L); - if (v == NULL) - c->c_errors++; - com_addoparg(c, LOAD_CONST, com_addconst(c, v)); - com_push(c, 1); - Py_XDECREF(v); + com_addbyte(c, GET_ITER); c->c_begin = c->c_nexti; com_addoparg(c, SET_LINENO, n->n_lineno); - com_addfwref(c, FOR_LOOP, &anchor); + com_addfwref(c, FOR_ITER, &anchor); com_push(c, 1); com_assign(c, CHILD(n, 1), OP_ASSIGN, NULL); c->c_loops++; @@ -2899,7 +2887,7 @@ com_for_stmt(struct compiling *c, node *n) com_addoparg(c, JUMP_ABSOLUTE, c->c_begin); c->c_begin = save_begin; com_backpatch(c, anchor); - com_pop(c, 2); /* FOR_LOOP has popped these */ + com_pop(c, 1); /* FOR_ITER has popped this */ com_addbyte(c, POP_BLOCK); block_pop(c, SETUP_LOOP); if (NCH(n) > 8) diff --git a/Python/exceptions.c b/Python/exceptions.c index ad8021e..214d8e5 100644 --- a/Python/exceptions.c +++ b/Python/exceptions.c @@ -52,6 +52,7 @@ recommended that user defined class based exceptions be derived from the\n\ Exception\n\ |\n\ +-- SystemExit\n\ + +-- StopIteration\n\ +-- StandardError\n\ | |\n\ | +-- KeyboardInterrupt\n\ @@ -369,6 +370,9 @@ StandardError__doc__[] = "Base class for all standard Python exceptions."; static char TypeError__doc__[] = "Inappropriate argument type."; +static char +StopIteration__doc__[] = "Signal the end from iterator.next()."; + static char @@ -924,6 +928,7 @@ static PyMethodDef functions[] = { /* Global C API defined exceptions */ PyObject *PyExc_Exception; +PyObject *PyExc_StopIteration; PyObject *PyExc_StandardError; PyObject *PyExc_ArithmeticError; PyObject *PyExc_LookupError; @@ -985,6 +990,8 @@ static struct { * The first three classes MUST appear in exactly this order */ {"Exception", &PyExc_Exception}, + {"StopIteration", &PyExc_StopIteration, &PyExc_Exception, + StopIteration__doc__}, {"StandardError", &PyExc_StandardError, &PyExc_Exception, StandardError__doc__}, {"TypeError", &PyExc_TypeError, 0, TypeError__doc__}, diff --git a/Python/import.c b/Python/import.c index bde49ce..2c15d10 100644 --- a/Python/import.c +++ b/Python/import.c @@ -43,7 +43,7 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); /* XXX Perhaps the magic number should be frozen and a version field added to the .pyc file header? */ /* New way to come up with the magic number: (YEAR-1995), MONTH, DAY */ -#define MAGIC (60202 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (60420 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the -- cgit v0.12