diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 2005-10-20 19:59:25 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 2005-10-20 19:59:25 (GMT) |
commit | 3e0055f8c65c407e74ce476b8e2b1fb889723514 (patch) | |
tree | 169cce8c87033e15364b57de947073e6e9c34d59 /Objects/codeobject.c | |
parent | 2cb94aba122b86dcda87d437eb36a860d14393d5 (diff) | |
download | cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.zip cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.gz cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.bz2 |
Merge ast-branch to head
This change implements a new bytecode compiler, based on a
transformation of the parse tree to an abstract syntax defined in
Parser/Python.asdl.
The compiler implementation is not complete, but it is in stable
enough shape to run the entire test suite excepting two disabled
tests.
Diffstat (limited to 'Objects/codeobject.c')
-rw-r--r-- | Objects/codeobject.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c new file mode 100644 index 0000000..c5ddfd5 --- /dev/null +++ b/Objects/codeobject.c @@ -0,0 +1,453 @@ +#include "Python.h" +#include "code.h" +#include "structmember.h" + +#define NAME_CHARS \ + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" + +/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */ + +static int +all_name_chars(unsigned char *s) +{ + static char ok_name_char[256]; + static unsigned char *name_chars = (unsigned char *)NAME_CHARS; + + if (ok_name_char[*name_chars] == 0) { + unsigned char *p; + for (p = name_chars; *p; p++) + ok_name_char[*p] = 1; + } + while (*s) { + if (ok_name_char[*s++] == 0) + return 0; + } + return 1; +} + +static void +intern_strings(PyObject *tuple) +{ + int i; + + for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { + PyObject *v = PyTuple_GET_ITEM(tuple, i); + if (v == NULL || !PyString_CheckExact(v)) { + Py_FatalError("non-string found in code slot"); + } + PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); + } +} + + +PyCodeObject * +PyCode_New(int argcount, int nlocals, int stacksize, int flags, + PyObject *code, PyObject *consts, PyObject *names, + PyObject *varnames, PyObject *freevars, PyObject *cellvars, + PyObject *filename, PyObject *name, int firstlineno, + PyObject *lnotab) +{ + PyCodeObject *co; + int i; + /* Check argument types */ + if (argcount < 0 || nlocals < 0 || + code == NULL || + consts == NULL || !PyTuple_Check(consts) || + names == NULL || !PyTuple_Check(names) || + varnames == NULL || !PyTuple_Check(varnames) || + freevars == NULL || !PyTuple_Check(freevars) || + cellvars == NULL || !PyTuple_Check(cellvars) || + name == NULL || !PyString_Check(name) || + filename == NULL || !PyString_Check(filename) || + lnotab == NULL || !PyString_Check(lnotab) || + !PyObject_CheckReadBuffer(code)) { + PyErr_BadInternalCall(); + return NULL; + } + intern_strings(names); + intern_strings(varnames); + intern_strings(freevars); + intern_strings(cellvars); + /* Intern selected string constants */ + for (i = PyTuple_Size(consts); --i >= 0; ) { + PyObject *v = PyTuple_GetItem(consts, i); + if (!PyString_Check(v)) + continue; + if (!all_name_chars((unsigned char *)PyString_AS_STRING(v))) + continue; + PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i)); + } + co = PyObject_NEW(PyCodeObject, &PyCode_Type); + if (co != NULL) { + co->co_argcount = argcount; + co->co_nlocals = nlocals; + co->co_stacksize = stacksize; + co->co_flags = flags; + Py_INCREF(code); + co->co_code = code; + Py_INCREF(consts); + co->co_consts = consts; + Py_INCREF(names); + co->co_names = names; + Py_INCREF(varnames); + co->co_varnames = varnames; + Py_INCREF(freevars); + co->co_freevars = freevars; + Py_INCREF(cellvars); + co->co_cellvars = cellvars; + Py_INCREF(filename); + co->co_filename = filename; + Py_INCREF(name); + co->co_name = name; + co->co_firstlineno = firstlineno; + Py_INCREF(lnotab); + co->co_lnotab = lnotab; + } + return co; +} + + +#define OFF(x) offsetof(PyCodeObject, x) + +static PyMemberDef code_memberlist[] = { + {"co_argcount", T_INT, OFF(co_argcount), READONLY}, + {"co_nlocals", T_INT, OFF(co_nlocals), READONLY}, + {"co_stacksize",T_INT, OFF(co_stacksize), READONLY}, + {"co_flags", T_INT, OFF(co_flags), READONLY}, + {"co_code", T_OBJECT, OFF(co_code), READONLY}, + {"co_consts", T_OBJECT, OFF(co_consts), READONLY}, + {"co_names", T_OBJECT, OFF(co_names), READONLY}, + {"co_varnames", T_OBJECT, OFF(co_varnames), READONLY}, + {"co_freevars", T_OBJECT, OFF(co_freevars), READONLY}, + {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY}, + {"co_filename", T_OBJECT, OFF(co_filename), READONLY}, + {"co_name", T_OBJECT, OFF(co_name), READONLY}, + {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY}, + {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY}, + {NULL} /* Sentinel */ +}; + +/* Helper for code_new: return a shallow copy of a tuple that is + guaranteed to contain exact strings, by converting string subclasses + to exact strings and complaining if a non-string is found. */ +static PyObject* +validate_and_copy_tuple(PyObject *tup) +{ + PyObject *newtuple; + PyObject *item; + int i, len; + + len = PyTuple_GET_SIZE(tup); + newtuple = PyTuple_New(len); + if (newtuple == NULL) + return NULL; + + for (i = 0; i < len; i++) { + item = PyTuple_GET_ITEM(tup, i); + if (PyString_CheckExact(item)) { + Py_INCREF(item); + } + else if (!PyString_Check(item)) { + PyErr_Format( + PyExc_TypeError, + "name tuples must contain only " + "strings, not '%.500s'", + item->ob_type->tp_name); + Py_DECREF(newtuple); + return NULL; + } + else { + item = PyString_FromStringAndSize( + PyString_AS_STRING(item), + PyString_GET_SIZE(item)); + if (item == NULL) { + Py_DECREF(newtuple); + return NULL; + } + } + PyTuple_SET_ITEM(newtuple, i, item); + } + + return newtuple; +} + +PyDoc_STRVAR(code_doc, +"code(argcount, nlocals, stacksize, flags, codestring, constants, names,\n\ + varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]])\n\ +\n\ +Create a code object. Not for the faint of heart."); + +static PyObject * +code_new(PyTypeObject *type, PyObject *args, PyObject *kw) +{ + int argcount; + int nlocals; + int stacksize; + int flags; + PyObject *co = NULL; + PyObject *code; + PyObject *consts; + PyObject *names, *ournames = NULL; + PyObject *varnames, *ourvarnames = NULL; + PyObject *freevars = NULL, *ourfreevars = NULL; + PyObject *cellvars = NULL, *ourcellvars = NULL; + PyObject *filename; + PyObject *name; + int firstlineno; + PyObject *lnotab; + + if (!PyArg_ParseTuple(args, "iiiiSO!O!O!SSiS|O!O!:code", + &argcount, &nlocals, &stacksize, &flags, + &code, + &PyTuple_Type, &consts, + &PyTuple_Type, &names, + &PyTuple_Type, &varnames, + &filename, &name, + &firstlineno, &lnotab, + &PyTuple_Type, &freevars, + &PyTuple_Type, &cellvars)) + return NULL; + + if (argcount < 0) { + PyErr_SetString( + PyExc_ValueError, + "code: argcount must not be negative"); + goto cleanup; + } + + if (nlocals < 0) { + PyErr_SetString( + PyExc_ValueError, + "code: nlocals must not be negative"); + goto cleanup; + } + + ournames = validate_and_copy_tuple(names); + if (ournames == NULL) + goto cleanup; + ourvarnames = validate_and_copy_tuple(varnames); + if (ourvarnames == NULL) + goto cleanup; + if (freevars) + ourfreevars = validate_and_copy_tuple(freevars); + else + ourfreevars = PyTuple_New(0); + if (ourfreevars == NULL) + goto cleanup; + if (cellvars) + ourcellvars = validate_and_copy_tuple(cellvars); + else + ourcellvars = PyTuple_New(0); + if (ourcellvars == NULL) + goto cleanup; + + co = (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags, + code, consts, ournames, ourvarnames, + ourfreevars, ourcellvars, filename, + name, firstlineno, lnotab); + cleanup: + Py_XDECREF(ournames); + Py_XDECREF(ourvarnames); + Py_XDECREF(ourfreevars); + Py_XDECREF(ourcellvars); + return co; +} + +static void +code_dealloc(PyCodeObject *co) +{ + Py_XDECREF(co->co_code); + Py_XDECREF(co->co_consts); + Py_XDECREF(co->co_names); + Py_XDECREF(co->co_varnames); + Py_XDECREF(co->co_freevars); + Py_XDECREF(co->co_cellvars); + Py_XDECREF(co->co_filename); + Py_XDECREF(co->co_name); + Py_XDECREF(co->co_lnotab); + PyObject_DEL(co); +} + +static PyObject * +code_repr(PyCodeObject *co) +{ + char buf[500]; + int lineno = -1; + char *filename = "???"; + char *name = "???"; + + if (co->co_firstlineno != 0) + lineno = co->co_firstlineno; + if (co->co_filename && PyString_Check(co->co_filename)) + filename = PyString_AS_STRING(co->co_filename); + if (co->co_name && PyString_Check(co->co_name)) + name = PyString_AS_STRING(co->co_name); + PyOS_snprintf(buf, sizeof(buf), + "<code object %.100s at %p, file \"%.300s\", line %d>", + name, co, filename, lineno); + return PyString_FromString(buf); +} + +static int +code_compare(PyCodeObject *co, PyCodeObject *cp) +{ + int cmp; + cmp = PyObject_Compare(co->co_name, cp->co_name); + if (cmp) return cmp; + cmp = co->co_argcount - cp->co_argcount; + if (cmp) goto normalize; + cmp = co->co_nlocals - cp->co_nlocals; + if (cmp) goto normalize; + cmp = co->co_flags - cp->co_flags; + if (cmp) goto normalize; + cmp = co->co_firstlineno - cp->co_firstlineno; + if (cmp) goto normalize; + cmp = PyObject_Compare(co->co_code, cp->co_code); + if (cmp) return cmp; + cmp = PyObject_Compare(co->co_consts, cp->co_consts); + if (cmp) return cmp; + cmp = PyObject_Compare(co->co_names, cp->co_names); + if (cmp) return cmp; + cmp = PyObject_Compare(co->co_varnames, cp->co_varnames); + if (cmp) return cmp; + cmp = PyObject_Compare(co->co_freevars, cp->co_freevars); + if (cmp) return cmp; + cmp = PyObject_Compare(co->co_cellvars, cp->co_cellvars); + return cmp; + + normalize: + if (cmp > 0) + return 1; + else if (cmp < 0) + return -1; + else + return 0; +} + +static long +code_hash(PyCodeObject *co) +{ + long h, h0, h1, h2, h3, h4, h5, h6; + h0 = PyObject_Hash(co->co_name); + if (h0 == -1) return -1; + h1 = PyObject_Hash(co->co_code); + if (h1 == -1) return -1; + h2 = PyObject_Hash(co->co_consts); + if (h2 == -1) return -1; + h3 = PyObject_Hash(co->co_names); + if (h3 == -1) return -1; + h4 = PyObject_Hash(co->co_varnames); + if (h4 == -1) return -1; + h5 = PyObject_Hash(co->co_freevars); + if (h5 == -1) return -1; + h6 = PyObject_Hash(co->co_cellvars); + if (h6 == -1) return -1; + h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^ + co->co_argcount ^ co->co_nlocals ^ co->co_flags; + if (h == -1) h = -2; + return h; +} + +/* XXX code objects need to participate in GC? */ + +PyTypeObject PyCode_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + "code", + sizeof(PyCodeObject), + 0, + (destructor)code_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + (cmpfunc)code_compare, /* tp_compare */ + (reprfunc)code_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)code_hash, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + code_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + code_memberlist, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + code_new, /* tp_new */ +}; + +/* All about c_lnotab. + +c_lnotab is an array of unsigned bytes disguised as a Python string. In -O +mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped +to source code line #s (when needed for tracebacks) via c_lnotab instead. +The array is conceptually a list of + (bytecode offset increment, line number increment) +pairs. The details are important and delicate, best illustrated by example: + + byte code offset source code line number + 0 1 + 6 2 + 50 7 + 350 307 + 361 308 + +The first trick is that these numbers aren't stored, only the increments +from one row to the next (this doesn't really work, but it's a start): + + 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 + +The second trick is that an unsigned byte can't hold negative values, or +values larger than 255, so (a) there's a deep assumption that byte code +offsets and their corresponding line #s both increase monotonically, and (b) +if at least one column jumps by more than 255 from one row to the next, more +than one pair is written to the table. In case #b, there's no way to know +from looking at the table later how many were written. That's the delicate +part. A user of c_lnotab desiring to find the source line number +corresponding to a bytecode address A should do something like this + + lineno = addr = 0 + for addr_incr, line_incr in c_lnotab: + addr += addr_incr + if addr > A: + return lineno + lineno += line_incr + +In order for this to work, when the addr field increments by more than 255, +the line # increment in each pair generated must be 0 until the remaining addr +increment is < 256. So, in the example above, com_set_lineno should not (as +was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to +255, 0, 45, 255, 0, 45. +*/ + +int +PyCode_Addr2Line(PyCodeObject *co, int addrq) +{ + int size = PyString_Size(co->co_lnotab) / 2; + unsigned char *p = (unsigned char*)PyString_AsString(co->co_lnotab); + int line = co->co_firstlineno; + int addr = 0; + while (--size >= 0) { + addr += *p++; + if (addr > addrq) + break; + line += *p++; + } + return line; +} |