diff options
-rw-r--r-- | Doc/c-api/code.rst | 50 | ||||
-rw-r--r-- | Doc/c-api/concrete.rst | 1 | ||||
-rw-r--r-- | Doc/c-api/reflection.rst | 5 | ||||
-rw-r--r-- | Doc/library/sys.rst | 8 | ||||
-rw-r--r-- | Include/code.h | 23 | ||||
-rw-r--r-- | Include/frameobject.h | 10 | ||||
-rw-r--r-- | Lib/test/test_code.py | 16 | ||||
-rw-r--r-- | Misc/NEWS | 9 | ||||
-rw-r--r-- | Modules/_ctypes/callbacks.c | 34 | ||||
-rw-r--r-- | Modules/_testcapimodule.c | 16 | ||||
-rw-r--r-- | Modules/pyexpat.c | 44 | ||||
-rw-r--r-- | Objects/codeobject.c | 189 | ||||
-rw-r--r-- | Objects/frameobject.c | 26 | ||||
-rw-r--r-- | Objects/lnotab_notes.txt | 124 | ||||
-rw-r--r-- | Python/_warnings.c | 2 | ||||
-rw-r--r-- | Python/ceval.c | 27 | ||||
-rw-r--r-- | Python/compile.c | 54 | ||||
-rw-r--r-- | Python/traceback.c | 3 |
18 files changed, 335 insertions, 306 deletions
diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst new file mode 100644 index 0000000..c6ca8c5 --- /dev/null +++ b/Doc/c-api/code.rst @@ -0,0 +1,50 @@ +.. highlightlang:: c + +.. _codeobjects: + +Code Objects +------------ + +.. sectionauthor:: Jeffrey Yasskin <jyasskin@gmail.com> + + +.. index:: + object: code + +Code objects are a low-level detail of the CPython implementation. +Each one represents a chunk of executable code that hasn't yet been +bound into a function. + +.. ctype:: PyCodeObject + + The C structure of the objects used to describe code objects. The + fields of this type are subject to change at any time. + + +.. cvar:: PyTypeObject PyCode_Type + + This is an instance of :ctype:`PyTypeObject` representing the Python + :class:`code` type. + + +.. cfunction:: int PyCode_Check(PyObject *co) + + Return true if *co* is a :class:`code` object + +.. cfunction:: int PyCode_GetNumFree(PyObject *co) + + Return the number of free variables in *co*. + +.. cfunction:: PyCodeObject *PyCode_New(int argcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, PyObject *lnotab) + + Return a new code object. If you need a dummy code object to + create a frame, use :cfunc:`PyCode_NewEmpty` instead. Calling + :cfunc:`PyCode_New` directly can bind you to a precise Python + version since the definition of the bytecode changes often. + + +.. cfunction:: int PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) + + Return a new empty code object with the specified filename, + function name, and first line number. It is illegal to + :keyword:`exec` or :func:`eval` the resulting code object. diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst index 4a4c142..8096054 100644 --- a/Doc/c-api/concrete.rst +++ b/Doc/c-api/concrete.rst @@ -105,3 +105,4 @@ Other Objects cell.rst gen.rst datetime.rst + code.rst diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index 822c593..3996c1f 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -29,6 +29,11 @@ Reflection currently executing. +.. cfunction:: int PyFrame_GetLineNumber(PyFrameObject *frame) + + Return the line number that *frame* is currently executing. + + .. cfunction:: int PyEval_GetRestricted() If there is a current frame and it is executing in restricted mode, return true, diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index ba22dc3..c4e3923 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -720,9 +720,11 @@ always available. specifies the local trace function. ``'line'`` - The interpreter is about to execute a new line of code (sometimes multiple - line events on one line exist). The local trace function is called; *arg* - is ``None``; the return value specifies the new local trace function. + The interpreter is about to execute a new line of code or re-execute the + condition of a loop. The local trace function is called; *arg* is + ``None``; the return value specifies the new local trace function. See + :file:`Objects/lnotab_notes.txt` for a detailed explanation of how this + works. ``'return'`` A function (or other code block) is about to return. The local trace diff --git a/Include/code.h b/Include/code.h index efaf3c4..c93d861 100644 --- a/Include/code.h +++ b/Include/code.h @@ -24,7 +24,8 @@ typedef struct { PyObject *co_filename; /* unicode (where it was loaded from) */ PyObject *co_name; /* unicode (name, for reference) */ int co_firstlineno; /* first source line number */ - PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */ + PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See + Objects/lnotab_notes.txt for details. */ void *co_zombieframe; /* for optimization only (see frameobject.c) */ } PyCodeObject; @@ -72,6 +73,14 @@ PyAPI_FUNC(PyCodeObject *) PyCode_New( PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *); /* same as struct above */ + +/* Creates a new empty code object with the specified source location. */ +PyAPI_FUNC(PyCodeObject *) +PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno); + +/* Return the line number associated with the specified bytecode index + in this code object. If you just need the line number of a frame, + use PyFrame_GetLineNumber() instead. */ PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); /* for internal use only */ @@ -80,15 +89,11 @@ typedef struct _addr_pair { int ap_upper; } PyAddrPair; -/* Check whether lasti (an instruction offset) falls outside bounds - and whether it is a line number that should be traced. Returns - a line number if it should be traced or -1 if the line should not. - - If lasti is not within bounds, updates bounds. +/* Update *bounds to describe the first and one-past-the-last instructions in the + same line as lasti. Return the number of that line. */ - -PyAPI_FUNC(int) PyCode_CheckLineNumber(PyCodeObject* co, - int lasti, PyAddrPair *bounds); +PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co, + int lasti, PyAddrPair *bounds); PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj); diff --git a/Include/frameobject.h b/Include/frameobject.h index 65ebd2a..9440973 100644 --- a/Include/frameobject.h +++ b/Include/frameobject.h @@ -38,8 +38,11 @@ typedef struct _frame { PyThreadState *f_tstate; int f_lasti; /* Last instruction if called */ - /* As of 2.3 f_lineno is only valid when tracing is active (i.e. when - f_trace is set) -- at other times use PyCode_Addr2Line instead. */ + /* Call PyFrame_GetLineNumber() instead of reading this field + directly. As of 2.3 f_lineno is only valid when tracing is + active (i.e. when f_trace is set). At other times we use + PyCode_Addr2Line to calculate the line from the current + bytecode index. */ int f_lineno; /* Current line number */ int f_iblock; /* index in f_blockstack */ PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ @@ -75,6 +78,9 @@ PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); PyAPI_FUNC(int) PyFrame_ClearFreeList(void); +/* Return the line of code the frame is currently executing. */ +PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index c8fa2a1..53e787a 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -102,6 +102,9 @@ consts: ('None',) """ +import unittest +import _testcapi + def consts(t): """Yield a doctest-safe sequence of object reprs.""" for elt in t: @@ -118,10 +121,21 @@ def dump(co): print("%s: %s" % (attr, getattr(co, "co_" + attr))) print("consts:", tuple(consts(co.co_consts))) + +class CodeTest(unittest.TestCase): + + def test_newempty(self): + co = _testcapi.code_newempty("filename", "funcname", 15) + self.assertEquals(co.co_filename, "filename") + self.assertEquals(co.co_name, "funcname") + self.assertEquals(co.co_firstlineno, 15) + + def test_main(verbose=None): - from test.support import run_doctest + from test.support import run_doctest, run_unittest from test import test_code run_doctest(test_code, verbose) + run_unittest(CodeTest) if __name__ == '__main__': @@ -40,6 +40,15 @@ C-API - The code flags for old __future__ features are now available again. +- Issue #5954: Add a PyFrame_GetLineNumber() function to replace most uses of + PyCode_Addr2Line(). + +- Issue #5959: Add a PyCode_NewEmpty() function to create a new empty code + object at a specified file, function, and line number. + +- Issue #1419652: Change the first argument to PyImport_AppendInittab() to + ``const char *`` as the string is stored beyond the call. + Library ------- diff --git a/Modules/_ctypes/callbacks.c b/Modules/_ctypes/callbacks.c index 328b10f..6e44080 100644 --- a/Modules/_ctypes/callbacks.c +++ b/Modules/_ctypes/callbacks.c @@ -94,41 +94,13 @@ PrintError(char *msg, ...) /* after code that pyrex generates */ void _ctypes_add_traceback(char *funcname, char *filename, int lineno) { - PyObject *py_srcfile = 0; - PyObject *py_funcname = 0; PyObject *py_globals = 0; - PyObject *empty_tuple = 0; - PyObject *empty_string = 0; PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; - py_srcfile = PyUnicode_DecodeFSDefault(filename); - if (!py_srcfile) goto bad; - py_funcname = PyUnicode_FromString(funcname); - if (!py_funcname) goto bad; py_globals = PyDict_New(); if (!py_globals) goto bad; - empty_tuple = PyTuple_New(0); - if (!empty_tuple) goto bad; - empty_string = PyBytes_FromString(""); - if (!empty_string) goto bad; - py_code = PyCode_New( - 0, /*int argcount,*/ - 0, /*int kwonlyargcount,*/ - 0, /*int nlocals,*/ - 0, /*int stacksize,*/ - 0, /*int flags,*/ - empty_string, /*PyObject *code,*/ - empty_tuple, /*PyObject *consts,*/ - empty_tuple, /*PyObject *names,*/ - empty_tuple, /*PyObject *varnames,*/ - empty_tuple, /*PyObject *freevars,*/ - empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - lineno, /*int firstlineno,*/ - empty_string /*PyObject *lnotab*/ - ); + py_code = PyCode_NewEmpty(filename, funcname, lineno); if (!py_code) goto bad; py_frame = PyFrame_New( PyThreadState_Get(), /*PyThreadState *tstate,*/ @@ -141,10 +113,6 @@ void _ctypes_add_traceback(char *funcname, char *filename, int lineno) PyTraceBack_Here(py_frame); bad: Py_XDECREF(py_globals); - Py_XDECREF(py_srcfile); - Py_XDECREF(py_funcname); - Py_XDECREF(empty_tuple); - Py_XDECREF(empty_string); Py_XDECREF(py_code); Py_XDECREF(py_frame); } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 5532e07..8dc09ac 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1446,6 +1446,21 @@ argparsing(PyObject *o, PyObject *args) Py_RETURN_NONE; } +/* To test that the result of PyCode_NewEmpty has the right members. */ +static PyObject * +code_newempty(PyObject *self, PyObject *args) +{ + const char *filename; + const char *funcname; + int firstlineno; + + if (!PyArg_ParseTuple(args, "ssi:code_newempty", + &filename, &funcname, &firstlineno)) + return NULL; + + return (PyObject *)PyCode_NewEmpty(filename, funcname, firstlineno); +} + static PyMethodDef TestMethods[] = { {"raise_exception", raise_exception, METH_VARARGS}, {"raise_memoryerror", (PyCFunction)raise_memoryerror, METH_NOARGS}, @@ -1498,6 +1513,7 @@ static PyMethodDef TestMethods[] = { {"traceback_print", traceback_print, METH_VARARGS}, {"exception_print", exception_print, METH_VARARGS}, {"argparsing", argparsing, METH_VARARGS}, + {"code_newempty", code_newempty, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 1b9ffca..4b5652e 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -209,53 +209,11 @@ flag_error(xmlparseobject *self) static PyCodeObject* getcode(enum HandlerTypes slot, char* func_name, int lineno) { - PyObject *code = NULL; - PyObject *name = NULL; - PyObject *nulltuple = NULL; - PyObject *filename = NULL; - if (handler_info[slot].tb_code == NULL) { - code = PyBytes_FromString(""); - if (code == NULL) - goto failed; - name = PyUnicode_FromString(func_name); - if (name == NULL) - goto failed; - nulltuple = PyTuple_New(0); - if (nulltuple == NULL) - goto failed; - filename = PyUnicode_DecodeFSDefault(__FILE__); handler_info[slot].tb_code = - PyCode_New(0, /* argcount */ - 0, /* kwonlyargcount */ - 0, /* nlocals */ - 0, /* stacksize */ - 0, /* flags */ - code, /* code */ - nulltuple, /* consts */ - nulltuple, /* names */ - nulltuple, /* varnames */ -#if PYTHON_API_VERSION >= 1010 - nulltuple, /* freevars */ - nulltuple, /* cellvars */ -#endif - filename, /* filename */ - name, /* name */ - lineno, /* firstlineno */ - code /* lnotab */ - ); - if (handler_info[slot].tb_code == NULL) - goto failed; - Py_DECREF(code); - Py_DECREF(nulltuple); - Py_DECREF(filename); - Py_DECREF(name); + PyCode_NewEmpty(__FILE__, func_name, lineno); } return handler_info[slot].tb_code; - failed: - Py_XDECREF(code); - Py_XDECREF(name); - return NULL; } #ifdef FIX_TRACE diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 832b4e9..a772c56 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -54,7 +54,7 @@ PyCode_New(int argcount, int kwonlyargcount, Py_ssize_t i; /* Check argument types */ - if (argcount < 0 || nlocals < 0 || + if (argcount < 0 || kwonlyargcount < 0 || nlocals < 0 || code == NULL || consts == NULL || !PyTuple_Check(consts) || names == NULL || !PyTuple_Check(names) || @@ -112,6 +112,53 @@ PyCode_New(int argcount, int kwonlyargcount, return co; } +PyCodeObject * +PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) +{ + static PyObject *emptystring = NULL; + static PyObject *nulltuple = NULL; + PyObject *filename_ob = NULL; + PyObject *funcname_ob = NULL; + PyCodeObject *result = NULL; + if (emptystring == NULL) { + emptystring = PyBytes_FromString(""); + if (emptystring == NULL) + goto failed; + } + if (nulltuple == NULL) { + nulltuple = PyTuple_New(0); + if (nulltuple == NULL) + goto failed; + } + funcname_ob = PyUnicode_FromString(funcname); + if (funcname_ob == NULL) + goto failed; + filename_ob = PyUnicode_DecodeFSDefault(filename); + if (filename_ob == NULL) + goto failed; + + result = PyCode_New(0, /* argcount */ + 0, /* kwonlyargcount */ + 0, /* nlocals */ + 0, /* stacksize */ + 0, /* flags */ + emptystring, /* code */ + nulltuple, /* consts */ + nulltuple, /* names */ + nulltuple, /* varnames */ + nulltuple, /* freevars */ + nulltuple, /* cellvars */ + filename_ob, /* filename */ + funcname_ob, /* name */ + firstlineno, /* firstlineno */ + emptystring /* lnotab */ + ); + +failed: + Py_XDECREF(funcname_ob); + Py_XDECREF(filename_ob); + return result; +} #define OFF(x) offsetof(PyCodeObject, x) @@ -431,48 +478,8 @@ PyTypeObject PyCode_Type = { code_new, /* tp_new */ }; -/* All about c_lnotab. - -c_lnotab is an array of unsigned bytes disguised as a Python string. In -O -mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped -to source code line #s (when needed for tracebacks) via c_lnotab instead. -The array is conceptually a list of - (bytecode offset increment, line number increment) -pairs. The details are important and delicate, best illustrated by example: - - byte code offset source code line number - 0 1 - 6 2 - 50 7 - 350 307 - 361 308 - -The first trick is that these numbers aren't stored, only the increments -from one row to the next (this doesn't really work, but it's a start): - - 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 - -The second trick is that an unsigned byte can't hold negative values, or -values larger than 255, so (a) there's a deep assumption that byte code -offsets and their corresponding line #s both increase monotonically, and (b) -if at least one column jumps by more than 255 from one row to the next, more -than one pair is written to the table. In case #b, there's no way to know -from looking at the table later how many were written. That's the delicate -part. A user of c_lnotab desiring to find the source line number -corresponding to a bytecode address A should do something like this - - lineno = addr = 0 - for addr_incr, line_incr in c_lnotab: - addr += addr_incr - if addr > A: - return lineno - lineno += line_incr - -In order for this to work, when the addr field increments by more than 255, -the line # increment in each pair generated must be 0 until the remaining addr -increment is < 256. So, in the example above, com_set_lineno should not (as -was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to -255, 0, 45, 255, 0, 45. +/* Use co_lnotab to compute the line number from a bytecode index, addrq. See + lnotab_notes.txt for the details of the lnotab representation. */ int @@ -491,85 +498,10 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return line; } -/* - Check whether the current instruction is at the start of a line. - - */ - - /* The theory of SET_LINENO-less tracing. - - In a nutshell, we use the co_lnotab field of the code object - to tell when execution has moved onto a different line. - - As mentioned above, the basic idea is so set things up so - that - - *instr_lb <= frame->f_lasti < *instr_ub - - is true so long as execution does not change lines. - - This is all fairly simple. Digging the information out of - co_lnotab takes some work, but is conceptually clear. - - Somewhat harder to explain is why we don't *always* call the - line trace function when the above test fails. - - Consider this code: - - 1: def f(a): - 2: if a: - 3: print 1 - 4: else: - 5: print 2 - - which compiles to this: - - 2 0 LOAD_FAST 0 (a) - 3 JUMP_IF_FALSE 9 (to 15) - 6 POP_TOP - - 3 7 LOAD_CONST 1 (1) - 10 PRINT_ITEM - 11 PRINT_NEWLINE - 12 JUMP_FORWARD 6 (to 21) - >> 15 POP_TOP - - 5 16 LOAD_CONST 2 (2) - 19 PRINT_ITEM - 20 PRINT_NEWLINE - >> 21 LOAD_CONST 0 (None) - 24 RETURN_VALUE - - If 'a' is false, execution will jump to instruction at offset - 15 and the co_lnotab will claim that execution has moved to - line 3. This is at best misleading. In this case we could - associate the POP_TOP with line 4, but that doesn't make - sense in all cases (I think). - - What we do is only call the line trace function if the co_lnotab - indicates we have jumped to the *start* of a line, i.e. if the - current instruction offset matches the offset given for the - start of a line by the co_lnotab. - - This also takes care of the situation where 'a' is true. - Execution will jump from instruction offset 12 to offset 21. - Then the co_lnotab would imply that execution has moved to line - 5, which is again misleading. - - Why do we set f_lineno when tracing? Well, consider the code - above when 'a' is true. If stepping through this with 'n' in - pdb, you would stop at line 1 with a "call" type event, then - line events on lines 2 and 3, then a "return" type event -- but - you would be shown line 5 during this event. This is a change - from the behaviour in 2.2 and before, and I've found it - confusing in practice. By setting and using f_lineno when - tracing, one can report a line number different from that - suggested by f_lasti on this one occasion where it's desirable. - */ - - -int -PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) +/* Update *bounds to describe the first and one-past-the-last instructions in + the same line as lasti. Return the number of that line. */ +int +_PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) { int size, addr, line; unsigned char* p; @@ -586,11 +518,9 @@ PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) instr_lb -- if we stored the matching value of p somwhere we could skip the first while loop. */ - /* see comments in compile.c for the description of + /* See lnotab_notes.txt for the description of co_lnotab. A point to remember: increments to p - should come in pairs -- although we don't care about - the line increments here, treating them as byte - increments gets confusing, to say the least. */ + come in (addr, line) pairs. */ bounds->ap_lower = 0; while (size > 0) { @@ -603,13 +533,6 @@ PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds) --size; } - /* If lasti and addr don't match exactly, we don't want to - change the lineno slot on the frame or execute a trace - function. Return -1 instead. - */ - if (addr != lasti) - line = -1; - if (size > 0) { while (--size >= 0) { addr += *p++; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 664dc2b..26abd5f 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -31,17 +31,19 @@ frame_getlocals(PyFrameObject *f, void *closure) return f->f_locals; } -static PyObject * -frame_getlineno(PyFrameObject *f, void *closure) +int +PyFrame_GetLineNumber(PyFrameObject *f) { - int lineno; - if (f->f_trace) - lineno = f->f_lineno; + return f->f_lineno; else - lineno = PyCode_Addr2Line(f->f_code, f->f_lasti); + return PyCode_Addr2Line(f->f_code, f->f_lasti); +} - return PyLong_FromLong(lineno); +static PyObject * +frame_getlineno(PyFrameObject *f, void *closure) +{ + return PyLong_FromLong(PyFrame_GetLineNumber(f)); } /* Setter for f_lineno - you can set f_lineno from within a trace function in @@ -345,16 +347,14 @@ frame_gettrace(PyFrameObject *f, void *closure) static int frame_settrace(PyFrameObject *f, PyObject* v, void *closure) { - /* We rely on f_lineno being accurate when f_trace is set. */ + PyObject* old_value; - PyObject* old_value = f->f_trace; + /* We rely on f_lineno being accurate when f_trace is set. */ + f->f_lineno = PyFrame_GetLineNumber(f); + old_value = f->f_trace; Py_XINCREF(v); f->f_trace = v; - - if (v != NULL) - f->f_lineno = PyCode_Addr2Line(f->f_code, f->f_lasti); - Py_XDECREF(old_value); return 0; diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt new file mode 100644 index 0000000..d247edd --- /dev/null +++ b/Objects/lnotab_notes.txt @@ -0,0 +1,124 @@ +All about co_lnotab, the line number table. + +Code objects store a field named co_lnotab. This is an array of unsigned bytes +disguised as a Python string. It is used to map bytecode offsets to source code +line #s for tracebacks and to identify line number boundaries for line tracing. + +The array is conceptually a compressed list of + (bytecode offset increment, line number increment) +pairs. The details are important and delicate, best illustrated by example: + + byte code offset source code line number + 0 1 + 6 2 + 50 7 + 350 307 + 361 308 + +Instead of storing these numbers literally, we compress the list by storing only +the increments from one row to the next. Conceptually, the stored list might +look like: + + 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 + +The above doesn't really work, but it's a start. Note that an unsigned byte +can't hold negative values, or values larger than 255, and the above example +contains two such values. So we make two tweaks: + + (a) there's a deep assumption that byte code offsets and their corresponding + line #s both increase monotonically, and + (b) if at least one column jumps by more than 255 from one row to the next, + more than one pair is written to the table. In case #b, there's no way to know + from looking at the table later how many were written. That's the delicate + part. A user of co_lnotab desiring to find the source line number + corresponding to a bytecode address A should do something like this + + lineno = addr = 0 + for addr_incr, line_incr in co_lnotab: + addr += addr_incr + if addr > A: + return lineno + lineno += line_incr + +(In C, this is implemented by PyCode_Addr2Line().) In order for this to work, +when the addr field increments by more than 255, the line # increment in each +pair generated must be 0 until the remaining addr increment is < 256. So, in +the example above, assemble_lnotab in compile.c should not (as was actually done +until 2.2) expand 300, 300 to + 255, 255, 45, 45, +but to + 255, 0, 45, 255, 0, 45. + +The above is sufficient to reconstruct line numbers for tracebacks, but not for +line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c +and maybe_call_line_trace() in ceval.c. + +*** Tracing *** + +To a first approximation, we want to call the tracing function when the line +number of the current instruction changes. Re-computing the current line for +every instruction is a little slow, though, so each time we compute the line +number we save the bytecode indices where it's valid: + + *instr_lb <= frame->f_lasti < *instr_ub + +is true so long as execution does not change lines. That is, *instr_lb holds +the first bytecode index of the current line, and *instr_ub holds the first +bytecode index of the next line. As long as the above expression is true, +maybe_call_line_trace() does not need to call PyCode_CheckLineNumber(). Note +that the same line may appear multiple times in the lnotab, either because the +bytecode jumped more than 255 indices between line number changes or because +the compiler inserted the same line twice. Even in that case, *instr_ub holds +the first index of the next line. + +However, we don't *always* want to call the line trace function when the above +test fails. + +Consider this code: + +1: def f(a): +2: while a: +3: print 1, +4: break +5: else: +6: print 2, + +which compiles to this: + + 2 0 SETUP_LOOP 19 (to 22) + >> 3 LOAD_FAST 0 (a) + 6 POP_JUMP_IF_FALSE 17 + + 3 9 LOAD_CONST 1 (1) + 12 PRINT_ITEM + + 4 13 BREAK_LOOP + 14 JUMP_ABSOLUTE 3 + >> 17 POP_BLOCK + + 6 18 LOAD_CONST 2 (2) + 21 PRINT_ITEM + >> 22 LOAD_CONST 0 (None) + 25 RETURN_VALUE + +If 'a' is false, execution will jump to the POP_BLOCK instruction at offset 17 +and the co_lnotab will claim that execution has moved to line 4, which is wrong. +In this case, we could instead associate the POP_BLOCK with line 5, but that +would break jumps around loops without else clauses. + +We fix this by only calling the line trace function for a forward jump if the +co_lnotab indicates we have jumped to the *start* of a line, i.e. if the current +instruction offset matches the offset given for the start of a line by the +co_lnotab. For backward jumps, however, we always call the line trace function, +which lets a debugger stop on every evaluation of a loop guard (which usually +won't be the first opcode in a line). + +Why do we set f_lineno when tracing, and only just before calling the trace +function? Well, consider the code above when 'a' is true. If stepping through +this with 'n' in pdb, you would stop at line 1 with a "call" type event, then +line events on lines 2, 3, and 4, then a "return" type event -- but because the +code for the return actually falls in the range of the "line 6" opcodes, you +would be shown line 6 during this event. This is a change from the behaviour in +2.2 and before, and I've found it confusing in practice. By setting and using +f_lineno when tracing, one can report a line number different from that +suggested by f_lasti on this one occasion where it's desirable. diff --git a/Python/_warnings.c b/Python/_warnings.c index ef7f373..35a840e 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -462,7 +462,7 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno, } else { globals = f->f_globals; - *lineno = PyCode_Addr2Line(f->f_code, f->f_lasti); + *lineno = PyFrame_GetLineNumber(f); } *module = NULL; diff --git a/Python/ceval.c b/Python/ceval.c index 6141c13..403acd2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2761,7 +2761,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) default: fprintf(stderr, "XXX lineno: %d, opcode: %d\n", - PyCode_Addr2Line(f->f_code, f->f_lasti), + PyFrame_GetLineNumber(f), opcode); PyErr_SetString(PyExc_SystemError, "unknown opcode"); why = WHY_EXCEPTION; @@ -3522,33 +3522,30 @@ _PyEval_CallTracing(PyObject *func, PyObject *args) return result; } +/* See Objects/lnotab_notes.txt for a description of how tracing works. */ static int maybe_call_line_trace(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, int *instr_lb, int *instr_ub, int *instr_prev) { int result = 0; + int line = frame->f_lineno; /* If the last instruction executed isn't in the current - instruction window, reset the window. If the last - instruction happens to fall at the start of a line or if it - represents a jump backwards, call the trace function. + instruction window, reset the window. */ - if ((frame->f_lasti < *instr_lb || frame->f_lasti >= *instr_ub)) { - int line; + if (frame->f_lasti < *instr_lb || frame->f_lasti >= *instr_ub) { PyAddrPair bounds; - - line = PyCode_CheckLineNumber(frame->f_code, frame->f_lasti, - &bounds); - if (line >= 0) { - frame->f_lineno = line; - result = call_trace(func, obj, frame, - PyTrace_LINE, Py_None); - } + line = _PyCode_CheckLineNumber(frame->f_code, frame->f_lasti, + &bounds); *instr_lb = bounds.ap_lower; *instr_ub = bounds.ap_upper; } - else if (frame->f_lasti <= *instr_prev) { + /* If the last instruction falls at the start of a line or if + it represents a jump backwards, update the frame's line + number and call the trace function. */ + if (frame->f_lasti == *instr_lb || frame->f_lasti < *instr_prev) { + frame->f_lineno = line; result = call_trace(func, obj, frame, PyTrace_LINE, Py_None); } *instr_prev = frame->f_lasti; diff --git a/Python/compile.c b/Python/compile.c index 787dfe3..24975b6 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1748,9 +1748,6 @@ compiler_for(struct compiler *c, stmt_ty s) VISIT(c, expr, s->v.For.iter); ADDOP(c, GET_ITER); compiler_use_next_block(c, start); - /* for expressions must be traced on each iteration, - so we need to set an extra line number. */ - c->u->u_lineno_set = 0; ADDOP_JREL(c, FOR_ITER, cleanup); VISIT(c, expr, s->v.For.target); VISIT_SEQ(c, stmt, s->v.For.body); @@ -1796,9 +1793,6 @@ compiler_while(struct compiler *c, stmt_ty s) if (!compiler_push_fblock(c, LOOP, loop)) return 0; if (constant == -1) { - /* while expressions must be traced on each iteration, - so we need to set an extra line number. */ - c->u->u_lineno_set = 0; VISIT(c, expr, s->v.While.test); ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor); } @@ -3654,51 +3648,9 @@ blocksize(basicblock *b) return size; } -/* All about a_lnotab. - -c_lnotab is an array of unsigned bytes disguised as a Python string. -It is used to map bytecode offsets to source code line #s (when needed -for tracebacks). - -The array is conceptually a list of - (bytecode offset increment, line number increment) -pairs. The details are important and delicate, best illustrated by example: - - byte code offset source code line number - 0 1 - 6 2 - 50 7 - 350 307 - 361 308 - -The first trick is that these numbers aren't stored, only the increments -from one row to the next (this doesn't really work, but it's a start): - - 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 - -The second trick is that an unsigned byte can't hold negative values, or -values larger than 255, so (a) there's a deep assumption that byte code -offsets and their corresponding line #s both increase monotonically, and (b) -if at least one column jumps by more than 255 from one row to the next, more -than one pair is written to the table. In case #b, there's no way to know -from looking at the table later how many were written. That's the delicate -part. A user of c_lnotab desiring to find the source line number -corresponding to a bytecode address A should do something like this - - lineno = addr = 0 - for addr_incr, line_incr in c_lnotab: - addr += addr_incr - if addr > A: - return lineno - lineno += line_incr - -In order for this to work, when the addr field increments by more than 255, -the line # increment in each pair generated must be 0 until the remaining addr -increment is < 256. So, in the example above, assemble_lnotab (it used -to be called com_set_lineno) should not (as was actually done until 2.2) -expand 300, 300 to 255, 255, 45, 45, - but to 255, 0, 45, 255, 0, 45. -*/ +/* Appends a pair to the end of the line number table, a_lnotab, representing + the instruction's bytecode offset and line number. See + Objects/lnotab_notes.txt for the description of the line number table. */ static int assemble_lnotab(struct assembler *a, struct instr *i) diff --git a/Python/traceback.c b/Python/traceback.c index e77b1b2..1de2df6 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -114,8 +114,7 @@ newtracebackobject(PyTracebackObject *next, PyFrameObject *frame) Py_XINCREF(frame); tb->tb_frame = frame; tb->tb_lasti = frame->f_lasti; - tb->tb_lineno = PyCode_Addr2Line(frame->f_code, - frame->f_lasti); + tb->tb_lineno = PyFrame_GetLineNumber(frame); PyObject_GC_Track(tb); } return tb; |