diff options
author | Mark Shannon <mark@hotpy.org> | 2021-04-13 10:08:14 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-13 10:08:14 (GMT) |
commit | 9e7b2076fb4380987ad0262c4c0ca900b06475ad (patch) | |
tree | d7a4902fa70e0014849fd77f07d89f39bb2e7494 | |
parent | c2b7a66b91cdb96fbfdb160f96797208ddc5e436 (diff) | |
download | cpython-9e7b2076fb4380987ad0262c4c0ca900b06475ad.zip cpython-9e7b2076fb4380987ad0262c4c0ca900b06475ad.tar.gz cpython-9e7b2076fb4380987ad0262c4c0ca900b06475ad.tar.bz2 |
bpo-43760: Speed up check for tracing in interpreter dispatch (#25276)
* Remove redundant tracing_possible field from interpreter state.
* Move 'use_tracing' from tstate onto C stack, for fastest possible checking in dispatch logic.
* Add comments stressing the importance stack discipline when dealing with CFrames.
* Add NEWS
-rw-r--r-- | Include/cpython/pystate.h | 22 | ||||
-rw-r--r-- | Include/internal/pycore_interp.h | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst | 2 | ||||
-rw-r--r-- | Python/ceval.c | 55 | ||||
-rw-r--r-- | Python/pystate.c | 3 | ||||
-rw-r--r-- | Python/sysmodule.c | 8 |
6 files changed, 61 insertions, 35 deletions
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index cfaee89..e3ccc54 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -29,6 +29,21 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 +typedef struct _cframe { + /* This struct will be threaded through the C stack + * allowing fast access to per-thread state that needs + * to be accessed quickly by the interpreter, but can + * be modified outside of the interpreter. + * + * WARNING: This makes data on the C stack accessible from + * heap objects. Care must be taken to maintain stack + * discipline and make sure that instances of this struct cannot + * accessed outside of their lifetime. + */ + int use_tracing; + struct _cframe *previous; +} CFrame; + typedef struct _err_stackitem { /* This struct represents an entry on the exception stack, which is a * per-coroutine state. (Coroutine in the computer science sense, @@ -61,7 +76,10 @@ struct _ts { This is to prevent the actual trace/profile code from being recorded in the trace/profile. */ int tracing; - int use_tracing; + + /* Pointer to current CFrame in the C stack frame of the currently, + * or most recently, executing _PyEval_EvalFrameDefault. */ + CFrame *cframe; Py_tracefunc c_profilefunc; Py_tracefunc c_tracefunc; @@ -129,6 +147,8 @@ struct _ts { /* Unique thread state id. */ uint64_t id; + CFrame root_cframe; + /* XXX signal handlers should also be here */ }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index fa0e26f..dbdd109 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -33,12 +33,6 @@ struct _pending_calls { struct _ceval_state { int recursion_limit; - /* Records whether tracing is on for any thread. Counts the number - of threads for which tstate->c_tracefunc is non-NULL, so if the - value is 0, we know we don't have to check this thread's - c_tracefunc. This speeds up the if statement in - _PyEval_EvalFrameDefault() after fast_next_opcode. */ - int tracing_possible; /* This single variable consolidates all requests to break out of the fast path in the eval loop. */ _Py_atomic_int eval_breaker; diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst new file mode 100644 index 0000000..e661210 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-08-12-20-29.bpo-43760.tBIsD8.rst @@ -0,0 +1,2 @@ +Move the flag for checking whether tracing is enabled to the C stack, from the heap. +Should speed up dispatch in the interpreter. diff --git a/Python/ceval.c b/Python/ceval.c index c121160..53b596b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -37,6 +37,7 @@ typedef struct { PyCodeObject *code; // The code object for the bounds. May be NULL. int instr_prev; // Only valid if code != NULL. PyCodeAddressRange bounds; // Only valid if code != NULL. + CFrame cframe; } PyTraceInfo; @@ -1109,8 +1110,6 @@ fail: static int do_raise(PyThreadState *tstate, PyObject *exc, PyObject *cause); static int unpack_iterable(PyThreadState *, PyObject *, int, int, PyObject **); -#define _Py_TracingPossible(ceval) ((ceval)->tracing_possible) - PyObject * PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) @@ -1307,7 +1306,7 @@ eval_frame_handle_pending(PyThreadState *tstate) #define DISPATCH() \ { \ - if (_Py_TracingPossible(ceval2) OR_DTRACE_LINE OR_LLTRACE) { \ + if (trace_info.cframe.use_tracing OR_DTRACE_LINE OR_LLTRACE) { \ goto tracing_dispatch; \ } \ f->f_lasti = INSTR_OFFSET(); \ @@ -1595,8 +1594,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) int oparg; /* Current opcode argument, if any */ PyObject **fastlocals, **freevars; PyObject *retval = NULL; /* Return value */ - struct _ceval_state * const ceval2 = &tstate->interp->ceval; - _Py_atomic_int * const eval_breaker = &ceval2->eval_breaker; + _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker; PyCodeObject *co; const _Py_CODEUNIT *first_instr; @@ -1616,11 +1614,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* Mark trace_info as uninitialized */ trace_info.code = NULL; + /* WARNING: Because the CFrame lives on the C stack, + * but can be accessed from a heap allocated object (tstate) + * strict stack discipline must be maintained. + */ + CFrame *prev_cframe = tstate->cframe; + trace_info.cframe.use_tracing = prev_cframe->use_tracing; + trace_info.cframe.previous = prev_cframe; + tstate->cframe = &trace_info.cframe; + /* push frame */ tstate->frame = f; co = f->f_code; - if (tstate->use_tracing) { + if (trace_info.cframe.use_tracing) { if (tstate->c_tracefunc != NULL) { /* tstate->c_tracefunc, if defined, is a function that will be called on *every* entry @@ -1782,7 +1789,7 @@ main_loop: /* line-by-line tracing support */ - if (_Py_TracingPossible(ceval2) && + if (trace_info.cframe.use_tracing && tstate->c_tracefunc != NULL && !tstate->tracing) { int err; /* see maybe_call_line_trace() @@ -4543,7 +4550,7 @@ exception_unwind: PUSH(val); PUSH(exc); JUMPTO(handler); - if (_Py_TracingPossible(ceval2)) { + if (trace_info.cframe.use_tracing) { trace_info.instr_prev = INT_MAX; } /* Resume normal execution */ @@ -4567,7 +4574,7 @@ exception_unwind: f->f_stackdepth = 0; f->f_state = FRAME_RAISED; exiting: - if (tstate->use_tracing) { + if (trace_info.cframe.use_tracing) { if (tstate->c_tracefunc) { if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj, tstate, f, &trace_info, PyTrace_RETURN, retval)) { @@ -4584,6 +4591,10 @@ exiting: /* pop frame */ exit_eval_frame: + /* Restore previous cframe */ + tstate->cframe = trace_info.cframe.previous; + tstate->cframe->use_tracing = trace_info.cframe.use_tracing; + if (PyDTrace_FUNCTION_RETURN_ENABLED()) dtrace_function_return(f); _Py_LeaveRecursiveCall(tstate); @@ -5507,7 +5518,7 @@ call_trace(Py_tracefunc func, PyObject *obj, if (tstate->tracing) return 0; tstate->tracing++; - tstate->use_tracing = 0; + tstate->cframe->use_tracing = 0; if (frame->f_lasti < 0) { frame->f_lineno = frame->f_code->co_firstlineno; } @@ -5517,7 +5528,7 @@ call_trace(Py_tracefunc func, PyObject *obj, } result = func(obj, frame, what, arg); frame->f_lineno = 0; - tstate->use_tracing = ((tstate->c_tracefunc != NULL) + tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); tstate->tracing--; return result; @@ -5528,15 +5539,15 @@ _PyEval_CallTracing(PyObject *func, PyObject *args) { PyThreadState *tstate = _PyThreadState_GET(); int save_tracing = tstate->tracing; - int save_use_tracing = tstate->use_tracing; + int save_use_tracing = tstate->cframe->use_tracing; PyObject *result; tstate->tracing = 0; - tstate->use_tracing = ((tstate->c_tracefunc != NULL) + tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); result = PyObject_Call(func, args, NULL); tstate->tracing = save_tracing; - tstate->use_tracing = save_use_tracing; + tstate->cframe->use_tracing = save_use_tracing; return result; } @@ -5590,7 +5601,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_profilefunc = NULL; tstate->c_profileobj = NULL; /* Must make sure that tracing is not ignored if 'profileobj' is freed */ - tstate->use_tracing = tstate->c_tracefunc != NULL; + tstate->cframe->use_tracing = tstate->c_tracefunc != NULL; Py_XDECREF(profileobj); Py_XINCREF(arg); @@ -5598,7 +5609,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_profilefunc = func; /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); + tstate->cframe->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); return 0; } @@ -5626,14 +5637,12 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) return -1; } - struct _ceval_state *ceval2 = &tstate->interp->ceval; PyObject *traceobj = tstate->c_traceobj; - ceval2->tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = NULL; tstate->c_traceobj = NULL; /* Must make sure that profiling is not ignored if 'traceobj' is freed */ - tstate->use_tracing = (tstate->c_profilefunc != NULL); + tstate->cframe->use_tracing = (tstate->c_profilefunc != NULL); Py_XDECREF(traceobj); Py_XINCREF(arg); @@ -5641,7 +5650,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) tstate->c_tracefunc = func; /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = ((func != NULL) + tstate->cframe->use_tracing = ((func != NULL) || (tstate->c_profilefunc != NULL)); return 0; @@ -5836,7 +5845,7 @@ PyEval_GetFuncDesc(PyObject *func) } #define C_TRACE(x, call) \ -if (tstate->use_tracing && tstate->c_profilefunc) { \ +if (trace_info->cframe.use_tracing && tstate->c_profilefunc) { \ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ tstate, tstate->frame, trace_info, \ PyTrace_C_CALL, func)) { \ @@ -5917,7 +5926,7 @@ call_function(PyThreadState *tstate, Py_ssize_t nargs = oparg - nkwargs; PyObject **stack = (*pp_stack) - nargs - nkwargs; - if (tstate->use_tracing) { + if (trace_info->cframe.use_tracing) { x = trace_call_function(tstate, trace_info, func, stack, nargs, kwnames); } else { @@ -5950,7 +5959,7 @@ do_call_core(PyThreadState *tstate, } else if (Py_IS_TYPE(func, &PyMethodDescr_Type)) { Py_ssize_t nargs = PyTuple_GET_SIZE(callargs); - if (nargs > 0 && tstate->use_tracing) { + if (nargs > 0 && trace_info->cframe.use_tracing) { /* We need to create a temporary bound method as argument for profiling. diff --git a/Python/pystate.c b/Python/pystate.c index c8b2530..436f874 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -624,7 +624,8 @@ new_threadstate(PyInterpreterState *interp, int init) tstate->recursion_headroom = 0; tstate->stackcheck_counter = 0; tstate->tracing = 0; - tstate->use_tracing = 0; + tstate->root_cframe.use_tracing = 0; + tstate->cframe = &tstate->root_cframe; tstate->gilstate_counter = 0; tstate->async_exc = NULL; tstate->thread_id = PyThread_get_thread_ident(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 54d70ef..a36d90f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -252,7 +252,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event, /* Disallow tracing in hooks unless explicitly enabled */ ts->tracing++; - ts->use_tracing = 0; + ts->cframe->use_tracing = 0; while ((hook = PyIter_Next(hooks)) != NULL) { _Py_IDENTIFIER(__cantrace__); PyObject *o; @@ -265,14 +265,14 @@ sys_audit_tstate(PyThreadState *ts, const char *event, break; } if (canTrace) { - ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); + ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); ts->tracing--; } PyObject* args[2] = {eventName, eventArgs}; o = _PyObject_FastCallTstate(ts, hook, args, 2); if (canTrace) { ts->tracing++; - ts->use_tracing = 0; + ts->cframe->use_tracing = 0; } if (!o) { break; @@ -280,7 +280,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event, Py_DECREF(o); Py_CLEAR(hook); } - ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); + ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc); ts->tracing--; if (_PyErr_Occurred(ts)) { goto exit; |