diff options
author | Guido van Rossum <guido@python.org> | 2023-01-18 18:41:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-18 18:41:07 (GMT) |
commit | 1f0d0a432cf431882b432eeba8315f84f818da6b (patch) | |
tree | c4ea6660f3c06b814479de050f0c08aec7a159c2 | |
parent | d65f48507045c87000c65dc2c4fa727f483caad6 (diff) | |
download | cpython-1f0d0a432cf431882b432eeba8315f84f818da6b.zip cpython-1f0d0a432cf431882b432eeba8315f84f818da6b.tar.gz cpython-1f0d0a432cf431882b432eeba8315f84f818da6b.tar.bz2 |
GH-98831: Move assorted macros from ceval.h to a new header (#101116)
-rw-r--r-- | Makefile.pre.in | 8 | ||||
-rw-r--r-- | Python/bytecodes.c | 43 | ||||
-rw-r--r-- | Python/ceval.c | 349 | ||||
-rw-r--r-- | Python/ceval_macros.h | 349 |
4 files changed, 364 insertions, 385 deletions
diff --git a/Makefile.pre.in b/Makefile.pre.in index 8c7a17b..d98f986 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1466,8 +1466,12 @@ regen-cases: -o $(srcdir)/Python/opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new -Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/condvar.h $(srcdir)/Python/generated_cases.c.h - +Python/ceval.o: \ + $(srcdir)/Python/ceval_macros.h \ + $(srcdir)/Python/condvar.h \ + $(srcdir)/Python/generated_cases.c.h \ + $(srcdir)/Python/opcode_metadata.h \ + $(srcdir)/Python/opcode_targets.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 344e9bb..5d5929f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -34,41 +34,14 @@ #include "setobject.h" #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX -void _PyFloat_ExactDealloc(PyObject *); -void _PyUnicode_ExactDealloc(PyObject *); - -/* Stack effect macros - * These will be mostly replaced by stack effect descriptions, - * but the tooling need to recognize them. - */ -#define SET_TOP(v) (stack_pointer[-1] = (v)) -#define SET_SECOND(v) (stack_pointer[-2] = (v)) -#define PEEK(n) (stack_pointer[-(n)]) -#define POKE(n, v) (stack_pointer[-(n)] = (v)) -#define PUSH(val) (*(stack_pointer++) = (val)) -#define POP() (*(--stack_pointer)) -#define TOP() PEEK(1) -#define SECOND() PEEK(2) -#define STACK_GROW(n) (stack_pointer += (n)) -#define STACK_SHRINK(n) (stack_pointer -= (n)) -#define EMPTY() 1 -#define STACK_LEVEL() 2 - -/* Local variable macros */ -#define GETLOCAL(i) (frame->localsplus[i]) -#define SETLOCAL(i, val) \ -do { \ - PyObject *_tmp = frame->localsplus[i]; \ - frame->localsplus[i] = (val); \ - Py_XDECREF(_tmp); \ -} while (0) +#define USE_COMPUTED_GOTOS 0 +#include "ceval_macros.h" /* Flow control macros */ #define DEOPT_IF(cond, instname) ((void)0) #define ERROR_IF(cond, labelname) ((void)0) -#define JUMPBY(offset) ((void)0) #define GO_TO_INSTRUCTION(instname) ((void)0) -#define DISPATCH_SAME_OPARG() ((void)0) +#define PREDICT(opname) ((void)0) #define inst(name, ...) case name: #define op(name, ...) /* NAME is ignored */ @@ -76,16 +49,14 @@ do { \ #define super(name) static int SUPER_##name #define family(name, ...) static int family_##name -#define NAME_ERROR_MSG \ - "name '%.200s' is not defined" - // Dummy variables for stack effects. static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; -static PyObject *list, *tuple, *dict, *owner; +static PyObject *list, *tuple, *dict, *owner, *set, *str, *tup, *map, *keys; static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter; static PyObject *aiter, *awaitable, *iterable, *w, *exc_value, *bc; static PyObject *orig, *excs, *update, *b, *fromlist, *level, *from; +static PyObject **pieces, **values; static size_t jump; // Dummy variables for cache effects static uint16_t invert, counter, index, hint; @@ -456,7 +427,7 @@ dummy_func( PREDICT(JUMP_BACKWARD); } - inst(SET_ADD, (set, unused[oparg-1], v -- set, unused[oparg-1])) { + inst(SET_ADD, (set, unused[oparg-1], v -- set, unused[oparg-1])) { int err = PySet_Add(set, v); Py_DECREF(v); ERROR_IF(err, error); @@ -3336,8 +3307,10 @@ dummy_func( // END BYTECODES // } + dispatch_opcode: error: exception_unwind: + exit_unwind: handle_eval_breaker: resume_frame: resume_with_error: diff --git a/Python/ceval.c b/Python/ceval.c index ecbe2f9..a97313c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -215,8 +215,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, static void _PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); -#define NAME_ERROR_MSG \ - "name '%.200s' is not defined" #define UNBOUNDLOCAL_ERROR_MSG \ "cannot access local variable '%s' where it is not associated with a value" #define UNBOUNDFREE_ERROR_MSG \ @@ -600,352 +598,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) return _PyEval_EvalFrame(tstate, f->f_frame, throwflag); } - -/* Computed GOTOs, or - the-optimization-commonly-but-improperly-known-as-"threaded code" - using gcc's labels-as-values extension - (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html). - - The traditional bytecode evaluation loop uses a "switch" statement, which - decent compilers will optimize as a single indirect branch instruction - combined with a lookup table of jump addresses. However, since the - indirect jump instruction is shared by all opcodes, the CPU will have a - hard time making the right prediction for where to jump next (actually, - it will be always wrong except in the uncommon case of a sequence of - several identical opcodes). - - "Threaded code" in contrast, uses an explicit jump table and an explicit - indirect jump instruction at the end of each opcode. Since the jump - instruction is at a different address for each opcode, the CPU will make a - separate prediction for each of these instructions, which is equivalent to - predicting the second opcode of each opcode pair. These predictions have - a much better chance to turn out valid, especially in small bytecode loops. - - A mispredicted branch on a modern CPU flushes the whole pipeline and - can cost several CPU cycles (depending on the pipeline depth), - and potentially many more instructions (depending on the pipeline width). - A correctly predicted branch, however, is nearly free. - - At the time of this writing, the "threaded code" version is up to 15-20% - faster than the normal "switch" version, depending on the compiler and the - CPU architecture. - - NOTE: care must be taken that the compiler doesn't try to "optimize" the - indirect jumps by sharing them between all opcodes. Such optimizations - can be disabled on gcc by using the -fno-gcse flag (or possibly - -fno-crossjumping). -*/ - -/* Use macros rather than inline functions, to make it as clear as possible - * to the C compiler that the tracing check is a simple test then branch. - * We want to be sure that the compiler knows this before it generates - * the CFG. - */ - -#ifdef WITH_DTRACE -#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0) -#else -#define OR_DTRACE_LINE -#endif - -#ifdef HAVE_COMPUTED_GOTOS - #ifndef USE_COMPUTED_GOTOS - #define USE_COMPUTED_GOTOS 1 - #endif -#else - #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS - #error "Computed gotos are not supported on this compiler." - #endif - #undef USE_COMPUTED_GOTOS - #define USE_COMPUTED_GOTOS 0 -#endif - -#ifdef Py_STATS -#define INSTRUCTION_START(op) \ - do { \ - frame->prev_instr = next_instr++; \ - OPCODE_EXE_INC(op); \ - if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \ - lastopcode = op; \ - } while (0) -#else -#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++) -#endif - -#if USE_COMPUTED_GOTOS -# define TARGET(op) TARGET_##op: INSTRUCTION_START(op); -# define DISPATCH_GOTO() goto *opcode_targets[opcode] -#else -# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op); -# define DISPATCH_GOTO() goto dispatch_opcode -#endif - -/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */ -#ifdef LLTRACE -#define PRE_DISPATCH_GOTO() if (lltrace) { \ - lltrace_instruction(frame, stack_pointer, next_instr); } -#else -#define PRE_DISPATCH_GOTO() ((void)0) -#endif - - -/* Do interpreter dispatch accounting for tracing and instrumentation */ -#define DISPATCH() \ - { \ - NEXTOPARG(); \ - PRE_DISPATCH_GOTO(); \ - assert(cframe.use_tracing == 0 || cframe.use_tracing == 255); \ - opcode |= cframe.use_tracing OR_DTRACE_LINE; \ - DISPATCH_GOTO(); \ - } - -#define DISPATCH_SAME_OPARG() \ - { \ - opcode = _Py_OPCODE(*next_instr); \ - PRE_DISPATCH_GOTO(); \ - opcode |= cframe.use_tracing OR_DTRACE_LINE; \ - DISPATCH_GOTO(); \ - } - -#define DISPATCH_INLINED(NEW_FRAME) \ - do { \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - frame->prev_instr = next_instr - 1; \ - (NEW_FRAME)->previous = frame; \ - frame = cframe.current_frame = (NEW_FRAME); \ - CALL_STAT_INC(inlined_py_calls); \ - goto start_frame; \ - } while (0) - -#define CHECK_EVAL_BREAKER() \ - _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \ - goto handle_eval_breaker; \ - } - - -/* Tuple access macros */ - -#ifndef Py_DEBUG -#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) -#else -static inline PyObject * -GETITEM(PyObject *v, Py_ssize_t i) { - assert(PyTuple_Check(v)); - assert(i >= 0); - assert(i < PyTuple_GET_SIZE(v)); - return PyTuple_GET_ITEM(v, i); -} -#endif - -/* Code access macros */ - -/* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code))) -#define NEXTOPARG() do { \ - _Py_CODEUNIT word = *next_instr; \ - opcode = _Py_OPCODE(word); \ - oparg = _Py_OPARG(word); \ - } while (0) -#define JUMPTO(x) (next_instr = _PyCode_CODE(frame->f_code) + (x)) -#define JUMPBY(x) (next_instr += (x)) - -/* OpCode prediction macros - Some opcodes tend to come in pairs thus making it possible to - predict the second code when the first is run. For example, - COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE. - - Verifying the prediction costs a single high-speed test of a register - variable against a constant. If the pairing was good, then the - processor's own internal branch predication has a high likelihood of - success, resulting in a nearly zero-overhead transition to the - next opcode. A successful prediction saves a trip through the eval-loop - including its unpredictable switch-case branch. Combined with the - processor's internal branch prediction, a successful PREDICT has the - effect of making the two opcodes run as if they were a single new opcode - with the bodies combined. - - If collecting opcode statistics, your choices are to either keep the - predictions turned-on and interpret the results as if some opcodes - had been combined or turn-off predictions so that the opcode frequency - counter updates for both opcodes. - - Opcode prediction is disabled with threaded code, since the latter allows - the CPU to record separate branch prediction information for each - opcode. - -*/ - -#define PREDICT_ID(op) PRED_##op - -#if USE_COMPUTED_GOTOS -#define PREDICT(op) if (0) goto PREDICT_ID(op) -#else -#define PREDICT(op) \ - do { \ - _Py_CODEUNIT word = *next_instr; \ - opcode = _Py_OPCODE(word) | cframe.use_tracing OR_DTRACE_LINE; \ - if (opcode == op) { \ - oparg = _Py_OPARG(word); \ - INSTRUCTION_START(op); \ - goto PREDICT_ID(op); \ - } \ - } while(0) -#endif -#define PREDICTED(op) PREDICT_ID(op): - - -/* Stack manipulation macros */ - -/* The stack can grow at most MAXINT deep, as co_nlocals and - co_stacksize are ints. */ -#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame))) -#define STACK_SIZE() (frame->f_code->co_stacksize) -#define EMPTY() (STACK_LEVEL() == 0) -#define TOP() (stack_pointer[-1]) -#define SECOND() (stack_pointer[-2]) -#define THIRD() (stack_pointer[-3]) -#define FOURTH() (stack_pointer[-4]) -#define PEEK(n) (stack_pointer[-(n)]) -#define POKE(n, v) (stack_pointer[-(n)] = (v)) -#define SET_TOP(v) (stack_pointer[-1] = (v)) -#define SET_SECOND(v) (stack_pointer[-2] = (v)) -#define BASIC_STACKADJ(n) (stack_pointer += n) -#define BASIC_PUSH(v) (*stack_pointer++ = (v)) -#define BASIC_POP() (*--stack_pointer) - -#ifdef Py_DEBUG -#define PUSH(v) do { \ - BASIC_PUSH(v); \ - assert(STACK_LEVEL() <= STACK_SIZE()); \ - } while (0) -#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP()) -#define STACK_GROW(n) do { \ - assert(n >= 0); \ - BASIC_STACKADJ(n); \ - assert(STACK_LEVEL() <= STACK_SIZE()); \ - } while (0) -#define STACK_SHRINK(n) do { \ - assert(n >= 0); \ - assert(STACK_LEVEL() >= n); \ - BASIC_STACKADJ(-(n)); \ - } while (0) -#else -#define PUSH(v) BASIC_PUSH(v) -#define POP() BASIC_POP() -#define STACK_GROW(n) BASIC_STACKADJ(n) -#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) -#endif - -/* Local variable macros */ - -#define GETLOCAL(i) (frame->localsplus[i]) - -/* The SETLOCAL() macro must not DECREF the local variable in-place and - then store the new value; it must copy the old value to a temporary - value, then store the new value, and then DECREF the temporary value. - This is because it is possible that during the DECREF the frame is - accessed by other code (e.g. a __del__ method or gc.collect()) and the - variable would be pointing to already-freed memory. */ -#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \ - GETLOCAL(i) = value; \ - Py_XDECREF(tmp); } while (0) - -#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) - -#ifdef Py_STATS -#define UPDATE_MISS_STATS(INSTNAME) \ - do { \ - STAT_INC(opcode, miss); \ - STAT_INC((INSTNAME), miss); \ - /* The counter is always the first cache entry: */ \ - if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \ - STAT_INC((INSTNAME), deopt); \ - } \ - else { \ - /* This is about to be (incorrectly) incremented: */ \ - STAT_DEC((INSTNAME), deferred); \ - } \ - } while (0) -#else -#define UPDATE_MISS_STATS(INSTNAME) ((void)0) -#endif - -#define DEOPT_IF(COND, INSTNAME) \ - if ((COND)) { \ - /* This is only a single jump on release builds! */ \ - UPDATE_MISS_STATS((INSTNAME)); \ - assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \ - GO_TO_INSTRUCTION(INSTNAME); \ - } - - -#define GLOBALS() frame->f_globals -#define BUILTINS() frame->f_builtins -#define LOCALS() frame->f_locals - -/* Shared opcode macros */ - -#define TRACE_FUNCTION_EXIT() \ - if (cframe.use_tracing) { \ - if (trace_function_exit(tstate, frame, retval)) { \ - Py_DECREF(retval); \ - goto exit_unwind; \ - } \ - } - -#define DTRACE_FUNCTION_EXIT() \ - if (PyDTrace_FUNCTION_RETURN_ENABLED()) { \ - dtrace_function_return(frame); \ - } - -#define TRACE_FUNCTION_UNWIND() \ - if (cframe.use_tracing) { \ - /* Since we are already unwinding, \ - * we don't care if this raises */ \ - trace_function_exit(tstate, frame, NULL); \ - } - -#define TRACE_FUNCTION_ENTRY() \ - if (cframe.use_tracing) { \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - int err = trace_function_entry(tstate, frame); \ - stack_pointer = _PyFrame_GetStackPointer(frame); \ - if (err) { \ - goto error; \ - } \ - } - -#define TRACE_FUNCTION_THROW_ENTRY() \ - if (cframe.use_tracing) { \ - assert(frame->stacktop >= 0); \ - if (trace_function_entry(tstate, frame)) { \ - goto exit_unwind; \ - } \ - } - -#define DTRACE_FUNCTION_ENTRY() \ - if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \ - dtrace_function_entry(frame); \ - } - -#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0) - -#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1)) - -#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \ - (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \ - } while (0); - -#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \ - (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \ - } while (0); +#include "ceval_macros.h" static int trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h new file mode 100644 index 0000000..d7a8f0b --- /dev/null +++ b/Python/ceval_macros.h @@ -0,0 +1,349 @@ +// Macros needed by ceval.c and bytecodes.c + +/* Computed GOTOs, or + the-optimization-commonly-but-improperly-known-as-"threaded code" + using gcc's labels-as-values extension + (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html). + + The traditional bytecode evaluation loop uses a "switch" statement, which + decent compilers will optimize as a single indirect branch instruction + combined with a lookup table of jump addresses. However, since the + indirect jump instruction is shared by all opcodes, the CPU will have a + hard time making the right prediction for where to jump next (actually, + it will be always wrong except in the uncommon case of a sequence of + several identical opcodes). + + "Threaded code" in contrast, uses an explicit jump table and an explicit + indirect jump instruction at the end of each opcode. Since the jump + instruction is at a different address for each opcode, the CPU will make a + separate prediction for each of these instructions, which is equivalent to + predicting the second opcode of each opcode pair. These predictions have + a much better chance to turn out valid, especially in small bytecode loops. + + A mispredicted branch on a modern CPU flushes the whole pipeline and + can cost several CPU cycles (depending on the pipeline depth), + and potentially many more instructions (depending on the pipeline width). + A correctly predicted branch, however, is nearly free. + + At the time of this writing, the "threaded code" version is up to 15-20% + faster than the normal "switch" version, depending on the compiler and the + CPU architecture. + + NOTE: care must be taken that the compiler doesn't try to "optimize" the + indirect jumps by sharing them between all opcodes. Such optimizations + can be disabled on gcc by using the -fno-gcse flag (or possibly + -fno-crossjumping). +*/ + +/* Use macros rather than inline functions, to make it as clear as possible + * to the C compiler that the tracing check is a simple test then branch. + * We want to be sure that the compiler knows this before it generates + * the CFG. + */ + +#ifdef WITH_DTRACE +#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0) +#else +#define OR_DTRACE_LINE +#endif + +#ifdef HAVE_COMPUTED_GOTOS + #ifndef USE_COMPUTED_GOTOS + #define USE_COMPUTED_GOTOS 1 + #endif +#else + #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS + #error "Computed gotos are not supported on this compiler." + #endif + #undef USE_COMPUTED_GOTOS + #define USE_COMPUTED_GOTOS 0 +#endif + +#ifdef Py_STATS +#define INSTRUCTION_START(op) \ + do { \ + frame->prev_instr = next_instr++; \ + OPCODE_EXE_INC(op); \ + if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \ + lastopcode = op; \ + } while (0) +#else +#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++) +#endif + +#if USE_COMPUTED_GOTOS +# define TARGET(op) TARGET_##op: INSTRUCTION_START(op); +# define DISPATCH_GOTO() goto *opcode_targets[opcode] +#else +# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op); +# define DISPATCH_GOTO() goto dispatch_opcode +#endif + +/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */ +#ifdef LLTRACE +#define PRE_DISPATCH_GOTO() if (lltrace) { \ + lltrace_instruction(frame, stack_pointer, next_instr); } +#else +#define PRE_DISPATCH_GOTO() ((void)0) +#endif + + +/* Do interpreter dispatch accounting for tracing and instrumentation */ +#define DISPATCH() \ + { \ + NEXTOPARG(); \ + PRE_DISPATCH_GOTO(); \ + assert(cframe.use_tracing == 0 || cframe.use_tracing == 255); \ + opcode |= cframe.use_tracing OR_DTRACE_LINE; \ + DISPATCH_GOTO(); \ + } + +#define DISPATCH_SAME_OPARG() \ + { \ + opcode = _Py_OPCODE(*next_instr); \ + PRE_DISPATCH_GOTO(); \ + opcode |= cframe.use_tracing OR_DTRACE_LINE; \ + DISPATCH_GOTO(); \ + } + +#define DISPATCH_INLINED(NEW_FRAME) \ + do { \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ + frame->prev_instr = next_instr - 1; \ + (NEW_FRAME)->previous = frame; \ + frame = cframe.current_frame = (NEW_FRAME); \ + CALL_STAT_INC(inlined_py_calls); \ + goto start_frame; \ + } while (0) + +#define CHECK_EVAL_BREAKER() \ + _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ + if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \ + goto handle_eval_breaker; \ + } + + +/* Tuple access macros */ + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#else +static inline PyObject * +GETITEM(PyObject *v, Py_ssize_t i) { + assert(PyTuple_Check(v)); + assert(i >= 0); + assert(i < PyTuple_GET_SIZE(v)); + return PyTuple_GET_ITEM(v, i); +} +#endif + +/* Code access macros */ + +/* The integer overflow is checked by an assertion below. */ +#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code))) +#define NEXTOPARG() do { \ + _Py_CODEUNIT word = *next_instr; \ + opcode = _Py_OPCODE(word); \ + oparg = _Py_OPARG(word); \ + } while (0) +#define JUMPTO(x) (next_instr = _PyCode_CODE(frame->f_code) + (x)) +#define JUMPBY(x) (next_instr += (x)) + +/* OpCode prediction macros + Some opcodes tend to come in pairs thus making it possible to + predict the second code when the first is run. For example, + COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE. + + Verifying the prediction costs a single high-speed test of a register + variable against a constant. If the pairing was good, then the + processor's own internal branch predication has a high likelihood of + success, resulting in a nearly zero-overhead transition to the + next opcode. A successful prediction saves a trip through the eval-loop + including its unpredictable switch-case branch. Combined with the + processor's internal branch prediction, a successful PREDICT has the + effect of making the two opcodes run as if they were a single new opcode + with the bodies combined. + + If collecting opcode statistics, your choices are to either keep the + predictions turned-on and interpret the results as if some opcodes + had been combined or turn-off predictions so that the opcode frequency + counter updates for both opcodes. + + Opcode prediction is disabled with threaded code, since the latter allows + the CPU to record separate branch prediction information for each + opcode. + +*/ + +#define PREDICT_ID(op) PRED_##op + +#if USE_COMPUTED_GOTOS +#define PREDICT(op) if (0) goto PREDICT_ID(op) +#else +#define PREDICT(op) \ + do { \ + _Py_CODEUNIT word = *next_instr; \ + opcode = _Py_OPCODE(word) | cframe.use_tracing OR_DTRACE_LINE; \ + if (opcode == op) { \ + oparg = _Py_OPARG(word); \ + INSTRUCTION_START(op); \ + goto PREDICT_ID(op); \ + } \ + } while(0) +#endif +#define PREDICTED(op) PREDICT_ID(op): + + +/* Stack manipulation macros */ + +/* The stack can grow at most MAXINT deep, as co_nlocals and + co_stacksize are ints. */ +#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame))) +#define STACK_SIZE() (frame->f_code->co_stacksize) +#define EMPTY() (STACK_LEVEL() == 0) +#define TOP() (stack_pointer[-1]) +#define SECOND() (stack_pointer[-2]) +#define THIRD() (stack_pointer[-3]) +#define FOURTH() (stack_pointer[-4]) +#define PEEK(n) (stack_pointer[-(n)]) +#define POKE(n, v) (stack_pointer[-(n)] = (v)) +#define SET_TOP(v) (stack_pointer[-1] = (v)) +#define SET_SECOND(v) (stack_pointer[-2] = (v)) +#define BASIC_STACKADJ(n) (stack_pointer += n) +#define BASIC_PUSH(v) (*stack_pointer++ = (v)) +#define BASIC_POP() (*--stack_pointer) + +#ifdef Py_DEBUG +#define PUSH(v) do { \ + BASIC_PUSH(v); \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) +#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP()) +#define STACK_GROW(n) do { \ + assert(n >= 0); \ + BASIC_STACKADJ(n); \ + assert(STACK_LEVEL() <= STACK_SIZE()); \ + } while (0) +#define STACK_SHRINK(n) do { \ + assert(n >= 0); \ + assert(STACK_LEVEL() >= n); \ + BASIC_STACKADJ(-(n)); \ + } while (0) +#else +#define PUSH(v) BASIC_PUSH(v) +#define POP() BASIC_POP() +#define STACK_GROW(n) BASIC_STACKADJ(n) +#define STACK_SHRINK(n) BASIC_STACKADJ(-(n)) +#endif + +/* Local variable macros */ + +#define GETLOCAL(i) (frame->localsplus[i]) + +/* The SETLOCAL() macro must not DECREF the local variable in-place and + then store the new value; it must copy the old value to a temporary + value, then store the new value, and then DECREF the temporary value. + This is because it is possible that during the DECREF the frame is + accessed by other code (e.g. a __del__ method or gc.collect()) and the + variable would be pointing to already-freed memory. */ +#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \ + GETLOCAL(i) = value; \ + Py_XDECREF(tmp); } while (0) + +#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) + +#ifdef Py_STATS +#define UPDATE_MISS_STATS(INSTNAME) \ + do { \ + STAT_INC(opcode, miss); \ + STAT_INC((INSTNAME), miss); \ + /* The counter is always the first cache entry: */ \ + if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \ + STAT_INC((INSTNAME), deopt); \ + } \ + else { \ + /* This is about to be (incorrectly) incremented: */ \ + STAT_DEC((INSTNAME), deferred); \ + } \ + } while (0) +#else +#define UPDATE_MISS_STATS(INSTNAME) ((void)0) +#endif + +#define DEOPT_IF(COND, INSTNAME) \ + if ((COND)) { \ + /* This is only a single jump on release builds! */ \ + UPDATE_MISS_STATS((INSTNAME)); \ + assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \ + GO_TO_INSTRUCTION(INSTNAME); \ + } + + +#define GLOBALS() frame->f_globals +#define BUILTINS() frame->f_builtins +#define LOCALS() frame->f_locals + +/* Shared opcode macros */ + +#define TRACE_FUNCTION_EXIT() \ + if (cframe.use_tracing) { \ + if (trace_function_exit(tstate, frame, retval)) { \ + Py_DECREF(retval); \ + goto exit_unwind; \ + } \ + } + +#define DTRACE_FUNCTION_EXIT() \ + if (PyDTrace_FUNCTION_RETURN_ENABLED()) { \ + dtrace_function_return(frame); \ + } + +#define TRACE_FUNCTION_UNWIND() \ + if (cframe.use_tracing) { \ + /* Since we are already unwinding, \ + * we don't care if this raises */ \ + trace_function_exit(tstate, frame, NULL); \ + } + +#define TRACE_FUNCTION_ENTRY() \ + if (cframe.use_tracing) { \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ + int err = trace_function_entry(tstate, frame); \ + stack_pointer = _PyFrame_GetStackPointer(frame); \ + if (err) { \ + goto error; \ + } \ + } + +#define TRACE_FUNCTION_THROW_ENTRY() \ + if (cframe.use_tracing) { \ + assert(frame->stacktop >= 0); \ + if (trace_function_entry(tstate, frame)) { \ + goto exit_unwind; \ + } \ + } + +#define DTRACE_FUNCTION_ENTRY() \ + if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \ + dtrace_function_entry(frame); \ + } + +#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \ + (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0) + +#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \ + (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1)) + +#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \ + (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \ + } while (0); + +#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \ + (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \ + } while (0); + +#define NAME_ERROR_MSG "name '%.200s' is not defined" |