From 496f711ae9cf8d67deb17e52f10b5b3ae39646f7 Mon Sep 17 00:00:00 2001 From: mig Date: Fri, 11 Jan 2013 15:37:10 +0000 Subject: testing a cheaper(?) INST_START_COMMAND --- generic/tclExecute.c | 97 ++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 303bafd..ae9d0c7 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2309,6 +2309,18 @@ TEBCresume( * reduces total obj size. */ + if (*pc == INST_START_CMD) { + iPtr->cmdCount += TclGetUInt4AtPtr(pc+5); + if (checkInterp) { + checkInterp = 0; + if ((codePtr->compileEpoch != iPtr->compileEpoch) + || (codePtr->nsEpoch != iPtr->varFramePtr->nsPtr->resolverEpoch)) { + goto instStartCmdFailed; + } + } + pc += 9; + } + if (*pc == INST_LOAD_SCALAR1) { goto instLoadScalar1; } else if (*pc == INST_PUSH1) { @@ -2499,57 +2511,8 @@ TEBCresume( */ pc++; -#if !TCL_COMPILE_DEBUG - if (*pc == INST_START_CMD) { - TCL_DTRACE_INST_NEXT(); - goto instStartCmdPeephole; - } -#endif NEXT_INST_F(0, 0, 0); - case INST_START_CMD: -#if !TCL_COMPILE_DEBUG - instStartCmdPeephole: -#endif - /* - * Remark that if the interpreter is marked for deletion its - * compileEpoch is modified, so that the epoch check also verifies - * that the interp is not deleted. If no outside call has been made - * since the last check, it is safe to omit the check. - */ - - iPtr->cmdCount += TclGetUInt4AtPtr(pc+5); - if (!checkInterp) { - goto instStartCmdOK; - } else if (((codePtr->compileEpoch == iPtr->compileEpoch) - && (codePtr->nsEpoch == iPtr->varFramePtr->nsPtr->resolverEpoch)) - || (codePtr->flags & TCL_BYTECODE_PRECOMPILED)) { - checkInterp = 0; - instStartCmdOK: - NEXT_INST_F(9, 0, 0); - } else { - const char *bytes; - - length = 0; - - /* - * We used to switch to direct eval; for NRE-awareness we now - * compile and eval the command so that this evaluation does not - * add a new TEBC instance. [Bug 2910748] - */ - - if (TclInterpReady(interp) == TCL_ERROR) { - goto gotError; - } - - codePtr->flags |= TCL_BYTECODE_RECOMPILE; - bytes = GetSrcInfoForPc(pc, codePtr, &length, NULL); - opnd = TclGetUInt4AtPtr(pc+1); - pc += (opnd-1); - PUSH_OBJECT(Tcl_NewStringObj(bytes, length)); - goto instEvalStk; - } - case INST_NOP: pc += 1; goto cleanup0; @@ -7102,6 +7065,42 @@ TEBCresume( TclStackFree(interp, TD); /* free my stack */ return result; + + /* + * INST_START_CMD failure case removed where it doesn't bother that much + */ + /* case INST_START_CMD: + * + * Remark that if the interpreter is marked for deletion its + * compileEpoch is modified, so that the epoch check also verifies + * that the interp is not deleted. If no outside call has been made + * since the last check, it is safe to omit the check. + */ + + instStartCmdFailed: + { + const char *bytes; + + length = 0; + + /* + * We used to switch to direct eval; for NRE-awareness we now + * compile and eval the command so that this evaluation does not + * add a new TEBC instance. [Bug 2910748] + */ + + if (TclInterpReady(interp) == TCL_ERROR) { + goto gotError; + } + + codePtr->flags |= TCL_BYTECODE_RECOMPILE; + bytes = GetSrcInfoForPc(pc, codePtr, &length, NULL); + opnd = TclGetUInt4AtPtr(pc+1); + pc += (opnd-1); + PUSH_OBJECT(Tcl_NewStringObj(bytes, length)); + goto instEvalStk; + NEXT_INST_F(9, 0, 0); + } } #undef codePtr -- cgit v0.12 From f531d3de422a79dcc477d10d83f2badbbc27e8ea Mon Sep 17 00:00:00 2001 From: mig Date: Fri, 11 Jan 2013 18:05:50 +0000 Subject: fix for consecutive ISC (produced by [while 1 {...}) --- generic/tclExecute.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index ae9d0c7..bc755e8 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2300,16 +2300,10 @@ TEBCresume( TCL_DTRACE_INST_NEXT(); - /* - * These two instructions account for 26% of all instructions (according - * to measurements on tclbench by Ben Vitale - * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf] - * Resolving them before the switch reduces the cost of branch - * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!) - * reduces total obj size. - */ - - if (*pc == INST_START_CMD) { + while (*pc == INST_START_CMD) { +#ifdef TCL_COMPILE_STATS + iPtr->stats.instructionCount[*pc]++; +#endif iPtr->cmdCount += TclGetUInt4AtPtr(pc+5); if (checkInterp) { checkInterp = 0; @@ -2321,6 +2315,15 @@ TEBCresume( pc += 9; } + /* + * These two instructions account for 26% of all instructions (according + * to measurements on tclbench by Ben Vitale + * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf] + * Resolving them before the switch reduces the cost of branch + * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!) + * reduces total obj size. + */ + if (*pc == INST_LOAD_SCALAR1) { goto instLoadScalar1; } else if (*pc == INST_PUSH1) { @@ -2503,19 +2506,10 @@ TEBCresume( TRACE_WITH_OBJ(("=> discarding "), OBJ_AT_TOS); objPtr = POP_OBJECT(); TclDecrRefCount(objPtr); - - /* - * Runtime peephole optimisation: an INST_POP is scheduled at the end - * of most commands. If the next instruction is an INST_START_CMD, - * fall through to it. - */ - - pc++; - NEXT_INST_F(0, 0, 0); + NEXT_INST_F(1, 0, 0); case INST_NOP: - pc += 1; - goto cleanup0; + NEXT_INST_F(1, 0, 0); case INST_DUP: objResultPtr = OBJ_AT_TOS; @@ -7081,6 +7075,7 @@ TEBCresume( { const char *bytes; + checkInterp = 1; length = 0; /* -- cgit v0.12 From e5fc72423c12d157618f81231cc5ae12e0e8fc76 Mon Sep 17 00:00:00 2001 From: mig Date: Fri, 11 Jan 2013 21:16:07 +0000 Subject: better comments --- generic/tclExecute.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index bc755e8..1ed8949 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2301,6 +2301,10 @@ TEBCresume( TCL_DTRACE_INST_NEXT(); while (*pc == INST_START_CMD) { + /* + * Peephole: do not run INST_START_CMD, just skip it + */ + #ifdef TCL_COMPILE_STATS iPtr->stats.instructionCount[*pc]++; #endif @@ -7062,13 +7066,13 @@ TEBCresume( /* * INST_START_CMD failure case removed where it doesn't bother that much - */ - /* case INST_START_CMD: * * Remark that if the interpreter is marked for deletion its * compileEpoch is modified, so that the epoch check also verifies * that the interp is not deleted. If no outside call has been made * since the last check, it is safe to omit the check. + + * case INST_START_CMD: */ instStartCmdFailed: -- cgit v0.12 From 6e7718395efb2bf299224e5188b32da47efe0883 Mon Sep 17 00:00:00 2001 From: mig Date: Sat, 12 Jan 2013 10:14:06 +0000 Subject: even better ... or so I hope: also inlining INST_PUSH1 in the peephole, checking for ISC after LOAD1 and PUSH1 --- generic/tclExecute.c | 93 ++++++++++++++++++++++------------------------------ 1 file changed, 40 insertions(+), 53 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 1ed8949..4d758f6 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2250,23 +2250,6 @@ TEBCresume( } cleanup0: -#ifdef TCL_COMPILE_DEBUG - /* - * Skip the stack depth check if an expansion is in progress. - */ - - CHECK_STACK(); - if (traceInstructions) { - fprintf(stdout, "%2d: %2d ", iPtr->numLevels, (int) CURR_DEPTH); - TclPrintInstruction(codePtr, pc); - fflush(stdout); - } -#endif /* TCL_COMPILE_DEBUG */ - -#ifdef TCL_COMPILE_STATS - iPtr->stats.instructionCount[*pc]++; -#endif - /* * Check for asynchronous handlers [Bug 746722]; we do the check every * ASYNC_CHECK_COUNT_MASK instruction, of the form (2**n-1). @@ -2298,16 +2281,51 @@ TEBCresume( CACHE_STACK_INFO(); } + /* + * These two instructions account for 26% of all instructions (according + * to measurements on tclbench by Ben Vitale + * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf] + * Resolving them before the switch reduces the cost of branch + * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!) + * reduces total obj size. + */ + + peepholeStart: +#ifdef TCL_COMPILE_STATS + iPtr->stats.instructionCount[*pc]++; +#endif + +#ifdef TCL_COMPILE_DEBUG + /* + * Skip the stack depth check if an expansion is in progress. + */ + + CHECK_STACK(); + if (traceInstructions) { + fprintf(stdout, "%2d: %2d ", iPtr->numLevels, (int) CURR_DEPTH); + TclPrintInstruction(codePtr, pc); + fflush(stdout); + } +#endif /* TCL_COMPILE_DEBUG */ + TCL_DTRACE_INST_NEXT(); + + if (*pc == INST_LOAD_SCALAR1) { + goto instLoadScalar1; + } - while (*pc == INST_START_CMD) { + if (*pc == INST_PUSH1) { + PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]); + TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS); + pc += 2; + goto peepholeStart; + } + + if (*pc == INST_START_CMD) { /* * Peephole: do not run INST_START_CMD, just skip it */ -#ifdef TCL_COMPILE_STATS - iPtr->stats.instructionCount[*pc]++; -#endif iPtr->cmdCount += TclGetUInt4AtPtr(pc+5); if (checkInterp) { checkInterp = 0; @@ -2317,23 +2335,9 @@ TEBCresume( } } pc += 9; + goto peepholeStart; } - /* - * These two instructions account for 26% of all instructions (according - * to measurements on tclbench by Ben Vitale - * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf] - * Resolving them before the switch reduces the cost of branch - * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!) - * reduces total obj size. - */ - - if (*pc == INST_LOAD_SCALAR1) { - goto instLoadScalar1; - } else if (*pc == INST_PUSH1) { - goto instPush1Peephole; - } - switch (*pc) { case INST_SYNTAX: case INST_RETURN_IMM: { @@ -2484,23 +2488,6 @@ TEBCresume( (void) POP_OBJECT(); goto abnormalReturn; - case INST_PUSH1: - instPush1Peephole: - PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]); - TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS); - pc += 2; -#if !TCL_COMPILE_DEBUG - /* - * Runtime peephole optimisation: check if we are pushing again. - */ - - if (*pc == INST_PUSH1) { - TCL_DTRACE_INST_NEXT(); - goto instPush1Peephole; - } -#endif - NEXT_INST_F(0, 0, 0); - case INST_PUSH4: objResultPtr = codePtr->objArrayPtr[TclGetUInt4AtPtr(pc+1)]; TRACE_WITH_OBJ(("%u => ", TclGetUInt4AtPtr(pc+1)), objResultPtr); -- cgit v0.12 From ab85720d9820b140486e1517a6bff19cfacffd32 Mon Sep 17 00:00:00 2001 From: mig Date: Sat, 12 Jan 2013 10:49:25 +0000 Subject: discouraging the compiler from re-reading *pc in the peephole loop --- generic/tclExecute.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 4d758f6..5bf0e79 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2084,7 +2084,8 @@ TEBCresume( Tcl_Obj **tosPtr; /* Cached pointer to top of evaluation * stack. */ const unsigned char *pc; /* The current program counter. */ - + unsigned char inst; /* The currently running instruction */ + /* * Transfer variables - needed only between opcodes, but not while * executing an instruction. @@ -2290,6 +2291,8 @@ TEBCresume( * reduces total obj size. */ + inst = *pc; + peepholeStart: #ifdef TCL_COMPILE_STATS iPtr->stats.instructionCount[*pc]++; @@ -2310,18 +2313,18 @@ TEBCresume( TCL_DTRACE_INST_NEXT(); - if (*pc == INST_LOAD_SCALAR1) { + if (inst == INST_LOAD_SCALAR1) { goto instLoadScalar1; } - if (*pc == INST_PUSH1) { + if (inst == INST_PUSH1) { PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]); TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS); - pc += 2; + inst = *(pc += 2); goto peepholeStart; } - if (*pc == INST_START_CMD) { + if (inst == INST_START_CMD) { /* * Peephole: do not run INST_START_CMD, just skip it */ @@ -2334,11 +2337,11 @@ TEBCresume( goto instStartCmdFailed; } } - pc += 9; + inst = *(pc += 9); goto peepholeStart; } - switch (*pc) { + switch (inst) { case INST_SYNTAX: case INST_RETURN_IMM: { int code = TclGetInt4AtPtr(pc+1); -- cgit v0.12 From 71ccd57a94c21d7e36abe8550f656e6f082a2907 Mon Sep 17 00:00:00 2001 From: mig Date: Sat, 12 Jan 2013 10:53:23 +0000 Subject: discouraging the compiler from re-reading *pc in the peephole loop, part2 (any diff?) --- generic/tclExecute.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 5bf0e79..628dfe7 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2315,16 +2315,12 @@ TEBCresume( if (inst == INST_LOAD_SCALAR1) { goto instLoadScalar1; - } - - if (inst == INST_PUSH1) { + } else if (inst == INST_PUSH1) { PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]); TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS); inst = *(pc += 2); goto peepholeStart; - } - - if (inst == INST_START_CMD) { + } else if (inst == INST_START_CMD) { /* * Peephole: do not run INST_START_CMD, just skip it */ -- cgit v0.12 -- cgit v0.12