summaryrefslogtreecommitdiffstats
path: root/generic/tclExecute.c
diff options
context:
space:
mode:
authormig <mig>2013-01-12 10:14:06 (GMT)
committermig <mig>2013-01-12 10:14:06 (GMT)
commit6e7718395efb2bf299224e5188b32da47efe0883 (patch)
treeecc8b1a68231dc3e468b73404813afdcf0ee3b85 /generic/tclExecute.c
parente8ffff0c7bd7b4a82bb2d50631dc13b6a081636d (diff)
downloadtcl-6e7718395efb2bf299224e5188b32da47efe0883.zip
tcl-6e7718395efb2bf299224e5188b32da47efe0883.tar.gz
tcl-6e7718395efb2bf299224e5188b32da47efe0883.tar.bz2
even better ... or so I hope: also inlining INST_PUSH1 in the peephole, checking for ISC after LOAD1 and PUSH1
Diffstat (limited to 'generic/tclExecute.c')
-rw-r--r--generic/tclExecute.c93
1 files changed, 40 insertions, 53 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 1ed8949..4d758f6 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -2250,23 +2250,6 @@ TEBCresume(
}
cleanup0:
-#ifdef TCL_COMPILE_DEBUG
- /*
- * Skip the stack depth check if an expansion is in progress.
- */
-
- CHECK_STACK();
- if (traceInstructions) {
- fprintf(stdout, "%2d: %2d ", iPtr->numLevels, (int) CURR_DEPTH);
- TclPrintInstruction(codePtr, pc);
- fflush(stdout);
- }
-#endif /* TCL_COMPILE_DEBUG */
-
-#ifdef TCL_COMPILE_STATS
- iPtr->stats.instructionCount[*pc]++;
-#endif
-
/*
* Check for asynchronous handlers [Bug 746722]; we do the check every
* ASYNC_CHECK_COUNT_MASK instruction, of the form (2**n-1).
@@ -2298,16 +2281,51 @@ TEBCresume(
CACHE_STACK_INFO();
}
+ /*
+ * These two instructions account for 26% of all instructions (according
+ * to measurements on tclbench by Ben Vitale
+ * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf]
+ * Resolving them before the switch reduces the cost of branch
+ * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!)
+ * reduces total obj size.
+ */
+
+ peepholeStart:
+#ifdef TCL_COMPILE_STATS
+ iPtr->stats.instructionCount[*pc]++;
+#endif
+
+#ifdef TCL_COMPILE_DEBUG
+ /*
+ * Skip the stack depth check if an expansion is in progress.
+ */
+
+ CHECK_STACK();
+ if (traceInstructions) {
+ fprintf(stdout, "%2d: %2d ", iPtr->numLevels, (int) CURR_DEPTH);
+ TclPrintInstruction(codePtr, pc);
+ fflush(stdout);
+ }
+#endif /* TCL_COMPILE_DEBUG */
+
TCL_DTRACE_INST_NEXT();
+
+ if (*pc == INST_LOAD_SCALAR1) {
+ goto instLoadScalar1;
+ }
- while (*pc == INST_START_CMD) {
+ if (*pc == INST_PUSH1) {
+ PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]);
+ TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS);
+ pc += 2;
+ goto peepholeStart;
+ }
+
+ if (*pc == INST_START_CMD) {
/*
* Peephole: do not run INST_START_CMD, just skip it
*/
-#ifdef TCL_COMPILE_STATS
- iPtr->stats.instructionCount[*pc]++;
-#endif
iPtr->cmdCount += TclGetUInt4AtPtr(pc+5);
if (checkInterp) {
checkInterp = 0;
@@ -2317,23 +2335,9 @@ TEBCresume(
}
}
pc += 9;
+ goto peepholeStart;
}
- /*
- * These two instructions account for 26% of all instructions (according
- * to measurements on tclbench by Ben Vitale
- * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf]
- * Resolving them before the switch reduces the cost of branch
- * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!)
- * reduces total obj size.
- */
-
- if (*pc == INST_LOAD_SCALAR1) {
- goto instLoadScalar1;
- } else if (*pc == INST_PUSH1) {
- goto instPush1Peephole;
- }
-
switch (*pc) {
case INST_SYNTAX:
case INST_RETURN_IMM: {
@@ -2484,23 +2488,6 @@ TEBCresume(
(void) POP_OBJECT();
goto abnormalReturn;
- case INST_PUSH1:
- instPush1Peephole:
- PUSH_OBJECT(codePtr->objArrayPtr[TclGetUInt1AtPtr(pc+1)]);
- TRACE_WITH_OBJ(("%u => ", TclGetInt1AtPtr(pc+1)), OBJ_AT_TOS);
- pc += 2;
-#if !TCL_COMPILE_DEBUG
- /*
- * Runtime peephole optimisation: check if we are pushing again.
- */
-
- if (*pc == INST_PUSH1) {
- TCL_DTRACE_INST_NEXT();
- goto instPush1Peephole;
- }
-#endif
- NEXT_INST_F(0, 0, 0);
-
case INST_PUSH4:
objResultPtr = codePtr->objArrayPtr[TclGetUInt4AtPtr(pc+1)];
TRACE_WITH_OBJ(("%u => ", TclGetUInt4AtPtr(pc+1)), objResultPtr);