summaryrefslogtreecommitdiffstats
path: root/generic/tclExecute.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclExecute.c')
-rw-r--r--generic/tclExecute.c18
1 files changed, 17 insertions, 1 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 43bd58a..5b3185b 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclExecute.c,v 1.314 2007/08/14 21:04:28 msofer Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.315 2007/08/16 20:39:34 msofer Exp $
*/
#include "tclInt.h"
@@ -1640,6 +1640,21 @@ TclExecuteByteCode(
}
}
+ /*
+ * These two instructions account for 26% of all instructions (according
+ * to measurements on tclbench by Ben Vitale
+ * [http://www.cs.toronto.edu/syslab/pubs/tcl2005-vitale-zaleski.pdf]
+ * Resolving them before the switch reduces the cost of branch
+ * mispredictions, seems to improve runtime by 5% to 15%, and (amazingly!)
+ * reduces total obj size.
+ */
+
+ if (*pc == INST_LOAD_SCALAR1) {
+ goto instLoadScalar1;
+ } else if (*pc == INST_PUSH1) {
+ goto instPush1Peephole;
+ }
+
switch (*pc) {
case INST_RETURN_IMM: {
int code = TclGetInt4AtPtr(pc+1);
@@ -2257,6 +2272,7 @@ TclExecuteByteCode(
Tcl_Obj *objPtr;
case INST_LOAD_SCALAR1:
+ instLoadScalar1:
opnd = TclGetUInt1AtPtr(pc+1);
varPtr = &(compiledLocals[opnd]);
while (TclIsVarLink(varPtr)) {