From f82f46df1628c6703ad0ff2b94d83c6c9a46c56f Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Wed, 21 Oct 2015 23:30:41 +0000 Subject: Micro-optimization: remove double checked lock from TclGetAllocCache in favour of initialization in TclInitSubsystems --- generic/tclEvent.c | 3 +++ generic/tclInt.h | 3 +++ unix/tclUnixThrd.c | 27 ++++++++++----------------- win/tclWinThrd.c | 33 +++++++++++++++------------------ 4 files changed, 31 insertions(+), 35 deletions(-) diff --git a/generic/tclEvent.c b/generic/tclEvent.c index 8305410..d62850b 100644 --- a/generic/tclEvent.c +++ b/generic/tclEvent.c @@ -1043,6 +1043,9 @@ TclInitSubsystems(void) #if USE_TCLALLOC TclInitAlloc(); /* Process wide mutex init */ #endif +#if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC) + TclpInitThreadAlloc(); +#endif #ifdef TCL_MEM_DEBUG TclInitDbCkalloc(); /* Process wide mutex init */ #endif diff --git a/generic/tclInt.h b/generic/tclInt.h index 356d250..d4baed4 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3146,6 +3146,9 @@ MODULE_SCOPE int TclpLoadMemory(Tcl_Interp *interp, void *buffer, Tcl_FSUnloadFileProc **unloadProcPtr, int flags); #endif MODULE_SCOPE void TclInitThreadStorage(void); +#if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC) +MODULE_SCOPE void TclpInitThreadAlloc(void); +#endif MODULE_SCOPE void TclFinalizeThreadDataThread(void); MODULE_SCOPE void TclFinalizeThreadStorage(void); #ifdef TCL_WIDE_CLICKS diff --git a/unix/tclUnixThrd.c b/unix/tclUnixThrd.c index ea03332..0f4a8a3 100644 --- a/unix/tclUnixThrd.c +++ b/unix/tclUnixThrd.c @@ -680,7 +680,6 @@ TclpInetNtoa( */ #ifdef USE_THREAD_ALLOC -static volatile int initialized = 0; static pthread_key_t key; typedef struct allocMutex { @@ -727,29 +726,23 @@ TclpFreeAllocCache( TclFreeAllocCache(ptr); pthread_setspecific(key, NULL); - - } else if (initialized) { - /* - * Called by us in TclFinalizeThreadAlloc() during the library - * finalization initiated from Tcl_Finalize() - */ - + + } else { pthread_key_delete(key); - initialized = 0; } } +void +TclpInitThreadAlloc(void) +{ + pthread_mutex_lock(allocLockPtr); + pthread_key_create(&key, TclpFreeAllocCache); + pthread_mutex_unlock(allocLockPtr); +} + void * TclpGetAllocCache(void) { - if (!initialized) { - pthread_mutex_lock(allocLockPtr); - if (!initialized) { - pthread_key_create(&key, TclpFreeAllocCache); - initialized = 1; - } - pthread_mutex_unlock(allocLockPtr); - } return pthread_getspecific(key); } diff --git a/win/tclWinThrd.c b/win/tclWinThrd.c index 1c9d483..fac8ab3 100644 --- a/win/tclWinThrd.c +++ b/win/tclWinThrd.c @@ -122,7 +122,6 @@ typedef struct WinCondition { */ #ifdef USE_THREAD_ALLOC -static int once; static DWORD tlsKey; typedef struct allocMutex { @@ -971,24 +970,24 @@ TclpFreeAllocMutex( free(lockPtr); } +void +TclInitThreadAlloc(void) +{ + /* + * We need to make sure that TclpFreeAllocCache is called on each + * thread that calls this, but only on threads that call this. + */ + + tlsKey = TlsAlloc(); + if (tlsKey == TLS_OUT_OF_INDEXES) { + Tcl_Panic("could not allocate thread local storage"); + } +} + void * TclpGetAllocCache(void) { void *result; - - if (!once) { - /* - * We need to make sure that TclpFreeAllocCache is called on each - * thread that calls this, but only on threads that call this. - */ - - tlsKey = TlsAlloc(); - once = 1; - if (tlsKey == TLS_OUT_OF_INDEXES) { - Tcl_Panic("could not allocate thread local storage"); - } - } - result = TlsGetValue(tlsKey); if ((result == NULL) && (GetLastError() != NO_ERROR)) { Tcl_Panic("TlsGetValue failed from TclpGetAllocCache"); @@ -1024,7 +1023,7 @@ TclpFreeAllocCache( if (!success) { Tcl_Panic("TlsSetValue failed from TclpFreeAllocCache"); } - } else if (once) { + } else { /* * Called by us in TclFinalizeThreadAlloc() during the library * finalization initiated from Tcl_Finalize() @@ -1034,9 +1033,7 @@ TclpFreeAllocCache( if (!success) { Tcl_Panic("TlsFree failed from TclpFreeAllocCache"); } - once = 0; /* reset for next time. */ } - } #endif /* USE_THREAD_ALLOC */ -- cgit v0.12 From ea342f5111aeaaf3b3da7e7e75df24f55a0f3e7d Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 22 Oct 2015 01:00:28 +0000 Subject: Turn off NRE asserts by default. About a 5% speedup on [clock format]. --- generic/tclBasic.c | 3 --- generic/tclExecute.c | 3 --- generic/tclInt.h | 4 +++- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index a09bf10..5c5bc64 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -22,10 +22,7 @@ #include "tclCompile.h" #include "tommath.h" #include - -#if NRE_ENABLE_ASSERTS #include -#endif #define INTERP_STACK_INITIAL_SIZE 2000 #define CORO_STACK_INITIAL_SIZE 200 diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 7f65262..b10af65 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -20,10 +20,7 @@ #include "tclOOInt.h" #include "tommath.h" #include - -#if NRE_ENABLE_ASSERTS #include -#endif /* * Hack to determine whether we may expect IEEE floating point. The hack is diff --git a/generic/tclInt.h b/generic/tclInt.h index d4baed4..50eb370 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4799,7 +4799,9 @@ void Tcl_Panic(const char *, ...) __attribute__((analyzer_noreturn)); */ #define NRE_USE_SMALL_ALLOC 1 /* Only turn off for debugging purposes. */ -#define NRE_ENABLE_ASSERTS 1 +#ifndef NRE_ENABLE_ASSERTS +#define NRE_ENABLE_ASSERTS 0 +#endif /* * This is the main data struct for representing NR commands. It is designed -- cgit v0.12 From ca9cf2ba57b9245e21d8bd908ffdbea32ed3d7cd Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Thu, 22 Oct 2015 14:07:01 +0000 Subject: fix typo in micro-optimization TclpInitThreadAlloc --- win/tclWinThrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/win/tclWinThrd.c b/win/tclWinThrd.c index fac8ab3..a2fc226 100644 --- a/win/tclWinThrd.c +++ b/win/tclWinThrd.c @@ -971,7 +971,7 @@ TclpFreeAllocMutex( } void -TclInitThreadAlloc(void) +TclpInitThreadAlloc(void) { /* * We need to make sure that TclpFreeAllocCache is called on each -- cgit v0.12 From f6c021c559b57cc973581cb328496b13e5f3c952 Mon Sep 17 00:00:00 2001 From: dkf Date: Fri, 23 Oct 2015 21:52:10 +0000 Subject: Knock perhaps 1% off execution time: guard on TclAsyncReady more efficient when decrementing to zero. --- generic/tclExecute.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index b10af65..f6dfc46 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -34,14 +34,14 @@ #endif /* - * A mask (should be 2**n-1) that is used to work out when the bytecode engine - * should call Tcl_AsyncReady() to see whether there is a signal that needs - * handling. + * A counter that is used to work out when the bytecode engine should call + * Tcl_AsyncReady() to see whether there is a signal that needs handling, and + * other expensive periodic operations. */ -#ifndef ASYNC_CHECK_COUNT_MASK -# define ASYNC_CHECK_COUNT_MASK 63 -#endif /* !ASYNC_CHECK_COUNT_MASK */ +#ifndef ASYNC_CHECK_COUNT +# define ASYNC_CHECK_COUNT 64 +#endif /* !ASYNC_CHECK_COUNT */ /* * Boolean flag indicating whether the Tcl bytecode interpreter has been @@ -2116,7 +2116,8 @@ TEBCresume( * sporadically: no special need for speed. */ - int instructionCount = 0; /* Counter that is used to work out when to + int instructionCount = ASYNC_CHECK_COUNT; + /* Counter that is used to work out when to * call Tcl_AsyncReady() */ const char *curInstName; #ifdef TCL_COMPILE_DEBUG @@ -2315,10 +2316,11 @@ TEBCresume( /* * Check for asynchronous handlers [Bug 746722]; we do the check every - * ASYNC_CHECK_COUNT_MASK instruction, of the form (2**n-1). + * ASYNC_CHECK_COUNT instructions. */ - if ((instructionCount++ & ASYNC_CHECK_COUNT_MASK) == 0) { + if (!(--instructionCount)) { + instructionCount = ASYNC_CHECK_COUNT; DECACHE_STACK_INFO(); if (TclAsyncReady(iPtr)) { result = Tcl_AsyncInvoke(interp, result); -- cgit v0.12 From 4174e5c5cc258660142a1912dd563a05272d50ba Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Sat, 24 Oct 2015 02:37:02 +0000 Subject: merge changes from pspjuth that optimize conversion from unichar to utf and add optimized versions for reading a word from byte codes. --- generic/tclCompile.h | 31 +++++++++++++++++++++++++++++++ generic/tclInt.h | 1 + generic/tclStringObj.c | 4 ++-- generic/tclUtf.c | 17 ++++++++--------- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/generic/tclCompile.h b/generic/tclCompile.h index b89346d..f9fddf3 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1516,6 +1516,37 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, (*((p)+3))) /* + * Override TclGetUInt4AtPtr or TclGetInt4AtPtr macros if + * a known better version exists. + */ +#ifdef WORDS_BIGENDIAN +#define OVERRIDE_INT4(i) (i) +#elif defined(__GNUC__) && (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)) +#define OVERRIDE_INT4(i) __builtin_bswap32(i) +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define OVERRIDE_INT4(i) _byteswap_ulong(i) +#endif + +#ifdef OVERRIDE_INT4 +#undef TclGetUInt4AtPtr +static inline unsigned int +TclGetUInt4AtPtr(const unsigned char *p) +{ + uint32_t i; + memcpy(&i,p,4); + return OVERRIDE_INT4(i); +} +#undef TclGetInt4AtPtr +static inline signed int +TclGetInt4AtPtr(const unsigned char *p) +{ + int32_t i; + memcpy(&i,p,4); + return OVERRIDE_INT4(i); +} +#endif /* OVERRIDE_INT4 */ + +/* * Macros used to compute the minimum and maximum of two integers. The ANSI C * "prototypes" for these macros are: * diff --git a/generic/tclInt.h b/generic/tclInt.h index 50eb370..6c39558 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3133,6 +3133,7 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); +MODULE_SCOPE int TclUtfCount(Tcl_UniChar ch); MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8d70d20..1974079 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3097,7 +3097,7 @@ ExtendStringRepWithUnicode( */ int i, origLength, size = 0; - char *dst, buf[TCL_UTF_MAX]; + char *dst; String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { @@ -3123,7 +3123,7 @@ ExtendStringRepWithUnicode( } for (i = 0; i < numChars && size >= 0; i++) { - size += Tcl_UniCharToUtf((int) unicode[i], buf); + size += TclUtfCount(unicode[i]); } if (size < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b878149..f585c03 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -89,12 +89,11 @@ static const unsigned char totalBytes[256] = { * Functions used only in this module. */ -static int UtfCount(int ch); /* *--------------------------------------------------------------------------- * - * UtfCount -- + * TclUtfCount -- * * Find the number of bytes in the Utf character "ch". * @@ -107,9 +106,9 @@ static int UtfCount(int ch); *--------------------------------------------------------------------------- */ -INLINE static int -UtfCount( - int ch) /* The Tcl_UniChar whose size is returned. */ +INLINE int +TclUtfCount( + Tcl_UniChar ch) /* The Tcl_UniChar whose size is returned. */ { if ((ch > 0) && (ch < UNICODE_SELF)) { return 1; @@ -829,7 +828,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (bytes < UtfCount(upChar)) { + if (bytes < TclUtfCount(upChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -882,7 +881,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (bytes < UtfCount(lowChar)) { + if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -932,7 +931,7 @@ Tcl_UtfToTitle( bytes = TclUtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); - if (bytes < UtfCount(titleChar)) { + if (bytes < TclUtfCount(titleChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -944,7 +943,7 @@ Tcl_UtfToTitle( bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); - if (bytes < UtfCount(lowChar)) { + if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { -- cgit v0.12 From 017fb881aae30f42cae7170c25ca3e32fd71ce33 Mon Sep 17 00:00:00 2001 From: dkf Date: Sat, 24 Oct 2015 18:59:50 +0000 Subject: Make the async-interrupt tests work more reliably. --- generic/tclExecute.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index f6dfc46..853c0d6 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2116,9 +2116,14 @@ TEBCresume( * sporadically: no special need for speed. */ - int instructionCount = ASYNC_CHECK_COUNT; + unsigned interruptCounter = 1; /* Counter that is used to work out when to - * call Tcl_AsyncReady() */ + * call Tcl_AsyncReady(). This must be 1 + * initially so that we call the async-check + * stanza early, otherwise there are command + * sequences that can make the interpreter + * busy-loop without an opportunity to + * recognise an interrupt. */ const char *curInstName; #ifdef TCL_COMPILE_DEBUG int traceInstructions; /* Whether we are doing instruction-level @@ -2319,8 +2324,8 @@ TEBCresume( * ASYNC_CHECK_COUNT instructions. */ - if (!(--instructionCount)) { - instructionCount = ASYNC_CHECK_COUNT; + if ((--interruptCounter) == 0) { + interruptCounter = ASYNC_CHECK_COUNT; DECACHE_STACK_INFO(); if (TclAsyncReady(iPtr)) { result = Tcl_AsyncInvoke(interp, result); -- cgit v0.12 From 0436970d3017c381bd9a70a3f7991680d0484736 Mon Sep 17 00:00:00 2001 From: dkf Date: Sat, 24 Oct 2015 19:00:21 +0000 Subject: Added one of the micro-opt test cases. --- tools/microoptimization/generalbytecode.tcl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tools/microoptimization/generalbytecode.tcl diff --git a/tools/microoptimization/generalbytecode.tcl b/tools/microoptimization/generalbytecode.tcl new file mode 100644 index 0000000..9c0a545 --- /dev/null +++ b/tools/microoptimization/generalbytecode.tcl @@ -0,0 +1,14 @@ +apply {{} { + for {set i 0} {$i < 100} {incr i} { + apply {{} { + set a {} + set b {} + for {set i 0} {$i < 1000} {incr i} { + lappend a $i + dict set b $i [expr {$i*$i}] + } + return [string length $a],[string length $b] + }} + } +}} + -- cgit v0.12 From 8ac915184090c5219ee491442969c426c734cfeb Mon Sep 17 00:00:00 2001 From: Miguel Sofer Date: Sat, 24 Oct 2015 21:52:12 +0000 Subject: (by drh) micro-opt of INST_LOAD_SCALAR1 (the hottest instruction) in the non-varLink (most frequent) case --- generic/tclExecute.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 853c0d6..4292479 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -3202,20 +3202,28 @@ TEBCresume( case INST_LOAD_SCALAR1: instLoadScalar1: + /* + * micro-optimization by drh: eliminate a compare-and-jump on the + * hottest path (no var link), at the cost of adding a few comparisons + * in the less frequent cases (var links: upvar, global, + * variable). We used to follow links first (causing a C&J in the + * non-link case), now we check for direct-readability first + */ + opnd = TclGetUInt1AtPtr(pc+1); varPtr = LOCAL(opnd); - while (TclIsVarLink(varPtr)) { - varPtr = varPtr->value.linkPtr; - } - TRACE(("%u => ", opnd)); - if (TclIsVarDirectReadable(varPtr)) { - /* - * No errors, no traces: just get the value. - */ - - objResultPtr = varPtr->value.objPtr; - TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); - NEXT_INST_F(2, 0, 1); + while (1) { + if (TclIsVarDirectReadable(varPtr)) { + TRACE(("%u => ", opnd)); + objResultPtr = varPtr->value.objPtr; + TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); + NEXT_INST_F(2, 0, 1); + } + if (TclIsVarLink(varPtr)) { + varPtr = varPtr->value.linkPtr; + continue; + } + break; } pcAdjustment = 2; cleanup = 0; -- cgit v0.12 From 8493e90ff204597cf5a0277be39455380a5ec36c Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 25 Oct 2015 00:20:42 +0000 Subject: Added another micro-opt case: [clock]. --- tools/microoptimization/clockformatscan.tcl | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tools/microoptimization/clockformatscan.tcl diff --git a/tools/microoptimization/clockformatscan.tcl b/tools/microoptimization/clockformatscan.tcl new file mode 100644 index 0000000..844562e --- /dev/null +++ b/tools/microoptimization/clockformatscan.tcl @@ -0,0 +1,5 @@ +apply {{} { + for {set i 0} {$i < 5000} {incr i} { + clock scan [clock format $i -format %T] -format %T + } +}} -- cgit v0.12 From 66718c29e87bb9d9a3e521373c60d5c812c04736 Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 25 Oct 2015 00:57:53 +0000 Subject: Added drh's analysis wrapper script so that we don't need to keep private copies. --- tools/cgannotate.tcl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tools/cgannotate.tcl diff --git a/tools/cgannotate.tcl b/tools/cgannotate.tcl new file mode 100644 index 0000000..f8b9b7d --- /dev/null +++ b/tools/cgannotate.tcl @@ -0,0 +1,24 @@ +#!/usr/bin/tclsh +# +# A wrapper around cg_annotate that sets appropriate command-line options +# and rearranges the output so that annotated files occur in a consistent +# sorted order. +# + +set in [open |[list cg_annotate --show=Ir --auto=yes --context=40 {*}$argv] r] +set dest ! +set out(!) {} +while {![eof $in]} { + set line [string map {\t { }} [gets $in]] + if {[regexp {^-- Auto-annotated source: (.*)} $line all name]} { + set dest $name + } elseif {[regexp {^-- line \d+ ------} $line]} { + set line [lreplace $line 2 2 {#}] + } elseif {[regexp {^The following files chosen for } $line]} { + set dest ! + } + append out($dest) $line\n +} +foreach x [lsort [array names out]] { + puts $out($x) +} -- cgit v0.12 From e1b162e92dca41682e812418307abf40d181084c Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 25 Oct 2015 10:57:26 +0000 Subject: Allow tuning of the number of iterations in the micro-opt loops. --- tools/microoptimization/clockformatscan.tcl | 6 +++--- tools/microoptimization/generalbytecode.tcl | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/microoptimization/clockformatscan.tcl b/tools/microoptimization/clockformatscan.tcl index 844562e..c7cec46 100644 --- a/tools/microoptimization/clockformatscan.tcl +++ b/tools/microoptimization/clockformatscan.tcl @@ -1,5 +1,5 @@ -apply {{} { - for {set i 0} {$i < 5000} {incr i} { +apply {{{limit 5000}} { + for {set i 0} {$i < $limit} {incr i} { clock scan [clock format $i -format %T] -format %T } -}} +}} {*}$argv diff --git a/tools/microoptimization/generalbytecode.tcl b/tools/microoptimization/generalbytecode.tcl index 9c0a545..2471943 100644 --- a/tools/microoptimization/generalbytecode.tcl +++ b/tools/microoptimization/generalbytecode.tcl @@ -1,14 +1,13 @@ -apply {{} { - for {set i 0} {$i < 100} {incr i} { - apply {{} { +apply {{{limit1 100} {limit2 1000}} { + for {set i 0} {$i < $limit1} {incr i} { + apply {limit2 { set a {} set b {} - for {set i 0} {$i < 1000} {incr i} { + for {set i 0} {$i < $limit2} {incr i} { lappend a $i dict set b $i [expr {$i*$i}] } return [string length $a],[string length $b] - }} + }} $limit2 } -}} - +}} {*}$argv -- cgit v0.12 From 2b958cda3b6aa3bef63d9968e436fb617d84d89e Mon Sep 17 00:00:00 2001 From: pspjuth Date: Sun, 25 Oct 2015 18:13:18 +0000 Subject: Avoid isnan call to make double-using code faster. --- generic/tclInt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 6c39558..03eaffc 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4649,7 +4649,13 @@ MODULE_SCOPE Tcl_PackageInitProc Procbodytest_SafeInit; # ifdef NO_ISNAN # define TclIsNaN(d) ((d) != (d)) # else -# define TclIsNaN(d) (isnan(d)) +/* + * This is called a lot for double-using code and isnan() is a noticable + * slowdown, so we stay with the comparison operation here. It should only + * make a difference for signalling NaN and those should not happen anyway. + */ +# define TclIsNaN(d) ((d) != (d)) +/*# define TclIsNaN(d) (isnan(d))*/ # endif #endif -- cgit v0.12 From 82c7f039877d529c4f448ccf6b5e2033022f13eb Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 25 Oct 2015 21:23:08 +0000 Subject: Higher-level microoptimizations for [clock format]. --- library/clock.tcl | 248 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 172 insertions(+), 76 deletions(-) diff --git a/library/clock.tcl b/library/clock.tcl index 8e4b657..01759dd 100755 --- a/library/clock.tcl +++ b/library/clock.tcl @@ -647,7 +647,7 @@ proc ::tcl::clock::Initialize {} { # that renders the given format } ::tcl::clock::Initialize - + #---------------------------------------------------------------------- # # clock format -- @@ -696,7 +696,7 @@ proc ::tcl::clock::format { args } { return [$procName $clockval $timezone] } - + #---------------------------------------------------------------------- # # ParseClockFormatFormat -- @@ -735,7 +735,7 @@ proc ::tcl::clock::ParseClockFormatFormat {procName format locale} { proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { set didLocaleEra 0 set didLocaleNumerals 0 - set preFormatCode \ + set prefixCode \ [string map [list @GREGORIAN_CHANGE_DATE@ \ [mc GREGORIAN_CHANGE_DATE]] \ { @@ -744,9 +744,11 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { $TZData($timezone) \ @GREGORIAN_CHANGE_DATE@] }] + set preFormatCode {} set formatString {} set substituents {} set state {} + set fields {} set format [LocalizeFormat $locale $format] @@ -772,8 +774,10 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list @DAYS_OF_WEEK_ABBREV@ \ [list [mc DAYS_OF_WEEK_ABBREV]]] \ { [lindex @DAYS_OF_WEEK_ABBREV@ \ - [expr {[dict get $date dayOfWeek] \ - % 7}]]}] + [expr {$dayOfWeek % 7}]]}] + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } A { # Day of week, spelt out. append formatString %s @@ -782,8 +786,10 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list @DAYS_OF_WEEK_FULL@ \ [list [mc DAYS_OF_WEEK_FULL]]] \ { [lindex @DAYS_OF_WEEK_FULL@ \ - [expr {[dict get $date dayOfWeek] \ - % 7}]]}] + [expr {$dayOfWeek % 7}]]}] + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } b - h { # Name of month, abbreviated. append formatString %s @@ -792,7 +798,10 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list @MONTHS_ABBREV@ \ [list [mc MONTHS_ABBREV]]] \ { [lindex @MONTHS_ABBREV@ \ - [expr {[dict get $date month]-1}]]}] + [expr {$month-1}]]}] + dict set fields month { + set month [dict get $date month] + } } B { # Name of month, spelt out append formatString %s @@ -801,20 +810,31 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list @MONTHS_FULL@ \ [list [mc MONTHS_FULL]]] \ { [lindex @MONTHS_FULL@ \ - [expr {[dict get $date month]-1}]]}] + [expr {$month-1}]]}] + dict set fields month { + set month [dict get $date month] + } } C { # Century number append formatString %02d - append substituents \ - { [expr {[dict get $date year] / 100}]} + append substituents { [expr {$year / 100}]} + dict set fields year { + set year [dict get $date year] + } } d { # Day of month, with leading zero append formatString %02d - append substituents { [dict get $date dayOfMonth]} + append substituents { $dayOfMonth} + dict set fields dayOfMonth { + set dayOfMonth [dict get $date dayOfMonth] + } } e { # Day of month, without leading zero append formatString %2d - append substituents { [dict get $date dayOfMonth]} + append substituents { $dayOfMonth} + dict set fields dayOfMonth { + set dayOfMonth [dict get $date dayOfMonth] + } } E { # Format group in a locale-dependent # alternative era @@ -840,33 +860,45 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { g { # Two-digit year relative to ISO8601 # week number append formatString %02d - append substituents \ - { [expr { [dict get $date iso8601Year] % 100 }]} + append substituents { [expr { $iso8601Year % 100 }]} + dict set fields iso8601Year { + set iso8601Year [dict get $date iso8601Year] + } } G { # Four-digit year relative to ISO8601 # week number append formatString %02d - append substituents { [dict get $date iso8601Year]} + append substituents { $iso8601Year} + dict set fields iso8601Year { + set iso8601Year [dict get $date iso8601Year] + } } H { # Hour in the 24-hour day, leading zero append formatString %02d append substituents \ - { [expr { [dict get $date localSeconds] \ - / 3600 % 24}]} + { [expr { $localSeconds / 3600 % 24}]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } I { # Hour AM/PM, with leading zero append formatString %02d append substituents \ - { [expr { ( ( ( [dict get $date localSeconds] \ - % 86400 ) \ + { [expr { ( ( ($localSeconds % 86400) \ + 86400 \ - 3600 ) \ / 3600 ) \ % 12 + 1 }] } + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } j { # Day of year (001-366) append formatString %03d - append substituents { [dict get $date dayOfYear]} + append substituents { $dayOfYear} + dict set fields dayOfYear { + set dayOfYear [dict get $date dayOfYear] + } } J { # Julian Day Number append formatString %07ld @@ -875,37 +907,47 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { k { # Hour (0-23), no leading zero append formatString %2d append substituents \ - { [expr { [dict get $date localSeconds] - / 3600 - % 24 }]} + { [expr { $localSeconds / 3600 % 24 }]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } l { # Hour (12-11), no leading zero append formatString %2d append substituents \ - { [expr { ( ( ( [dict get $date localSeconds] - % 86400 ) + { [expr { ( ( ( $localSeconds % 86400 ) + 86400 - 3600 ) / 3600 ) % 12 + 1 }]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } m { # Month number, leading zero append formatString %02d - append substituents { [dict get $date month]} + append substituents { $month} + dict set fields month { + set month [dict get $date month] + } } M { # Minute of the hour, leading zero append formatString %02d append substituents \ - { [expr { [dict get $date localSeconds] - / 60 - % 60 }]} + { [expr { $localSeconds / 60 % 60 }]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } n { # A literal newline append formatString \n } N { # Month number, no leading zero append formatString %2d - append substituents { [dict get $date month]} + append substituents { $month} + dict set fields month { + set month [dict get $date month] + } } O { # A format group in the locale's # alternative numerals @@ -924,9 +966,11 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list set AM [string toupper [mc AM]]] \n \ [list set PM [string toupper [mc PM]]] \n append substituents \ - { [expr {(([dict get $date localSeconds] - % 86400) < 43200) ? + { [expr {(($localSeconds % 86400) < 43200) ? $AM : $PM}]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } P { # Localized 'AM' or 'PM' indicator append formatString %s @@ -934,10 +978,11 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { [list set am [mc AM]] \n \ [list set pm [mc PM]] \n append substituents \ - { [expr {(([dict get $date localSeconds] - % 86400) < 43200) ? + { [expr {(($localSeconds % 86400) < 43200) ? $am : $pm}]} - + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } Q { # Hi, Jeff! append formatString %s @@ -945,70 +990,95 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { } s { # Seconds from the Posix Epoch append formatString %s - append substituents { [dict get $date seconds]} + append substituents { $seconds} + dict set fields seconds { + set seconds [dict get $date seconds] + } } S { # Second of the minute, with # leading zero append formatString %02d - append substituents \ - { [expr { [dict get $date localSeconds] - % 60 }]} + append substituents { [expr { $localSeconds % 60 }]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } t { # A literal tab character append formatString \t } u { # Day of the week (1-Monday, 7-Sunday) append formatString %1d - append substituents { [dict get $date dayOfWeek]} + append substituents { $dayOfWeek } + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } U { # Week of the year (00-53). The # first Sunday of the year is the # first day of week 01 append formatString %02d append preFormatCode { - set dow [dict get $date dayOfWeek] + set dow $dayOfWeek if { $dow == 7 } { set dow 0 } incr dow set UweekNumber \ - [expr { ( [dict get $date dayOfYear] - - $dow + 7 ) - / 7 }] + [expr { ( $dayOfYear - $dow + 7 ) / 7 }] } append substituents { $UweekNumber} + dict set fields dayOfYear { + set dayOfYear [dict get $date dayOfYear] + } + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } V { # The ISO8601 week number append formatString %02d - append substituents { [dict get $date iso8601Week]} + append substituents { $iso8601Week} + dict set fields iso8601Week { + set iso8601Week [dict get $date iso8601Week] + } } w { # Day of the week (0-Sunday, # 6-Saturday) append formatString %1d append substituents \ - { [expr { [dict get $date dayOfWeek] % 7 }]} + { [expr { $dayOfWeek % 7 }]} + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } W { # Week of the year (00-53). The first # Monday of the year is the first day # of week 01. append preFormatCode { set WweekNumber \ - [expr { ( [dict get $date dayOfYear] - - [dict get $date dayOfWeek] - + 7 ) - / 7 }] + [expr { ($dayOfYear - $dayOfWeek + 7) / 7 }] } append formatString %02d append substituents { $WweekNumber} + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } + dict set fields dayOfMonth { + set dayOfMonth [dict get $date dayOfMonth] + } } y { # The two-digit year of the century append formatString %02d - append substituents \ - { [expr { [dict get $date year] % 100 }]} + append substituents { [expr { $year % 100 }]} + dict set fields year { + set year [dict get $date year] + } } Y { # The four-digit year append formatString %04d - append substituents { [dict get $date year]} + append substituents { $year} + dict set fields year { + set year [dict get $date year] + } } z { # The time zone as hours and minutes # east (+) or west (-) of Greenwich @@ -1018,7 +1088,10 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { } Z { # The name of the time zone append formatString %s - append substituents { [dict get $date tzName]} + append substituents { $tzName} + dict set fields tzName { + set tzName [dict get $date tzName] + } } % { # A literal percent character append formatString %% @@ -1068,72 +1141,91 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { # numerals append formatString %s append substituents \ - { [lindex $localeNumerals \ - [dict get $date dayOfMonth]]} + { [lindex $localeNumerals $dayOfMonth]} + dict set fields dayOfMonth { + set dayOfMonth [dict get $date dayOfMonth] + } } H - k { # Hour of the day in alternative # numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { [dict get $date localSeconds] - / 3600 - % 24 }]]} + [expr { $localSeconds / 3600 % 24 }]]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } I - l { # Hour (12-11) AM/PM in alternative # numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { ( ( ( [dict get $date localSeconds] - % 86400 ) + [expr { ( ( ( $localSeconds % 86400 ) + 86400 - 3600 ) / 3600 ) % 12 + 1 }]]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } m { # Month number in alternative numerals append formatString %s append substituents \ - { [lindex $localeNumerals [dict get $date month]]} + { [lindex $localeNumerals $month]} + dict set fields month { + set month [dict get $date month] + } } M { # Minute of the hour in alternative # numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { [dict get $date localSeconds] - / 60 - % 60 }]]} + [expr { $localSeconds / 60 % 60 }]]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } S { # Second of the minute in alternative # numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { [dict get $date localSeconds] - % 60 }]]} + [expr { $localSeconds % 60 }]]} + dict set fields localSeconds { + set localSeconds [dict get $date localSeconds] + } } u { # Day of the week (Monday=1,Sunday=7) # in alternative numerals append formatString %s append substituents \ - { [lindex $localeNumerals \ - [dict get $date dayOfWeek]]} + { [lindex $localeNumerals $dayOfWeek]} + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] } + } w { # Day of the week (Sunday=0,Saturday=6) # in alternative numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { [dict get $date dayOfWeek] % 7 }]]} + [expr { $dayOfWeek % 7 }]]} + dict set fields dayOfWeek { + set dayOfWeek [dict get $date dayOfWeek] + } } y { # Year of the century in alternative # numerals append formatString %s append substituents \ { [lindex $localeNumerals \ - [expr { [dict get $date year] % 100 }]]} + [expr { $year % 100 }]]} + dict set fields year { + set year [dict get $date year] + } } default { # Unknown format group append formatString %%O $char @@ -1157,7 +1249,11 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { } } + set extractCode [join [dict values $fields] ";"] + proc $procName {clockval timezone} " + $prefixCode + $extractCode $preFormatCode return \[::format [list $formatString] $substituents\] " @@ -1166,7 +1262,7 @@ proc ::tcl::clock::ParseClockFormatFormat2 {format locale procName} { return $procName } - + #---------------------------------------------------------------------- # # clock scan -- @@ -1931,7 +2027,7 @@ proc ::tcl::clock::ParseClockScanFormat {formatString locale} { # Build the procedure set procBody {} - append procBody "variable ::tcl::clock::TZData" \n + append procBody "variable TZData" \n append procBody "if \{ !\[ regexp -nocase [list $re] \$string ->" for { set i 1 } { $i <= $captureCount } { incr i } { append procBody " " field $i @@ -1958,7 +2054,7 @@ proc ::tcl::clock::ParseClockScanFormat {formatString locale} { } } append procBody { - ::tcl::clock::SetupTimeZone $timeZone + SetupTimeZone $timeZone } } @@ -1991,7 +2087,7 @@ proc ::tcl::clock::ParseClockScanFormat {formatString locale} { # Finally, convert the date to local time append procBody { - set date [::tcl::clock::ConvertLocalToUTC $date[set date {}] \ + set date [ConvertLocalToUTC $date[set date {}] \ $TZData($timeZone) $changeover] } } -- cgit v0.12 From a94ede13beafd4d4c3ab5e4013882e35f9d03ad8 Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 26 Oct 2015 07:22:46 +0000 Subject: Added some TclOO microbenches. --- tools/microoptimization/oocreate.tcl | 13 +++++++++++++ tools/microoptimization/oodispatch.tcl | 11 +++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tools/microoptimization/oocreate.tcl create mode 100644 tools/microoptimization/oodispatch.tcl diff --git a/tools/microoptimization/oocreate.tcl b/tools/microoptimization/oocreate.tcl new file mode 100644 index 0000000..0a5d3dc --- /dev/null +++ b/tools/microoptimization/oocreate.tcl @@ -0,0 +1,13 @@ +oo::class create foo { + method bar {} { + return abc + } +} +apply {{{iter 10000}} { + for {set i 0} {$i < $iter} {incr i} { + set obj1 [foo new] + set obj2 [foo create inst] + $obj1 destroy + $obj2 destroy + } +}} {*}$argv diff --git a/tools/microoptimization/oodispatch.tcl b/tools/microoptimization/oodispatch.tcl new file mode 100644 index 0000000..041c8e3 --- /dev/null +++ b/tools/microoptimization/oodispatch.tcl @@ -0,0 +1,11 @@ +oo::class create foo { + method bar {} { + return abc + } +} +foo create inst +apply {{{iter 100000}} { + for {set i 0} {$i < $iter} {incr i} { + inst bar + } +}} {*}$argv -- cgit v0.12 From 21d397d48716466a3fbefaf0bacd0bef7ead69ea Mon Sep 17 00:00:00 2001 From: dkf Date: Tue, 27 Oct 2015 22:09:57 +0000 Subject: =?UTF-8?q?Chisel=201%=20from=20the=20TclOO=20object=20creation=20?= =?UTF-8?q?=CE=BCbenchmark.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic/tcl.h | 3 +++ generic/tclOOCall.c | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index a08edde..d778e19 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -144,6 +144,7 @@ extern "C" { #if defined(__GNUC__) && (__GNUC__ > 2) # define TCL_FORMAT_PRINTF(a,b) __attribute__ ((__format__ (__printf__, a, b))) # define TCL_NORETURN __attribute__ ((noreturn)) +# define TCL_NOINLINE __attribute__ ((noinline)) # if defined(BUILD_tcl) || defined(BUILD_tk) # define TCL_NORETURN1 __attribute__ ((noreturn)) # else @@ -153,8 +154,10 @@ extern "C" { # define TCL_FORMAT_PRINTF(a,b) # if defined(_MSC_VER) && (_MSC_VER >= 1310) # define TCL_NORETURN _declspec(noreturn) +# define TCL_NOINLINE __declspec(noinline) # else # define TCL_NORETURN /* nothing */ +# define TCL_NOINLINE /* nothing */ # endif # define TCL_NORETURN1 /* nothing */ #endif diff --git a/generic/tclOOCall.c b/generic/tclOOCall.c index facf90d..fa16a6e 100644 --- a/generic/tclOOCall.c +++ b/generic/tclOOCall.c @@ -726,6 +726,14 @@ AddSimpleChainToCallContext( * ---------------------------------------------------------------------- */ +static TCL_NOINLINE void +AddMethodToCallChainCore( + Method *const mPtr, + struct ChainBuilder *const cbPtr, + Tcl_HashTable *const doneFilters, + Class *const filterDecl, + int flags); + static inline void AddMethodToCallChain( Method *const mPtr, /* Actual method implementation to add to call @@ -748,20 +756,43 @@ AddMethodToCallChain( * looking to add things from a mixin and have * not passed a mixin. */ { - register CallChain *callPtr = cbPtr->callChainPtr; - int i; - /* - * Return if this is just an entry used to record whether this is a public + * Check if this is just an entry used to record whether this is a public * method. If so, there's nothing real to call and so nothing to add to * the call chain. * * This is also where we enforce mixin-consistency. */ - if (mPtr == NULL || mPtr->typePtr == NULL || !MIXIN_CONSISTENT(flags)) { - return; + if (mPtr && mPtr->typePtr && MIXIN_CONSISTENT(flags)) { + AddMethodToCallChainCore(mPtr, cbPtr, doneFilters, filterDecl, flags); } +} + +static TCL_NOINLINE void +AddMethodToCallChainCore( + Method *const mPtr, /* Actual method implementation to add to call + * chain (or NULL, a no-op). */ + struct ChainBuilder *const cbPtr, + /* The call chain to add the method + * implementation to. */ + Tcl_HashTable *const doneFilters, + /* Where to record what filters have been + * processed. If NULL, not processing filters. + * Note that this function does not update + * this hashtable. */ + Class *const filterDecl, /* The class that declared the filter. If + * NULL, either the filter was declared by the + * object or this isn't a filter. */ + int flags) /* Used to check if we're mixin-consistent + * only. Mixin-consistent means that either + * we're looking to add things from a mixin + * and we have passed a mixin, or we're not + * looking to add things from a mixin and have + * not passed a mixin. */ +{ + register CallChain *callPtr = cbPtr->callChainPtr; + int i; /* * Enforce real private method handling here. We will skip adding this -- cgit v0.12