From d6befe3b0ecd8da7c936827f0c6c2e3fb41b6496 Mon Sep 17 00:00:00 2001
From: hobbs <hobbs>
Date: Tue, 23 Apr 2002 17:03:34 +0000
Subject:         * generic/tclAlloc.c:         * generic/tclInt.h:         *
 generic/tclThreadAlloc.c (new):         * unix/Makefile.in:         *
 unix/tclUnixThrd.c:         * win/Makefile.in:         * win/tclWinInt.h:    
     * win/tclWinThrd.c: added new threaded allocator contributed by        
 AOL that significantly reduces lock contention when multiple         threads
 are in use.  Only Windows and Unix implementations are         ready, and the
 Windows one may need work.  It is only used by         default on Unix for
 now, and requires that USE_THREAD_ALLOC be         defined (--enable-threads
 on Unix will define this).

---
 ChangeLog                |  14 +
 generic/tclAlloc.c       |  10 +-
 generic/tclInt.h         |  32 +-
 generic/tclThreadAlloc.c | 914 +++++++++++++++++++++++++++++++++++++++++++++++
 unix/configure           |   2 +-
 unix/tcl.m4              |   2 +-
 win/Makefile.in          |   3 +-
 win/tclWinInt.h          |   9 +-
 win/tclWinThrd.c         |  78 +++-
 9 files changed, 1056 insertions(+), 8 deletions(-)
 create mode 100755 generic/tclThreadAlloc.c
diff --git a/ChangeLog b/ChangeLog
index 4c93b52..38f66ee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,6 +12,20 @@
 
 2002-04-22  Jeff Hobbs  <jeffh@ActiveState.com>
 
+	* generic/tclAlloc.c:
+	* generic/tclInt.h:
+	* generic/tclThreadAlloc.c (new):
+	* unix/Makefile.in:
+	* unix/tclUnixThrd.c:
+	* win/Makefile.in:
+	* win/tclWinInt.h:
+	* win/tclWinThrd.c: added new threaded allocator contributed by
+	AOL that significantly reduces lock contention when multiple
+	threads are in use.  Only Windows and Unix implementations are
+	ready, and the Windows one may need work.  It is only used by
+	default on Unix for now, and requires that USE_THREAD_ALLOC be
+	defined (--enable-threads on Unix will define this).
+
 	* generic/tclIOUtil.c (Tcl_FSRegister, Tcl_FSUnregister):
 	corrected calling of Tcl_ConditionWait to ensure that there would
 	be a condition to wait upon.
diff --git a/generic/tclAlloc.c b/generic/tclAlloc.c
index 099da71..fe64c97 100644
--- a/generic/tclAlloc.c
+++ b/generic/tclAlloc.c
@@ -15,9 +15,16 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclAlloc.c,v 1.15 2002/02/27 07:08:28 hobbs Exp $
+ * RCS: @(#) $Id: tclAlloc.c,v 1.16 2002/04/23 17:03:34 hobbs Exp $
  */
 
+/*
+ * Windows and Unix use an alternative allocator when building with threads
+ * that has significantly reduced lock contention.
+ */
+
+#if !defined(TCL_THREADS) || !defined(USE_THREAD_ALLOC)
+
 #include "tclInt.h"
 #include "tclPort.h"
 
@@ -721,3 +728,4 @@ TclpRealloc(cp, nbytes)
 }
 
 #endif /* !USE_TCLALLOC */
+#endif /* !TCL_THREADS */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index a2dc4ac..f87eac9 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclInt.h,v 1.84 2002/03/06 15:04:58 msofer Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.85 2002/04/23 17:03:34 hobbs Exp $
  */
 
 #ifndef _TCLINT
@@ -2244,6 +2244,36 @@ EXTERN int	TclCompileWhileCmd _ANSI_ARGS_((Tcl_Interp *interp,
 	TclIncrObjsFreed(); \
     }
 
+#elif defined(TCL_THREADS) && defined(USE_THREAD_ALLOC)
+
+/*
+ * The TCL_THREADS mode is like the regular mode but allocates Tcl_Obj's
+ * from per-thread caches.
+ */
+
+EXTERN Tcl_Obj *TclThreadAllocObj _ANSI_ARGS_((void));
+EXTERN void TclThreadFreeObj _ANSI_ARGS_((Tcl_Obj *));
+
+#  define TclNewObj(objPtr) \
+       (objPtr) = TclThreadAllocObj(); \
+       (objPtr)->refCount = 0; \
+       (objPtr)->bytes    = tclEmptyStringRep; \
+       (objPtr)->length   = 0; \
+       (objPtr)->typePtr  = NULL
+
+#  define TclDecrRefCount(objPtr) \
+       if (--(objPtr)->refCount <= 0) { \
+           if (((objPtr)->bytes != NULL) \
+                   && ((objPtr)->bytes != tclEmptyStringRep)) { \
+               ckfree((char *) (objPtr)->bytes); \
+           } \
+           if (((objPtr)->typePtr != NULL) \
+                   && ((objPtr)->typePtr->freeIntRepProc != NULL)) { \
+               (objPtr)->typePtr->freeIntRepProc(objPtr); \
+           } \
+           TclThreadFreeObj((objPtr)); \
+       }
+
 #else /* not TCL_MEM_DEBUG */
 
 #ifdef TCL_THREADS
diff --git a/generic/tclThreadAlloc.c b/generic/tclThreadAlloc.c
new file mode 100755
index 0000000..5a8a925
--- /dev/null
+++ b/generic/tclThreadAlloc.c
@@ -0,0 +1,914 @@
+/*
+ * tclThreadAlloc.c --
+ *
+ *	This is a very fast storage allocator for used with threads (designed
+ *	avoid lock contention).  The basic strategy is to allocate memory in
+ *  	fixed size blocks from block caches.
+ * 
+ * The Initial Developer of the Original Code is America Online, Inc.
+ * Portions created by AOL are Copyright (C) 1999 America Online, Inc.
+ *
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * RCS: @(#) $Id: tclThreadAlloc.c,v 1.1 2002/04/23 17:03:34 hobbs Exp $ */
+
+#if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC)
+
+#include "tclInt.h"
+
+#ifdef WIN32
+#include "tclWinInt.h"
+#else
+extern Tcl_Mutex *TclpNewAllocMutex(void);
+extern void *TclpGetAllocCache(void);
+extern void TclpSetAllocCache(void *);
+#endif
+
+/*
+ * If range checking is enabled, an additional byte will be allocated
+ * to store the magic number at the end of the requested memory.
+ */
+
+#ifndef RCHECK
+#ifdef  NDEBUG
+#define RCHECK		0
+#else
+#define RCHECK		1
+#endif
+#endif
+
+/*
+ * The following define the number of Tcl_Obj's to allocate/move
+ * at a time and the high water mark to prune a per-thread cache.
+ * On a 32 bit system, sizeof(Tcl_Obj) = 24 so 800 * 24 = ~16k.
+ */
+ 
+#define NOBJALLOC	 800
+#define NOBJHIGH	1200
+
+/*
+ * The following defines the number of buckets in the bucket
+ * cache and those block sizes from (1<<4) to (1<<(3+NBUCKETS))
+ */
+
+#define NBUCKETS	  11
+#define MAXALLOC	  16284
+
+/*
+ * The following union stores accounting information for
+ * each block including two small magic numbers and
+ * a bucket number when in use or a next pointer when
+ * free.  The original requested size (not including
+ * the Block overhead) is also maintained.
+ */
+ 
+typedef struct Block {
+    union {
+    	struct Block *next;	  /* Next in free list. */
+    	struct {
+	    unsigned char magic1; /* First magic number. */
+	    unsigned char bucket; /* Bucket block allocated from. */
+	    unsigned char unused; /* Padding. */
+	    unsigned char magic2; /* Second magic number. */
+        } b_s;
+    } b_u;
+    size_t b_reqsize;		  /* Requested allocation size. */
+} Block;
+#define b_next		b_u.next
+#define b_bucket	b_u.b_s.bucket
+#define b_magic1	b_u.b_s.magic1
+#define b_magic2	b_u.b_s.magic2
+#define MAGIC		0xef
+
+/*
+ * The following structure defines a bucket of blocks with
+ * various accounting and statistics information.
+ */
+
+typedef struct Bucket {
+    Block *firstPtr;
+    int nfree;
+    int nget;
+    int nput;
+    int nwait;
+    int nlock;
+    int nrequest;
+} Bucket;
+
+/*
+ * The following structure defines a cache of buckets and objs.
+ */
+
+typedef struct Cache {
+    struct Cache  *nextPtr;
+    Tcl_ThreadId   owner;
+    Tcl_Obj       *firstObjPtr;
+    int            nobjs;
+    int	           nsysalloc;
+    Bucket         buckets[NBUCKETS];
+} Cache;
+
+/*
+ * The following array specifies various per-bucket 
+ * limits and locks.  The values are statically initialized
+ * to avoid calculating them repeatedly.
+ */
+
+struct binfo {
+    size_t blocksize;	/* Bucket blocksize. */
+    int maxblocks;	/* Max blocks before move to share. */
+    int nmove;		/* Num blocks to move to share. */
+    Tcl_Mutex *lockPtr; /* Share bucket lock. */
+} binfo[NBUCKETS] = {
+    {   16, 1024, 512, NULL},
+    {   32,  512, 256, NULL},
+    {   64,  256, 128, NULL},
+    {  128,  128,  64, NULL},
+    {  256,   64,  32, NULL},
+    {  512,   32,  16, NULL},
+    { 1024,   16,   8, NULL},
+    { 2048,    8,   4, NULL},
+    { 4096,    4,   2, NULL},
+    { 8192,    2,   1, NULL},
+    {16284,    1,   1, NULL},
+};
+
+/*
+ * Static functions defined in this file.
+ */
+
+static void LockBucket(Cache *cachePtr, int bucket);
+static void UnlockBucket(Cache *cachePtr, int bucket);
+static void PutBlocks(Cache *cachePtr, int bucket, int nmove);
+static int  GetBlocks(Cache *cachePtr, int bucket);
+static Block *Ptr2Block(char *ptr);
+static char *Block2Ptr(Block *blockPtr, int bucket, unsigned int reqsize);
+static void MoveObjs(Cache *fromPtr, Cache *toPtr, int nmove);
+
+/*
+ * Local variables defined in this file and initialized at
+ * startup.
+ */
+
+static Tcl_Mutex *listLockPtr;
+static Tcl_Mutex *objLockPtr;
+static Cache     sharedCache;
+static Cache    *sharedPtr = &sharedCache;
+static Cache    *firstCachePtr = &sharedCache;
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  GetCache ---
+ *
+ *	Gets per-thread memory cache, allocating it if necessary.
+ *
+ * Results:
+ *	Pointer to cache.
+ *
+ * Side effects:
+ *  	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static Cache *
+GetCache(void)
+{
+    Cache *cachePtr;
+
+    /*
+     * Check for first-time initialization.
+     */
+
+    if (listLockPtr == NULL) {
+	Tcl_Mutex *initLockPtr;
+    	int i;
+
+	initLockPtr = Tcl_GetAllocMutex();
+	Tcl_MutexLock(initLockPtr);
+	if (listLockPtr == NULL) {
+	    listLockPtr = TclpNewAllocMutex();
+	    objLockPtr = TclpNewAllocMutex();
+	    for (i = 0; i < NBUCKETS; ++i) {
+	        binfo[i].lockPtr = TclpNewAllocMutex();
+	    }
+	}
+	Tcl_MutexUnlock(initLockPtr);
+    }
+
+    /*
+     * Get this thread's cache, allocating if necessary.
+     */
+
+    cachePtr = TclpGetAllocCache();
+    if (cachePtr == NULL) {
+    	cachePtr = calloc(1, sizeof(Cache));
+    	if (cachePtr == NULL) {
+	    panic("alloc: could not allocate new cache");
+    	}
+    	Tcl_MutexLock(listLockPtr);
+    	cachePtr->nextPtr = firstCachePtr;
+    	firstCachePtr = cachePtr;
+    	Tcl_MutexUnlock(listLockPtr);
+    	cachePtr->owner = Tcl_GetCurrentThread();
+	TclpSetAllocCache(cachePtr);
+    }
+    return cachePtr;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  TclFreeAllocCache --
+ *
+ *	Flush and delete a cache, removing from list of caches.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclFreeAllocCache(void *arg)
+{
+    Cache *cachePtr = arg;
+    Cache **nextPtrPtr;
+    register int   bucket;
+
+    /*
+     * Flush blocks.
+     */
+
+    for (bucket = 0; bucket < NBUCKETS; ++bucket) {
+	if (cachePtr->buckets[bucket].nfree > 0) {
+	    PutBlocks(cachePtr, bucket, cachePtr->buckets[bucket].nfree);
+	}
+    }
+
+    /*
+     * Flush objs.
+     */
+
+    if (cachePtr->nobjs > 0) {
+    	Tcl_MutexLock(objLockPtr);
+    	MoveObjs(cachePtr, sharedPtr, cachePtr->nobjs);
+    	Tcl_MutexUnlock(objLockPtr);
+    }
+
+    /*
+     * Remove from pool list.
+     */
+
+    Tcl_MutexLock(listLockPtr);
+    nextPtrPtr = &firstCachePtr;
+    while (*nextPtrPtr != cachePtr) {
+	nextPtrPtr = &(*nextPtrPtr)->nextPtr;
+    }
+    *nextPtrPtr = cachePtr->nextPtr;
+    cachePtr->nextPtr = NULL;
+    Tcl_MutexUnlock(listLockPtr);
+#ifdef WIN32
+    TlsFree((DWORD) cachePtr);
+#else
+    free(cachePtr);
+#endif
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  TclpAlloc --
+ *
+ *	Allocate memory.
+ *
+ * Results:
+ *	Pointer to memory just beyond Block pointer.
+ *
+ * Side effects:
+ *	May allocate more blocks for a bucket.
+ *
+ *----------------------------------------------------------------------
+ */
+
+char *
+TclpAlloc(unsigned int reqsize)
+{
+    Cache          *cachePtr = GetCache();
+    Block         *blockPtr;
+    register int   bucket;
+    size_t  	   size;
+    
+    /*
+     * Increment the requested size to include room for 
+     * the Block structure.  Call malloc() directly if the
+     * required amount is greater than the largest block,
+     * otherwise pop the smallest block large enough,
+     * allocating more blocks if necessary.
+     */
+
+    blockPtr = NULL;     
+    size = reqsize + sizeof(Block);
+#if RCHECK
+    ++size;
+#endif
+    if (size > MAXALLOC) {
+	bucket = NBUCKETS;
+    	blockPtr = malloc(size);
+	if (blockPtr != NULL) {
+	    cachePtr->nsysalloc += reqsize;
+	}
+    } else {
+    	bucket = 0;
+    	while (binfo[bucket].blocksize < size) {
+    	    ++bucket;
+    	}
+    	if (cachePtr->buckets[bucket].nfree || GetBlocks(cachePtr, bucket)) {
+	    blockPtr = cachePtr->buckets[bucket].firstPtr;
+	    cachePtr->buckets[bucket].firstPtr = blockPtr->b_next;
+	    --cachePtr->buckets[bucket].nfree;
+    	    ++cachePtr->buckets[bucket].nget;
+	    cachePtr->buckets[bucket].nrequest += reqsize;
+	}
+    }
+    if (blockPtr == NULL) {
+    	return NULL;
+    }
+    return Block2Ptr(blockPtr, bucket, reqsize);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  TclpFree --
+ *
+ *	Return blocks to the thread block cache.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	May move blocks to shared cache.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclpFree(char *ptr)
+{
+    Cache  *cachePtr = GetCache();
+    Block *blockPtr;
+    int bucket;
+ 
+    /*
+     * Get the block back from the user pointer and
+     * call system free directly for large blocks.
+     * Otherwise, push the block back on the bucket and
+     * move blocks to the shared cache if there are now
+     * too many free.
+     */
+
+    blockPtr = Ptr2Block(ptr);
+    bucket = blockPtr->b_bucket;
+    if (bucket == NBUCKETS) {
+	cachePtr->nsysalloc -= blockPtr->b_reqsize;
+	free(blockPtr);
+    } else {
+	cachePtr->buckets[bucket].nrequest -= blockPtr->b_reqsize;
+	blockPtr->b_next = cachePtr->buckets[bucket].firstPtr;
+	cachePtr->buckets[bucket].firstPtr = blockPtr;
+	++cachePtr->buckets[bucket].nfree;
+	++cachePtr->buckets[bucket].nput;
+    	if (cachePtr != sharedPtr &&
+	    cachePtr->buckets[bucket].nfree > binfo[bucket].maxblocks) {
+	    PutBlocks(cachePtr, bucket, binfo[bucket].nmove);
+	}
+    }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  TclpRealloc --
+ *
+ *	Re-allocate memory to a larger or smaller size.
+ *
+ * Results:
+ *	Pointer to memory just beyond Block pointer.
+ *
+ * Side effects:
+ *	Previous memory, if any, may be freed.
+ *
+ *----------------------------------------------------------------------
+ */
+
+char *
+TclpRealloc(char *ptr, unsigned int reqsize)
+{
+    Cache *cachePtr = GetCache();
+    Block *blockPtr;
+    void *new;
+    size_t size, min;
+    int bucket;
+
+    /*
+     * If the block is not a system block and fits in place,
+     * simply return the existing pointer.  Otherwise, if the block
+     * is a system block and the new size would also require a system
+     * block, call realloc() directly.
+     */
+
+    blockPtr = Ptr2Block(ptr);
+    size = reqsize + sizeof(Block);
+#if RCHECK
+    ++size;
+#endif
+    bucket = blockPtr->b_bucket;
+    if (bucket != NBUCKETS) {
+	if (bucket > 0) {
+	    min = binfo[bucket-1].blocksize;
+	} else {
+	    min = 0;
+	}
+	if (size > min && size <= binfo[bucket].blocksize) {
+	    cachePtr->buckets[bucket].nrequest -= blockPtr->b_reqsize;
+	    cachePtr->buckets[bucket].nrequest += reqsize;
+	    return Block2Ptr(blockPtr, bucket, reqsize);
+	}
+    } else if (size > MAXALLOC) {
+	cachePtr->nsysalloc -= blockPtr->b_reqsize;
+	cachePtr->nsysalloc += reqsize;
+	blockPtr = realloc(blockPtr, size);
+	if (blockPtr == NULL) {
+	    return NULL;
+	}
+	return Block2Ptr(blockPtr, NBUCKETS, reqsize);
+    }
+
+    /*
+     * Finally, perform an expensive malloc/copy/free.
+     */
+
+    new = TclpAlloc(reqsize);
+    if (new != NULL) {
+	if (reqsize > blockPtr->b_reqsize) {
+	    reqsize = blockPtr->b_reqsize;
+	}
+    	memcpy(new, ptr, reqsize);
+    	TclpFree(ptr);
+    }
+    return new;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclThreadAllocObj --
+ *
+ *	Allocate a Tcl_Obj from the per-thread cache.
+ *
+ * Results:
+ *	Pointer to uninitialized Tcl_Obj.
+ *
+ * Side effects:
+ *	May move Tcl_Obj's from shared cached or allocate new Tcl_Obj's
+ *  	if list is empty.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclThreadAllocObj(void)
+{
+    register Cache *cachePtr = GetCache();
+    register int nmove;
+    register Tcl_Obj *objPtr;
+    Tcl_Obj *newObjsPtr;
+
+    /*
+     * Get this thread's obj list structure and move
+     * or allocate new objs if necessary.
+     */
+     
+    if (cachePtr->nobjs == 0) {
+    	Tcl_MutexLock(objLockPtr);
+	nmove = sharedPtr->nobjs;
+	if (nmove > 0) {
+	    if (nmove > NOBJALLOC) {
+		nmove = NOBJALLOC;
+	    }
+	    MoveObjs(sharedPtr, cachePtr, nmove);
+	}
+    	Tcl_MutexUnlock(objLockPtr);
+	if (cachePtr->nobjs == 0) {
+	    cachePtr->nobjs = nmove = NOBJALLOC;
+	    newObjsPtr = malloc(sizeof(Tcl_Obj) * nmove);
+	    if (newObjsPtr == NULL) {
+		panic("alloc: could not allocate %d new objects", nmove);
+	    }
+	    while (--nmove >= 0) {
+		objPtr = &newObjsPtr[nmove];
+		objPtr->internalRep.otherValuePtr = cachePtr->firstObjPtr;
+		cachePtr->firstObjPtr = objPtr;
+	    }
+	}
+    }
+
+    /*
+     * Pop the first object.
+     */
+
+    objPtr = cachePtr->firstObjPtr;
+    cachePtr->firstObjPtr = objPtr->internalRep.otherValuePtr;
+    --cachePtr->nobjs;
+    return objPtr;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclThreadFreeObj --
+ *
+ *	Return a free Tcl_Obj to the per-thread cache.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	May move free Tcl_Obj's to shared list upon hitting high
+ *  	water mark.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclThreadFreeObj(Tcl_Obj *objPtr)
+{
+    Cache *cachePtr = GetCache();
+
+    /*
+     * Get this thread's list and push on the free Tcl_Obj.
+     */
+     
+    objPtr->internalRep.otherValuePtr = cachePtr->firstObjPtr;
+    cachePtr->firstObjPtr = objPtr;
+    
+    /*
+     * If the number of free objects has exceeded the high
+     * water mark, move some blocks to the shared list.
+     */
+     
+    ++cachePtr->nobjs;
+    if (cachePtr->nobjs > NOBJHIGH) {
+	Tcl_MutexLock(objLockPtr);
+	MoveObjs(cachePtr, sharedPtr, NOBJALLOC);
+	Tcl_MutexUnlock(objLockPtr);
+    }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_GetMemoryInfo --
+ *
+ *	Return a list-of-lists of memory stats.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *  	List appended to given dstring.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_GetMemoryInfo(Tcl_DString *dsPtr)
+{
+    Cache *cachePtr;
+    char buf[200];
+    int n;
+
+    Tcl_MutexLock(listLockPtr);
+    cachePtr = firstCachePtr;
+    while (cachePtr != NULL) {
+	Tcl_DStringStartSublist(dsPtr);
+	if (cachePtr == sharedPtr) {
+    	    Tcl_DStringAppendElement(dsPtr, "shared");
+	} else {
+	    sprintf(buf, "thread%d", (int) cachePtr->owner);
+    	    Tcl_DStringAppendElement(dsPtr, buf);
+	}
+	for (n = 0; n < NBUCKETS; ++n) {
+    	    sprintf(buf, "%d %d %d %d %d %d %d",
+		(int) binfo[n].blocksize,
+		cachePtr->buckets[n].nfree,
+		cachePtr->buckets[n].nget,
+		cachePtr->buckets[n].nput,
+		cachePtr->buckets[n].nrequest,
+		cachePtr->buckets[n].nlock,
+		cachePtr->buckets[n].nwait);
+	    Tcl_DStringAppendElement(dsPtr, buf);
+	}
+	Tcl_DStringEndSublist(dsPtr);
+	    cachePtr = cachePtr->nextPtr;
+    }
+    Tcl_MutexUnlock(listLockPtr);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * MoveObjs --
+ *
+ *	Move Tcl_Obj's between caches.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *  	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+MoveObjs(Cache *fromPtr, Cache *toPtr, int nmove)
+{
+    register Tcl_Obj *objPtr;
+
+    toPtr->nobjs += nmove;
+    fromPtr->nobjs -= nmove;
+    while (--nmove >= 0) {
+	objPtr = fromPtr->firstObjPtr;
+	fromPtr->firstObjPtr = objPtr->internalRep.otherValuePtr;
+	objPtr->internalRep.otherValuePtr = toPtr->firstObjPtr;
+	toPtr->firstObjPtr = objPtr;
+    }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  Block2Ptr, Ptr2Block --
+ *
+ *	Convert between internal blocks and user pointers.
+ *
+ * Results:
+ *	User pointer or internal block.
+ *
+ * Side effects:
+ *	Invalid blocks will abort the server.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static char *
+Block2Ptr(Block *blockPtr, int bucket, unsigned int reqsize) 
+{
+    register void *ptr;
+
+    blockPtr->b_magic1 = blockPtr->b_magic2 = MAGIC;
+    blockPtr->b_bucket = bucket;
+    blockPtr->b_reqsize = reqsize;
+    ptr = ((void *) (blockPtr + 1));
+#if RCHECK
+    ((unsigned char *)(ptr))[reqsize] = MAGIC;
+#endif
+    return (char *) ptr;
+}
+
+static Block *
+Ptr2Block(char *ptr)
+{
+    register Block *blockPtr;
+
+    blockPtr = (((Block *) ptr) - 1);
+    if (blockPtr->b_magic1 != MAGIC
+#if RCHECK
+	|| ((unsigned char *) ptr)[blockPtr->b_reqsize] != MAGIC
+#endif
+	|| blockPtr->b_magic2 != MAGIC) {
+	panic("alloc: invalid block: %p: %x %x %x\n",
+	    blockPtr, blockPtr->b_magic1, blockPtr->b_magic2,
+	    ((unsigned char *) ptr)[blockPtr->b_reqsize]);
+    }
+    return blockPtr;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  LockBucket, UnlockBucket --
+ *
+ *	Set/unset the lock to access a bucket in the shared cache.
+ *
+ * Results:
+ *  	None.
+ *
+ * Side effects:
+ *	Lock activity and contention are monitored globally and on
+ *  	a per-cache basis.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+LockBucket(Cache *cachePtr, int bucket)
+{
+#if 0
+    if (Tcl_MutexTryLock(binfo[bucket].lockPtr) != TCL_OK) {
+	Tcl_MutexLock(binfo[bucket].lockPtr);
+    	++cachePtr->buckets[bucket].nwait;
+    	++sharedPtr->buckets[bucket].nwait;
+    }
+#else
+    Tcl_MutexLock(binfo[bucket].lockPtr);
+#endif
+    ++cachePtr->buckets[bucket].nlock;
+    ++sharedPtr->buckets[bucket].nlock;
+}
+
+
+static void
+UnlockBucket(Cache *cachePtr, int bucket)
+{
+    Tcl_MutexUnlock(binfo[bucket].lockPtr);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  PutBlocks --
+ *
+ *	Return unused blocks to the shared cache.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+PutBlocks(Cache *cachePtr, int bucket, int nmove)
+{
+    register Block *lastPtr, *firstPtr;
+    register int n = nmove;
+
+    /*
+     * Before acquiring the lock, walk the block list to find
+     * the last block to be moved.
+     */
+
+    firstPtr = lastPtr = cachePtr->buckets[bucket].firstPtr;
+    while (--n > 0) {
+	lastPtr = lastPtr->b_next;
+    }
+    cachePtr->buckets[bucket].firstPtr = lastPtr->b_next;
+    cachePtr->buckets[bucket].nfree -= nmove;
+
+    /*
+     * Aquire the lock and place the list of blocks at the front
+     * of the shared cache bucket.
+     */
+
+    LockBucket(cachePtr, bucket);
+    lastPtr->b_next = sharedPtr->buckets[bucket].firstPtr;
+    sharedPtr->buckets[bucket].firstPtr = firstPtr;
+    sharedPtr->buckets[bucket].nfree += nmove;
+    UnlockBucket(cachePtr, bucket);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ *  GetBlocks --
+ *
+ *	Get more blocks for a bucket.
+ *
+ * Results:
+ *	1 if blocks where allocated, 0 otherwise.
+ *
+ * Side effects:
+ *	Cache may be filled with available blocks.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+GetBlocks(Cache *cachePtr, int bucket)
+{
+    register Block *blockPtr;
+    register int n;
+    register size_t size;
+
+    /*
+     * First, atttempt to move blocks from the shared cache.  Note
+     * the potentially dirty read of nfree before acquiring the lock
+     * which is a slight performance enhancement.  The value is
+     * verified after the lock is actually acquired.
+     */
+     
+    if (cachePtr != sharedPtr && sharedPtr->buckets[bucket].nfree > 0) {
+	LockBucket(cachePtr, bucket);
+	if (sharedPtr->buckets[bucket].nfree > 0) {
+
+	    /*
+	     * Either move the entire list or walk the list to find
+	     * the last block to move.
+	     */
+
+	    n = binfo[bucket].nmove;
+	    if (n >= sharedPtr->buckets[bucket].nfree) {
+		cachePtr->buckets[bucket].firstPtr =
+		    sharedPtr->buckets[bucket].firstPtr;
+		cachePtr->buckets[bucket].nfree =
+		    sharedPtr->buckets[bucket].nfree;
+		sharedPtr->buckets[bucket].firstPtr = NULL;
+		sharedPtr->buckets[bucket].nfree = 0;
+	    } else {
+		blockPtr = sharedPtr->buckets[bucket].firstPtr;
+		cachePtr->buckets[bucket].firstPtr = blockPtr;
+		sharedPtr->buckets[bucket].nfree -= n;
+		cachePtr->buckets[bucket].nfree = n;
+		while (--n > 0) {
+    		    blockPtr = blockPtr->b_next;
+		}
+		sharedPtr->buckets[bucket].firstPtr = blockPtr->b_next;
+		blockPtr->b_next = NULL;
+	    }
+	}
+	UnlockBucket(cachePtr, bucket);
+    }
+    
+    if (cachePtr->buckets[bucket].nfree == 0) {
+
+	/*
+	 * If no blocks could be moved from shared, first look for a
+	 * larger block in this cache to split up.
+	 */
+
+    	blockPtr = NULL;
+	n = NBUCKETS;
+	size = 0; /* lint */
+	while (--n > bucket) {
+    	    if (cachePtr->buckets[n].nfree > 0) {
+		size = binfo[n].blocksize;
+		blockPtr = cachePtr->buckets[n].firstPtr;
+		cachePtr->buckets[n].firstPtr = blockPtr->b_next;
+		--cachePtr->buckets[n].nfree;
+		break;
+	    }
+	}
+
+	/*
+	 * Otherwise, allocate a big new block directly.
+	 */
+
+	if (blockPtr == NULL) {
+	    size = MAXALLOC;
+	    blockPtr = malloc(size);
+	    if (blockPtr == NULL) {
+		return 0;
+	    }
+	}
+
+	/*
+	 * Split the larger block into smaller blocks for this bucket.
+	 */
+
+	n = size / binfo[bucket].blocksize;
+	cachePtr->buckets[bucket].nfree = n;
+	cachePtr->buckets[bucket].firstPtr = blockPtr;
+	while (--n > 0) {
+	    blockPtr->b_next = (Block *) 
+		((char *) blockPtr + binfo[bucket].blocksize);
+	    blockPtr = blockPtr->b_next;
+	}
+	blockPtr->b_next = NULL;
+    }
+    return 1;
+}
+
+#endif /* TCL_THREADS */
diff --git a/unix/configure b/unix/configure
index bd25049..04c7076 100755
--- a/unix/configure
+++ b/unix/configure
@@ -5890,7 +5890,7 @@ fi
 	    DL_OBJS="tclLoadDl.o"
 	    DL_LIBS=""
 	    LDFLAGS="-export-dynamic"
-	    LD_SEARCH_FLAGS=""
+	    LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'
 	    if test "${TCL_THREADS}" = "1" ; then
 		EXTRA_CFLAGS="-pthread"
 	    	LDFLAGS="$LDFLAGS -pthread"
diff --git a/unix/tcl.m4 b/unix/tcl.m4
index 3b51c8d..0a87a22 100644
--- a/unix/tcl.m4
+++ b/unix/tcl.m4
@@ -1022,7 +1022,7 @@ dnl AC_CHECK_TOOL(AR, ar, :)
 	    DL_OBJS="tclLoadDl.o"
 	    DL_LIBS=""
 	    LDFLAGS="-export-dynamic"
-	    LD_SEARCH_FLAGS=""
+	    LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'
 	    if test "${TCL_THREADS}" = "1" ; then
 		EXTRA_CFLAGS="-pthread"
 	    	LDFLAGS="$LDFLAGS -pthread"
diff --git a/win/Makefile.in b/win/Makefile.in
index 56082fc..0c76143 100644
--- a/win/Makefile.in
+++ b/win/Makefile.in
@@ -5,7 +5,7 @@
 # "autoconf" program (constructs like "@foo@" will get replaced in the
 # actual Makefile.
 #
-# RCS: @(#) $Id: Makefile.in,v 1.60 2002/04/22 22:42:37 hobbs Exp $
+# RCS: @(#) $Id: Makefile.in,v 1.61 2002/04/23 17:03:35 hobbs Exp $
 
 VERSION = @TCL_VERSION@
 
@@ -258,6 +258,7 @@ GENERIC_OBJS = \
 	tclStubInit.$(OBJEXT) \
 	tclStubLib.$(OBJEXT) \
 	tclThread.$(OBJEXT) \
+	tclThreadAlloc.$(OBJEXT) \
 	tclThreadJoin.$(OBJEXT) \
 	tclTimer.$(OBJEXT) \
 	tclUtf.$(OBJEXT) \
diff --git a/win/tclWinInt.h b/win/tclWinInt.h
index 3ed0143..f0e8e42 100644
--- a/win/tclWinInt.h
+++ b/win/tclWinInt.h
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclWinInt.h,v 1.13 2002/03/22 23:37:39 hobbs Exp $
+ * RCS: @(#) $Id: tclWinInt.h,v 1.14 2002/04/23 17:03:35 hobbs Exp $
  */
 
 #ifndef _TCLWININT
@@ -102,6 +102,13 @@ EXTERN TclWinProcs *tclWinProcs;
  */
 
 EXTERN void		TclWinInit(HINSTANCE hInst);
+#if defined(TCL_THREADS) && defined(USE_THREAD_ALLOC)
+EXTERN void		TclWinFreeAllocCache(void);
+EXTERN void		TclFreeAllocCache(void *);
+EXTERN Tcl_Mutex	*TclpNewAllocMutex(void);
+EXTERN void		*TclpGetAllocCache(void);
+EXTERN void		TclpSetAllocCache(void *);
+#endif /* TCL_THREADS */
 
 #include "tclIntPlatDecls.h"
 
diff --git a/win/tclWinThrd.c b/win/tclWinThrd.c
index e25b145..10a886c 100644
--- a/win/tclWinThrd.c
+++ b/win/tclWinThrd.c
@@ -9,7 +9,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclWinThrd.c,v 1.18 2001/09/07 18:57:20 mdejong Exp $
+ * RCS: @(#) $Id: tclWinThrd.c,v 1.19 2002/04/23 17:03:35 hobbs Exp $
  */
 
 #include "tclWinInt.h"
@@ -367,7 +367,12 @@ Tcl_Mutex *
 Tcl_GetAllocMutex()
 {
 #ifdef TCL_THREADS
-    InitializeCriticalSection(&allocLock);
+    static int once = 0;
+
+    if (!once) {
+	InitializeCriticalSection(&allocLock);
+	once = 1;
+    }
     return &allocLockPtr;
 #else
     return NULL;
@@ -628,6 +633,14 @@ TclpFinalizeThreadData(keyPtr)
 {
     VOID *result;
     DWORD *indexPtr;
+#ifdef USE_THREAD_ALLOC
+    static int once = 0;
+
+    if (!once) {
+	once = 1;
+	TclWinFreeAllocCache();
+    }
+#endif
 
     if (*keyPtr != NULL) {
 	indexPtr = *(DWORD **)keyPtr;
@@ -970,4 +983,65 @@ TclpFinalizeCondition(condPtr)
 	*condPtr = NULL;
     }
 }
+
+/*
+ * Additions by AOL for specialized thread memory allocator.
+ */
+#ifdef USE_THREAD_ALLOC
+static DWORD key;
+
+Tcl_Mutex *
+TclpNewAllocMutex(void)
+{
+    struct lock {
+        Tcl_Mutex        tlock;
+        CRITICAL_SECTION wlock;
+    } *lockPtr;
+
+    lockPtr = malloc(sizeof(struct lock));
+    if (lockPtr == NULL) {
+	panic("could not allocate lock");
+    }
+    lockPtr->tlock = (Tcl_Mutex) &lockPtr->wlock;
+    InitializeCriticalSection(&lockPtr->wlock);
+    return &lockPtr->tlock;
+}
+
+void *
+TclpGetAllocCache(void)
+{
+    static int once = 0;
+
+    if (!once) {
+	/*
+	 * We need to make sure that TclWinFreeAllocCache is called
+	 * on each thread that calls this, but only on threads that
+	 * call this.
+	 */
+    	key = TlsAlloc();
+	once = 1;
+	if (key == TLS_OUT_OF_INDEXES) {
+	    panic("could not allocate thread local storage");
+	}
+    }
+    return TlsGetValue(key);
+}
+
+void
+TclpSetAllocCache(void *ptr)
+{
+    TlsSetValue(key, ptr);
+}
+
+void
+TclWinFreeAllocCache(void)
+{
+    void *ptr;
+
+    ptr = TlsGetValue(key);
+    TlsSetValue(key, NULL);
+    TclFreeAllocCache(ptr);
+}
+
+#endif /* USE_THREAD_ALLOC */
 #endif /* TCL_THREADS */
-- 
cgit v0.12