From 9fb7d8647cc45add433c331104f2f0cc144947b1 Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 29 Nov 2001 15:38:40 +0000 Subject: * generic/tclBinary.c: Added caching scheme to reduce number of object allocations when doing scans of large repetitive binary strings. --- ChangeLog | 9 ++++ generic/tclBinary.c | 133 ++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 123 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e6ff11..50ababd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2001-11-29 Donal K. Fellows + + * generic/tclBinary.c (BINARY_SCAN_MAX_CACHE, Tcl_BinaryObjCmd, + ScanNumber): Added caching scheme to reduce number of object + allocations when doing scans of large repetitive binary strings. + See comments in file for reasoning behind implementation. + Suggested by Miguel Sofer in Patch #429916, but independently + implemented. + 2001-11-28 Donal K. Fellows * doc/regsub.n, doc/regexp.n: Converted dangling references to diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 9fd9f4d..54f8ff0 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclBinary.c,v 1.8 2001/08/23 14:22:49 dkf Exp $ + * RCS: @(#) $Id: tclBinary.c,v 1.9 2001/11/29 15:38:40 dkf Exp $ */ #include @@ -26,6 +26,26 @@ #define BINARY_NOCOUNT -2 /* No count was specified in format. */ /* + * The following defines the maximum number of different (integer) + * numbers placed in the object cache by 'binary scan' before it bails + * out and switches back to Plan A (creating a new object for each + * value.) Theoretically, it would be possible to keep the cache + * about for the values that are already in it, but that makes the + * code slower in practise when overflow happens, and makes little + * odds the rest of the time (as measured on my machine.) It is also + * slower (on the sample I tried at least) to grow the cache to hold + * all items we might want to put in it; presumably the extra cost of + * managing the memory for the enlarged table outweighs the benefit + * from allocating fewer objects. This is probably because as the + * number of objects increases, the likelihood of reuse of any + * particular one drops, and there is very little gain from larger + * maximum cache sizes (the value below is chosen to allow caching to + * work in full with conversion of bytes.) - DKF + */ + +#define BINARY_SCAN_MAX_CACHE 260 + +/* * Prototypes for local procedures defined in this file: */ @@ -36,7 +56,8 @@ static int FormatNumber _ANSI_ARGS_((Tcl_Interp *interp, int type, static void FreeByteArrayInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); static int GetFormatSpec _ANSI_ARGS_((char **formatPtr, char *cmdPtr, int *countPtr)); -static Tcl_Obj * ScanNumber _ANSI_ARGS_((unsigned char *buffer, int type)); +static Tcl_Obj * ScanNumber _ANSI_ARGS_((unsigned char *buffer, + int type, Tcl_HashTable **numberCachePtr)); static int SetByteArrayFromAny _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *objPtr)); static void UpdateStringOfByteArray _ANSI_ARGS_((Tcl_Obj *listPtr)); @@ -996,12 +1017,16 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) case BINARY_SCAN: { int i; Tcl_Obj *valuePtr, *elementPtr; + Tcl_HashTable numberCacheHash; + Tcl_HashTable *numberCachePtr; if (objc < 4) { Tcl_WrongNumArgs(interp, 2, objv, "value formatString ?varName varName ...?"); return TCL_ERROR; } + numberCachePtr = &numberCacheHash; + Tcl_InitHashTable(numberCachePtr, TCL_ONE_WORD_KEYS); buffer = Tcl_GetByteArrayFromObj(objv[2], &length); format = Tcl_GetString(objv[3]); cursor = buffer; @@ -1018,6 +1043,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) unsigned char *src; if (arg >= objc) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } goto badIndex; } if (count == BINARY_ALL) { @@ -1051,6 +1079,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) NULL, valuePtr, TCL_LEAVE_ERR_MSG); arg++; if (resultPtr == NULL) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } Tcl_DecrRefCount(valuePtr); /* unneeded */ return TCL_ERROR; } @@ -1063,6 +1094,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) char *dest; if (arg >= objc) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } goto badIndex; } if (count == BINARY_ALL) { @@ -1104,6 +1138,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) NULL, valuePtr, TCL_LEAVE_ERR_MSG); arg++; if (resultPtr == NULL) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } Tcl_DecrRefCount(valuePtr); /* unneeded */ return TCL_ERROR; } @@ -1118,6 +1155,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) static char hexdigit[] = "0123456789abcdef"; if (arg >= objc) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } goto badIndex; } if (count == BINARY_ALL) { @@ -1159,6 +1199,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) NULL, valuePtr, TCL_LEAVE_ERR_MSG); arg++; if (resultPtr == NULL) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } Tcl_DecrRefCount(valuePtr); /* unneeded */ return TCL_ERROR; } @@ -1191,13 +1234,17 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) scanNumber: if (arg >= objc) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } goto badIndex; } if (count == BINARY_NOCOUNT) { if ((length - offset) < size) { goto done; } - valuePtr = ScanNumber(buffer+offset, cmd); + valuePtr = ScanNumber(buffer+offset, cmd, + &numberCachePtr); offset += size; } else { if (count == BINARY_ALL) { @@ -1209,7 +1256,8 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) valuePtr = Tcl_NewObj(); src = buffer+offset; for (i = 0; i < count; i++) { - elementPtr = ScanNumber(src, cmd); + elementPtr = ScanNumber(src, cmd, + &numberCachePtr); src += size; Tcl_ListObjAppendElement(NULL, valuePtr, elementPtr); @@ -1221,6 +1269,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) NULL, valuePtr, TCL_LEAVE_ERR_MSG); arg++; if (resultPtr == NULL) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } Tcl_DecrRefCount(valuePtr); /* unneeded */ return TCL_ERROR; } @@ -1251,6 +1302,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) } case '@': { if (count == BINARY_NOCOUNT) { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } goto badCount; } if ((count == BINARY_ALL) || (count > length)) { @@ -1261,6 +1315,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) break; } default: { + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } errorString = str; goto badfield; } @@ -1274,6 +1331,9 @@ Tcl_BinaryObjCmd(dummy, interp, objc, objv) done: Tcl_ResetResult(interp); Tcl_SetLongObj(Tcl_GetObjResult(interp), arg - 4); + if (numberCachePtr != NULL) { + Tcl_DeleteHashTable(numberCachePtr); + } break; } } @@ -1465,15 +1525,21 @@ FormatNumber(interp, type, src, cursorPtr) * This object has a ref count of zero. * * Side effects: - * None. + * Might reuse an object in the number cache, place a new object + * in the cache, or delete the cache and set the reference to + * it (itself passed in by reference) to NULL. * *---------------------------------------------------------------------- */ static Tcl_Obj * -ScanNumber(buffer, type) +ScanNumber(buffer, type, numberCachePtrPtr) unsigned char *buffer; /* Buffer to scan number from. */ int type; /* Format character from "binary scan" */ + Tcl_HashTable **numberCachePtrPtr; + /* Place to look for cache of scanned + * value objects, or NULL if too many + * different numbers have been scanned. */ { long value; @@ -1486,7 +1552,7 @@ ScanNumber(buffer, type) */ switch (type) { - case 'c': { + case 'c': /* * Characters need special handling. We want to produce a * signed result, but on some platforms (such as AIX) chars @@ -1498,28 +1564,26 @@ ScanNumber(buffer, type) if (value & 0x80) { value |= -0x100; } - return Tcl_NewLongObj((long)value); - } - case 's': { + goto returnNumericObject; + + case 's': value = (long) (buffer[0] + (buffer[1] << 8)); goto shortValue; - } - case 'S': { + case 'S': value = (long) (buffer[1] + (buffer[0] << 8)); shortValue: if (value & 0x8000) { value |= -0x10000; } - return Tcl_NewLongObj(value); - } - case 'i': { + goto returnNumericObject; + + case 'i': value = (long) (buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24)); goto intValue; - } - case 'I': { + case 'I': value = (long) (buffer[3] + (buffer[2] << 8) + (buffer[1] << 16) @@ -1534,8 +1598,39 @@ ScanNumber(buffer, type) value -= (((unsigned int)1)<<31); value -= (((unsigned int)1)<<31); } - return Tcl_NewLongObj(value); - } + returnNumericObject: + if (*numberCachePtrPtr == NULL) { + return Tcl_NewLongObj(value); + } else { + register Tcl_HashTable *tablePtr = *numberCachePtrPtr; + register Tcl_HashEntry *hPtr; + int isNew; + + hPtr = Tcl_CreateHashEntry(tablePtr, (char *)value, &isNew); + if (!isNew) { + return (Tcl_Obj *) Tcl_GetHashValue(hPtr); + } + if (tablePtr->numEntries > BINARY_SCAN_MAX_CACHE) { + /* + * We've overflowed the cache! Someone's parsing + * a LOT of varied binary data in a single call! + * Bail out by switching back to the old behaviour + * for the rest of the scan. + * + * Note that anyone just using the 'c' conversion + * (for bytes) cannot trigger this. + */ + Tcl_DeleteHashTable(tablePtr); + *numberCachePtrPtr = NULL; + return Tcl_NewLongObj(value); + } else { + register Tcl_Obj *objPtr = Tcl_NewLongObj(value); + /* Don't need to fiddle with refcount... */ + Tcl_SetHashValue(hPtr, (ClientData) objPtr); + return objPtr; + } + } + case 'f': { float fvalue; memcpy((VOID *) &fvalue, (VOID *) buffer, sizeof(float)); -- cgit v0.12