From 1c02141e53c67e31c95fdf0cacde16366245125d Mon Sep 17 00:00:00 2001 From: dkf Date: Fri, 16 Feb 2001 09:26:30 +0000 Subject: [split $string ""] now shares character strings in the resulting list, giving better performance for smallish strings, and *much* better performance for large (especially multi-megabyte) ones. --- ChangeLog | 9 +++++++++ generic/tclCmdMZ.c | 23 +++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 338882a..2e59c495 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2001-02-15 Donal K. Fellows + + * generic/tclCmdMZ.c (Tcl_SplitObjCmd): Improved efficiency of + splitting strings into individual characters by adding hash so + that only one Tcl_Obj per character is created. Improves + performance of splitting of short strings and makes a huge + difference to splitting of long strings, such as is done in the + mime package in tcllib. [Bug #131523] + 2001-01-31 Don Porter * win/makefile.vc (install-libraries): Corrected misdirected install directory for the msgcat 1.2 package. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 1e99419..b328793 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.30 2000/09/20 01:50:38 ericm Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.31 2001/02/16 09:26:30 dkf Exp $ */ #include "tclInt.h" @@ -939,15 +939,34 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) * Do nothing. */ } else if (splitCharLen == 0) { + Tcl_HashTable charReuseTable; + Tcl_HashEntry *hPtr; + int isNew; + /* * Handle the special case of splitting on every character. + * + * Uses a hash table to ensure that each kind of character has + * only one Tcl_Obj instance (multiply-referenced) in the + * final list. This is a *major* win when splitting on a long + * string (especially in the megabyte range!) - DKF */ + Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; string < end; string += len) { len = Tcl_UtfToUniChar(string, &ch); - objPtr = Tcl_NewStringObj(string, len); + /* Assume Tcl_UniChar is an integral type... */ + hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew); + if (isNew) { + objPtr = Tcl_NewStringObj(string, len); + /* Don't need to fiddle with refcount... */ + Tcl_SetHashValue(hPtr, (ClientData) objPtr); + } else { + objPtr = (Tcl_Obj*) Tcl_GetHashValue(hPtr); + } Tcl_ListObjAppendElement(NULL, listPtr, objPtr); } + Tcl_DeleteHashTable(&charReuseTable); } else { char *element, *p, *splitEnd; int splitLen; -- cgit v0.12