diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2001-02-16 09:26:30 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2001-02-16 09:26:30 (GMT) |
commit | 1c02141e53c67e31c95fdf0cacde16366245125d (patch) | |
tree | 00ec85b9a55d0128b258753eb3183e5838f3b2e3 | |
parent | 7f746b8bfbb8b4b90be140239f8155d6366ebf4b (diff) | |
download | tcl-1c02141e53c67e31c95fdf0cacde16366245125d.zip tcl-1c02141e53c67e31c95fdf0cacde16366245125d.tar.gz tcl-1c02141e53c67e31c95fdf0cacde16366245125d.tar.bz2 |
[split $string ""] now shares character strings in the resulting list,
giving better performance for smallish strings, and *much* better
performance for large (especially multi-megabyte) ones.
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 23 |
2 files changed, 30 insertions, 2 deletions
@@ -1,3 +1,12 @@ +2001-02-15 Donal K. Fellows <fellowsd@cs.man.ac.uk> + + * generic/tclCmdMZ.c (Tcl_SplitObjCmd): Improved efficiency of + splitting strings into individual characters by adding hash so + that only one Tcl_Obj per character is created. Improves + performance of splitting of short strings and makes a huge + difference to splitting of long strings, such as is done in the + mime package in tcllib. [Bug #131523] + 2001-01-31 Don Porter <dgp@users.sourceforge.net> * win/makefile.vc (install-libraries): Corrected misdirected install directory for the msgcat 1.2 package. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 1e99419..b328793 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.30 2000/09/20 01:50:38 ericm Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.31 2001/02/16 09:26:30 dkf Exp $ */ #include "tclInt.h" @@ -939,15 +939,34 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) * Do nothing. */ } else if (splitCharLen == 0) { + Tcl_HashTable charReuseTable; + Tcl_HashEntry *hPtr; + int isNew; + /* * Handle the special case of splitting on every character. + * + * Uses a hash table to ensure that each kind of character has + * only one Tcl_Obj instance (multiply-referenced) in the + * final list. This is a *major* win when splitting on a long + * string (especially in the megabyte range!) - DKF */ + Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; string < end; string += len) { len = Tcl_UtfToUniChar(string, &ch); - objPtr = Tcl_NewStringObj(string, len); + /* Assume Tcl_UniChar is an integral type... */ + hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew); + if (isNew) { + objPtr = Tcl_NewStringObj(string, len); + /* Don't need to fiddle with refcount... */ + Tcl_SetHashValue(hPtr, (ClientData) objPtr); + } else { + objPtr = (Tcl_Obj*) Tcl_GetHashValue(hPtr); + } Tcl_ListObjAppendElement(NULL, listPtr, objPtr); } + Tcl_DeleteHashTable(&charReuseTable); } else { char *element, *p, *splitEnd; int splitLen; |