summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2001-02-16 09:26:30 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2001-02-16 09:26:30 (GMT)
commit1c02141e53c67e31c95fdf0cacde16366245125d (patch)
tree00ec85b9a55d0128b258753eb3183e5838f3b2e3
parent7f746b8bfbb8b4b90be140239f8155d6366ebf4b (diff)
downloadtcl-1c02141e53c67e31c95fdf0cacde16366245125d.zip
tcl-1c02141e53c67e31c95fdf0cacde16366245125d.tar.gz
tcl-1c02141e53c67e31c95fdf0cacde16366245125d.tar.bz2
[split $string ""] now shares character strings in the resulting list,
giving better performance for smallish strings, and *much* better performance for large (especially multi-megabyte) ones.
-rw-r--r--ChangeLog9
-rw-r--r--generic/tclCmdMZ.c23
2 files changed, 30 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 338882a..2e59c495 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2001-02-15 Donal K. Fellows <fellowsd@cs.man.ac.uk>
+
+ * generic/tclCmdMZ.c (Tcl_SplitObjCmd): Improved efficiency of
+ splitting strings into individual characters by adding hash so
+ that only one Tcl_Obj per character is created. Improves
+ performance of splitting of short strings and makes a huge
+ difference to splitting of long strings, such as is done in the
+ mime package in tcllib. [Bug #131523]
+
2001-01-31 Don Porter <dgp@users.sourceforge.net>
* win/makefile.vc (install-libraries): Corrected misdirected
install directory for the msgcat 1.2 package.
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 1e99419..b328793 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.30 2000/09/20 01:50:38 ericm Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.31 2001/02/16 09:26:30 dkf Exp $
*/
#include "tclInt.h"
@@ -939,15 +939,34 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv)
* Do nothing.
*/
} else if (splitCharLen == 0) {
+ Tcl_HashTable charReuseTable;
+ Tcl_HashEntry *hPtr;
+ int isNew;
+
/*
* Handle the special case of splitting on every character.
+ *
+ * Uses a hash table to ensure that each kind of character has
+ * only one Tcl_Obj instance (multiply-referenced) in the
+ * final list. This is a *major* win when splitting on a long
+ * string (especially in the megabyte range!) - DKF
*/
+ Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; string < end; string += len) {
len = Tcl_UtfToUniChar(string, &ch);
- objPtr = Tcl_NewStringObj(string, len);
+ /* Assume Tcl_UniChar is an integral type... */
+ hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew);
+ if (isNew) {
+ objPtr = Tcl_NewStringObj(string, len);
+ /* Don't need to fiddle with refcount... */
+ Tcl_SetHashValue(hPtr, (ClientData) objPtr);
+ } else {
+ objPtr = (Tcl_Obj*) Tcl_GetHashValue(hPtr);
+ }
Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
}
+ Tcl_DeleteHashTable(&charReuseTable);
} else {
char *element, *p, *splitEnd;
int splitLen;