* generic/tclCmdMZ.c (Tcl_RegsubObjCmd): added special case to

search for simple 'string map' style regsub calls. Delayed creation of resultPtr object until an initial match is made, as the input string object can then be reused for no matches. (Tcl_StringObjCmd): optimization improvements to the STR_MAP algorithm for zero-length and nocase cases.
author: hobbs <hobbs> 2002-02-07 00:56:02 (GMT)
committer: hobbs <hobbs> 2002-02-07 00:56:02 (GMT)
commit: d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3 (patch)
tree: 0e27df68e1f6cd208e3708cd6f4904f84fa85b3e /generic
parent: ab7b0d2ecb6a3cdf49109bf456aa0df5fb6369cb (diff)
download: tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.zip
tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.gz
tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.bz2
1 files changed, 137 insertions, 42 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 425ef3a..34f7fec 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -9,11 +9,12 @@
  * Copyright (c) 1987-1993 The Regents of the University of California.
  * Copyright (c) 1994-1997 Sun Microsystems, Inc.
  * Copyright (c) 1998-2000 Scriptics Corporation.
+ * Copyright (c) 2002 ActiveState Corporation.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.57 2002/02/02 00:20:54 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.58 2002/02/07 00:56:02 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -489,6 +490,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
     cflags = TCL_REG_ADVANCED;
     all = 0;
     offset = 0;
+    resultPtr = NULL;
 
     for (idx = 1; idx < objc; idx++) {
 	char *name;
@@ -554,6 +556,75 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 
     objv += idx;
 
+    if (all && (offset == 0)
+	    && (strpbrk(Tcl_GetString(objv[2]), "&\\") == NULL)
+	    && (strpbrk(Tcl_GetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
+	/*
+	 * This is a simple one pair string map situation.  We make use of
+	 * a slightly modified version of the one pair STR_MAP code.
+	 */
+	int slen, nocase;
+	int (*strCmpFn)_ANSI_ARGS_((CONST Tcl_UniChar *, CONST Tcl_UniChar *,
+		unsigned long));
+	Tcl_UniChar *p, wsrclc;
+
+	numMatches = 0;
+	nocase     = (cflags & TCL_REG_NOCASE);
+	strCmpFn   = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+
+	wsrc     = Tcl_GetUnicodeFromObj(objv[0], &slen);
+	wstring  = Tcl_GetUnicodeFromObj(objv[1], &wlen);
+	wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen);
+	wend     = wstring + wlen - (slen ? slen - 1 : 0);
+	result   = TCL_OK;
+
+	if (slen == 0) {
+	    /*
+	     * regsub behavior for "" matches between each character.
+	     * 'string map' skips the "" case.
+	     */
+	    resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+	    Tcl_IncrRefCount(resultPtr);
+	    for (; wstring < wend; wstring++) {
+		Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+		Tcl_AppendUnicodeToObj(resultPtr, wstring, 1);
+		numMatches++;
+	    }
+	    wlen = 0;
+	} else {
+	    wsrclc = Tcl_UniCharToLower(*wsrc);
+	    for (p = wfirstChar = wstring; wstring < wend; wstring++) {
+		if (((*wstring == *wsrc) ||
+			(nocase && (Tcl_UniCharToLower(*wstring) ==
+				wsrclc))) &&
+			((slen == 1) || (strCmpFn(wstring, wsrc,
+				(unsigned long) slen) == 0))) {
+		    if (numMatches == 0) {
+			resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+			Tcl_IncrRefCount(resultPtr);
+		    }
+		    if (p != wstring) {
+			Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p);
+			p = wstring + slen;
+		    } else {
+			p += slen;
+		    }
+		    wstring = p - 1;
+
+		    Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+		    numMatches++;
+		}
+	    }
+	    if (numMatches) {
+		wlen    = wfirstChar + wlen - p;
+		wstring = p;
+	    }
+	}
+	objPtr = NULL;
+	subPtr = NULL;
+	goto regsubDone;
+    }
+
     regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
     if (regExpr == NULL) {
 	return TCL_ERROR;
@@ -579,8 +650,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
     wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
 
     result = TCL_OK;
-    resultPtr = Tcl_NewUnicodeObj(wstring, 0);
-    Tcl_IncrRefCount(resultPtr);
 
     /*
      * The following loop is to handle multiple matches within the
@@ -607,12 +676,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	if (match == 0) {
 	    break;
 	}
-	if ((numMatches == 0) && (offset > 0)) {
-	    /*
-	     * Copy the initial portion of the string in if an offset
-	     * was specified.
-	     */
-	    Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+	if (numMatches == 0) {
+	    resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+	    Tcl_IncrRefCount(resultPtr);
+	    if (offset > 0) {
+		/*
+		 * Copy the initial portion of the string in if an offset
+		 * was specified.
+		 */
+		Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+	    }
 	}
 	numMatches++;
 
@@ -696,13 +769,15 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
      * Copy the portion of the source string after the last match to the
      * result variable.
      */
-
+    regsubDone:
     if (numMatches == 0) {
 	/*
 	 * On zero matches, just ignore the offset, since it shouldn't
 	 * matter to us in this case, and the user may have skewed it.
 	 */
-	Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen);
+	//Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen);
+	resultPtr = objv[1];
+	Tcl_IncrRefCount(resultPtr);
     } else if (offset < wlen) {
 	Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
     }
@@ -715,14 +790,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	 * Set the interpreter's object result to an integer object
 	 * holding the number of matches. 
 	 */
-	
+
 	Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches);
     }
 
     done:
-    if (objv[1] == objv[0]) { Tcl_DecrRefCount(objPtr); }
-    if (objv[2] == objv[0]) { Tcl_DecrRefCount(subPtr); }
-    Tcl_DecrRefCount(resultPtr);
+    if (objPtr && (objv[1] == objv[0])) { Tcl_DecrRefCount(objPtr); }
+    if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); }
+    if (resultPtr) { Tcl_DecrRefCount(resultPtr); }
     return result;
 }
 
@@ -1767,7 +1842,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 	    end = ustring1 + length1;
 
-	    strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+	    strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
 
 	    /*
 	     * Force result to be Unicode
@@ -1782,52 +1857,69 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		 * This will be >30% faster on larger strings.
 		 */
 		int mapLen;
-		Tcl_UniChar *mapString;
+		Tcl_UniChar *mapString, u2lc;
 
 		ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
-		mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
-		for (p = ustring1; ustring1 < end; ustring1++) {
-		    if ((length2 > 0) &&
-			    (nocase || (*ustring1 == *ustring2)) &&
-			    (strCmpFn(ustring1, ustring2,
-				    (unsigned long) length2) == 0)) {
-			if (p != ustring1) {
-			    Tcl_AppendUnicodeToObj(resultPtr, p,
-				    ustring1 - p);
-			    p = ustring1 + length2;
-			} else {
-			    p += length2;
-			}
-			ustring1 = p - 1;
+		p = ustring1;
+		if (length2 == 0) {
+		    ustring1 = end;
+		} else {
+		    mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
+		    u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+		    for (; ustring1 < end; ustring1++) {
+			if (((*ustring1 == *ustring2) ||
+				(nocase && (Tcl_UniCharToLower(*ustring1) ==
+					u2lc))) &&
+				((length2 == 1) || strCmpFn(ustring1, ustring2,
+					(unsigned long) length2) == 0)) {
+			    if (p != ustring1) {
+				Tcl_AppendUnicodeToObj(resultPtr, p,
+					ustring1 - p);
+				p = ustring1 + length2;
+			    } else {
+				p += length2;
+			    }
+			    ustring1 = p - 1;
 
-			Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
+			    Tcl_AppendUnicodeToObj(resultPtr, mapString,
+				    mapLen);
+			}
 		    }
 		}
 	    } else {
-		Tcl_UniChar **mapStrings =
-		    (Tcl_UniChar **) ckalloc((mapElemc * 2)
-			    * sizeof(Tcl_UniChar *));
-		int *mapLens =
-		    (int *) ckalloc((mapElemc * 2) * sizeof(int));
+		Tcl_UniChar **mapStrings, *u2lc = NULL;
+		int *mapLens;
 		/*
 		 * Precompute pointers to the unicode string and length.
 		 * This saves us repeated function calls later,
-		 * significantly speeding up the algorithm.
+		 * significantly speeding up the algorithm.  We only need
+		 * the lowercase first char in the nocase case.
 		 */
+		mapStrings = (Tcl_UniChar **) ckalloc((mapElemc * 2)
+			* sizeof(Tcl_UniChar *));
+		mapLens = (int *) ckalloc((mapElemc * 2) * sizeof(int));
+		if (nocase) {
+		    u2lc = (Tcl_UniChar *)
+			ckalloc((mapElemc) * sizeof(Tcl_UniChar));
+		}
 		for (index = 0; index < mapElemc; index++) {
 		    mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
 			    &(mapLens[index]));
+		    if (nocase && ((index % 2) == 0)) {
+			u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+		    }
 		}
 		for (p = ustring1; ustring1 < end; ustring1++) {
 		    for (index = 0; index < mapElemc; index += 2) {
 			/*
-			 * Get the key string to match on
+			 * Get the key string to match on.
 			 */
 			ustring2 = mapStrings[index];
 			length2  = mapLens[index];
-			if ((length2 > 0) &&
-				(nocase || (*ustring1 == *ustring2)) &&
-				(strCmpFn(ustring2, ustring1,
+			if ((length2 > 0) && ((*ustring1 == *ustring2) ||
+				(nocase && (Tcl_UniCharToLower(*ustring1) ==
+					u2lc[index/2]))) &&
+				((length2 == 1) || strCmpFn(ustring2, ustring1,
 					(unsigned long) length2) == 0)) {
 			    if (p != ustring1) {
 				/*
@@ -1855,6 +1947,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		}
 		ckfree((char *) mapStrings);
 		ckfree((char *) mapLens);
+		if (nocase) {
+		    ckfree((char *) u2lc);
+		}
 	    }
 	    if (p != ustring1) {
 		/*
author	hobbs <hobbs>	2002-02-07 00:56:02 (GMT)
committer	hobbs <hobbs>	2002-02-07 00:56:02 (GMT)
commit	d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3 (patch)
tree	0e27df68e1f6cd208e3708cd6f4904f84fa85b3e /generic
parent	ab7b0d2ecb6a3cdf49109bf456aa0df5fb6369cb (diff)
download	tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.zip tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.gz tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.bz2