diff options
author | hobbs <hobbs> | 2002-02-07 00:56:02 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2002-02-07 00:56:02 (GMT) |
commit | d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3 (patch) | |
tree | 0e27df68e1f6cd208e3708cd6f4904f84fa85b3e /generic | |
parent | ab7b0d2ecb6a3cdf49109bf456aa0df5fb6369cb (diff) | |
download | tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.zip tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.gz tcl-d2bfb1a40f6a591d962f857fac50dbaaf3e93ab3.tar.bz2 |
* generic/tclCmdMZ.c (Tcl_RegsubObjCmd): added special case to
search for simple 'string map' style regsub calls.
Delayed creation of resultPtr object until an initial match is
made, as the input string object can then be reused for no matches.
(Tcl_StringObjCmd): optimization improvements to the STR_MAP
algorithm for zero-length and nocase cases.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdMZ.c | 179 |
1 files changed, 137 insertions, 42 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 425ef3a..34f7fec 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -9,11 +9,12 @@ * Copyright (c) 1987-1993 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. * Copyright (c) 1998-2000 Scriptics Corporation. + * Copyright (c) 2002 ActiveState Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.57 2002/02/02 00:20:54 hobbs Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.58 2002/02/07 00:56:02 hobbs Exp $ */ #include "tclInt.h" @@ -489,6 +490,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) cflags = TCL_REG_ADVANCED; all = 0; offset = 0; + resultPtr = NULL; for (idx = 1; idx < objc; idx++) { char *name; @@ -554,6 +556,75 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) objv += idx; + if (all && (offset == 0) + && (strpbrk(Tcl_GetString(objv[2]), "&\\") == NULL) + && (strpbrk(Tcl_GetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { + /* + * This is a simple one pair string map situation. We make use of + * a slightly modified version of the one pair STR_MAP code. + */ + int slen, nocase; + int (*strCmpFn)_ANSI_ARGS_((CONST Tcl_UniChar *, CONST Tcl_UniChar *, + unsigned long)); + Tcl_UniChar *p, wsrclc; + + numMatches = 0; + nocase = (cflags & TCL_REG_NOCASE); + strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + + wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); + wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); + wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wend = wstring + wlen - (slen ? slen - 1 : 0); + result = TCL_OK; + + if (slen == 0) { + /* + * regsub behavior for "" matches between each character. + * 'string map' skips the "" case. + */ + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + for (; wstring < wend; wstring++) { + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + numMatches++; + } + wlen = 0; + } else { + wsrclc = Tcl_UniCharToLower(*wsrc); + for (p = wfirstChar = wstring; wstring < wend; wstring++) { + if (((*wstring == *wsrc) || + (nocase && (Tcl_UniCharToLower(*wstring) == + wsrclc))) && + ((slen == 1) || (strCmpFn(wstring, wsrc, + (unsigned long) slen) == 0))) { + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + } + if (p != wstring) { + Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + p = wstring + slen; + } else { + p += slen; + } + wstring = p - 1; + + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + numMatches++; + } + } + if (numMatches) { + wlen = wfirstChar + wlen - p; + wstring = p; + } + } + objPtr = NULL; + subPtr = NULL; + goto regsubDone; + } + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); if (regExpr == NULL) { return TCL_ERROR; @@ -579,8 +650,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); result = TCL_OK; - resultPtr = Tcl_NewUnicodeObj(wstring, 0); - Tcl_IncrRefCount(resultPtr); /* * The following loop is to handle multiple matches within the @@ -607,12 +676,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) if (match == 0) { break; } - if ((numMatches == 0) && (offset > 0)) { - /* - * Copy the initial portion of the string in if an offset - * was specified. - */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + if (offset > 0) { + /* + * Copy the initial portion of the string in if an offset + * was specified. + */ + Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + } } numMatches++; @@ -696,13 +769,15 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * Copy the portion of the source string after the last match to the * result variable. */ - + regsubDone: if (numMatches == 0) { /* * On zero matches, just ignore the offset, since it shouldn't * matter to us in this case, and the user may have skewed it. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen); + //Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen); + resultPtr = objv[1]; + Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } @@ -715,14 +790,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * Set the interpreter's object result to an integer object * holding the number of matches. */ - + Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches); } done: - if (objv[1] == objv[0]) { Tcl_DecrRefCount(objPtr); } - if (objv[2] == objv[0]) { Tcl_DecrRefCount(subPtr); } - Tcl_DecrRefCount(resultPtr); + if (objPtr && (objv[1] == objv[0])) { Tcl_DecrRefCount(objPtr); } + if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); } + if (resultPtr) { Tcl_DecrRefCount(resultPtr); } return result; } @@ -1767,7 +1842,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } end = ustring1 + length1; - strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; /* * Force result to be Unicode @@ -1782,52 +1857,69 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * This will be >30% faster on larger strings. */ int mapLen; - Tcl_UniChar *mapString; + Tcl_UniChar *mapString, u2lc; ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); - for (p = ustring1; ustring1 < end; ustring1++) { - if ((length2 > 0) && - (nocase || (*ustring1 == *ustring2)) && - (strCmpFn(ustring1, ustring2, - (unsigned long) length2) == 0)) { - if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, - ustring1 - p); - p = ustring1 + length2; - } else { - p += length2; - } - ustring1 = p - 1; + p = ustring1; + if (length2 == 0) { + ustring1 = end; + } else { + mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); + for (; ustring1 < end; ustring1++) { + if (((*ustring1 == *ustring2) || + (nocase && (Tcl_UniCharToLower(*ustring1) == + u2lc))) && + ((length2 == 1) || strCmpFn(ustring1, ustring2, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + Tcl_AppendUnicodeToObj(resultPtr, p, + ustring1 - p); + p = ustring1 + length2; + } else { + p += length2; + } + ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + Tcl_AppendUnicodeToObj(resultPtr, mapString, + mapLen); + } } } } else { - Tcl_UniChar **mapStrings = - (Tcl_UniChar **) ckalloc((mapElemc * 2) - * sizeof(Tcl_UniChar *)); - int *mapLens = - (int *) ckalloc((mapElemc * 2) * sizeof(int)); + Tcl_UniChar **mapStrings, *u2lc = NULL; + int *mapLens; /* * Precompute pointers to the unicode string and length. * This saves us repeated function calls later, - * significantly speeding up the algorithm. + * significantly speeding up the algorithm. We only need + * the lowercase first char in the nocase case. */ + mapStrings = (Tcl_UniChar **) ckalloc((mapElemc * 2) + * sizeof(Tcl_UniChar *)); + mapLens = (int *) ckalloc((mapElemc * 2) * sizeof(int)); + if (nocase) { + u2lc = (Tcl_UniChar *) + ckalloc((mapElemc) * sizeof(Tcl_UniChar)); + } for (index = 0; index < mapElemc; index++) { mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], &(mapLens[index])); + if (nocase && ((index % 2) == 0)) { + u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); + } } for (p = ustring1; ustring1 < end; ustring1++) { for (index = 0; index < mapElemc; index += 2) { /* - * Get the key string to match on + * Get the key string to match on. */ ustring2 = mapStrings[index]; length2 = mapLens[index]; - if ((length2 > 0) && - (nocase || (*ustring1 == *ustring2)) && - (strCmpFn(ustring2, ustring1, + if ((length2 > 0) && ((*ustring1 == *ustring2) || + (nocase && (Tcl_UniCharToLower(*ustring1) == + u2lc[index/2]))) && + ((length2 == 1) || strCmpFn(ustring2, ustring1, (unsigned long) length2) == 0)) { if (p != ustring1) { /* @@ -1855,6 +1947,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } ckfree((char *) mapStrings); ckfree((char *) mapLens); + if (nocase) { + ckfree((char *) u2lc); + } } if (p != ustring1) { /* |