diff options
-rw-r--r-- | generic/tclCmdMZ.c | 179 |
1 files changed, 137 insertions, 42 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 425ef3a..34f7fec 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -9,11 +9,12 @@ * Copyright (c) 1987-1993 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. * Copyright (c) 1998-2000 Scriptics Corporation. + * Copyright (c) 2002 ActiveState Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.57 2002/02/02 00:20:54 hobbs Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.58 2002/02/07 00:56:02 hobbs Exp $ */ #include "tclInt.h" @@ -489,6 +490,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) cflags = TCL_REG_ADVANCED; all = 0; offset = 0; + resultPtr = NULL; for (idx = 1; idx < objc; idx++) { char *name; @@ -554,6 +556,75 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) objv += idx; + if (all && (offset == 0) + && (strpbrk(Tcl_GetString(objv[2]), "&\\") == NULL) + && (strpbrk(Tcl_GetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { + /* + * This is a simple one pair string map situation. We make use of + * a slightly modified version of the one pair STR_MAP code. + */ + int slen, nocase; + int (*strCmpFn)_ANSI_ARGS_((CONST Tcl_UniChar *, CONST Tcl_UniChar *, + unsigned long)); + Tcl_UniChar *p, wsrclc; + + numMatches = 0; + nocase = (cflags & TCL_REG_NOCASE); + strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + + wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); + wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); + wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wend = wstring + wlen - (slen ? slen - 1 : 0); + result = TCL_OK; + + if (slen == 0) { + /* + * regsub behavior for "" matches between each character. + * 'string map' skips the "" case. + */ + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + for (; wstring < wend; wstring++) { + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + numMatches++; + } + wlen = 0; + } else { + wsrclc = Tcl_UniCharToLower(*wsrc); + for (p = wfirstChar = wstring; wstring < wend; wstring++) { + if (((*wstring == *wsrc) || + (nocase && (Tcl_UniCharToLower(*wstring) == + wsrclc))) && + ((slen == 1) || (strCmpFn(wstring, wsrc, + (unsigned long) slen) == 0))) { + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + } + if (p != wstring) { + Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + p = wstring + slen; + } else { + p += slen; + } + wstring = p - 1; + + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + numMatches++; + } + } + if (numMatches) { + wlen = wfirstChar + wlen - p; + wstring = p; + } + } + objPtr = NULL; + subPtr = NULL; + goto regsubDone; + } + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); if (regExpr == NULL) { return TCL_ERROR; @@ -579,8 +650,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); result = TCL_OK; - resultPtr = Tcl_NewUnicodeObj(wstring, 0); - Tcl_IncrRefCount(resultPtr); /* * The following loop is to handle multiple matches within the @@ -607,12 +676,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) if (match == 0) { break; } - if ((numMatches == 0) && (offset > 0)) { - /* - * Copy the initial portion of the string in if an offset - * was specified. - */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + if (offset > 0) { + /* + * Copy the initial portion of the string in if an offset + * was specified. + */ + Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + } } numMatches++; @@ -696,13 +769,15 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * Copy the portion of the source string after the last match to the * result variable. */ - + regsubDone: if (numMatches == 0) { /* * On zero matches, just ignore the offset, since it shouldn't * matter to us in this case, and the user may have skewed it. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen); + //Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen); + resultPtr = objv[1]; + Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } @@ -715,14 +790,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * Set the interpreter's object result to an integer object * holding the number of matches. */ - + Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches); } done: - if (objv[1] == objv[0]) { Tcl_DecrRefCount(objPtr); } - if (objv[2] == objv[0]) { Tcl_DecrRefCount(subPtr); } - Tcl_DecrRefCount(resultPtr); + if (objPtr && (objv[1] == objv[0])) { Tcl_DecrRefCount(objPtr); } + if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); } + if (resultPtr) { Tcl_DecrRefCount(resultPtr); } return result; } @@ -1767,7 +1842,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } end = ustring1 + length1; - strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; /* * Force result to be Unicode @@ -1782,52 +1857,69 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * This will be >30% faster on larger strings. */ int mapLen; - Tcl_UniChar *mapString; + Tcl_UniChar *mapString, u2lc; ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); - for (p = ustring1; ustring1 < end; ustring1++) { - if ((length2 > 0) && - (nocase || (*ustring1 == *ustring2)) && - (strCmpFn(ustring1, ustring2, - (unsigned long) length2) == 0)) { - if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, - ustring1 - p); - p = ustring1 + length2; - } else { - p += length2; - } - ustring1 = p - 1; + p = ustring1; + if (length2 == 0) { + ustring1 = end; + } else { + mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); + for (; ustring1 < end; ustring1++) { + if (((*ustring1 == *ustring2) || + (nocase && (Tcl_UniCharToLower(*ustring1) == + u2lc))) && + ((length2 == 1) || strCmpFn(ustring1, ustring2, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + Tcl_AppendUnicodeToObj(resultPtr, p, + ustring1 - p); + p = ustring1 + length2; + } else { + p += length2; + } + ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + Tcl_AppendUnicodeToObj(resultPtr, mapString, + mapLen); + } } } } else { - Tcl_UniChar **mapStrings = - (Tcl_UniChar **) ckalloc((mapElemc * 2) - * sizeof(Tcl_UniChar *)); - int *mapLens = - (int *) ckalloc((mapElemc * 2) * sizeof(int)); + Tcl_UniChar **mapStrings, *u2lc = NULL; + int *mapLens; /* * Precompute pointers to the unicode string and length. * This saves us repeated function calls later, - * significantly speeding up the algorithm. + * significantly speeding up the algorithm. We only need + * the lowercase first char in the nocase case. */ + mapStrings = (Tcl_UniChar **) ckalloc((mapElemc * 2) + * sizeof(Tcl_UniChar *)); + mapLens = (int *) ckalloc((mapElemc * 2) * sizeof(int)); + if (nocase) { + u2lc = (Tcl_UniChar *) + ckalloc((mapElemc) * sizeof(Tcl_UniChar)); + } for (index = 0; index < mapElemc; index++) { mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], &(mapLens[index])); + if (nocase && ((index % 2) == 0)) { + u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); + } } for (p = ustring1; ustring1 < end; ustring1++) { for (index = 0; index < mapElemc; index += 2) { /* - * Get the key string to match on + * Get the key string to match on. */ ustring2 = mapStrings[index]; length2 = mapLens[index]; - if ((length2 > 0) && - (nocase || (*ustring1 == *ustring2)) && - (strCmpFn(ustring2, ustring1, + if ((length2 > 0) && ((*ustring1 == *ustring2) || + (nocase && (Tcl_UniCharToLower(*ustring1) == + u2lc[index/2]))) && + ((length2 == 1) || strCmpFn(ustring2, ustring1, (unsigned long) length2) == 0)) { if (p != ustring1) { /* @@ -1855,6 +1947,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } ckfree((char *) mapStrings); ckfree((char *) mapLens); + if (nocase) { + ckfree((char *) u2lc); + } } if (p != ustring1) { /* |