summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclCmdMZ.c179
1 files changed, 137 insertions, 42 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 425ef3a..34f7fec 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -9,11 +9,12 @@
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1997 Sun Microsystems, Inc.
* Copyright (c) 1998-2000 Scriptics Corporation.
+ * Copyright (c) 2002 ActiveState Corporation.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.57 2002/02/02 00:20:54 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.58 2002/02/07 00:56:02 hobbs Exp $
*/
#include "tclInt.h"
@@ -489,6 +490,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
cflags = TCL_REG_ADVANCED;
all = 0;
offset = 0;
+ resultPtr = NULL;
for (idx = 1; idx < objc; idx++) {
char *name;
@@ -554,6 +556,75 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
objv += idx;
+ if (all && (offset == 0)
+ && (strpbrk(Tcl_GetString(objv[2]), "&\\") == NULL)
+ && (strpbrk(Tcl_GetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
+ /*
+ * This is a simple one pair string map situation. We make use of
+ * a slightly modified version of the one pair STR_MAP code.
+ */
+ int slen, nocase;
+ int (*strCmpFn)_ANSI_ARGS_((CONST Tcl_UniChar *, CONST Tcl_UniChar *,
+ unsigned long));
+ Tcl_UniChar *p, wsrclc;
+
+ numMatches = 0;
+ nocase = (cflags & TCL_REG_NOCASE);
+ strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+
+ wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen);
+ wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen);
+ wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen);
+ wend = wstring + wlen - (slen ? slen - 1 : 0);
+ result = TCL_OK;
+
+ if (slen == 0) {
+ /*
+ * regsub behavior for "" matches between each character.
+ * 'string map' skips the "" case.
+ */
+ resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ Tcl_IncrRefCount(resultPtr);
+ for (; wstring < wend; wstring++) {
+ Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ Tcl_AppendUnicodeToObj(resultPtr, wstring, 1);
+ numMatches++;
+ }
+ wlen = 0;
+ } else {
+ wsrclc = Tcl_UniCharToLower(*wsrc);
+ for (p = wfirstChar = wstring; wstring < wend; wstring++) {
+ if (((*wstring == *wsrc) ||
+ (nocase && (Tcl_UniCharToLower(*wstring) ==
+ wsrclc))) &&
+ ((slen == 1) || (strCmpFn(wstring, wsrc,
+ (unsigned long) slen) == 0))) {
+ if (numMatches == 0) {
+ resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ Tcl_IncrRefCount(resultPtr);
+ }
+ if (p != wstring) {
+ Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p);
+ p = wstring + slen;
+ } else {
+ p += slen;
+ }
+ wstring = p - 1;
+
+ Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
+ numMatches++;
+ }
+ }
+ if (numMatches) {
+ wlen = wfirstChar + wlen - p;
+ wstring = p;
+ }
+ }
+ objPtr = NULL;
+ subPtr = NULL;
+ goto regsubDone;
+ }
+
regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
if (regExpr == NULL) {
return TCL_ERROR;
@@ -579,8 +650,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
result = TCL_OK;
- resultPtr = Tcl_NewUnicodeObj(wstring, 0);
- Tcl_IncrRefCount(resultPtr);
/*
* The following loop is to handle multiple matches within the
@@ -607,12 +676,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
if (match == 0) {
break;
}
- if ((numMatches == 0) && (offset > 0)) {
- /*
- * Copy the initial portion of the string in if an offset
- * was specified.
- */
- Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+ if (numMatches == 0) {
+ resultPtr = Tcl_NewUnicodeObj(wstring, 0);
+ Tcl_IncrRefCount(resultPtr);
+ if (offset > 0) {
+ /*
+ * Copy the initial portion of the string in if an offset
+ * was specified.
+ */
+ Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
+ }
}
numMatches++;
@@ -696,13 +769,15 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* Copy the portion of the source string after the last match to the
* result variable.
*/
-
+ regsubDone:
if (numMatches == 0) {
/*
* On zero matches, just ignore the offset, since it shouldn't
* matter to us in this case, and the user may have skewed it.
*/
- Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen);
+ //Tcl_AppendUnicodeToObj(resultPtr, wstring, wlen);
+ resultPtr = objv[1];
+ Tcl_IncrRefCount(resultPtr);
} else if (offset < wlen) {
Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
}
@@ -715,14 +790,14 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* Set the interpreter's object result to an integer object
* holding the number of matches.
*/
-
+
Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches);
}
done:
- if (objv[1] == objv[0]) { Tcl_DecrRefCount(objPtr); }
- if (objv[2] == objv[0]) { Tcl_DecrRefCount(subPtr); }
- Tcl_DecrRefCount(resultPtr);
+ if (objPtr && (objv[1] == objv[0])) { Tcl_DecrRefCount(objPtr); }
+ if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); }
+ if (resultPtr) { Tcl_DecrRefCount(resultPtr); }
return result;
}
@@ -1767,7 +1842,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
end = ustring1 + length1;
- strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+ strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
/*
* Force result to be Unicode
@@ -1782,52 +1857,69 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
* This will be >30% faster on larger strings.
*/
int mapLen;
- Tcl_UniChar *mapString;
+ Tcl_UniChar *mapString, u2lc;
ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
- mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
- for (p = ustring1; ustring1 < end; ustring1++) {
- if ((length2 > 0) &&
- (nocase || (*ustring1 == *ustring2)) &&
- (strCmpFn(ustring1, ustring2,
- (unsigned long) length2) == 0)) {
- if (p != ustring1) {
- Tcl_AppendUnicodeToObj(resultPtr, p,
- ustring1 - p);
- p = ustring1 + length2;
- } else {
- p += length2;
- }
- ustring1 = p - 1;
+ p = ustring1;
+ if (length2 == 0) {
+ ustring1 = end;
+ } else {
+ mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
+ u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+ for (; ustring1 < end; ustring1++) {
+ if (((*ustring1 == *ustring2) ||
+ (nocase && (Tcl_UniCharToLower(*ustring1) ==
+ u2lc))) &&
+ ((length2 == 1) || strCmpFn(ustring1, ustring2,
+ (unsigned long) length2) == 0)) {
+ if (p != ustring1) {
+ Tcl_AppendUnicodeToObj(resultPtr, p,
+ ustring1 - p);
+ p = ustring1 + length2;
+ } else {
+ p += length2;
+ }
+ ustring1 = p - 1;
- Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
+ Tcl_AppendUnicodeToObj(resultPtr, mapString,
+ mapLen);
+ }
}
}
} else {
- Tcl_UniChar **mapStrings =
- (Tcl_UniChar **) ckalloc((mapElemc * 2)
- * sizeof(Tcl_UniChar *));
- int *mapLens =
- (int *) ckalloc((mapElemc * 2) * sizeof(int));
+ Tcl_UniChar **mapStrings, *u2lc = NULL;
+ int *mapLens;
/*
* Precompute pointers to the unicode string and length.
* This saves us repeated function calls later,
- * significantly speeding up the algorithm.
+ * significantly speeding up the algorithm. We only need
+ * the lowercase first char in the nocase case.
*/
+ mapStrings = (Tcl_UniChar **) ckalloc((mapElemc * 2)
+ * sizeof(Tcl_UniChar *));
+ mapLens = (int *) ckalloc((mapElemc * 2) * sizeof(int));
+ if (nocase) {
+ u2lc = (Tcl_UniChar *)
+ ckalloc((mapElemc) * sizeof(Tcl_UniChar));
+ }
for (index = 0; index < mapElemc; index++) {
mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
&(mapLens[index]));
+ if (nocase && ((index % 2) == 0)) {
+ u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+ }
}
for (p = ustring1; ustring1 < end; ustring1++) {
for (index = 0; index < mapElemc; index += 2) {
/*
- * Get the key string to match on
+ * Get the key string to match on.
*/
ustring2 = mapStrings[index];
length2 = mapLens[index];
- if ((length2 > 0) &&
- (nocase || (*ustring1 == *ustring2)) &&
- (strCmpFn(ustring2, ustring1,
+ if ((length2 > 0) && ((*ustring1 == *ustring2) ||
+ (nocase && (Tcl_UniCharToLower(*ustring1) ==
+ u2lc[index/2]))) &&
+ ((length2 == 1) || strCmpFn(ustring2, ustring1,
(unsigned long) length2) == 0)) {
if (p != ustring1) {
/*
@@ -1855,6 +1947,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
ckfree((char *) mapStrings);
ckfree((char *) mapLens);
+ if (nocase) {
+ ckfree((char *) u2lc);
+ }
}
if (p != ustring1) {
/*