From 11acaadecced9136a82d1e9711ffc7a4d9b7090a Mon Sep 17 00:00:00 2001 From: hobbs Date: Tue, 12 Nov 2002 02:25:24 +0000 Subject: * tests/split.test: added 1-char string split tests * generic/tclCmdMZ.c (Tcl_SplitObjCmd): Use TclUtfToUniChar. Also added a special case for single-ascii-char splits. (Tcl_StringObjCmd): Use TclUtfToUniChar. For STR_RANGE, support getting ranges of ByteArrays (reverts change from 2000-05-26). (TraceExecutionProc) add proper static declaration. --- generic/tclCmdMZ.c | 67 ++++++++++++++++++++++++++++++++++++++++++------------ tests/split.test | 14 +++++++++++- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index fc6ad98..8243790 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.77 2002/10/15 16:13:46 vincentdarley Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.78 2002/11/12 02:25:24 hobbs Exp $ */ #include "tclInt.h" @@ -1097,7 +1097,7 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; string < end; string += len) { - len = Tcl_UtfToUniChar(string, &ch); + len = TclUtfToUniChar(string, &ch); /* Assume Tcl_UniChar is an integral type... */ hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew); if (isNew) { @@ -1110,6 +1110,22 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) Tcl_ListObjAppendElement(NULL, listPtr, objPtr); } Tcl_DeleteHashTable(&charReuseTable); + } else if (splitCharLen == 1) { + char *p; + + /* + * Handle the special case of splitting on a single character. + * This is only true for the one-char ASCII case, as one unicode + * char is > 1 byte in length. + */ + + while (*string && (p = strchr(string, (int) *splitChars)) != NULL) { + objPtr = Tcl_NewStringObj(string, p - string); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); + string = p + 1; + } + objPtr = Tcl_NewStringObj(string, end - string); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); } else { char *element, *p, *splitEnd; int splitLen; @@ -1123,9 +1139,9 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) splitEnd = splitChars + splitCharLen; for (element = string; string < end; string += len) { - len = Tcl_UtfToUniChar(string, &ch); + len = TclUtfToUniChar(string, &ch); for (p = splitChars; p < splitEnd; p += splitLen) { - splitLen = Tcl_UtfToUniChar(p, &splitChar); + splitLen = TclUtfToUniChar(p, &splitChar); if (ch == splitChar) { objPtr = Tcl_NewStringObj(element, string - element); Tcl_ListObjAppendElement(NULL, listPtr, objPtr); @@ -1711,7 +1727,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } if (chcomp != NULL) { for (; string1 < end; string1 += length2, failat++) { - length2 = Tcl_UtfToUniChar(string1, &ch); + length2 = TclUtfToUniChar(string1, &ch); if (!chcomp(ch)) { result = 0; break; @@ -2021,9 +2037,22 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } /* - * Get the length in actual characters. + * If we have a ByteArray object, avoid indexing in the + * Utf string since the byte array contains one byte per + * character. Otherwise, use the Unicode string rep to + * get the range. */ - length1 = Tcl_GetCharLength(objv[2]) - 1; + + if (objv[2]->typePtr == &tclByteArrayType) { + string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1); + length1--; + } else { + /* + * Get the length in actual characters. + */ + string1 = NULL; + length1 = Tcl_GetCharLength(objv[2]) - 1; + } if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK) || (TclGetIntForIndex(interp, objv[4], length1, @@ -2038,7 +2067,15 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) last = length1; } if (last >= first) { - Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last)); + if (string1 != NULL) { + int numBytes = last - first + 1; + resultPtr = Tcl_NewByteArrayObj( + (unsigned char *) &string1[first], numBytes); + Tcl_SetObjResult(interp, resultPtr); + } else { + Tcl_SetObjResult(interp, + Tcl_GetRange(objv[2], first, last)); + } } break; } @@ -2228,14 +2265,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) */ for (p = string1; p < end; p += offset) { - offset = Tcl_UtfToUniChar(p, &ch); + offset = TclUtfToUniChar(p, &ch); for (check = string2; ; ) { if (check >= checkEnd) { p = end; break; } - check += Tcl_UtfToUniChar(check, &trim); + check += TclUtfToUniChar(check, &trim); if (ch == trim) { length1 -= offset; string1 += offset; @@ -2256,13 +2293,13 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) for (p = string1 + length1; p > end; ) { p = Tcl_UtfPrev(p, string1); - offset = Tcl_UtfToUniChar(p, &ch); + offset = TclUtfToUniChar(p, &ch); for (check = string2; ; ) { if (check >= checkEnd) { p = end; break; } - check += Tcl_UtfToUniChar(check, &trim); + check += TclUtfToUniChar(check, &trim); if (ch == trim) { length1 -= offset; break; @@ -2307,7 +2344,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) p = Tcl_UtfAtIndex(string1, index); end = string1+length1; for (cur = index; p < end; cur++) { - p += Tcl_UtfToUniChar(p, &ch); + p += TclUtfToUniChar(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -2345,7 +2382,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) if (index > 0) { p = Tcl_UtfAtIndex(string1, index); for (cur = index; cur >= 0; cur--) { - Tcl_UtfToUniChar(p, &ch); + TclUtfToUniChar(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -4266,7 +4303,7 @@ CallTraceProcedure(interp, tracePtr, cmdPtr, command, numChars, objc, objv) * *---------------------------------------------------------------------- */ -int +static int TraceExecutionProc(ClientData clientData, Tcl_Interp *interp, int level, CONST char* command, Tcl_Command cmdInfo, int objc, struct Tcl_Obj *CONST objv[]) { diff --git a/tests/split.test b/tests/split.test index 4dcbb00..0ad879b 100644 --- a/tests/split.test +++ b/tests/split.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: split.test,v 1.7 2001/09/12 20:28:50 dgp Exp $ +# RCS: @(#) $Id: split.test,v 1.8 2002/11/12 02:25:24 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -60,6 +60,18 @@ test split-1.9 {basic split commands} { test split-1.10 {basic split commands} { split "a0ab1b2bbb3\000c4" ab\000c } {{} 0 {} 1 2 {} {} 3 {} 4} +test split-1.11 {basic split commands} { + split "12,3,45" {,} +} {12 3 45} +test split-1.12 {basic split commands} { + split "\u0001ab\u0001cd\u0001\u0001ef\u0001" \1 +} {{} ab cd {} ef {}} +test split-1.13 {basic split commands} { + split "12,34,56," {,} +} {12 34 56 {}} +test split-1.14 {basic split commands} { + split ",12,,,34,56," {,} +} {{} 12 {} {} 34 56 {}} test split-2.1 {split errors} { list [catch split msg] $msg $errorCode -- cgit v0.12