From 11acaadecced9136a82d1e9711ffc7a4d9b7090a Mon Sep 17 00:00:00 2001
From: hobbs <hobbs>
Date: Tue, 12 Nov 2002 02:25:24 +0000
Subject: 	* tests/split.test: added 1-char string split tests 	*
 generic/tclCmdMZ.c (Tcl_SplitObjCmd): Use TclUtfToUniChar. 	Also added a
 special case for single-ascii-char splits. 	(Tcl_StringObjCmd): Use
 TclUtfToUniChar. 	For STR_RANGE, support getting ranges of ByteArrays
 (reverts 	change from 2000-05-26). 	(TraceExecutionProc) add
 proper static declaration.

---
 generic/tclCmdMZ.c | 67 ++++++++++++++++++++++++++++++++++++++++++------------
 tests/split.test   | 14 +++++++++++-
 2 files changed, 65 insertions(+), 16 deletions(-)
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index fc6ad98..8243790 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -14,7 +14,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.77 2002/10/15 16:13:46 vincentdarley Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.78 2002/11/12 02:25:24 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -1097,7 +1097,7 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv)
 
 	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
 	for ( ; string < end; string += len) {
-	    len = Tcl_UtfToUniChar(string, &ch);
+	    len = TclUtfToUniChar(string, &ch);
 	    /* Assume Tcl_UniChar is an integral type... */
 	    hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0 + ch, &isNew);
 	    if (isNew) {
@@ -1110,6 +1110,22 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv)
 	    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
 	}
 	Tcl_DeleteHashTable(&charReuseTable);
+    } else if (splitCharLen == 1) {
+	char *p;
+
+	/*
+	 * Handle the special case of splitting on a single character.
+	 * This is only true for the one-char ASCII case, as one unicode
+	 * char is > 1 byte in length.
+	 */
+
+	while (*string && (p = strchr(string, (int) *splitChars)) != NULL) {
+	    objPtr = Tcl_NewStringObj(string, p - string);
+	    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
+	    string = p + 1;
+	}
+	objPtr = Tcl_NewStringObj(string, end - string);
+	Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
     } else {
 	char *element, *p, *splitEnd;
 	int splitLen;
@@ -1123,9 +1139,9 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv)
 	splitEnd = splitChars + splitCharLen;
 
 	for (element = string; string < end; string += len) {
-	    len = Tcl_UtfToUniChar(string, &ch);
+	    len = TclUtfToUniChar(string, &ch);
 	    for (p = splitChars; p < splitEnd; p += splitLen) {
-		splitLen = Tcl_UtfToUniChar(p, &splitChar);
+		splitLen = TclUtfToUniChar(p, &splitChar);
 		if (ch == splitChar) {
 		    objPtr = Tcl_NewStringObj(element, string - element);
 		    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
@@ -1711,7 +1727,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 	    if (chcomp != NULL) {
 		for (; string1 < end; string1 += length2, failat++) {
-		    length2 = Tcl_UtfToUniChar(string1, &ch);
+		    length2 = TclUtfToUniChar(string1, &ch);
 		    if (!chcomp(ch)) {
 			result = 0;
 			break;
@@ -2021,9 +2037,22 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 
 	    /*
-	     * Get the length in actual characters.
+	     * If we have a ByteArray object, avoid indexing in the
+	     * Utf string since the byte array contains one byte per
+	     * character.  Otherwise, use the Unicode string rep to
+	     * get the range.
 	     */
-	    length1 = Tcl_GetCharLength(objv[2]) - 1;
+
+	    if (objv[2]->typePtr == &tclByteArrayType) {
+		string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1);
+		length1--;
+	    } else {
+		/*
+		 * Get the length in actual characters.
+		 */
+		string1 = NULL;
+		length1 = Tcl_GetCharLength(objv[2]) - 1;
+	    }
 
 	    if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK)
 		    || (TclGetIntForIndex(interp, objv[4], length1,
@@ -2038,7 +2067,15 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		last = length1;
 	    }
 	    if (last >= first) {
-		Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last));
+		if (string1 != NULL) {
+		    int numBytes = last - first + 1;
+		    resultPtr = Tcl_NewByteArrayObj(
+			(unsigned char *) &string1[first], numBytes);
+		    Tcl_SetObjResult(interp, resultPtr);
+		} else {
+		    Tcl_SetObjResult(interp,
+			    Tcl_GetRange(objv[2], first, last));
+		}
 	    }
 	    break;
 	}
@@ -2228,14 +2265,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		 */
 
 		for (p = string1; p < end; p += offset) {
-		    offset = Tcl_UtfToUniChar(p, &ch);
+		    offset = TclUtfToUniChar(p, &ch);
 		    
 		    for (check = string2; ; ) {
 			if (check >= checkEnd) {
 			    p = end;
 			    break;
 			}
-			check += Tcl_UtfToUniChar(check, &trim);
+			check += TclUtfToUniChar(check, &trim);
 			if (ch == trim) {
 			    length1 -= offset;
 			    string1 += offset;
@@ -2256,13 +2293,13 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 
 		for (p = string1 + length1; p > end; ) {
 		    p = Tcl_UtfPrev(p, string1);
-		    offset = Tcl_UtfToUniChar(p, &ch);
+		    offset = TclUtfToUniChar(p, &ch);
 		    for (check = string2; ; ) {
 		        if (check >= checkEnd) {
 			    p = end;
 			    break;
 			}
-			check += Tcl_UtfToUniChar(check, &trim);
+			check += TclUtfToUniChar(check, &trim);
 			if (ch == trim) {
 			    length1 -= offset;
 			    break;
@@ -2307,7 +2344,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		p = Tcl_UtfAtIndex(string1, index);
 		end = string1+length1;
 		for (cur = index; p < end; cur++) {
-		    p += Tcl_UtfToUniChar(p, &ch);
+		    p += TclUtfToUniChar(p, &ch);
 		    if (!Tcl_UniCharIsWordChar(ch)) {
 			break;
 		    }
@@ -2345,7 +2382,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    if (index > 0) {
 		p = Tcl_UtfAtIndex(string1, index);
 	        for (cur = index; cur >= 0; cur--) {
-		    Tcl_UtfToUniChar(p, &ch);
+		    TclUtfToUniChar(p, &ch);
 		    if (!Tcl_UniCharIsWordChar(ch)) {
 			break;
 		    }
@@ -4266,7 +4303,7 @@ CallTraceProcedure(interp, tracePtr, cmdPtr, command, numChars, objc, objv)
  *
  *----------------------------------------------------------------------
  */
-int 
+static int
 TraceExecutionProc(ClientData clientData, Tcl_Interp *interp, 
 	      int level, CONST char* command, Tcl_Command cmdInfo,
 	      int objc, struct Tcl_Obj *CONST objv[]) {
diff --git a/tests/split.test b/tests/split.test
index 4dcbb00..0ad879b 100644
--- a/tests/split.test
+++ b/tests/split.test
@@ -11,7 +11,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: split.test,v 1.7 2001/09/12 20:28:50 dgp Exp $
+# RCS: @(#) $Id: split.test,v 1.8 2002/11/12 02:25:24 hobbs Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     package require tcltest
@@ -60,6 +60,18 @@ test split-1.9 {basic split commands} {
 test split-1.10 {basic split commands} {
     split "a0ab1b2bbb3\000c4" ab\000c
 } {{} 0 {} 1 2 {} {} 3 {} 4}
+test split-1.11 {basic split commands} {
+    split "12,3,45" {,}
+} {12 3 45}
+test split-1.12 {basic split commands} {
+    split "\u0001ab\u0001cd\u0001\u0001ef\u0001" \1
+} {{} ab cd {} ef {}}
+test split-1.13 {basic split commands} {
+    split "12,34,56," {,}
+} {12 34 56 {}}
+test split-1.14 {basic split commands} {
+    split ",12,,,34,56," {,}
+} {{} 12 {} {} 34 56 {}}
 
 test split-2.1 {split errors} {
     list [catch split msg] $msg $errorCode
-- 
cgit v0.12