summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
authorhershey <hershey>1999-06-08 02:59:23 (GMT)
committerhershey <hershey>1999-06-08 02:59:23 (GMT)
commit0e53e351cd3c0bdf51b84e459262c47f913c9a97 (patch)
tree87cee8e23f1c9f621f583c5d97e3e62979935fa9 /generic/tclCmdMZ.c
parentb2759d9c544b22071eca46475d110812304e8faa (diff)
downloadtcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.zip
tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.gz
tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.bz2
* tests/string.test:
* generic/tclVar.c (Tcl_SetVar2Ex): * generic/tclStringObj.c (Tcl_AppendObjToObj): * generic/tclCmdMZ.c (Tcl_StringObjCmd): optimized the string index, string length, string range, and append command in cases where the object's internal rep is a bytearray. Objects with other internal reps are converted to have the new unicode internal rep. * unix/Makefile.in: * win/Makefile.in: * win/Makefile.vc: * tests/unicode.test: * generic/tclInt.h: * generic/tclObj.c: * generic/tclUnicodeObj.c: added a new object type to store the unicode representation of a string. * generic/tclTestObj.c: added the objtype option to the testobj command. This option returns the name of the type of internal rep an object has.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c139
1 files changed, 95 insertions, 44 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 19b9ece..ebea22b 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.12 1999/06/03 18:43:30 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $
*/
#include "tclInt.h"
@@ -1009,32 +1009,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
case STR_INDEX: {
int index;
+ char buf[TCL_UTF_MAX];
+ Tcl_UniChar unichar;
if (objc != 4) {
Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- /*
- * establish what 'end' really means
- */
- length2 = Tcl_NumUtfChars(string1, length1);
- if (TclGetIntForIndex(interp, objv[3], length2 - 1,
- &index) != TCL_OK) {
- return TCL_ERROR;
- }
+
/*
- * index must be between 0 and the UTF length to be valid
+ * If we have a ByteArray object, avoid indexing in the
+ * Utf string since the byte array contains one byte per
+ * character. Otherwise, use the Unicode string rep to
+ * get the index'th char.
*/
- if ((index >= 0) && (index < length2)) {
- if (length1 == length2) {
- /* no unicode chars */
- Tcl_SetStringObj(resultPtr, string1+index, 1);
- } else {
- char buf[TCL_UTF_MAX];
- length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
- index), buf);
+ if (objv[2]->typePtr == &tclByteArrayType) {
+
+ string1 = Tcl_GetByteArrayFromObj(objv[2], &length1);
+
+ if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+ &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ Tcl_SetStringObj(resultPtr, &string1[index], 1);
+ } else {
+ string1 = Tcl_GetStringFromObj(objv[2], &length1);
+
+ /*
+ * convert to Unicode internal rep to calulate what
+ * 'end' really means.
+ */
+
+ length2 = TclGetUnicodeLengthFromObj(objv[2]);
+
+ if (TclGetIntForIndex(interp, objv[3], length2 - 1,
+ &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if ((index >= 0) && (index < length2)) {
+ unichar = TclGetUniCharFromObj(objv[2], index);
+ length2 = Tcl_UniCharToUtf((int)unichar, buf);
Tcl_SetStringObj(resultPtr, buf, length2);
}
}
@@ -1400,16 +1415,16 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
/*
* If we have a ByteArray object, avoid recomputing the
* string since the byte array contains one byte per
- * character.
+ * character. Otherwise, use the Unicode string rep to
+ * calculate the length.
*/
if (objv[2]->typePtr == &tclByteArrayType) {
(void) Tcl_GetByteArrayFromObj(objv[2], &length1);
Tcl_SetIntObj(resultPtr, length1);
} else {
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- Tcl_SetIntObj(resultPtr, Tcl_NumUtfChars(string1,
- length1));
+ Tcl_SetIntObj(resultPtr,
+ TclGetUnicodeLengthFromObj(objv[2]));
}
}
break;
@@ -1550,28 +1565,64 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
- if (TclGetIntForIndex(interp, objv[3], length1,
- &first) != TCL_OK) {
- return TCL_ERROR;
- }
- if (TclGetIntForIndex(interp, objv[4], length1,
- &last) != TCL_OK) {
- return TCL_ERROR;
- }
- if (first < 0) {
- first = 0;
- }
- if (last >= length1) {
- last = length1;
- }
- if (last >= first) {
- char *start, *end;
+ /*
+ * If we have a ByteArray object, avoid indexing in the
+ * Utf string since the byte array contains one byte per
+ * character. Otherwise, use the Unicode string rep to
+ * get the range.
+ */
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, last - first + 1);
- Tcl_SetStringObj(resultPtr, start, end - start);
+ if (objv[2]->typePtr == &tclByteArrayType) {
+
+ string1 = Tcl_GetByteArrayFromObj(objv[2], &length1);
+
+ if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+ &first) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (TclGetIntForIndex(interp, objv[4], length1 - 1,
+ &last) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (first < 0) {
+ first = 0;
+ }
+ if (last >= length1 - 1) {
+ last = length1 - 1;
+ }
+ if (last >= first) {
+ int numBytes = last - first + 1;
+ resultPtr = Tcl_NewByteArrayObj(&string1[first], numBytes);
+ Tcl_SetObjResult(interp, resultPtr);
+ }
+ } else {
+ string1 = Tcl_GetStringFromObj(objv[2], &length1);
+
+ /*
+ * Convert to Unicode internal rep to calulate length and
+ * create a result object.
+ */
+
+ length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1;
+
+ if (TclGetIntForIndex(interp, objv[3], length2,
+ &first) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (TclGetIntForIndex(interp, objv[4], length2,
+ &last) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (first < 0) {
+ first = 0;
+ }
+ if (last >= length1 - 1) {
+ last = length1 - 1;
+ }
+ if (last >= first) {
+ resultPtr = TclGetRangeFromObj(objv[2], first, last);
+ Tcl_SetObjResult(interp, resultPtr);
+ }
}
break;
}