diff options
author | hershey <hershey> | 1999-06-08 02:59:23 (GMT) |
---|---|---|
committer | hershey <hershey> | 1999-06-08 02:59:23 (GMT) |
commit | 0e53e351cd3c0bdf51b84e459262c47f913c9a97 (patch) | |
tree | 87cee8e23f1c9f621f583c5d97e3e62979935fa9 /generic/tclCmdMZ.c | |
parent | b2759d9c544b22071eca46475d110812304e8faa (diff) | |
download | tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.zip tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.gz tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.bz2 |
* tests/string.test:
* generic/tclVar.c (Tcl_SetVar2Ex):
* generic/tclStringObj.c (Tcl_AppendObjToObj):
* generic/tclCmdMZ.c (Tcl_StringObjCmd): optimized the string
index, string length, string range, and append command in cases
where the object's internal rep is a bytearray. Objects with
other internal reps are converted to have the new unicode internal
rep.
* unix/Makefile.in:
* win/Makefile.in:
* win/Makefile.vc:
* tests/unicode.test:
* generic/tclInt.h:
* generic/tclObj.c:
* generic/tclUnicodeObj.c: added a new object type to store the
unicode representation of a string.
* generic/tclTestObj.c: added the objtype option to the testobj
command. This option returns the name of the type of internal rep
an object has.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r-- | generic/tclCmdMZ.c | 139 |
1 files changed, 95 insertions, 44 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 19b9ece..ebea22b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.12 1999/06/03 18:43:30 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $ */ #include "tclInt.h" @@ -1009,32 +1009,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_INDEX: { int index; + char buf[TCL_UTF_MAX]; + Tcl_UniChar unichar; if (objc != 4) { Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - /* - * establish what 'end' really means - */ - length2 = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndex(interp, objv[3], length2 - 1, - &index) != TCL_OK) { - return TCL_ERROR; - } + /* - * index must be between 0 and the UTF length to be valid + * If we have a ByteArray object, avoid indexing in the + * Utf string since the byte array contains one byte per + * character. Otherwise, use the Unicode string rep to + * get the index'th char. */ - if ((index >= 0) && (index < length2)) { - if (length1 == length2) { - /* no unicode chars */ - Tcl_SetStringObj(resultPtr, string1+index, 1); - } else { - char buf[TCL_UTF_MAX]; - length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, - index), buf); + if (objv[2]->typePtr == &tclByteArrayType) { + + string1 = Tcl_GetByteArrayFromObj(objv[2], &length1); + + if (TclGetIntForIndex(interp, objv[3], length1 - 1, + &index) != TCL_OK) { + return TCL_ERROR; + } + Tcl_SetStringObj(resultPtr, &string1[index], 1); + } else { + string1 = Tcl_GetStringFromObj(objv[2], &length1); + + /* + * convert to Unicode internal rep to calulate what + * 'end' really means. + */ + + length2 = TclGetUnicodeLengthFromObj(objv[2]); + + if (TclGetIntForIndex(interp, objv[3], length2 - 1, + &index) != TCL_OK) { + return TCL_ERROR; + } + if ((index >= 0) && (index < length2)) { + unichar = TclGetUniCharFromObj(objv[2], index); + length2 = Tcl_UniCharToUtf((int)unichar, buf); Tcl_SetStringObj(resultPtr, buf, length2); } } @@ -1400,16 +1415,16 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) /* * If we have a ByteArray object, avoid recomputing the * string since the byte array contains one byte per - * character. + * character. Otherwise, use the Unicode string rep to + * calculate the length. */ if (objv[2]->typePtr == &tclByteArrayType) { (void) Tcl_GetByteArrayFromObj(objv[2], &length1); Tcl_SetIntObj(resultPtr, length1); } else { - string1 = Tcl_GetStringFromObj(objv[2], &length1); - Tcl_SetIntObj(resultPtr, Tcl_NumUtfChars(string1, - length1)); + Tcl_SetIntObj(resultPtr, + TclGetUnicodeLengthFromObj(objv[2])); } } break; @@ -1550,28 +1565,64 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - length1 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndex(interp, objv[3], length1, - &first) != TCL_OK) { - return TCL_ERROR; - } - if (TclGetIntForIndex(interp, objv[4], length1, - &last) != TCL_OK) { - return TCL_ERROR; - } - if (first < 0) { - first = 0; - } - if (last >= length1) { - last = length1; - } - if (last >= first) { - char *start, *end; + /* + * If we have a ByteArray object, avoid indexing in the + * Utf string since the byte array contains one byte per + * character. Otherwise, use the Unicode string rep to + * get the range. + */ - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); - Tcl_SetStringObj(resultPtr, start, end - start); + if (objv[2]->typePtr == &tclByteArrayType) { + + string1 = Tcl_GetByteArrayFromObj(objv[2], &length1); + + if (TclGetIntForIndex(interp, objv[3], length1 - 1, + &first) != TCL_OK) { + return TCL_ERROR; + } + if (TclGetIntForIndex(interp, objv[4], length1 - 1, + &last) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; + } + if (last >= length1 - 1) { + last = length1 - 1; + } + if (last >= first) { + int numBytes = last - first + 1; + resultPtr = Tcl_NewByteArrayObj(&string1[first], numBytes); + Tcl_SetObjResult(interp, resultPtr); + } + } else { + string1 = Tcl_GetStringFromObj(objv[2], &length1); + + /* + * Convert to Unicode internal rep to calulate length and + * create a result object. + */ + + length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1; + + if (TclGetIntForIndex(interp, objv[3], length2, + &first) != TCL_OK) { + return TCL_ERROR; + } + if (TclGetIntForIndex(interp, objv[4], length2, + &last) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; + } + if (last >= length1 - 1) { + last = length1 - 1; + } + if (last >= first) { + resultPtr = TclGetRangeFromObj(objv[2], first, last); + Tcl_SetObjResult(interp, resultPtr); + } } break; } |