From b29f3df591b0bc0c7cdeba75727b7005e7cd6235 Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 5 Feb 2009 22:12:44 +0000 Subject: Simplify the implementation of some commands now that the underlying string API knows more about bytearrays. --- ChangeLog | 8 ++- generic/tclCmdMZ.c | 158 ++++++++++++++++++++--------------------------------- 2 files changed, 65 insertions(+), 101 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3d3710b..4c5e47b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ 2009-02-05 Donal K. Fellows - * generic/tclExecute.c (TclExecuteByteCode): Make sure that - INST_CONCAT1 will not lose string reps wrongly. [Bug 2568434] + * generic/tclCmdMZ.c (StringIndexCmd, StringRangeCmd, StringLenCmd): + Simplify the implementation of some commands now that the underlying + string API knows more about bytearrays. + + * generic/tclExecute.c (TclExecuteByteCode): [Bug 2568434]: Make sure + that INST_CONCAT1 will not lose string reps wrongly. * generic/tclStringObj.c (Tcl_AppendObjToObj): Special-case the appending of one bytearray to another, which can be extremely rapid. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index b0c90a4..15895c1 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.178 2009/02/04 18:15:48 dgp Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.179 2009/02/05 22:12:44 dkf Exp $ */ #include "tclInt.h" @@ -1136,8 +1136,8 @@ StringFirstCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *ustring1, *ustring2; - int match, start, length1, length2; + Tcl_UniChar *needleStr, *haystackStr; + int match, start, needleLen, haystackLen; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, @@ -1146,15 +1146,15 @@ StringFirstCmd( } /* - * We are searching string2 for the sequence string1. + * We are searching haystackStr for the sequence needleStr. */ match = -1; start = 0; - length2 = -1; + haystackLen = -1; - ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); if (objc == 4) { /* @@ -1162,7 +1162,8 @@ StringFirstCmd( * point in the string before we think about a match. */ - if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){ + if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, + &start) != TCL_OK){ return TCL_ERROR; } @@ -1170,14 +1171,14 @@ StringFirstCmd( * Reread to prevent shimmering problems. */ - ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - if (start >= length2) { + if (start >= haystackLen) { goto str_first_done; } else if (start > 0) { - ustring2 += start; - length2 -= start; + haystackStr += start; + haystackLen -= start; } else if (start < 0) { /* * Invalid start index mapped to string start; Bug #423581 @@ -1187,18 +1188,18 @@ StringFirstCmd( } } - if (length1 > 0) { + if (needleLen > 0) { register Tcl_UniChar *p, *end; - end = ustring2 + length2 - length1 + 1; - for (p = ustring2; p < end; p++) { + end = haystackStr + haystackLen - needleLen + 1; + for (p = haystackStr; p < end; p++) { /* * Scan forward to find the first character. */ - if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p, - (unsigned long) length1) == 0)) { - match = p - ustring2; + if ((*p == *needleStr) && (TclUniCharNcmp(needleStr, p, + (unsigned long) needleLen) == 0)) { + match = p - haystackStr; break; } } @@ -1243,8 +1244,8 @@ StringLastCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *ustring1, *ustring2, *p; - int match, start, length1, length2; + Tcl_UniChar *needleStr, *haystackStr, *p; + int match, start, needleLen, haystackLen; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, @@ -1253,15 +1254,15 @@ StringLastCmd( } /* - * We are searching string2 for the sequence string1. + * We are searching haystackString for the sequence needleString. */ match = -1; start = 0; - length2 = -1; + haystackLen = -1; - ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); if (objc == 4) { /* @@ -1269,7 +1270,8 @@ StringLastCmd( * range to that char index in the string */ - if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){ + if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, + &start) != TCL_OK){ return TCL_ERROR; } @@ -1277,29 +1279,29 @@ StringLastCmd( * Reread to prevent shimmering problems. */ - ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); if (start < 0) { goto str_last_done; - } else if (start < length2) { - p = ustring2 + start + 1 - length1; + } else if (start < haystackLen) { + p = haystackStr + start + 1 - needleLen; } else { - p = ustring2 + length2 - length1; + p = haystackStr + haystackLen - needleLen; } } else { - p = ustring2 + length2 - length1; + p = haystackStr + haystackLen - needleLen; } - if (length1 > 0) { - for (; p >= ustring2; p--) { + if (needleLen > 0) { + for (; p >= haystackStr; p--) { /* * Scan backwards to find the first character. */ - if ((*p == *ustring1) && !memcmp(ustring1, p, - sizeof(Tcl_UniChar) * (size_t)length1)) { - match = p - ustring2; + if ((*p == *needleStr) && !memcmp(needleStr, p, + sizeof(Tcl_UniChar) * (size_t)needleLen)) { + match = p - haystackStr; break; } } @@ -1343,37 +1345,29 @@ StringIndexCmd( } /* - * If we have a ByteArray object, avoid indexing in the Utf string since - * the byte array contains one byte per character. Otherwise, use the - * Unicode string rep to get the index'th char. + * Get Unicode or byte-array char length to calulate what 'end' means. */ - if (objv[1]->typePtr == &tclByteArrayType) { - const unsigned char *string = - Tcl_GetByteArrayFromObj(objv[1], &length); + length = Tcl_GetCharLength(objv[1]); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { + return TCL_ERROR; + } + + if ((index >= 0) && (index < length)) { + Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index); - if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){ - return TCL_ERROR; - } - string = Tcl_GetByteArrayFromObj(objv[1], &length); - if ((index >= 0) && (index < length)) { - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(string + index, 1)); - } - } else { /* - * Get Unicode char length to calulate what 'end' means. + * If we have a ByteArray object, we're careful to generate a new + * bytearray for a result. */ - length = Tcl_GetCharLength(objv[1]); + if (objv[1]->typePtr == &tclByteArrayType) { + unsigned char uch = (unsigned char) ch; - if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){ - return TCL_ERROR; - } - if ((index >= 0) && (index < length)) { + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); + } else { char buf[TCL_UTF_MAX]; - Tcl_UniChar ch; - ch = Tcl_GetUniChar(objv[1], index); length = Tcl_UniCharToUtf(ch, buf); Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } @@ -2045,7 +2039,6 @@ StringRangeCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - const unsigned char *string; int length, first, last; if (objc != 4) { @@ -2054,22 +2047,12 @@ StringRangeCmd( } /* - * If we have a ByteArray object, avoid indexing in the Utf string since - * the byte array contains one byte per character. Otherwise, use the - * Unicode string rep to get the range. + * Get the length in actual characters; this uses the unicode string rep + * or the byte-array rep. We then reduce it by one because 'end' refers to + * the last character, not one past it. */ - if (objv[1]->typePtr == &tclByteArrayType) { - string = Tcl_GetByteArrayFromObj(objv[1], &length); - length--; - } else { - /* - * Get the length in actual characters. - */ - - string = NULL; - length = Tcl_GetCharLength(objv[1]) - 1; - } + length = Tcl_GetCharLength(objv[1]) - 1; if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { @@ -2083,17 +2066,7 @@ StringRangeCmd( last = length; } if (last >= first) { - if (string != NULL) { - /* - * Reread the string to prevent shimmering nasties. - */ - - string = Tcl_GetByteArrayFromObj(objv[1], &length); - Tcl_SetObjResult(interp, - Tcl_NewByteArrayObj(string+first, last - first + 1)); - } else { - Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); - } + Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); } return TCL_OK; } @@ -2785,25 +2758,12 @@ StringLenCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - int length; - if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "string"); return TCL_ERROR; } - /* - * If we have a ByteArray object, avoid recomputing the string since the - * byte array contains one byte per character. Otherwise, use the Unicode - * string rep to calculate the length. - */ - - if (objv[1]->typePtr == &tclByteArrayType) { - (void) Tcl_GetByteArrayFromObj(objv[1], &length); - } else { - length = Tcl_GetCharLength(objv[1]); - } - Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(Tcl_GetCharLength(objv[1]))); return TCL_OK; } -- cgit v0.12