diff options
-rw-r--r-- | generic/tclCmdMZ.c | 10 | ||||
-rw-r--r-- | generic/tclExecute.c | 2 | ||||
-rw-r--r-- | generic/tclInt.h | 4 | ||||
-rw-r--r-- | generic/tclStringObj.c | 63 |
4 files changed, 33 insertions, 46 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index f9e404b..5d49d2a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1335,16 +1335,8 @@ StringFirstCmd( if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; } - - if (start < 0) { - start = 0; - } - if (start >= size) { - Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); - return TCL_OK; - } } - Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1], + Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFirst(objv[1], objv[2], start))); return TCL_OK; } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index a30ec89..679b57a 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5715,7 +5715,7 @@ TEBCresume( NEXT_INST_V(1, 3, 1); case INST_STR_FIND: - match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0); + match = TclStringFirst(OBJ_UNDER_TOS, OBJ_AT_TOS, 0); TRACE(("%.20s %.20s => %d\n", O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match)); diff --git a/generic/tclInt.h b/generic/tclInt.h index 6cb9955..f288a3a 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3189,8 +3189,6 @@ MODULE_SCOPE void TclSpellFix(Tcl_Interp *interp, Tcl_Obj *bad, Tcl_Obj *fix); MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); -MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, - int start); MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, int last); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, @@ -4009,6 +4007,8 @@ MODULE_SCOPE int TclCompileAssembleCmd(Tcl_Interp *interp, MODULE_SCOPE Tcl_Obj * TclStringCat(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], int flags); +MODULE_SCOPE int TclStringFirst(Tcl_Obj *needle, Tcl_Obj *haystack, + int start); MODULE_SCOPE Tcl_Obj * TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr, int count, int flags); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 46162ff..bb72acd 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3199,7 +3199,7 @@ TclStringCat( /* *--------------------------------------------------------------------------- * - * TclStringFind -- + * TclStringFirst -- * * Implements the [string first] operation. * @@ -3215,20 +3215,20 @@ TclStringCat( */ int -TclStringFind( +TclStringFirst( Tcl_Obj *needle, Tcl_Obj *haystack, int start) { int lh, ln = Tcl_GetCharLength(needle); + if (start < 0) { + start = 0; + } if (ln == 0) { - /* - * We don't find empty substrings. Bizarre! - * - * TODO: When we one day make this a true substring - * finder, change this to "return 0" - */ + /* We don't find empty substrings. Bizarre! + * Whenever this routine is turned into a proper substring + * finder, change to `return start` after limits imposed. */ return -1; } @@ -3236,51 +3236,46 @@ TclStringFind( unsigned char *end, *try, *bh; unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + /* Find bytes in bytes */ bh = Tcl_GetByteArrayFromObj(haystack, &lh); end = bh + lh; try = bh + start; while (try + ln <= end) { - try = memchr(try, bn[0], end - try); - + /* + * Look for the leading byte of the needle in the haystack + * starting at try and stopping when there's not enough room + * for the needle left. + */ + try = memchr(try, bn[0], (end + 1 - ln) - try); if (try == NULL) { + /* Leading byte not found -> needle cannot be found. */ return -1; } + /* Leading byte found, check rest of needle. */ if (0 == memcmp(try+1, bn+1, ln-1)) { + /* Checks! Return the successful index. */ return (try - bh); } + /* Rest of needle match failed; Iterate to continue search. */ try++; } return -1; } /* - * Check if we have two strings of single-byte characters. If we have, we - * can use strstr() to do the search. Note that we can sometimes have - * multibyte characters when the string could be minimally represented - * using single byte characters; we can't assume that a mismatch here - * means no match. + * TODO: It might be nice to support some cases where it is not + * necessary to shimmer to &tclStringType to compute the result, + * and instead operate just on the objPtr->bytes values directly. + * However, we also do not want the answer to change based on the + * code pathway, or if it does we want that to be for some values + * we explicitly decline to support. Getting there will involve + * locking down in practice more firmly just what encodings produce + * what supported results for the objPtr->bytes values. For now, + * do only the well-defined Tcl_UniChar array search. */ - lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && (lh == haystack->length) && needle->bytes - && (ln == needle->length)) { - /* - * Both haystack and needle are all single-byte chars. - */ - - char *found = strstr(haystack->bytes + start, needle->bytes); - - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } - } else { - /* - * Do the search on the unicode representation for simplicity. - */ - + { Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); |