summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclCmdMZ.c10
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclInt.h4
-rw-r--r--generic/tclStringObj.c63
4 files changed, 33 insertions, 46 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index f9e404b..5d49d2a 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1335,16 +1335,8 @@ StringFirstCmd(
if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) {
return TCL_ERROR;
}
-
- if (start < 0) {
- start = 0;
- }
- if (start >= size) {
- Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
- return TCL_OK;
- }
}
- Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1],
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFirst(objv[1],
objv[2], start)));
return TCL_OK;
}
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a30ec89..679b57a 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5715,7 +5715,7 @@ TEBCresume(
NEXT_INST_V(1, 3, 1);
case INST_STR_FIND:
- match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0);
+ match = TclStringFirst(OBJ_UNDER_TOS, OBJ_AT_TOS, 0);
TRACE(("%.20s %.20s => %d\n",
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 6cb9955..f288a3a 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3189,8 +3189,6 @@ MODULE_SCOPE void TclSpellFix(Tcl_Interp *interp,
Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr,
int numBytes);
-MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack,
- int start);
MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack,
int last);
MODULE_SCOPE int TclStringMatch(const char *str, int strLen,
@@ -4009,6 +4007,8 @@ MODULE_SCOPE int TclCompileAssembleCmd(Tcl_Interp *interp,
MODULE_SCOPE Tcl_Obj * TclStringCat(Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[], int flags);
+MODULE_SCOPE int TclStringFirst(Tcl_Obj *needle, Tcl_Obj *haystack,
+ int start);
MODULE_SCOPE Tcl_Obj * TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr,
int count, int flags);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 46162ff..bb72acd 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -3199,7 +3199,7 @@ TclStringCat(
/*
*---------------------------------------------------------------------------
*
- * TclStringFind --
+ * TclStringFirst --
*
* Implements the [string first] operation.
*
@@ -3215,20 +3215,20 @@ TclStringCat(
*/
int
-TclStringFind(
+TclStringFirst(
Tcl_Obj *needle,
Tcl_Obj *haystack,
int start)
{
int lh, ln = Tcl_GetCharLength(needle);
+ if (start < 0) {
+ start = 0;
+ }
if (ln == 0) {
- /*
- * We don't find empty substrings. Bizarre!
- *
- * TODO: When we one day make this a true substring
- * finder, change this to "return 0"
- */
+ /* We don't find empty substrings. Bizarre!
+ * Whenever this routine is turned into a proper substring
+ * finder, change to `return start` after limits imposed. */
return -1;
}
@@ -3236,51 +3236,46 @@ TclStringFind(
unsigned char *end, *try, *bh;
unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+ /* Find bytes in bytes */
bh = Tcl_GetByteArrayFromObj(haystack, &lh);
end = bh + lh;
try = bh + start;
while (try + ln <= end) {
- try = memchr(try, bn[0], end - try);
-
+ /*
+ * Look for the leading byte of the needle in the haystack
+ * starting at try and stopping when there's not enough room
+ * for the needle left.
+ */
+ try = memchr(try, bn[0], (end + 1 - ln) - try);
if (try == NULL) {
+ /* Leading byte not found -> needle cannot be found. */
return -1;
}
+ /* Leading byte found, check rest of needle. */
if (0 == memcmp(try+1, bn+1, ln-1)) {
+ /* Checks! Return the successful index. */
return (try - bh);
}
+ /* Rest of needle match failed; Iterate to continue search. */
try++;
}
return -1;
}
/*
- * Check if we have two strings of single-byte characters. If we have, we
- * can use strstr() to do the search. Note that we can sometimes have
- * multibyte characters when the string could be minimally represented
- * using single byte characters; we can't assume that a mismatch here
- * means no match.
+ * TODO: It might be nice to support some cases where it is not
+ * necessary to shimmer to &tclStringType to compute the result,
+ * and instead operate just on the objPtr->bytes values directly.
+ * However, we also do not want the answer to change based on the
+ * code pathway, or if it does we want that to be for some values
+ * we explicitly decline to support. Getting there will involve
+ * locking down in practice more firmly just what encodings produce
+ * what supported results for the objPtr->bytes values. For now,
+ * do only the well-defined Tcl_UniChar array search.
*/
- lh = Tcl_GetCharLength(haystack);
- if (haystack->bytes && (lh == haystack->length) && needle->bytes
- && (ln == needle->length)) {
- /*
- * Both haystack and needle are all single-byte chars.
- */
-
- char *found = strstr(haystack->bytes + start, needle->bytes);
-
- if (found) {
- return (found - haystack->bytes);
- } else {
- return -1;
- }
- } else {
- /*
- * Do the search on the unicode representation for simplicity.
- */
-
+ {
Tcl_UniChar *try, *end, *uh;
Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);