summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2016-11-07 20:11:11 (GMT)
committerdgp <dgp@users.sourceforge.net>2016-11-07 20:11:11 (GMT)
commit547c580e1abe051ba9f909c8f6d20a43595bb667 (patch)
tree987324200646eaa485ded0e53970555ff3033b7c
parent9bbdc1599de82b388e548191a669524d9b577653 (diff)
parent96b75029d111bab1f01acea88795c774b96b4f6e (diff)
downloadtcl-547c580e1abe051ba9f909c8f6d20a43595bb667.zip
tcl-547c580e1abe051ba9f909c8f6d20a43595bb667.tar.gz
tcl-547c580e1abe051ba9f909c8f6d20a43595bb667.tar.bz2
Refactor to channel all [string first] functions through a single implementation.
-rw-r--r--generic/tclCmdMZ.c78
-rw-r--r--generic/tclExecute.c4
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclStringObj.c97
4 files changed, 112 insertions, 69 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 10c2ef3..be77b1f 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1176,8 +1176,7 @@ StringFirstCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar *needleStr, *haystackStr;
- int match, start, needleLen, haystackLen;
+ int start = 0;
if (objc < 3 || objc > 4) {
Tcl_WrongNumArgs(interp, 1, objv,
@@ -1185,82 +1184,23 @@ StringFirstCmd(
return TCL_ERROR;
}
- /*
- * We are searching haystackStr for the sequence needleStr.
- */
-
- match = -1;
- start = 0;
- haystackLen = -1;
-
- needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
- haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);
-
if (objc == 4) {
- /*
- * If a startIndex is specified, we will need to fast forward to that
- * point in the string before we think about a match.
- */
+ int size = Tcl_GetCharLength(objv[2]);
- if (TclGetIntForIndexM(interp, objv[3], haystackLen-1,
- &start) != TCL_OK){
+ if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) {
return TCL_ERROR;
}
- /*
- * Reread to prevent shimmering problems.
- */
-
- needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
- haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);
-
- if (start >= haystackLen) {
- goto str_first_done;
- } else if (start > 0) {
- haystackStr += start;
- haystackLen -= start;
- } else if (start < 0) {
- /*
- * Invalid start index mapped to string start; Bug #423581
- */
-
+ if (start < 0) {
start = 0;
}
- }
-
- /*
- * If the length of the needle is more than the length of the haystack, it
- * cannot be contained in there so we can avoid searching. [Bug 2960021]
- */
-
- if (needleLen > 0 && needleLen <= haystackLen) {
- register Tcl_UniChar *p, *end;
-
- end = haystackStr + haystackLen - needleLen + 1;
- for (p = haystackStr; p < end; p++) {
- /*
- * Scan forward to find the first character.
- */
-
- if ((*p == *needleStr) && (memcmp(needleStr, p,
- sizeof(Tcl_UniChar) * (size_t)needleLen) == 0)) {
- match = p - haystackStr;
- break;
- }
+ if (start >= size) {
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
+ return TCL_OK;
}
}
-
- /*
- * Compute the character index of the matching string by counting the
- * number of characters before the match.
- */
-
- if ((match != -1) && (objc == 4)) {
- match += start;
- }
-
- str_first_done:
- Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1],
+ objv[2], start)));
return TCL_OK;
}
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 83b83f1..5ea199d 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5720,6 +5720,9 @@ TEBCresume(
NEXT_INST_V(1, 3, 1);
case INST_STR_FIND:
+#if 1
+ match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0);
+#else
ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */
ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */
@@ -5734,6 +5737,7 @@ TEBCresume(
}
}
}
+#endif
TRACE(("%.20s %.20s => %d\n",
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 8a647f0..26592f9 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3138,6 +3138,8 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr,
MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace,
int objc, Tcl_Obj *const objv[],
Tcl_Obj **objPtrPtr);
+MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack,
+ unsigned int start);
MODULE_SCOPE int TclStringMatch(const char *str, int strLen,
const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj,
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index b486106..edba881 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2841,6 +2841,103 @@ TclStringCatObjv(
/*
*---------------------------------------------------------------------------
*
+ * TclStringFind --
+ *
+ * Implements the [string first] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * first instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringFind(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ unsigned int start)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *end, *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ end = bh + lh;
+
+ try = bh + start;
+ while (try + ln <= end) {
+ try = memchr(try, bn[0], end - try);
+
+ if (try == NULL) {
+ return -1;
+ }
+ if (0 == memcmp(try+1, bn+1, ln-1)) {
+ return (try - bh);
+ }
+ try++;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+ char *found = strstr(haystack->bytes + start, needle->bytes);
+
+ if (found) {
+ return (found - haystack->bytes);
+ } else {
+ return -1;
+ }
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *end, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ end = uh + lh;
+
+ try = uh + start;
+ while (try + ln <= end) {
+ if ((*try == *un)
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try++;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.