summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2016-11-04 21:29:30 (GMT)
committerdgp <dgp@users.sourceforge.net>2016-11-04 21:29:30 (GMT)
commit5f1098561b5067a8f429be371166ed3b1e6189e4 (patch)
tree67179cf720569f5de357549751755dd9d0332b0e
parent9bbdc1599de82b388e548191a669524d9b577653 (diff)
downloadtcl-5f1098561b5067a8f429be371166ed3b1e6189e4.zip
tcl-5f1098561b5067a8f429be371166ed3b1e6189e4.tar.gz
tcl-5f1098561b5067a8f429be371166ed3b1e6189e4.tar.bz2
First draft refactoring the [string first] functionality.
-rw-r--r--generic/tclExecute.c4
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclStringObj.c84
3 files changed, 90 insertions, 0 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 83b83f1..5ea199d 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5720,6 +5720,9 @@ TEBCresume(
NEXT_INST_V(1, 3, 1);
case INST_STR_FIND:
+#if 1
+ match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0);
+#else
ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */
ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */
@@ -5734,6 +5737,7 @@ TEBCresume(
}
}
}
+#endif
TRACE(("%.20s %.20s => %d\n",
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 8a647f0..26592f9 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3138,6 +3138,8 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr,
MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace,
int objc, Tcl_Obj *const objv[],
Tcl_Obj **objPtrPtr);
+MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack,
+ unsigned int start);
MODULE_SCOPE int TclStringMatch(const char *str, int strLen,
const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj,
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index b486106..6e1529c 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2841,6 +2841,90 @@ TclStringCatObjv(
/*
*---------------------------------------------------------------------------
*
+ * TclStringFind --
+ *
+ * Implements the [string first] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * first instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringFind(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ unsigned int start)
+{
+ int ln, lh;
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *end, *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ end = bh + lh;
+
+ try = bh + start;
+ while (try + ln <= end) {
+ try = memchr(try, bn[0], end - try);
+
+ if (try == NULL) {
+ return -1;
+ }
+ if (0 == memcmp(try+1, bn+1, ln-1)) {
+ return (try - bh);
+ }
+ try++;
+ }
+ return -1;
+ }
+
+ /* TODO: Detect and optimize case with single byte chars only */
+
+ {
+ Tcl_UniChar *try, *end, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ if (ln == 0) {
+ /* See above */
+ return -1;
+ }
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ end = uh + lh;
+
+ try = uh + start;
+ while (try + ln <= end) {
+ if ((*try == *un)
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try++;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.