summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c201
1 files changed, 201 insertions, 0 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 85b15fd..1417ea3 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2841,6 +2841,207 @@ TclStringCatObjv(
/*
*---------------------------------------------------------------------------
*
+ * TclStringFind --
+ *
+ * Implements the [string first] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * first instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringFind(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int start)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *end, *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ end = bh + lh;
+
+ try = bh + start;
+ while (try + ln <= end) {
+ try = memchr(try, bn[0], end - try);
+
+ if (try == NULL) {
+ return -1;
+ }
+ if (0 == memcmp(try+1, bn+1, ln-1)) {
+ return (try - bh);
+ }
+ try++;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+ char *found = strstr(haystack->bytes + start, needle->bytes);
+
+ if (found) {
+ return (found - haystack->bytes);
+ } else {
+ return -1;
+ }
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *end, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ end = uh + lh;
+
+ try = uh + start;
+ while (try + ln <= end) {
+ if ((*try == *un)
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try++;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringLast --
+ *
+ * Implements the [string last] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * last instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringLast(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int last)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (ln > last + 1) {
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ try = bh + last + 1 - ln;
+ while (try >= bh) {
+ if ((*try == bn[0])
+ && (0 == memcmp(try+1, bn+1, ln-1))) {
+ return (try - bh);
+ }
+ try--;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+
+ char *try = haystack->bytes + last + 1 - ln;
+ while (try >= haystack->bytes) {
+ if ((*try == needle->bytes[0])
+ && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) {
+ return (try - haystack->bytes);
+ }
+ try--;
+ }
+ return -1;
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+
+ try = uh + last + 1 - ln;
+ while (try >= uh) {
+ if ((*try == un[0])
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try--;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.