From 5f1098561b5067a8f429be371166ed3b1e6189e4 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 4 Nov 2016 21:29:30 +0000 Subject: First draft refactoring the [string first] functionality. --- generic/tclExecute.c | 4 +++ generic/tclInt.h | 2 ++ generic/tclStringObj.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 83b83f1..5ea199d 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5720,6 +5720,9 @@ TEBCresume( NEXT_INST_V(1, 3, 1); case INST_STR_FIND: +#if 1 + match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0); +#else ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */ ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */ @@ -5734,6 +5737,7 @@ TEBCresume( } } } +#endif TRACE(("%.20s %.20s => %d\n", O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match)); diff --git a/generic/tclInt.h b/generic/tclInt.h index 8a647f0..26592f9 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3138,6 +3138,8 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); +MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, + unsigned int start); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index b486106..6e1529c 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2841,6 +2841,90 @@ TclStringCatObjv( /* *--------------------------------------------------------------------------- * + * TclStringFind -- + * + * Implements the [string first] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * first instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringFind( + Tcl_Obj *needle, + Tcl_Obj *haystack, + unsigned int start) +{ + int ln, lh; + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *end, *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + end = bh + lh; + + try = bh + start; + while (try + ln <= end) { + try = memchr(try, bn[0], end - try); + + if (try == NULL) { + return -1; + } + if (0 == memcmp(try+1, bn+1, ln-1)) { + return (try - bh); + } + try++; + } + return -1; + } + + /* TODO: Detect and optimize case with single byte chars only */ + + { + Tcl_UniChar *try, *end, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + if (ln == 0) { + /* See above */ + return -1; + } + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + end = uh + lh; + + try = uh + start; + while (try + ln <= end) { + if ((*try == *un) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try++; + } + return -1; + } +} + +/* + *--------------------------------------------------------------------------- + * * TclStringObjReverse -- * * Implements the [string reverse] operation. -- cgit v0.12