diff options
author | dgp <dgp@users.sourceforge.net> | 2016-11-07 20:11:11 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2016-11-07 20:11:11 (GMT) |
commit | 547c580e1abe051ba9f909c8f6d20a43595bb667 (patch) | |
tree | 987324200646eaa485ded0e53970555ff3033b7c | |
parent | 9bbdc1599de82b388e548191a669524d9b577653 (diff) | |
parent | 96b75029d111bab1f01acea88795c774b96b4f6e (diff) | |
download | tcl-547c580e1abe051ba9f909c8f6d20a43595bb667.zip tcl-547c580e1abe051ba9f909c8f6d20a43595bb667.tar.gz tcl-547c580e1abe051ba9f909c8f6d20a43595bb667.tar.bz2 |
Refactor to channel all [string first] functions through a single implementation.
-rw-r--r-- | generic/tclCmdMZ.c | 78 | ||||
-rw-r--r-- | generic/tclExecute.c | 4 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclStringObj.c | 97 |
4 files changed, 112 insertions, 69 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 10c2ef3..be77b1f 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1176,8 +1176,7 @@ StringFirstCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *needleStr, *haystackStr; - int match, start, needleLen, haystackLen; + int start = 0; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, @@ -1185,82 +1184,23 @@ StringFirstCmd( return TCL_ERROR; } - /* - * We are searching haystackStr for the sequence needleStr. - */ - - match = -1; - start = 0; - haystackLen = -1; - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - if (objc == 4) { - /* - * If a startIndex is specified, we will need to fast forward to that - * point in the string before we think about a match. - */ + int size = Tcl_GetCharLength(objv[2]); - if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, - &start) != TCL_OK){ + if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; } - /* - * Reread to prevent shimmering problems. - */ - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - - if (start >= haystackLen) { - goto str_first_done; - } else if (start > 0) { - haystackStr += start; - haystackLen -= start; - } else if (start < 0) { - /* - * Invalid start index mapped to string start; Bug #423581 - */ - + if (start < 0) { start = 0; } - } - - /* - * If the length of the needle is more than the length of the haystack, it - * cannot be contained in there so we can avoid searching. [Bug 2960021] - */ - - if (needleLen > 0 && needleLen <= haystackLen) { - register Tcl_UniChar *p, *end; - - end = haystackStr + haystackLen - needleLen + 1; - for (p = haystackStr; p < end; p++) { - /* - * Scan forward to find the first character. - */ - - if ((*p == *needleStr) && (memcmp(needleStr, p, - sizeof(Tcl_UniChar) * (size_t)needleLen) == 0)) { - match = p - haystackStr; - break; - } + if (start >= size) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); + return TCL_OK; } } - - /* - * Compute the character index of the matching string by counting the - * number of characters before the match. - */ - - if ((match != -1) && (objc == 4)) { - match += start; - } - - str_first_done: - Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1], + objv[2], start))); return TCL_OK; } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 83b83f1..5ea199d 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5720,6 +5720,9 @@ TEBCresume( NEXT_INST_V(1, 3, 1); case INST_STR_FIND: +#if 1 + match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0); +#else ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */ ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */ @@ -5734,6 +5737,7 @@ TEBCresume( } } } +#endif TRACE(("%.20s %.20s => %d\n", O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match)); diff --git a/generic/tclInt.h b/generic/tclInt.h index 8a647f0..26592f9 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3138,6 +3138,8 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); +MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, + unsigned int start); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index b486106..edba881 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2841,6 +2841,103 @@ TclStringCatObjv( /* *--------------------------------------------------------------------------- * + * TclStringFind -- + * + * Implements the [string first] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * first instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringFind( + Tcl_Obj *needle, + Tcl_Obj *haystack, + unsigned int start) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *end, *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + end = bh + lh; + + try = bh + start; + while (try + ln <= end) { + try = memchr(try, bn[0], end - try); + + if (try == NULL) { + return -1; + } + if (0 == memcmp(try+1, bn+1, ln-1)) { + return (try - bh); + } + try++; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + char *found = strstr(haystack->bytes + start, needle->bytes); + + if (found) { + return (found - haystack->bytes); + } else { + return -1; + } + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *end, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + end = uh + lh; + + try = uh + start; + while (try + ln <= end) { + if ((*try == *un) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try++; + } + return -1; + } +} + +/* + *--------------------------------------------------------------------------- + * * TclStringObjReverse -- * * Implements the [string reverse] operation. |