From cad330bea5eb1c5fa6958b11506b10529846eb00 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 8 Nov 2016 02:56:11 +0000 Subject: Route all [string last] operations through a common implementation. --- generic/tclCmdMZ.c | 67 +++++-------------------------- generic/tclExecute.c | 15 +------ generic/tclInt.h | 4 +- generic/tclStringObj.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++- tests/string.test | 4 +- 5 files changed, 122 insertions(+), 74 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index be77b1f..e9a6933 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1229,76 +1229,31 @@ StringLastCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *needleStr, *haystackStr, *p; - int match, start, needleLen, haystackLen; + int last = INT_MAX - 1; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, - "needleString haystackString ?startIndex?"); + "needleString haystackString ?lastIndex?"); return TCL_ERROR; } - /* - * We are searching haystackString for the sequence needleString. - */ - - match = -1; - start = 0; - haystackLen = -1; - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - if (objc == 4) { - /* - * If a startIndex is specified, we will need to restrict the string - * range to that char index in the string - */ + int size = Tcl_GetCharLength(objv[2]); - if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, - &start) != TCL_OK){ + if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) { return TCL_ERROR; } - /* - * Reread to prevent shimmering problems. - */ - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - - if (start < 0) { - goto str_last_done; - } else if (start < haystackLen) { - p = haystackStr + start + 1 - needleLen; - } else { - p = haystackStr + haystackLen - needleLen; + if (last < 0) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); + return TCL_OK; } - } else { - p = haystackStr + haystackLen - needleLen; - } - - /* - * If the length of the needle is more than the length of the haystack, it - * cannot be contained in there so we can avoid searching. [Bug 2960021] - */ - - if (needleLen > 0 && needleLen <= haystackLen) { - for (; p >= haystackStr; p--) { - /* - * Scan backwards to find the first character. - */ - - if ((*p == *needleStr) && !memcmp(needleStr, p, - sizeof(Tcl_UniChar) * (size_t)needleLen)) { - match = p - haystackStr; - break; - } + if (last >= size) { + last = size - 1; } } - - str_last_done: - Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringLast(objv[1], + objv[2], last))); return TCL_OK; } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index a44451f..0d48071 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5728,23 +5728,10 @@ TEBCresume( NEXT_INST_F(1, 2, 1); case INST_STR_FIND_LAST: - ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */ - ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */ - - match = -1; - if (length2 > 0 && length2 <= length) { - for (p=ustring1+length-length2 ; p>=ustring1 ; p--) { - if ((*p == *ustring2) && - memcmp(ustring2,p,sizeof(Tcl_UniChar)*length2) == 0) { - match = p - ustring1; - break; - } - } - } + match = TclStringLast(OBJ_UNDER_TOS, OBJ_AT_TOS, INT_MAX - 1); TRACE(("%.20s %.20s => %d\n", O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match)); - TclNewIntObj(objResultPtr, match); NEXT_INST_F(1, 2, 1); diff --git a/generic/tclInt.h b/generic/tclInt.h index 26592f9..e468f3d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3139,7 +3139,9 @@ MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, - unsigned int start); + int start); +MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, + int last); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index edba881..f7791fe 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2860,7 +2860,7 @@ int TclStringFind( Tcl_Obj *needle, Tcl_Obj *haystack, - unsigned int start) + int start) { int lh, ln = Tcl_GetCharLength(needle); @@ -2938,6 +2938,110 @@ TclStringFind( /* *--------------------------------------------------------------------------- * + * TclStringLast -- + * + * Implements the [string last] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * last instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringLast( + Tcl_Obj *needle, + Tcl_Obj *haystack, + int last) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (ln > last + 1) { + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + + if (last + 1 > lh) { + last = lh - 1; + } + try = bh + last + 1 - ln; + while (try >= bh) { + if ((*try == bn[0]) + && (0 == memcmp(try+1, bn+1, ln-1))) { + return (try - bh); + } + try--; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (last + 1 > lh) { + last = lh - 1; + } + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + + char *try = haystack->bytes + last + 1 - ln; + while (try >= haystack->bytes) { + if ((*try == needle->bytes[0]) + && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { + return (try - haystack->bytes); + } + try--; + } + return -1; + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + + try = uh + last + 1 - ln; + while (try >= uh) { + if ((*try == un[0]) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try--; + } + return -1; + } +} + +/* + *--------------------------------------------------------------------------- + * * TclStringObjReverse -- * * Implements the [string reverse] operation. diff --git a/tests/string.test b/tests/string.test index 5fdb510..11cbcff 100644 --- a/tests/string.test +++ b/tests/string.test @@ -756,13 +756,13 @@ catch {rename largest_int {}} test string-7.1 {string last, too few args} { list [catch {string last a} msg] $msg -} {1 {wrong # args: should be "string last needleString haystackString ?startIndex?"}} +} {1 {wrong # args: should be "string last needleString haystackString ?lastIndex?"}} test string-7.2 {string last, bad args} { list [catch {string last a b c} msg] $msg } {1 {bad index "c": must be integer?[+-]integer? or end?[+-]integer?}} test string-7.3 {string last, too many args} { list [catch {string last a b c d} msg] $msg -} {1 {wrong # args: should be "string last needleString haystackString ?startIndex?"}} +} {1 {wrong # args: should be "string last needleString haystackString ?lastIndex?"}} test string-7.4 {string last} { string la xxx xxxx123xx345x678 } 1 -- cgit v0.12