summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2016-11-08 02:56:11 (GMT)
committerdgp <dgp@users.sourceforge.net>2016-11-08 02:56:11 (GMT)
commitcad330bea5eb1c5fa6958b11506b10529846eb00 (patch)
treee1ef2f40fc3b0408147198398113220f4f9d7eb4
parentb1c86e9055e5384cf165f8e509583067d4b32796 (diff)
downloadtcl-cad330bea5eb1c5fa6958b11506b10529846eb00.zip
tcl-cad330bea5eb1c5fa6958b11506b10529846eb00.tar.gz
tcl-cad330bea5eb1c5fa6958b11506b10529846eb00.tar.bz2
Route all [string last] operations through a common implementation.
-rw-r--r--generic/tclCmdMZ.c67
-rw-r--r--generic/tclExecute.c15
-rw-r--r--generic/tclInt.h4
-rw-r--r--generic/tclStringObj.c106
-rw-r--r--tests/string.test4
5 files changed, 122 insertions, 74 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index be77b1f..e9a6933 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1229,76 +1229,31 @@ StringLastCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar *needleStr, *haystackStr, *p;
- int match, start, needleLen, haystackLen;
+ int last = INT_MAX - 1;
if (objc < 3 || objc > 4) {
Tcl_WrongNumArgs(interp, 1, objv,
- "needleString haystackString ?startIndex?");
+ "needleString haystackString ?lastIndex?");
return TCL_ERROR;
}
- /*
- * We are searching haystackString for the sequence needleString.
- */
-
- match = -1;
- start = 0;
- haystackLen = -1;
-
- needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
- haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);
-
if (objc == 4) {
- /*
- * If a startIndex is specified, we will need to restrict the string
- * range to that char index in the string
- */
+ int size = Tcl_GetCharLength(objv[2]);
- if (TclGetIntForIndexM(interp, objv[3], haystackLen-1,
- &start) != TCL_OK){
+ if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) {
return TCL_ERROR;
}
- /*
- * Reread to prevent shimmering problems.
- */
-
- needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
- haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);
-
- if (start < 0) {
- goto str_last_done;
- } else if (start < haystackLen) {
- p = haystackStr + start + 1 - needleLen;
- } else {
- p = haystackStr + haystackLen - needleLen;
+ if (last < 0) {
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
+ return TCL_OK;
}
- } else {
- p = haystackStr + haystackLen - needleLen;
- }
-
- /*
- * If the length of the needle is more than the length of the haystack, it
- * cannot be contained in there so we can avoid searching. [Bug 2960021]
- */
-
- if (needleLen > 0 && needleLen <= haystackLen) {
- for (; p >= haystackStr; p--) {
- /*
- * Scan backwards to find the first character.
- */
-
- if ((*p == *needleStr) && !memcmp(needleStr, p,
- sizeof(Tcl_UniChar) * (size_t)needleLen)) {
- match = p - haystackStr;
- break;
- }
+ if (last >= size) {
+ last = size - 1;
}
}
-
- str_last_done:
- Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringLast(objv[1],
+ objv[2], last)));
return TCL_OK;
}
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a44451f..0d48071 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5728,23 +5728,10 @@ TEBCresume(
NEXT_INST_F(1, 2, 1);
case INST_STR_FIND_LAST:
- ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */
- ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */
-
- match = -1;
- if (length2 > 0 && length2 <= length) {
- for (p=ustring1+length-length2 ; p>=ustring1 ; p--) {
- if ((*p == *ustring2) &&
- memcmp(ustring2,p,sizeof(Tcl_UniChar)*length2) == 0) {
- match = p - ustring1;
- break;
- }
- }
- }
+ match = TclStringLast(OBJ_UNDER_TOS, OBJ_AT_TOS, INT_MAX - 1);
TRACE(("%.20s %.20s => %d\n",
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
-
TclNewIntObj(objResultPtr, match);
NEXT_INST_F(1, 2, 1);
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 26592f9..e468f3d 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3139,7 +3139,9 @@ MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace,
int objc, Tcl_Obj *const objv[],
Tcl_Obj **objPtrPtr);
MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack,
- unsigned int start);
+ int start);
+MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack,
+ int last);
MODULE_SCOPE int TclStringMatch(const char *str, int strLen,
const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj,
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index edba881..f7791fe 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2860,7 +2860,7 @@ int
TclStringFind(
Tcl_Obj *needle,
Tcl_Obj *haystack,
- unsigned int start)
+ int start)
{
int lh, ln = Tcl_GetCharLength(needle);
@@ -2938,6 +2938,110 @@ TclStringFind(
/*
*---------------------------------------------------------------------------
*
+ * TclStringLast --
+ *
+ * Implements the [string last] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * last instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringLast(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int last)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (ln > last + 1) {
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ try = bh + last + 1 - ln;
+ while (try >= bh) {
+ if ((*try == bn[0])
+ && (0 == memcmp(try+1, bn+1, ln-1))) {
+ return (try - bh);
+ }
+ try--;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+
+ char *try = haystack->bytes + last + 1 - ln;
+ while (try >= haystack->bytes) {
+ if ((*try == needle->bytes[0])
+ && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) {
+ return (try - haystack->bytes);
+ }
+ try--;
+ }
+ return -1;
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+
+ try = uh + last + 1 - ln;
+ while (try >= uh) {
+ if ((*try == un[0])
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try--;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.
diff --git a/tests/string.test b/tests/string.test
index 5fdb510..11cbcff 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -756,13 +756,13 @@ catch {rename largest_int {}}
test string-7.1 {string last, too few args} {
list [catch {string last a} msg] $msg
-} {1 {wrong # args: should be "string last needleString haystackString ?startIndex?"}}
+} {1 {wrong # args: should be "string last needleString haystackString ?lastIndex?"}}
test string-7.2 {string last, bad args} {
list [catch {string last a b c} msg] $msg
} {1 {bad index "c": must be integer?[+-]integer? or end?[+-]integer?}}
test string-7.3 {string last, too many args} {
list [catch {string last a b c d} msg] $msg
-} {1 {wrong # args: should be "string last needleString haystackString ?startIndex?"}}
+} {1 {wrong # args: should be "string last needleString haystackString ?lastIndex?"}}
test string-7.4 {string last} {
string la xxx xxxx123xx345x678
} 1