summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2017-11-04 00:34:48 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2017-11-04 00:34:48 (GMT)
commitb55d5cc57eba5295979ff84c7256c577f2a24cc8 (patch)
treef21d29f994d4f3e552bb2397eb74c7d6390f5cfd
parente4356f7d64ab9d2c871b3015e10ecf3955d7f539 (diff)
downloadtcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.zip
tcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.tar.gz
tcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.tar.bz2
Fix for the weird [string first] behaviour.
-rw-r--r--generic/tclStringObj.c42
1 files changed, 23 insertions, 19 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 7c1d42b..3a35bcf 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -3242,40 +3242,44 @@ TclStringFind(
return -1;
}
+ /*
+ * Check if we have two strings of single-byte characters. If we have, we
+ * can use strstr() to do the search. Note that we can sometimes have
+ * multibyte characters when the string could be minimally represented
+ * using single byte characters; we can't assume that a mismatch here
+ * means no match.
+ */
+
lh = Tcl_GetCharLength(haystack);
- if (haystack->bytes && (lh == haystack->length)) {
- /* haystack is all single-byte chars */
+ if (haystack->bytes && (lh == haystack->length) && needle->bytes
+ && (ln == needle->length)) {
+ /*
+ * Both haystack and needle are all single-byte chars.
+ */
- if (needle->bytes && (ln == needle->length)) {
- /* needle is also all single-byte chars */
- char *found = strstr(haystack->bytes + start, needle->bytes);
+ char *found = strstr(haystack->bytes + start, needle->bytes);
- if (found) {
- return (found - haystack->bytes);
- } else {
- return -1;
- }
+ if (found) {
+ return (found - haystack->bytes);
} else {
- /*
- * Cannot find substring with a multi-byte char inside
- * a string with no multi-byte chars.
- */
return -1;
}
} else {
+ /*
+ * Do the search on the unicode representation for simplicity.
+ */
+
Tcl_UniChar *try, *end, *uh;
Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
uh = Tcl_GetUnicodeFromObj(haystack, &lh);
end = uh + lh;
- try = uh + start;
- while (try + ln <= end) {
- if ((*try == *un)
- && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ for (try = uh + start; try + ln <= end; try++) {
+ if ((*try == *un) && (0 ==
+ memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) {
return (try - uh);
}
- try++;
}
return -1;
}