diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2017-11-04 00:34:48 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2017-11-04 00:34:48 (GMT) |
commit | b55d5cc57eba5295979ff84c7256c577f2a24cc8 (patch) | |
tree | f21d29f994d4f3e552bb2397eb74c7d6390f5cfd | |
parent | e4356f7d64ab9d2c871b3015e10ecf3955d7f539 (diff) | |
download | tcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.zip tcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.tar.gz tcl-b55d5cc57eba5295979ff84c7256c577f2a24cc8.tar.bz2 |
Fix for the weird [string first] behaviour.
-rw-r--r-- | generic/tclStringObj.c | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 7c1d42b..3a35bcf 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3242,40 +3242,44 @@ TclStringFind( return -1; } + /* + * Check if we have two strings of single-byte characters. If we have, we + * can use strstr() to do the search. Note that we can sometimes have + * multibyte characters when the string could be minimally represented + * using single byte characters; we can't assume that a mismatch here + * means no match. + */ + lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ + if (haystack->bytes && (lh == haystack->length) && needle->bytes + && (ln == needle->length)) { + /* + * Both haystack and needle are all single-byte chars. + */ - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - char *found = strstr(haystack->bytes + start, needle->bytes); + char *found = strstr(haystack->bytes + start, needle->bytes); - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } + if (found) { + return (found - haystack->bytes); } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ return -1; } } else { + /* + * Do the search on the unicode representation for simplicity. + */ + Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); uh = Tcl_GetUnicodeFromObj(haystack, &lh); end = uh + lh; - try = uh + start; - while (try + ln <= end) { - if ((*try == *un) - && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + for (try = uh + start; try + ln <= end; try++) { + if ((*try == *un) && (0 == + memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) { return (try - uh); } - try++; } return -1; } |