summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclStringObj.c42
-rw-r--r--tests/string.test16
-rw-r--r--tests/stringObj.test5
3 files changed, 40 insertions, 23 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 7c1d42b..3a35bcf 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -3242,40 +3242,44 @@ TclStringFind(
return -1;
}
+ /*
+ * Check if we have two strings of single-byte characters. If we have, we
+ * can use strstr() to do the search. Note that we can sometimes have
+ * multibyte characters when the string could be minimally represented
+ * using single byte characters; we can't assume that a mismatch here
+ * means no match.
+ */
+
lh = Tcl_GetCharLength(haystack);
- if (haystack->bytes && (lh == haystack->length)) {
- /* haystack is all single-byte chars */
+ if (haystack->bytes && (lh == haystack->length) && needle->bytes
+ && (ln == needle->length)) {
+ /*
+ * Both haystack and needle are all single-byte chars.
+ */
- if (needle->bytes && (ln == needle->length)) {
- /* needle is also all single-byte chars */
- char *found = strstr(haystack->bytes + start, needle->bytes);
+ char *found = strstr(haystack->bytes + start, needle->bytes);
- if (found) {
- return (found - haystack->bytes);
- } else {
- return -1;
- }
+ if (found) {
+ return (found - haystack->bytes);
} else {
- /*
- * Cannot find substring with a multi-byte char inside
- * a string with no multi-byte chars.
- */
return -1;
}
} else {
+ /*
+ * Do the search on the unicode representation for simplicity.
+ */
+
Tcl_UniChar *try, *end, *uh;
Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
uh = Tcl_GetUnicodeFromObj(haystack, &lh);
end = uh + lh;
- try = uh + start;
- while (try + ln <= end) {
- if ((*try == *un)
- && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ for (try = uh + start; try + ln <= end; try++) {
+ if ((*try == *un) && (0 ==
+ memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) {
return (try - uh);
}
- try++;
}
return -1;
}
diff --git a/tests/string.test b/tests/string.test
index 549944d..cb901b9 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -28,6 +28,11 @@ testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
+proc representationpoke s {
+ set r [::tcl::unsupported::representation $s]
+ list [lindex $r 3] [string match {*, string representation "*"} $r]
+}
+
test string-1.1 {error conditions} {
list [catch {string gorp a b} msg] $msg
} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
@@ -224,6 +229,13 @@ test string-4.15 {string first, ability to two-byte encoded utf-8 chars} {
set uchar \u057e ;# character with two-byte encoding in utf-8
string first % %#$uchar$uchar#$uchar$uchar#% 3
} 8
+test string-4.16 {string first, normal string vs pure unicode string} {
+ set s hello
+ regexp ll $s m
+ # Representation checks are canaries
+ list [representationpoke $s] [representationpoke $m] \
+ [string first $m $s]
+} {{string 1} {string 0} 2}
test string-5.1 {string index} {
list [catch {string index} msg] $msg
@@ -2042,9 +2054,7 @@ test string-29.15 {string cat, efficiency} -setup {
} -body {
tcl::unsupported::representation [string cat $e $f $e $f [list x]]
} -match glob -result {*no string representation}
-
-
-
+
# cleanup
rename MemStress {}
catch {rename foo {}}
diff --git a/tests/stringObj.test b/tests/stringObj.test
index 49f268e..a78b5f8 100644
--- a/tests/stringObj.test
+++ b/tests/stringObj.test
@@ -480,7 +480,6 @@ test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj {
teststringobj set 1 foo
teststringobj appendself2 1 3
} foo
-
if {[testConstraint testobj]} {
testobj freeallvars
@@ -489,3 +488,7 @@ if {[testConstraint testobj]} {
# cleanup
::tcltest::cleanupTests
return
+
+# Local Variables:
+# mode: tcl
+# End: