diff options
| -rw-r--r-- | generic/tclStringObj.c | 42 | ||||
| -rw-r--r-- | tests/string.test | 16 | ||||
| -rw-r--r-- | tests/stringObj.test | 5 |
3 files changed, 40 insertions, 23 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 7c1d42b..3a35bcf 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3242,40 +3242,44 @@ TclStringFind( return -1; } + /* + * Check if we have two strings of single-byte characters. If we have, we + * can use strstr() to do the search. Note that we can sometimes have + * multibyte characters when the string could be minimally represented + * using single byte characters; we can't assume that a mismatch here + * means no match. + */ + lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && (lh == haystack->length)) { - /* haystack is all single-byte chars */ + if (haystack->bytes && (lh == haystack->length) && needle->bytes + && (ln == needle->length)) { + /* + * Both haystack and needle are all single-byte chars. + */ - if (needle->bytes && (ln == needle->length)) { - /* needle is also all single-byte chars */ - char *found = strstr(haystack->bytes + start, needle->bytes); + char *found = strstr(haystack->bytes + start, needle->bytes); - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } + if (found) { + return (found - haystack->bytes); } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ return -1; } } else { + /* + * Do the search on the unicode representation for simplicity. + */ + Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); uh = Tcl_GetUnicodeFromObj(haystack, &lh); end = uh + lh; - try = uh + start; - while (try + ln <= end) { - if ((*try == *un) - && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + for (try = uh + start; try + ln <= end; try++) { + if ((*try == *un) && (0 == + memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) { return (try - uh); } - try++; } return -1; } diff --git a/tests/string.test b/tests/string.test index 549944d..cb901b9 100644 --- a/tests/string.test +++ b/tests/string.test @@ -28,6 +28,11 @@ testConstraint testindexobj [expr {[info commands testindexobj] != {}}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] +proc representationpoke s { + set r [::tcl::unsupported::representation $s] + list [lindex $r 3] [string match {*, string representation "*"} $r] +} + test string-1.1 {error conditions} { list [catch {string gorp a b} msg] $msg } {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}} @@ -224,6 +229,13 @@ test string-4.15 {string first, ability to two-byte encoded utf-8 chars} { set uchar \u057e ;# character with two-byte encoding in utf-8 string first % %#$uchar$uchar#$uchar$uchar#% 3 } 8 +test string-4.16 {string first, normal string vs pure unicode string} { + set s hello + regexp ll $s m + # Representation checks are canaries + list [representationpoke $s] [representationpoke $m] \ + [string first $m $s] +} {{string 1} {string 0} 2} test string-5.1 {string index} { list [catch {string index} msg] $msg @@ -2042,9 +2054,7 @@ test string-29.15 {string cat, efficiency} -setup { } -body { tcl::unsupported::representation [string cat $e $f $e $f [list x]] } -match glob -result {*no string representation} - - - + # cleanup rename MemStress {} catch {rename foo {}} diff --git a/tests/stringObj.test b/tests/stringObj.test index 49f268e..a78b5f8 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -480,7 +480,6 @@ test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj { teststringobj set 1 foo teststringobj appendself2 1 3 } foo - if {[testConstraint testobj]} { testobj freeallvars @@ -489,3 +488,7 @@ if {[testConstraint testobj]} { # cleanup ::tcltest::cleanupTests return + +# Local Variables: +# mode: tcl +# End: |
