summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2024-08-17 12:52:48 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2024-08-17 12:52:48 (GMT)
commite454e0d3e8a6deffd30491f078cec81a314a34c7 (patch)
tree203beceba932b48aeb45da36683fca1fb4b210a1
parent9a68f70aa77883ec9dcfc435d979c7cca4df644f (diff)
downloadtcl-e454e0d3e8a6deffd30491f078cec81a314a34c7.zip
tcl-e454e0d3e8a6deffd30491f078cec81a314a34c7.tar.gz
tcl-e454e0d3e8a6deffd30491f078cec81a314a34c7.tar.bz2
Start on fragmentation tests
-rw-r--r--tests/utfext.test131
1 files changed, 95 insertions, 36 deletions
diff --git a/tests/utfext.test b/tests/utfext.test
index feb215d..a51b7ec 100644
--- a/tests/utfext.test
+++ b/tests/utfext.test
@@ -21,33 +21,50 @@ testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testencoding [llength [info commands testencoding]]
namespace eval utftest {
- # Format of table
+ # Format of table, indexed by encoding.
+ # Each element is list of lists. Nested lists have following fields
# 0 comment (no spaces, might be used to generate id's as well)
- # 1 encoding
- # 2 hex representation of internal *modified* utf-8 encoding. This is the
+ # The combination of comment and internal hex (2) should be unique.
+ # 1 hex representation of internal *modified* utf-8 encoding. This is the
# source string for Tcl_UtfToExternal and expected result for
# Tcl_ExternalToUtf.
- # 3 hex representation in specified encoding. This is the source string for
+ # 2 hex representation in specified encoding. This is the source string for
# Tcl_ExternalToUtf and expected result for Tcl_UtfToExternal.
- # 4 internal fragmentation index - where to split field 2 for fragmentation
+ # 3 internal fragmentation index - where to split field 1 for fragmentation
# tests. -1 to skip
- # 5 external fragmentation index - where to split field 3 for fragmentation
+ # 4 external fragmentation index - where to split field 2 for fragmentation
# tests. -1 to skip
lappend utfExtMap {*}{
- basic ascii 414243 414243 -1 -1
-
- bmp utf-8 c3a9 c3a9 1 1
- nonbmp utf-8 f09f9880 f09f9880 2 3
- null utf-8 41c08042 410042 2 -1
-
- basic iso8859-1 41c3a942 41e942 2 -1
- null iso8859-1 41c08042 410042 2 -1
-
- basic shiftjis 41e4b98e42 418cc142 3 2
-
- basic jis0208 e4b98ee590be 38433863 -1 -1
-
-
+ ascii {
+ {basic 414243 414243 -1 -1}
+ }
+ utf-8 {
+ {bmp {41 c3a9 42} {41 c3a9 42} 2 2}
+ {nonbmp {41 f09f9880 42} {41 f09f9880 42} 2 3}
+ {null {41 c080 42} {41 00 42} 2 -1}
+ }
+ utf-16le {
+ {bmp {41 c3a9 42} {4100 e900 4200} 2 3}
+ {nonbmp {41 f09f9880 42} {4100 3dd8 00de 4200} 4 3}
+ {split-surrogate {41 f09f9080 42} {4100 3dd8 00dc 4200} 3 4}
+ {null {41 c080 42} {4100 0000 4200} 2 3}
+ }
+ utf-16be {
+ {bmp {41 c3a9 42} {0041 00e9 0042} 2 3}
+ {nonbmp {41 f09f9880 42} {0041 d83d de00 0042} 4 3}
+ {split-surrogate {41 f09f9080 42} {0041 d83d dc00 0042} 3 4}
+ {null {41 c080 42} {0041 0000 0042} 2 3}
+ }
+ iso8859-1 {
+ {basic {41 c3a9 42} 41e942 2 -1}
+ {null {41 c080 42} 410042 2 -1}
+ }
+ shiftjis {
+ {basic {41 e4b98e 42} {41 8cc1 42} 3 2}
+ }
+ jis0208 {
+ {basic {e4b98e e590be} {3843 3863} 1 1}
+ }
}
# Return a binary string containing nul terminator for encoding
@@ -61,7 +78,8 @@ namespace eval utftest {
return [string range "$bin[string repeat \xFF $buflen]" 0 $buflen-1]
}
- proc testutf {direction enc hexin hexout args} {
+ proc testutf {direction enc comment hexin hexout args} {
+ set id $comment-[join $hexin ""]
if {$direction eq "toutf"} {
set cmd Tcl_ExternalToUtf
} else {
@@ -84,7 +102,7 @@ namespace eval utftest {
-status { set status [lpop args 0]}
default {
error "Unknown option \"$opt\""
- A }
+ }
}
}
if {[llength $args]} {
@@ -93,31 +111,72 @@ namespace eval utftest {
set result [list $status {} [fill $out $dstlen]]
- test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \
+ test $cmd-$enc-$id-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \
[list testencoding $cmd $enc $in $flags {} $dstlen] \
-result $result -constraints $constraints
foreach profile $profiles {
set flags2 [linsert $flags end $profile]
- test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \
+ test $cmd-$enc-$id-[join $flags2 -] "$cmd - $enc - $hexin - $flags2" -body \
[list testencoding $cmd $enc $in $flags2 {} $dstlen] \
-result $result -constraints $constraints
}
}
+ proc testfragment {direction enc comment hexin hexout fragindex} {
+
+ if {$fragindex < 0} {
+ # Single byte encodings so no question of fragmentation
+ return
+ }
+ set id $comment-[join $hexin ""]-fragment
+
+ if {$direction eq "toutf"} {
+ set cmd Tcl_ExternalToUtf
+ } else {
+ set cmd Tcl_UtfToExternal
+ }
+
+ set in [binary decode hex $hexin]
+ set infrag [string range $in 0 $fragindex-1]
+ set out [binary decode hex $hexout]
+ set dstlen 40 ;# Should be enough for all encoding tests
+
+ set expected_result {}
+ append expected_result multibyte $fragindex
+
+ test $cmd-$enc-$id "$cmd - $enc - $hexin - frag" -constraints testencoding -body {
+ set frag1Result [testencoding $cmd $enc [string range $in 0 $fragindex-1] {start} 0 $dstlen frag1Read frag1Written]
+ lassign $frag1Result frag1Status frag1State frag1Decoded
+ set frag2Result [testencoding $cmd $enc [string range $in $frag1Read end] {end} $frag1State $dstlen frag2Read frag2Written]
+ lassign $frag2Result frag2Status frag2State frag2Decoded
+ set decoded [string cat [string range $frag1Decoded 0 $frag1Written-1] [string range $frag2Decoded 0 $frag2Written-1]]
+ list $frag1Status [expr {$frag1Read < $fragindex}] \
+ $frag2Status [expr {$frag1Read+$frag2Read}] \
+ [expr {$frag1Written+$frag2Written}] $decoded
+ } -result [list multibyte 1 ok [string length $in] [string length $out] $out]
+ }
+
#
# Basic tests
- foreach {comment enc utfhex hex internalfragindex externalfragindex} $utfExtMap {
- # Basic test - TCL_ENCODING_START|TCL_ENCODING_END
- # Note by default output should be terminated with \0
- set encnuls [hexnuls $enc]
- testutf toutf $enc $hex ${utfhex}00
- testutf fromutf $enc $utfhex $hex$encnuls
-
- # Test TCL_ENCODING_NO_TERMINATE
- testutf toutf $enc $hex $utfhex -flags {start end noterminate}
- # noterminate is specific to ExternalToUtf,
- # should have no effect in other direction
- testutf fromutf $enc $utfhex $hex$encnuls -flags {start end noterminate}
+ foreach {enc testcases} $utfExtMap {
+ foreach testcase $testcases {
+ lassign $testcase {*}{comment utfhex hex internalfragindex externalfragindex}
+
+ # Basic test - TCL_ENCODING_START|TCL_ENCODING_END
+ # Note by default output should be terminated with \0
+ set encnuls [hexnuls $enc]
+ testutf toutf $enc $comment $hex ${utfhex}00
+ testutf fromutf $enc $comment $utfhex $hex$encnuls
+
+ # Test TCL_ENCODING_NO_TERMINATE
+ testutf toutf $enc $comment $hex $utfhex -flags {start end noterminate}
+ # noterminate is specific to ExternalToUtf,
+ # should have no effect in other direction
+ testutf fromutf $enc $comment $utfhex $hex$encnuls -flags {start end noterminate}
+
+ testfragment toutf $enc $comment $hex $utfhex $externalfragindex
+ testfragment fromutf $enc $comment $utfhex $hex $internalfragindex
+ }
}
# Bug regression tests