summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2024-08-16 17:02:48 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2024-08-16 17:02:48 (GMT)
commit9a68f70aa77883ec9dcfc435d979c7cca4df644f (patch)
tree2615460210e010f8aac9ae9bc69d05dea32ec841
parent095db6a9ca5e7e5d1286fe4d25a4f93bea80bcd3 (diff)
downloadtcl-9a68f70aa77883ec9dcfc435d979c7cca4df644f.zip
tcl-9a68f70aa77883ec9dcfc435d979c7cca4df644f.tar.gz
tcl-9a68f70aa77883ec9dcfc435d979c7cca4df644f.tar.bz2
More tests for Tcl_UtfToExternal and Tcl_ExternalToUtf C API, in progress
-rw-r--r--generic/tclTest.c22
-rw-r--r--tests/icuUcmTests.tcl4
-rw-r--r--tests/utfext.test162
3 files changed, 121 insertions, 67 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 9a7fa39..6c25770 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -2073,7 +2073,7 @@ static void SpecialFree(
* encodingname srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar?
*
* Results:
- * TCL_OK or TCL_ERROR. This any errors running the test, NOT the
+ * TCL_OK or TCL_ERROR. This indicates any errors running the test, NOT the
* result of Tcl_UtfToExternal or Tcl_ExternalToUtf.
*
* Side effects:
@@ -2084,10 +2084,9 @@ static void SpecialFree(
* entire output buffer, not just the part containing the decoded
* portion. This allows for additional checks at test script level.
*
- * If any of the srcreadvar, dstwrotevar and
- * dstcharsvar are specified and not empty, they are treated as names
- * of variables where the *srcRead, *dstWrote and *dstChars output
- * from the functions are stored.
+ * If any of the srcreadvar, dstwrotevar and dstcharsvar are specified and
+ * not empty, they are treated as names of variables where the *srcRead,
+ * *dstWrote and *dstChars output from the functions are stored.
*
* The function also checks internally whether nuls are correctly
* appended as requested but the TCL_ENCODING_NO_TERMINATE flag
@@ -2121,9 +2120,9 @@ static int UtfExtWrapper(
{"end", TCL_ENCODING_END},
{"noterminate", TCL_ENCODING_NO_TERMINATE},
{"charlimit", TCL_ENCODING_CHAR_LIMIT},
- {"profiletcl8", TCL_ENCODING_PROFILE_TCL8},
- {"profilestrict", TCL_ENCODING_PROFILE_STRICT},
- {"profilereplace", TCL_ENCODING_PROFILE_REPLACE},
+ {"tcl8", TCL_ENCODING_PROFILE_TCL8},
+ {"strict", TCL_ENCODING_PROFILE_STRICT},
+ {"replace", TCL_ENCODING_PROFILE_REPLACE},
{NULL, 0}
};
Tcl_Size i;
@@ -2220,9 +2219,10 @@ static int UtfExtWrapper(
&dstWrote,
dstCharsVar ? &dstChars : NULL);
if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) {
- Tcl_SetResult(interp,
- "Tcl_ExternalToUtf wrote past output buffer",
- TCL_STATIC);
+ Tcl_SetObjResult(interp,
+ Tcl_ObjPrintf("%s wrote past output buffer",
+ transformer == Tcl_ExternalToUtf ?
+ "Tcl_ExternalToUtf" : "Tcl_UtfToExternal"));
result = TCL_ERROR;
} else if (result != TCL_ERROR) {
Tcl_Obj *resultObjs[3];
diff --git a/tests/icuUcmTests.tcl b/tests/icuUcmTests.tcl
index 3b70748..65082e5 100644
--- a/tests/icuUcmTests.tcl
+++ b/tests/icuUcmTests.tcl
@@ -2,8 +2,8 @@
# This file is automatically generated by ucm2tests.tcl.
# Edits will be overwritten on next generation.
#
-# Generates tests comparing Tcl encodings to ICU.
-# The generated file is NOT standalone. It should be sourced into a test script.
+# Tests comparing Tcl encodings to ICU.
+# This file is NOT standalone. It should be sourced into a test script.
proc ucmConvertfromMismatches {enc map} {
set mismatches {}
diff --git a/tests/utfext.test b/tests/utfext.test
index fd82b16..feb215d 100644
--- a/tests/utfext.test
+++ b/tests/utfext.test
@@ -1,6 +1,8 @@
# This file contains a collection of tests for Tcl_UtfToExternal and
-# Tcl_UtfToExternal. Sourcing this file into Tcl runs the tests and generates
-# errors. No output means no errors found.
+# Tcl_UtfToExternal that exercise various combinations of flags,
+# buffer lengths and fragmentation that cannot be tested by
+# normal script level commands. There tests are NOT intended to check
+# correct encodings; those are elsewhere.
#
# Copyright (c) 2023 Ashok P. Nadkarni
#
@@ -18,69 +20,121 @@ catch [list package require -exact tcl::test [info patchlevel]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testencoding [llength [info commands testencoding]]
-# Maps encoded bytes string to utf-8 equivalents, both in hex
-# encoding utf-8 encdata
-lappend utfExtMap {*}{
- ascii 414243 414243
-}
+namespace eval utftest {
+ # Format of table
+ # 0 comment (no spaces, might be used to generate id's as well)
+ # 1 encoding
+ # 2 hex representation of internal *modified* utf-8 encoding. This is the
+ # source string for Tcl_UtfToExternal and expected result for
+ # Tcl_ExternalToUtf.
+ # 3 hex representation in specified encoding. This is the source string for
+ # Tcl_ExternalToUtf and expected result for Tcl_UtfToExternal.
+ # 4 internal fragmentation index - where to split field 2 for fragmentation
+ # tests. -1 to skip
+ # 5 external fragmentation index - where to split field 3 for fragmentation
+ # tests. -1 to skip
+ lappend utfExtMap {*}{
+ basic ascii 414243 414243 -1 -1
+
+ bmp utf-8 c3a9 c3a9 1 1
+ nonbmp utf-8 f09f9880 f09f9880 2 3
+ null utf-8 41c08042 410042 2 -1
+
+ basic iso8859-1 41c3a942 41e942 2 -1
+ null iso8859-1 41c08042 410042 2 -1
+
+ basic shiftjis 41e4b98e42 418cc142 3 2
+
+ basic jis0208 e4b98ee590be 38433863 -1 -1
-# Simple test with basic flags
-proc testbasic {direction enc hexin hexout {flags {start end}}} {
- if {$direction eq "toutf"} {
- set cmd Tcl_ExternalToUtf
- } else {
- set cmd Tcl_UtfToExternal
+
+ }
+
+ # Return a binary string containing nul terminator for encoding
+ proc hexnuls {enc} {
+ return [binary encode hex [encoding convertto $enc \x00]]
}
- set in [binary decode hex $hexin]
- set out [binary decode hex $hexout]
- set dstlen 40 ;# Should be enough for all encoding tests
# The C wrapper fills entire destination buffer with FF.
# Anything beyond expected output should have FF's
- set filler [string repeat \xFF $dstlen]
- set result [string range "$out$filler" 0 $dstlen-1]
- test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \
- [list testencoding $cmd $enc $in $flags {} $dstlen] \
- -result [list ok {} $result] -constraints testencoding
- foreach profile [encoding profiles] {
- set flags2 [linsert $flags end profile$profile]
- test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \
- [list testencoding $cmd $enc $in $flags2 {} $dstlen] \
- -result [list ok {} $result] -constraints testencoding
+ proc fill {bin buflen} {
+ return [string range "$bin[string repeat \xFF $buflen]" 0 $buflen-1]
}
-}
-#
-# Basic tests
-foreach {enc utfhex hex} $utfExtMap {
- # Basic test - TCL_ENCODING_START|TCL_ENCODING_END
- # Note by default output should be terminated with \0
- testbasic toutf $enc $hex ${utfhex}00 {start end}
- testbasic fromutf $enc $utfhex ${hex}00 {start end}
-
- # Test TCL_ENCODING_NO_TERMINATE
- testbasic toutf $enc $hex $utfhex {start end noterminate}
- # knownBug - noterminate not obeyed by fromutf
- # testbasic fromutf $enc $utfhex $hex {start end noterminate}
-}
+ proc testutf {direction enc hexin hexout args} {
+ if {$direction eq "toutf"} {
+ set cmd Tcl_ExternalToUtf
+ } else {
+ set cmd Tcl_UtfToExternal
+ }
+ set in [binary decode hex $hexin]
+ set out [binary decode hex $hexout]
+ set dstlen 40 ;# Should be enough for all encoding tests
+
+ set status ok
+ set flags [list start end]
+ set constraints [list testencoding]
+ set profiles [encoding profiles]
+ while {[llength $args] > 1} {
+ set opt [lpop args 0]
+ switch $opt {
+ -flags { set flags [lpop args 0] }
+ -constraints { lappend constraints {*}[lpop args 0] }
+ -profiles { set profiles [lpop args 0] }
+ -status { set status [lpop args 0]}
+ default {
+ error "Unknown option \"$opt\""
+ A }
+ }
+ }
+ if {[llength $args]} {
+ error "No value supplied for option [lindex $args 0]."
+ }
-# Test for insufficient space
-test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body {
- testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1
-} -result [list nospace {} \xFF] -constraints testencoding
+ set result [list $status {} [fill $out $dstlen]]
+
+ test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \
+ [list testencoding $cmd $enc $in $flags {} $dstlen] \
+ -result $result -constraints $constraints
+ foreach profile $profiles {
+ set flags2 [linsert $flags end $profile]
+ test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \
+ [list testencoding $cmd $enc $in $flags2 {} $dstlen] \
+ -result $result -constraints $constraints
+ }
+ }
-# Another bug - char limit not obeyed
-# % set cv 2
-# % testencoding Tcl_ExternalToUtf utf-8 abcdefgh {start end noterminate charlimit} {} 20 rv wv cv
-# nospace {} abcÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ
+ #
+ # Basic tests
+ foreach {comment enc utfhex hex internalfragindex externalfragindex} $utfExtMap {
+ # Basic test - TCL_ENCODING_START|TCL_ENCODING_END
+ # Note by default output should be terminated with \0
+ set encnuls [hexnuls $enc]
+ testutf toutf $enc $hex ${utfhex}00
+ testutf fromutf $enc $utfhex $hex$encnuls
-test TableToUtf-bug-5be203d6ca {Bug 5be203d6ca - truncated prefix in table encoding} -body {
- set src \x82\x4F\x82\x50\x82
- lassign [testencoding Tcl_ExternalToUtf shiftjis $src {start profiletcl8} 0 16 srcRead dstWritten charsWritten] buf
- set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start profiletcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
- lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end profiletcl8} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
-} -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] -constraints testencoding
+ # Test TCL_ENCODING_NO_TERMINATE
+ testutf toutf $enc $hex $utfhex -flags {start end noterminate}
+ # noterminate is specific to ExternalToUtf,
+ # should have no effect in other direction
+ testutf fromutf $enc $utfhex $hex$encnuls -flags {start end noterminate}
+ }
+
+ # Bug regression tests
+ test Tcl_UtfToExternal-bug-183a1adcc0 {buffer overflow} -body {
+ testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1
+ } -result [list nospace {} \xFF] -constraints testencoding
+
+ test Tcl_ExternalToUtf-bug-5be203d6ca {
+ truncated prefix in table encoding
+ } -body {
+ set src \x82\x4F\x82\x50\x82
+ set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start tcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
+ lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end tcl8} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten]
+ } -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] -constraints testencoding
+}
+namespace delete utftest
::tcltest::cleanupTests
return