summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-16 17:15:35 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-16 17:15:35 (GMT)
commitbd084c2fc97ffe2e19f0f44e23f441b89c139e9b (patch)
tree8f83258bdfebc1fc14c28f4468a5c2f50ee9144a
parentd9046229bc814b561eb59c03e0aa3627264c07ea (diff)
downloadtcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.zip
tcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.tar.gz
tcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.tar.bz2
Bit more work on encoding test framework. Long way to go.
-rw-r--r--generic/tclEncoding.c65
-rw-r--r--tests/cmdAH.test427
2 files changed, 324 insertions, 168 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 8cd970f..470f8f3 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2368,6 +2368,7 @@ UtfToUtfProc(
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
int ch;
+ int profile;
result = TCL_OK;
@@ -2385,8 +2386,8 @@ UtfToUtfProc(
flags |= PTR2INT(clientData);
dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
+ profile = TCL_ENCODING_PROFILE_GET(flags);
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- int profile = TCL_ENCODING_PROFILE_GET(flags);
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
@@ -2415,15 +2416,15 @@ UtfToUtfProc(
(!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
PROFILE_REPLACE(profile))) {
/* Special sequence \xC0\x80 */
- if (PROFILE_STRICT(profile)) {
- result = TCL_CONVERT_SYNTAX;
- break;
- }
-
- if (PROFILE_REPLACE(profile)) {
- dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
- src += 1; /* C0, 80 handled in next loop iteration
- since dst limit has to be checked */
+ if (flags & ENCODING_INPUT) {
+ if (PROFILE_REPLACE(profile)) {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ src += 2;
+ } else {
+ /* PROFILE_STRICT */
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
} else {
/*
* Convert 0xC080 to real nulls when we are in output mode,
@@ -2432,6 +2433,7 @@ UtfToUtfProc(
*dst++ = 0;
src += 2;
}
+
}
else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
@@ -2516,32 +2518,37 @@ UtfToUtfProc(
/*
* A surrogate character is detected, handle especially.
*/
- /* TODO - what about REPLACE profile? */
if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
}
-
- low = ch;
- len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
-
- if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
-
- if (PROFILE_STRICT(profile)) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
+ if (0 && PROFILE_REPLACE(profile)) {
+ ch = UNICODE_REPLACE_CHAR;
+ src += len;
+ // dst += Tcl_UniCharToUtf(ch, dst);
+ }
+ else {
+ low = ch;
+ len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+
+ if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
+
+ if (PROFILE_STRICT(profile)) {
+ result = TCL_CONVERT_UNKNOWN;
+ src = saveSrc;
+ break;
+ }
+cesu8:
+ *dst++ = (char)(((ch >> 12) | 0xE0) & 0xEF);
+ *dst++ = (char)(((ch >> 6) | 0x80) & 0xBF);
+ *dst++ = (char)((ch | 0x80) & 0xBF);
+ continue;
}
- cesu8:
- *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
- *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
- *dst++ = (char) ((ch | 0x80) & 0xBF);
- continue;
+ src += len;
+ dst += Tcl_UniCharToUtf(ch, dst);
+ ch = low;
}
- src += len;
- dst += Tcl_UniCharToUtf(ch, dst);
- ch = low;
} else if (PROFILE_STRICT(profile) &&
(!(flags & ENCODING_INPUT)) &&
SURROGATE(ch)) {
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index 6aa3c2e..6386658 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -187,19 +187,18 @@ set encProfiles {tcl8 strict replace}
# generated based on le/be versions. Also add all ranges from Unicode standard
# Table 3.7
set encValidStrings {
- ascii ABC \x41\x42\x43
- utf-8 A\u0000\u03A9\u8A9E\U00010384 \x41\x00\xCE\xA9\xE8\xAA\x9E\xF0\x90\x8E\x84
- utf-16le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\xA9\x03\x9E\x8A\x00\xD8\x84\xDF
- utf-16be A\u0000\u03A9\u8A9E\U00010384 \x00\x41\x00\x00\x03\xA9\x8A\x9E\xD8\x00\xDF\x84
- utf-32le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\x00\x00\x00\x00\xA9\x03\x00\x00\x9E\x8A\x00\x00\x84\x03\x01\x00
- utf-32be A\u0000\u03A9\u8A9E\U00010384 \x00\x00\x00\x41\x00\x00\x00\x00\x00\x00\x03\xA9\x00\x00\x8A\x9E\x00\x01\x03\x84
+ ascii ABC 414243
+ utf-8 A\u0000\u03A9\u8A9E\U00010384 4100CEA9E8AA9EF0908E84
+ utf-16le A\u0000\u03A9\u8A9E\U00010384 41000000A9039E8A00D884DF
+ utf-16be A\u0000\u03A9\u8A9E\U00010384 0041000003A98A9ED800DF84
+ utf-32le A\u0000\u03A9\u8A9E\U00010384 4100000000000000A90300009E8A000084030100
+ utf-32be A\u0000\u03A9\u8A9E\U00010384 0000004100000000000003A900008A9E00010384
}
# Invalid byte sequences. These are driven from a table with format
# {encoding bytes profile expectedresult expectedfailindex ctrl comment}
#
-# Note tag is used in test id generation as well. The combination
-# <enc,profile,tag> should be unique for test ids to be unique. Note utf-16,
+# <enc,bytes,profile> should be unique for test ids to be unique. Note utf-16,
# utf-32 missing because they are automatically generated based on le/be
# versions. Each entry potentially results in generation of multiple tests.
# This is controlled by the ctrl field. This should be a list of
@@ -214,13 +213,15 @@ set encValidStrings {
# TODO - other encodings and test cases
-# ascii - Any byte above 127 is invalid
-set encInvalidBytes {
- ascii 80 default \u20AC -1 {} {map to cp1252}
- ascii 80 tcl8 \u20AC -1 {} {map to cp1252}
+# ascii - Any byte above 127 is invalid and is mapped
+# to the same numeric code point except for the range
+# 80-9F which is treated as cp1252.
+# This tests the TableToUtfProc code path.
+lappend encInvalidBytes {*}{
+ ascii 80 default \u20AC -1 {knownBug} {map to cp1252}
+ ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252}
ascii 80 replace \uFFFD -1 {} {Smallest invalid byte}
ascii 80 strict {} 0 {} {Smallest invalid byte}
-
ascii 81 default \u0081 -1 {knownBug} {map to cp1252}
ascii 82 default \u201A -1 {knownBug} {map to cp1252}
ascii 83 default \u0192 -1 {knownBug} {map to cp1252}
@@ -259,25 +260,80 @@ set encInvalidBytes {
ascii FF strict {} 0 {} {Largest invalid byte}
}
-# Following invalid sequences based on Table 3.7 in the Unicode standard.
-# utf-8 C0, C1, F5:FF are invalid bytes ANYWHERE.
-# Exception is C080 in non-strict mode.
-#
+# utf-8 - valid sequences based on Table 3.7 in the Unicode
+# standard.
+#
+# Code Points First Second Third Fourth Byte
+# U+0000..U+007F 00..7F
+# U+0080..U+07FF C2..DF 80..BF
+# U+0800..U+0FFF E0 A0..BF 80..BF
+# U+1000..U+CFFF E1..EC 80..BF 80..BF
+# U+D000..U+D7FF ED 80..9F 80..BF
+# U+E000..U+FFFF EE..EF 80..BF 80..BF
+# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
+# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
+# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
+#
+# Tests below are based on the "gaps" in the above table. Note ascii test
+# values are repeated because internally a different code path is used
+# (UtfToUtfProc).
+# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080
lappend encInvalidBytes {*}{
+ utf-8 80 default \u20AC -1 {knownBug} {map to cp1252}
+ utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252}
+ utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte}
+ utf-8 80 strict {} 0 {} {Smallest invalid byte}
+ utf-8 81 default \u0081 -1 {knownBug} {map to cp1252}
+ utf-8 82 default \u201A -1 {knownBug} {map to cp1252}
+ utf-8 83 default \u0192 -1 {knownBug} {map to cp1252}
+ utf-8 84 default \u201E -1 {knownBug} {map to cp1252}
+ utf-8 85 default \u2026 -1 {knownBug} {map to cp1252}
+ utf-8 86 default \u2020 -1 {knownBug} {map to cp1252}
+ utf-8 87 default \u2021 -1 {knownBug} {map to cp1252}
+ utf-8 88 default \u0276 -1 {knownBug} {map to cp1252}
+ utf-8 89 default \u2030 -1 {knownBug} {map to cp1252}
+ utf-8 8A default \u0160 -1 {knownBug} {map to cp1252}
+ utf-8 8B default \u2039 -1 {knownBug} {map to cp1252}
+ utf-8 8C default \u0152 -1 {knownBug} {map to cp1252}
+ utf-8 8D default \u008D -1 {knownBug} {map to cp1252}
+ utf-8 8E default \u017D -1 {knownBug} {map to cp1252}
+ utf-8 8F default \u008F -1 {knownBug} {map to cp1252}
+ utf-8 90 default \u0090 -1 {knownBug} {map to cp1252}
+ utf-8 91 default \u2018 -1 {knownBug} {map to cp1252}
+ utf-8 92 default \u2019 -1 {knownBug} {map to cp1252}
+ utf-8 93 default \u201C -1 {knownBug} {map to cp1252}
+ utf-8 94 default \u201D -1 {knownBug} {map to cp1252}
+ utf-8 95 default \u2022 -1 {knownBug} {map to cp1252}
+ utf-8 96 default \u2013 -1 {knownBug} {map to cp1252}
+ utf-8 97 default \u2014 -1 {knownBug} {map to cp1252}
+ utf-8 98 default \u02DC -1 {knownBug} {map to cp1252}
+ utf-8 99 default \u2122 -1 {knownBug} {map to cp1252}
+ utf-8 9A default \u0161 -1 {knownBug} {map to cp1252}
+ utf-8 9B default \u203A -1 {knownBug} {map to cp1252}
+ utf-8 9C default \u0153 -1 {knownBug} {map to cp1252}
+ utf-8 9D default \u009D -1 {knownBug} {map to cp1252}
+ utf-8 9E default \u017E -1 {knownBug} {map to cp1252}
+ utf-8 9F default \u0178 -1 {knownBug} {map to cp1252}
+
utf-8 C0 default \u00C0 -1 {} {C0 is invalid anywhere}
utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere}
- utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere}
utf-8 C0 strict {} 0 {} {C0 is invalid anywhere}
-
+ utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere}
utf-8 C080 default \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8}
utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8}
- utf-8 C080 replace \uFFFD\uFFFD -1 C080 {} {C080 -> U+0 in Tcl's internal modified UTF8}
- utf-8 C080 strict {} 0 {} {C080 -> U+0 in Tcl's internal modified UTF8}
-
+ utf-8 C080 strict {} 0 {} {C080 -> invalid}
+ utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char}
utf-8 C1 default \u00C1 -1 {} {C1 is invalid everywhere}
utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere}
utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere}
utf-8 C1 strict {} 0 {} {C1 is invalid everywhere}
+
+ utf-8 C1 default \u00C1 -1 {} {Require valid trail byte}
+ utf-8 C1 tcl8 \u00C1 -1 {} {Require valid trail byte}
+ utf-8 C1 replace \uFFFD -1 {} {Require valid trail byte}
+ utf-8 C1 strict {} 0 {} {Require valid trail byte}
+
+
utf-8 F5 default \u00F5 -1 {} {F5:FF are invalid everywhere}
utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere}
utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere}
@@ -286,14 +342,14 @@ lappend encInvalidBytes {*}{
utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere}
utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere}
utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere}
- utf-8 F5908080 default \u00F5 -1 {knownBug} {F5:FF with trailing bytes}
+
+ utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8}
+ utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3-9}
+ utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10}
+ utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3.11}
}
set xxencInvalidBytes {
- ascii \x41\xe9\x42 default A\u00E9B -1 {non-ASCII}
- ascii \x41\xe9\x42 tcl8 A\u00E9B -1 {non-ASCII}
- ascii \x41\xe9\x42 replace A\uFFFDB -1 {non-ASCII}
- ascii \x41\xe9\x42 strict A 1 {non-ASCII}
utf-8 \x41\x80\x42 default A\u0080B -1 80
utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80
@@ -343,31 +399,39 @@ set utf32-le-TODO {
}
# Strings that cannot be encoded for specific encoding / profiles
-# {encoding string profile bytes failindex tag}
-# Note tag is used in test id generation as well. The combination
-# <enc,profile,tag> should be unique for test ids to be unique.
+# {encoding string profile exptedresult expectedfailindex ctrl comment}
+# <enc,string,profile> should be unique for test ids to be unique.
# Note utf-16, utf-32 missing because they are automatically
# generated based on le/be versions.
+# Each entry potentially results in generation of multiple tests.
+# This is controlled by the ctrl field. This should be a list of
+# zero or more of the following:
+# solo - the test data is the string itself
+# lead - the test data is the string followed by a valid suffix
+# tail - the test data is the string preceded by a prefix
+# middle - the test data is the string wrapped by a prefix and suffix
+# If the ctrl field is empty it is treated as all of the above
+# Note if there is any other value by itself, it will cause the test to
+# be skipped. This is intentional to skip known bugs.
# TODO - other encodings and test cases
# TODO - out of range code point (note cannot be generated by \U notation)
set encUnencodableStrings {
- ascii A\u00e0B default \x41\x3f\x42 -1 non-ASCII
- ascii A\u00e0B tcl8 \x41\x3f\x42 -1 non-ASCII
- ascii A\u00e0B strict \x41 1 non-ASCII
-
- iso8859-1 A\u0141B default \x41\x3f\x42 -1 unencodable
- iso8859-1 A\u0141B tcl8 \x41\x3f\x42 -1 unencodable
- iso8859-1 A\u0141B strict \x41 0 unencodable
-
- utf-8 A\uD800B default \x41\xed\xa0\x80\x42 -1 High-surrogate
- utf-8 A\uD800B tcl8 \x41\xed\xa0\x80\x42 -1 High-surrogate
- utf-8 A\uD800B strict \x41 1 High-surrogate
- utf-8 A\uDC00B default \x41\xed\xb0\x80\x42 -1 High-surrogate
- utf-8 A\uDC00B tcl8 \x41\xed\xb0\x80\x42 -1 High-surrogate
- utf-8 A\uDC00B strict \x41 1 High-surrogate
+ ascii \u00e0 default 3f -1 {} {unencodable}
+ ascii \u00e0 tcl8 3f -1 {} {unencodable}
+ ascii \u00e0 strict {} 0 {} {unencodable}
+
+ iso8859-1 \u0141 default 3f -1 {} unencodable
+ iso8859-1 \u0141 tcl8 3f -1 {} unencodable
+ iso8859-1 \u0141 strict {} 0 {} unencodable
+
+ utf-8 \uD800 default eda080 -1 {} High-surrogate
+ utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate
+ utf-8 \uD800 strict {} 0 {} High-surrogate
+ utf-8 \uDC00 default edb080 -1 {} High-surrogate
+ utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate
+ utf-8 \uDC00 strict {} 0 {} High-surrogate
}
-
if {$::tcl_platform(byteOrder) eq "littleEndian"} {
set endian le
} else {
@@ -437,6 +501,40 @@ proc testconvert {id body result args} {
{*}$args
}
+proc testprofile {id converter enc profile data result args} {
+ if {$profile eq "default"} {
+ testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args
+ if {[set enc [endianUtf $enc]] ne ""} {
+ # If utf{16,32}-{le,be}, also do utf{16,32}
+ testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args
+ }
+ } else {
+ testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args
+ if {[set enc [endianUtf $enc]] ne ""} {
+ # If utf{16,32}-{le,be}, also do utf{16,32}
+ testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args
+ }
+ }
+}
+
+
+# Wrapper for verifying -failindex
+proc testfailindex {id converter enc data result {profile default}} {
+ if {$profile eq "default"} {
+ testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result
+ if {[set enc [endianUtf $enc]] ne ""} {
+ # If utf{16,32}-{le,be}, also do utf{16,32}
+ testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result
+ }
+ } else {
+ testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result
+ if {[set enc [endianUtf $enc]] ne ""} {
+ # If utf{16,32}-{le,be}, also do utf{16,32}
+ testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result
+ }
+ }
+}
+
test cmdAH-4.1.1 {encoding} -returnCodes error -body {
encoding
} -result {wrong # args: should be "encoding subcommand ?arg ...?"}
@@ -492,42 +590,110 @@ testconvert cmdAH-4.3.12 {
encoding system $system
}
-# Wrapper for verifying -failindex
-proc testfailindex {id converter enc data result {profile default}} {
- if {$profile eq "default"} {
- testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result
- if {[set enc [endianUtf $enc]] ne ""} {
- # If utf{16,32}-{le,be}, also do utf{16,32}
- testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result
+# convertfrom, convertfrom -profile
+
+# convertfrom ?-profile? : All valid byte sequences should be accepted by all profiles
+foreach {enc str hex} $encValidStrings {
+ set bytes [binary decode hex $hex]
+ set prefix A
+ set suffix B
+ set prefix_bytes [encoding convertto $enc A]
+ set suffix_bytes [encoding convertto $enc B]
+ foreach profile $encProfiles {
+ testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile
+ }
+}
+
+# convertfrom ?-profile? : invalid byte sequences
+foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes {
+ set bytes [binary format H* $hex]
+ set prefix A
+ set suffix B
+ set prefixLen [string length [encoding convertto $enc $prefix]]
+ set result [list $str]
+ # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch
+ # so glob it out in error message pattern for now.
+ set errorWithoutPrefix [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob]
+ set errorWithPrefix [list "unexpected byte sequence starting at index [expr {$failidx+$prefixLen}]: *" -returnCodes error -match glob]
+ if {$ctrl eq {} || "solo" in $ctrl} {
+ if {$failidx == -1} {
+ set result [list $str]
+ } else {
+ set result $errorWithoutPrefix
}
- } else {
- testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result
- if {[set enc [endianUtf $enc]] ne ""} {
- # If utf{16,32}-{le,be}, also do utf{16,32}
- testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result
+ testprofile cmdAH-4.3.15.$hex.solo convertfrom $enc $profile $bytes {*}$result
+ }
+ if {$ctrl eq {} || "lead" in $ctrl} {
+ if {$failidx == -1} {
+ set result [list $str$suffix]
+ } else {
+ set result $errorWithoutPrefix
+ }
+ testprofile cmdAH-4.3.15.$hex.lead convertfrom $enc $profile $bytes$suffix {*}$result
+ }
+ if {$ctrl eq {} || "tail" in $ctrl} {
+ if {$failidx == -1} {
+ set result [list $prefix$str]
+ } else {
+ set result $errorWithPrefix
+ }
+ testprofile cmdAH-4.3.15.$hex.tail convertfrom $enc $profile $prefix$bytes {*}$result
+ }
+ if {$ctrl eq {} || "middle" in $ctrl} {
+ if {$failidx == -1} {
+ set result [list $prefix$str$suffix]
+ } else {
+ set result $errorWithPrefix
}
+ testprofile cmdAH-4.3.15.$hex.middle convertfrom $enc $profile $prefix$bytes$suffix {*}$result
}
}
-# -failindex - valid data
-foreach {enc string bytes} $encValidStrings {
- testfailindex cmdAH-4.3.13.$enc convertfrom $enc $bytes [list $string -1]
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testfailindex cmdAH-4.3.13.$enc convertfrom $enc $bytes [list $string -1]
+proc printable {s} {
+ set print ""
+ foreach c [split $s ""] {
+ set i [scan $c %c]
+ if {[string is print $c] && ($i <= 127)} {
+ append print $c
+ } elseif {$i <= 0xff} {
+ append print \\x[format %02X $i]
+ } elseif {$i <= 0xffff} {
+ append print \\u[format %04X $i]
+ } else {
+ append print \\U[format %08X $i]
+ }
}
+ return $print
}
-# -failindex - invalid data for each profile
+# convertfrom -failindex - valid data
+foreach {enc str hex} $encValidStrings {
+ set bytes [binary decode hex $hex]
+ set prefix A
+ set suffix B
+ set prefix_bytes [encoding convertto $enc A]
+ set suffix_bytes [encoding convertto $enc B]
+ foreach profile $encProfiles {
+ testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile
+ testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile
+ }
+}
+
+
+# convertfrom -failindex, convertfrom -failindex -profile, invalid data
foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes {
# There are multiple test cases based on location of invalid bytes
- set bytes [binary format H* $hex]
+ set bytes [binary decode hex $hex]
set prefix A
set suffix B
set prefixLen [string length [encoding convertto $enc $prefix]]
if {$ctrl eq {} || "solo" in $ctrl} {
- testfailindex xxcmdAH-4.3.14.$profile.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile
+ testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile
}
if {$ctrl eq {} || "lead" in $ctrl} {
if {$failidx == -1} {
@@ -537,7 +703,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes {
# Failure expected
set result ""
}
- testfailindex xxcmdAH-4.3.14.$profile.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile
+ testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile
}
if {$ctrl eq {} || "tail" in $ctrl} {
set expected_failidx $failidx
@@ -547,9 +713,9 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes {
} else {
# Failure expected
set result $prefix
- incr expected_failidx [string length [encoding convertto $enc $prefix]]
+ incr expected_failidx $prefixLen
}
- testfailindex xxcmdAH-4.3.14.$profile.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile
+ testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile
}
if {$ctrl eq {} || "middle" in $ctrl} {
set expected_failidx $failidx
@@ -559,53 +725,9 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes {
} else {
# Failure expected
set result $prefix
- incr expected_failidx [string length [encoding convertto $enc $prefix]]
- }
- testfailindex xxcmdAH-4.3.14.$profile.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile
- }
-}
-
-# -profile
-
-# All valid byte sequences should be accepted by all profiles
-foreach profile $encProfiles {
- set i 0
- foreach {enc string bytes} $encValidStrings {
- testconvert cmdAH-4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testconvert cmdAH-4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string
- }
- }
-}
-
-# Cycle through the various combinations of encodings and profiles
-# for invalid byte sequences
-foreach {enc hex profile prefix failidx ctrl comment} $encInvalidBytes {
- set bytes [binary format H* $hex]
- if {$failidx eq -1} {
- set result [list $prefix]
- } else {
- set badbyte "'\\x[string toupper [binary encode hex [string index $bytes $failidx]]]'"
- # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch
- # so glob it out for now.
- set result [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob]
- }
- if {$profile eq "default"} {
- testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result
- }
- } else {
- testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result
+ incr expected_failidx $prefixLen
}
+ testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile
}
}
@@ -646,41 +768,67 @@ testconvert cmdAH-4.4.12 {
# -failindex - valid data
foreach {enc string bytes} $encValidStrings {
testfailindex cmdAH-4.4.13.$enc convertto $enc $string [list $bytes -1]
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testfailindex cmdAH-4.4.13.$enc convertto $enc $string [list $bytes -1]
- }
}
# -failindex - invalid data
-foreach {enc string profile bytes failidx tag} $encUnencodableStrings {
- testfailindex cmdAH-4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testfailindex cmdAH-4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile
+foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings {
+ set bytes [binary decode hex $hex]
+ set prefix A
+ set suffix B
+ set prefixLen [string length [encoding convertto $enc $prefix]]
+ if {$ctrl eq {} || "solo" in $ctrl} {
+ testfailindex cmdAH-4.4.14.$string.solo convertto $enc $string [list $bytes $failidx] $profile
+ }
+ if {$ctrl eq {} || "lead" in $ctrl} {
+ if {$failidx == -1} {
+ # If success expected
+ set result $bytes$suffix
+ } else {
+ # Failure expected
+ set result ""
+ }
+ testfailindex cmdAH-4.4.14.$string.lead convertto $enc $string$suffix [list $result $failidx] $profile
+ }
+ if {$ctrl eq {} || "tail" in $ctrl} {
+ set expected_failidx $failidx
+ if {$failidx == -1} {
+ # If success expected
+ set result $prefix$bytes
+ } else {
+ # Failure expected
+ set result $prefix
+ incr expected_failidx $prefixLen
+ }
+ testfailindex cmdAH-4.4.14.$string.tail convertto $enc $prefix$string [list $result $expected_failidx] $profile
+ }
+ if {$ctrl eq {} || "middle" in $ctrl} {
+ set expected_failidx $failidx
+ if {$failidx == -1} {
+ # If success expected
+ set result $prefix$bytes$suffix
+ } else {
+ # Failure expected
+ set result $prefix
+ incr expected_failidx $prefixLen
+ }
+ testfailindex cmdAH-4.4.14.$string.middle convertto $enc $prefix$string$suffix [list $result $expected_failidx] $profile
}
}
-# -profile
+# convertto -profile
# All valid byte sequences should be accepted by all profiles
foreach profile $encProfiles {
set i 0
foreach {enc string bytes} $encValidStrings {
- testconvert cmdAH-4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes
- if {"utf-16$endian" eq $enc} {
- # utf-16le ->utf-16, utf-32be -> utf32 etc.
- set enc [string range $enc 0 5]
- testconvert cmdAH-4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes
- }
+ testprofile cmdAH-4.4.15 convertto $enc $profile $string $bytes
}
}
# Cycle through the various combinations of encodings and profiles
# for invalid byte sequences
-foreach {enc string profile bytes failidx tag} $encUnencodableStrings {
+foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings {
+ set bytes [binary decode hex $hex]
if {$failidx eq -1} {
set result [list $bytes]
} else {
@@ -688,19 +836,20 @@ foreach {enc string profile bytes failidx tag} $encUnencodableStrings {
# so glob it out for now.
set result [list "unexpected character at index $failidx: *" -returnCodes error -match glob]
}
+ #testprofile xx convertto $enc $profile $string {*}$result
if {$profile eq "default"} {
- testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result
+ # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result
if {"utf-16$endian" eq $enc} {
# utf-16le ->utf-16, utf-32be -> utf32 etc.
set enc [string range $enc 0 5]
- testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result
+ # xxtestconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result
}
} else {
- testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result
+ # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result
if {"utf-16$endian" eq $enc} {
# utf-16le ->utf-16, utf-32be -> utf32 etc.
set enc [string range $enc 0 5]
- testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result
+ # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result
}
}
}