summaryrefslogtreecommitdiffstats
path: root/tests/cmdAH.test
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-24 09:35:09 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-24 09:35:09 (GMT)
commit854369a67c1719356d036c3fe11e052a7fe62e80 (patch)
tree2c2c51b218930fb40ec957e8f513e13315ceda6e /tests/cmdAH.test
parent485bc2fd887abb2501321c670e66c849da1b026c (diff)
downloadtcl-854369a67c1719356d036c3fe11e052a7fe62e80.zip
tcl-854369a67c1719356d036c3fe11e052a7fe62e80.tar.gz
tcl-854369a67c1719356d036c3fe11e052a7fe62e80.tar.bz2
Factor out encoding test vectors into separate file so they can be used for file IO tests
Diffstat (limited to 'tests/cmdAH.test')
-rw-r--r--tests/cmdAH.test634
1 files changed, 1 insertions, 633 deletions
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index ba78c23..cec93d2 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -180,640 +180,8 @@ set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tc
set "numargErrors(encoding names)" {wrong # args: should be "encoding names"}
set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"}
-set encProfiles {tcl8 strict replace}
-set encDefaultProfile tcl8; # Should reflect the default from implementation
-
-# TODO - valid sequences for different encodings - shiftjis etc.
-# Note utf-16, utf-32 missing because they are automatically
-# generated based on le/be versions.
-lappend encValidStrings {*}{
- ascii \u0000 00 {} {Lowest ASCII}
- ascii \u007F 7F knownBug {Highest ASCII}
- ascii \u007D 7D {} {Brace - just to verify test scripts are escaped correctly}
- ascii \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly}
-
- utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1}
- utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1}
- utf-8 \u0080 C280 {} {Unicode Table 3.7 Row 2}
- utf-8 \u07FF DFBF {} {Unicode Table 3.7 Row 2}
- utf-8 \u0800 E0A080 {} {Unicode Table 3.7 Row 3}
- utf-8 \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3}
- utf-8 \u1000 E18080 {} {Unicode Table 3.7 Row 4}
- utf-8 \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4}
- utf-8 \uD000 ED8080 {} {Unicode Table 3.7 Row 5}
- utf-8 \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5}
- utf-8 \uE000 EE8080 {} {Unicode Table 3.7 Row 6}
- utf-8 \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6}
- utf-8 \U10000 F0908080 {} {Unicode Table 3.7 Row 7}
- utf-8 \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7}
- utf-8 \U40000 F1808080 {} {Unicode Table 3.7 Row 8}
- utf-8 \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8}
- utf-8 \U100000 F4808080 {} {Unicode Table 3.7 Row 9}
- utf-8 \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9}
- utf-8 A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5}
-
- utf-16le \u0000 0000 {} {Lowest code unit}
- utf-16le \uD7FF FFD7 {} {Below high surrogate range}
- utf-16le \uE000 00E0 {} {Above low surrogate range}
- utf-16le \uFFFF FFFF {} {Highest code unit}
- utf-16le \U010000 00D800DC {} {First surrogate pair}
- utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair}
- utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5}
-
- utf-16be \u0000 0000 {} {Lowest code unit}
- utf-16be \uD7FF D7FF {} {Below high surrogate range}
- utf-16be \uE000 E000 {} {Above low surrogate range}
- utf-16be \uFFFF FFFF {} {Highest code unit}
- utf-16be \U010000 D800DC00 {} {First surrogate pair}
- utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair}
- utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5}
-
- utf-32le \u0000 00000000 {} {Lowest code unit}
- utf-32le \uFFFF FFFF0000 {} {Highest BMP}
- utf-32le \U010000 00000100 {} {First supplementary}
- utf-32le \U10FFFF ffff1000 {} {Last supplementary}
- utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5}
-
- utf-32be \u0000 00000000 {} {Lowest code unit}
- utf-32be \uFFFF 0000FFFF {} {Highest BMP}
- utf-32be \U010000 00010000 {} {First supplementary}
- utf-32be \U10FFFF 0010FFFF {} {Last supplementary}
- utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5}
-}
-
-# Invalid byte sequences. These are driven from a table with format
-# {encoding bytes profile expectedresult expectedfailindex ctrl comment}
-#
-# <enc,bytes,profile> should be unique for test ids to be unique. Note utf-16,
-# utf-32 missing because they are automatically generated based on le/be
-# versions. Each entry potentially results in generation of multiple tests.
-# This is controlled by the ctrl field. This should be a list of
-# zero or more of the following:
-# solo - the test data is the string itself
-# lead - the test data is the string followed by a valid suffix
-# tail - the test data is the string preceded by a prefix
-# middle - the test data is the string wrapped by a prefix and suffix
-# If the ctrl field is empty it is treated as all of the above
-# Note if there is any other value by itself, it will cause the test to
-# be skipped. This is intentional to skip known bugs.
-# TODO - non-UTF encodings
-
-# ascii - Any byte above 127 is invalid and is mapped
-# to the same numeric code point except for the range
-# 80-9F which is treated as cp1252.
-# This tests the TableToUtfProc code path.
-lappend encInvalidBytes {*}{
- ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252}
- ascii 80 replace \uFFFD -1 {} {Smallest invalid byte}
- ascii 80 strict {} 0 {} {Smallest invalid byte}
- ascii 81 tcl8 \u0081 -1 {knownBug} {map to cp1252}
- ascii 82 tcl8 \u201A -1 {knownBug} {map to cp1252}
- ascii 83 tcl8 \u0192 -1 {knownBug} {map to cp1252}
- ascii 84 tcl8 \u201E -1 {knownBug} {map to cp1252}
- ascii 85 tcl8 \u2026 -1 {knownBug} {map to cp1252}
- ascii 86 tcl8 \u2020 -1 {knownBug} {map to cp1252}
- ascii 87 tcl8 \u2021 -1 {knownBug} {map to cp1252}
- ascii 88 tcl8 \u0276 -1 {knownBug} {map to cp1252}
- ascii 89 tcl8 \u2030 -1 {knownBug} {map to cp1252}
- ascii 8A tcl8 \u0160 -1 {knownBug} {map to cp1252}
- ascii 8B tcl8 \u2039 -1 {knownBug} {map to cp1252}
- ascii 8C tcl8 \u0152 -1 {knownBug} {map to cp1252}
- ascii 8D tcl8 \u008D -1 {knownBug} {map to cp1252}
- ascii 8E tcl8 \u017D -1 {knownBug} {map to cp1252}
- ascii 8F tcl8 \u008F -1 {knownBug} {map to cp1252}
- ascii 90 tcl8 \u0090 -1 {knownBug} {map to cp1252}
- ascii 91 tcl8 \u2018 -1 {knownBug} {map to cp1252}
- ascii 92 tcl8 \u2019 -1 {knownBug} {map to cp1252}
- ascii 93 tcl8 \u201C -1 {knownBug} {map to cp1252}
- ascii 94 tcl8 \u201D -1 {knownBug} {map to cp1252}
- ascii 95 tcl8 \u2022 -1 {knownBug} {map to cp1252}
- ascii 96 tcl8 \u2013 -1 {knownBug} {map to cp1252}
- ascii 97 tcl8 \u2014 -1 {knownBug} {map to cp1252}
- ascii 98 tcl8 \u02DC -1 {knownBug} {map to cp1252}
- ascii 99 tcl8 \u2122 -1 {knownBug} {map to cp1252}
- ascii 9A tcl8 \u0161 -1 {knownBug} {map to cp1252}
- ascii 9B tcl8 \u203A -1 {knownBug} {map to cp1252}
- ascii 9C tcl8 \u0153 -1 {knownBug} {map to cp1252}
- ascii 9D tcl8 \u009D -1 {knownBug} {map to cp1252}
- ascii 9E tcl8 \u017E -1 {knownBug} {map to cp1252}
- ascii 9F tcl8 \u0178 -1 {knownBug} {map to cp1252}
-
- ascii FF tcl8 \u00FF -1 {} {Largest invalid byte}
- ascii FF replace \uFFFD -1 {} {Largest invalid byte}
- ascii FF strict {} 0 {} {Largest invalid byte}
-}
-
-# utf-8 - valid sequences based on Table 3.7 in the Unicode
-# standard.
-#
-# Code Points First Second Third Fourth Byte
-# U+0000..U+007F 00..7F
-# U+0080..U+07FF C2..DF 80..BF
-# U+0800..U+0FFF E0 A0..BF 80..BF
-# U+1000..U+CFFF E1..EC 80..BF 80..BF
-# U+D000..U+D7FF ED 80..9F 80..BF
-# U+E000..U+FFFF EE..EF 80..BF 80..BF
-# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
-# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
-# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
-#
-# Tests below are based on the "gaps" in the above table. Note ascii test
-# values are repeated because internally a different code path is used
-# (UtfToUtfProc).
-# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080
-lappend encInvalidBytes {*}{
- utf-8 80 tcl8 \u20AC -1 {} {map to cp1252}
- utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte}
- utf-8 80 strict {} 0 {} {Smallest invalid byte}
- utf-8 81 tcl8 \u0081 -1 {} {map to cp1252}
- utf-8 82 tcl8 \u201A -1 {} {map to cp1252}
- utf-8 83 tcl8 \u0192 -1 {} {map to cp1252}
- utf-8 84 tcl8 \u201E -1 {} {map to cp1252}
- utf-8 85 tcl8 \u2026 -1 {} {map to cp1252}
- utf-8 86 tcl8 \u2020 -1 {} {map to cp1252}
- utf-8 87 tcl8 \u2021 -1 {} {map to cp1252}
- utf-8 88 tcl8 \u02C6 -1 {} {map to cp1252}
- utf-8 89 tcl8 \u2030 -1 {} {map to cp1252}
- utf-8 8A tcl8 \u0160 -1 {} {map to cp1252}
- utf-8 8B tcl8 \u2039 -1 {} {map to cp1252}
- utf-8 8C tcl8 \u0152 -1 {} {map to cp1252}
- utf-8 8D tcl8 \u008D -1 {} {map to cp1252}
- utf-8 8E tcl8 \u017D -1 {} {map to cp1252}
- utf-8 8F tcl8 \u008F -1 {} {map to cp1252}
- utf-8 90 tcl8 \u0090 -1 {} {map to cp1252}
- utf-8 91 tcl8 \u2018 -1 {} {map to cp1252}
- utf-8 92 tcl8 \u2019 -1 {} {map to cp1252}
- utf-8 93 tcl8 \u201C -1 {} {map to cp1252}
- utf-8 94 tcl8 \u201D -1 {} {map to cp1252}
- utf-8 95 tcl8 \u2022 -1 {} {map to cp1252}
- utf-8 96 tcl8 \u2013 -1 {} {map to cp1252}
- utf-8 97 tcl8 \u2014 -1 {} {map to cp1252}
- utf-8 98 tcl8 \u02DC -1 {} {map to cp1252}
- utf-8 99 tcl8 \u2122 -1 {} {map to cp1252}
- utf-8 9A tcl8 \u0161 -1 {} {map to cp1252}
- utf-8 9B tcl8 \u203A -1 {} {map to cp1252}
- utf-8 9C tcl8 \u0153 -1 {} {map to cp1252}
- utf-8 9D tcl8 \u009D -1 {} {map to cp1252}
- utf-8 9E tcl8 \u017E -1 {} {map to cp1252}
- utf-8 9F tcl8 \u0178 -1 {} {map to cp1252}
-
- utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere}
- utf-8 C0 strict {} 0 {} {C0 is invalid anywhere}
- utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere}
- utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8}
- utf-8 C080 strict {} 0 {} {C080 -> invalid}
- utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char}
- utf-8 C0A2 tcl8 \u00C0\u00A2 -1 {} {websec.github.io - A}
- utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A}
- utf-8 C0A2 strict {} 0 {} {websec.github.io - A}
- utf-8 C0A7 tcl8 \u00C0\u00A7 -1 {} {websec.github.io - double quote}
- utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote}
- utf-8 C0A7 strict {} 0 {} {websec.github.io - double quote}
- utf-8 C0AE tcl8 \u00C0\u00AE -1 {} {websec.github.io - full stop}
- utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop}
- utf-8 C0AE strict {} 0 {} {websec.github.io - full stop}
- utf-8 C0AF tcl8 \u00C0\u00AF -1 {} {websec.github.io - solidus}
- utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus}
- utf-8 C0AF strict {} 0 {} {websec.github.io - solidus}
-
- utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere}
- utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere}
- utf-8 C1 strict {} 0 {} {C1 is invalid everywhere}
- utf-8 C181 tcl8 \u00C1\u0081 -1 {} {websec.github.io - base test (A)}
- utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)}
- utf-8 C181 strict {} 0 {} {websec.github.io - base test (A)}
- utf-8 C19C tcl8 \u00C1\u0153 -1 {} {websec.github.io - reverse solidus}
- utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus}
- utf-8 C19C strict {} 0 {} {websec.github.io - reverse solidus}
-
- utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte}
- utf-8 C2 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 C2 strict {} 0 {} {Missing trail byte}
- utf-8 C27F tcl8 \u00C2\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 C27F strict {} 0 {} {Trail byte must be 80:BF}
- utf-8 DF tcl8 \u00DF -1 {} {Missing trail byte}
- utf-8 DF replace \uFFFD -1 {} {Missing trail byte}
- utf-8 DF strict {} 0 {} {Missing trail byte}
- utf-8 DF7F tcl8 \u00DF\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 DF7F strict {} 0 {} {Trail byte must be 80:BF}
- utf-8 DFE0A080 tcl8 \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence}
- utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence}
- utf-8 DFE0A080 strict {} 0 {} {Invalid trail byte is start of valid sequence}
-
- utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte}
- utf-8 E0 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 E0 strict {} 0 {} {Missing trail byte}
- utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF}
- utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF}
- utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF}
- utf-8 E0819C tcl8 \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus}
- utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus}
- utf-8 E0819C strict {} 0 {} {websec.github.io - reverse solidus}
- utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF}
- utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF}
- utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF}
- utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte}
- utf-8 E0A0 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 E0A0 strict {} 0 {} {Missing second trail byte}
- utf-8 E0BF tcl8 \u00E0\u00BF -1 {} {Missing second trail byte}
- utf-8 E0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 E0BF strict {} 0 {} {Missing second trail byte}
- utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 E0BF7F tcl8 \u00E0\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 E0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 E0BF7F strict {} 0 {} {Second trail byte must be 80:BF}
-
- utf-8 E1 tcl8 \u00E1 -1 {} {Missing trail byte}
- utf-8 E1 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 E1 strict {} 0 {} {Missing trail byte}
- utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF}
- utf-8 E181 tcl8 \u00E1\u0081 -1 {} {Missing second trail byte}
- utf-8 E181 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 E181 strict {} 0 {} {Missing second trail byte}
- utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte}
- utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 E1BF strict {} 0 {} {Missing second trail byte}
- utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 E1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 E1BF7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 EC tcl8 \u00EC -1 {} {Missing trail byte}
- utf-8 EC replace \uFFFD -1 {} {Missing trail byte}
- utf-8 EC strict {} 0 {} {Missing trail byte}
- utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
- utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF}
- utf-8 EC81 tcl8 \u00EC\u0081 -1 {} {Missing second trail byte}
- utf-8 EC81 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EC81 strict {} 0 {} {Missing second trail byte}
- utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte}
- utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 ECBF strict {} 0 {} {Missing second trail byte}
- utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF}
-
- utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte}
- utf-8 ED replace \uFFFD -1 {} {Missing trail byte}
- utf-8 ED strict {} 0 {} {Missing trail byte}
- utf-8 ED7F tcl8 \u00ED\u7F -1 {} {First trail byte must be 80:9F}
- utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F}
- utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F}
- utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {} {First trail byte must be 80:9F}
- utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F}
- utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F}
- utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte}
- utf-8 ED81 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 ED81 strict {} 0 {} {Missing second trail byte}
- utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte}
- utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EDBF strict {} 0 {} {Missing second trail byte}
- utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate}
- utf-8 EDA080 replace \uFFFD -1 {} {High surrogate}
- utf-8 EDA080 strict {} 0 {} {High surrogate}
- utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate}
- utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate}
- utf-8 EDAFBF strict {} 0 {} {High surrogate}
- utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate}
- utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate}
- utf-8 EDB080 strict {} 0 {} {Low surrogate}
- utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate}
- utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate}
- utf-8 EDBFBF strict {} 0 {} {Low surrogate}
- utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair}
- utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair}
- utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair}
- utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair}
- utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair}
- utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair}
-
- utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte}
- utf-8 EE replace \uFFFD -1 {} {Missing trail byte}
- utf-8 EE strict {} 0 {} {Missing trail byte}
- utf-8 EE7F tcl8 \u00EE\u7F -1 {} {First trail byte must be 80:BF}
- utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF}
- utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 EED0 tcl8 \u00EE\u00D0 -1 {} {First trail byte must be 80:BF}
- utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
- utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte}
- utf-8 EE81 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EE81 strict {} 0 {} {Missing second trail byte}
- utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte}
- utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EEBF strict {} 0 {} {Missing second trail byte}
- utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 EEBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 EEBF7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte}
- utf-8 EF replace \uFFFD -1 {} {Missing trail byte}
- utf-8 EF strict {} 0 {} {Missing trail byte}
- utf-8 EF7F tcl8 \u00EF\u7F -1 {} {First trail byte must be 80:BF}
- utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF}
- utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {} {First trail byte must be 80:BF}
- utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
- utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte}
- utf-8 EF81 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EF81 strict {} 0 {} {Missing second trail byte}
- utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte}
- utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 EFBF strict {} 0 {} {Missing second trail byte}
- utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 EFBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 EFBF7F strict {} 0 {} {Second trail byte must be 80:BF}
-
- utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte}
- utf-8 F0 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 F0 strict {} 0 {} {Missing trail byte}
- utf-8 F080 tcl8 \u00F0\u20AC -1 {} {First trail byte must be 90:BF}
- utf-8 F080 replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF}
- utf-8 F080 strict {} 0 {} {First trail byte must be 90:BF}
- utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF}
- utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF}
- utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF}
- utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {} {First trail byte must be 90:BF}
- utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF}
- utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF}
- utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte}
- utf-8 F090 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F090 strict {} 0 {} {Missing second trail byte}
- utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte}
- utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F0BF strict {} 0 {} {Missing second trail byte}
- utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F0BF7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F090BF tcl8 \u00F0\u0090\u00BF -1 {} {Missing third trail byte}
- utf-8 F090BF replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F090BF strict {} 0 {} {Missing third trail byte}
- utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte}
- utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F0BF81 strict {} 0 {} {Missing third trail byte}
- utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF}
- utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF}
- utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF}
- utf-8 F090BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F090BFD0 strict {} 0 {} {Third trail byte must be 80:BF}
-
- utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte}
- utf-8 F1 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 F1 strict {} 0 {} {Missing trail byte}
- utf-8 F17F tcl8 \u00F1\u7F -1 {} {First trail byte must be 80:BF}
- utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF}
- utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {} {First trail byte must be 80:BF}
- utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
- utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte}
- utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F180 strict {} 0 {} {Missing second trail byte}
- utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte}
- utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F1BF strict {} 0 {} {Missing second trail byte}
- utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {} {Missing third trail byte}
- utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F180BF strict {} 0 {} {Missing third trail byte}
- utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte}
- utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F1BF81 strict {} 0 {} {Missing third trail byte}
- utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF}
- utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF}
- utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF}
- utf-8 F180BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F180BFD0 strict {} 0 {} {Third trail byte must be 80:BF}
- utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte}
- utf-8 F3 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 F3 strict {} 0 {} {Missing trail byte}
- utf-8 F37F tcl8 \u00F3\x7F -1 {} {First trail byte must be 80:BF}
- utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF}
- utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {} {First trail byte must be 80:BF}
- utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
- utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte}
- utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F380 strict {} 0 {} {Missing second trail byte}
- utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte}
- utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F3BF strict {} 0 {} {Missing second trail byte}
- utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {} {Missing third trail byte}
- utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F380BF strict {} 0 {} {Missing third trail byte}
- utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte}
- utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F3BF81 strict {} 0 {} {Missing third trail byte}
- utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF}
- utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF}
- utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF}
- utf-8 F380BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F380BFD0 strict {} 0 {} {Third trail byte must be 80:BF}
-
- utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte}
- utf-8 F4 replace \uFFFD -1 {} {Missing trail byte}
- utf-8 F4 strict {} 0 {} {Missing trail byte}
- utf-8 F47F tcl8 \u00F4\u7F -1 {} {First trail byte must be 80:8F}
- utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F}
- utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F}
- utf-8 F490 tcl8 \u00F4\u0090 -1 {} {First trail byte must be 80:8F}
- utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F}
- utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F}
- utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte}
- utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F480 strict {} 0 {} {Missing second trail byte}
- utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte}
- utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte}
- utf-8 F48F strict {} 0 {} {Missing second trail byte}
- utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF}
- utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF}
- utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF}
- utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {} {Missing third trail byte}
- utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F48081 strict {} 0 {} {Missing third trail byte}
- utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte}
- utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte}
- utf-8 F48F81 strict {} 0 {} {Missing third trail byte}
- utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {} {Third trail byte must be 80:BF}
- utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF}
- utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF}
- utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF}
- utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF}
-
- utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere}
- utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere}
- utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere}
- utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere}
- utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere}
- utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere}
-
- utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8}
- utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9}
- utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10}
- utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11}
-}
-
-# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated
-# based on these depending on platform endianness. Note truncated tests can only
-# happen when the sequence is at the end (including by itself) Thus {solo tail}
-# in some cases.
-lappend encInvalidBytes {*}{
- utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated}
- utf-16le 41 strict {} 0 {solo tail} {Truncated}
- utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate}
- utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate}
- utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate}
- utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate}
- utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate}
- utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate}
-
- utf-16be 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-16be 41 replace \uFFFD -1 {solo tail} {Truncated}
- utf-16be 41 strict {} 0 {solo tail} {Truncated}
- utf-16be D800 tcl8 \uD800 -1 {} {Missing low surrogate}
- utf-16be D800 replace \uFFFD -1 {knownBug} {Missing low surrogate}
- utf-16be D800 strict {} 0 {knownBug} {Missing low surrogate}
- utf-16be DC00 tcl8 \uDC00 -1 {} {Missing high surrogate}
- utf-16be DC00 replace \uFFFD -1 {knownBug} {Missing high surrogate}
- utf-16be DC00 strict {} 0 {knownBug} {Missing high surrogate}
-}
-
-# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated
-# based on these depending on platform endianness. Note truncated tests can only
-# happen when the sequence is at the end (including by itself) Thus {solo tail}
-# in some cases.
-lappend encInvalidBytes {*}{
- utf-32le 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32le 41 replace \uFFFD -1 {solo} {Truncated}
- utf-32le 41 strict {} 0 {solo tail} {Truncated}
- utf-32le 4100 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32le 4100 replace \uFFFD -1 {solo} {Truncated}
- utf-32le 4100 strict {} 0 {solo tail} {Truncated}
- utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32le 410000 replace \uFFFD -1 {solo} {Truncated}
- utf-32le 410000 strict {} 0 {solo tail} {Truncated}
- utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate}
- utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate}
- utf-32le 00D80000 strict {} 0 {} {High-surrogate}
- utf-32le 00DC0000 tcl8 \uDC00 -1 {} {Low-surrogate}
- utf-32le 00DC0000 replace \uFFFD -1 {} {Low-surrogate}
- utf-32le 00DC0000 strict {} 0 {} {Low-surrogate}
- utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair}
- utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair}
- utf-32le 00D8000000DC0000 strict {} 0 {} {High-low-surrogate-pair}
- utf-32le 00001100 tcl8 \UFFFD -1 {} {Out of range}
- utf-32le 00001100 replace \UFFFD -1 {} {Out of range}
- utf-32le 00001100 strict {} 0 {} {Out of range}
- utf-32le FFFFFFFF tcl8 \UFFFD -1 {} {Out of range}
- utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range}
- utf-32le FFFFFFFF strict {} 0 {} {Out of range}
-
- utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32be 41 replace \uFFFD -1 {solo tail} {Truncated}
- utf-32be 41 strict {} 0 {solo tail} {Truncated}
- utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32be 0041 replace \uFFFD -1 {solo} {Truncated}
- utf-32be 0041 strict {} 0 {solo tail} {Truncated}
- utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated}
- utf-32be 000041 replace \uFFFD -1 {solo} {Truncated}
- utf-32be 000041 strict {} 0 {solo tail} {Truncated}
- utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate}
- utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate}
- utf-32be 0000D800 strict {} 0 {} {High-surrogate}
- utf-32be 0000DC00 tcl8 \uDC00 -1 {} {Low-surrogate}
- utf-32be 0000DC00 replace \uFFFD -1 {} {Low-surrogate}
- utf-32be 0000DC00 strict {} 0 {} {Low-surrogate}
- utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair}
- utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair}
- utf-32be 0000D8000000DC00 strict {} 0 {} {High-low-surrogate-pair}
- utf-32be 00110000 tcl8 \UFFFD -1 {} {Out of range}
- utf-32be 00110000 replace \UFFFD -1 {} {Out of range}
- utf-32be 00110000 strict {} 0 {} {Out of range}
- utf-32be FFFFFFFF tcl8 \UFFFD -1 {} {Out of range}
- utf-32be FFFFFFFF replace \UFFFD -1 {} {Out of range}
- utf-32be FFFFFFFF strict {} 0 {} {Out of range}
-}
-
-
-# Strings that cannot be encoded for specific encoding / profiles
-# {encoding string profile exptedresult expectedfailindex ctrl comment}
-# <enc,string,profile> should be unique for test ids to be unique.
-# Note utf-16, utf-32 missing because they are automatically
-# generated based on le/be versions.
-# Each entry potentially results in generation of multiple tests.
-# This is controlled by the ctrl field. This should be a list of
-# zero or more of the following:
-# solo - the test data is the string itself
-# lead - the test data is the string followed by a valid suffix
-# tail - the test data is the string preceded by a prefix
-# middle - the test data is the string wrapped by a prefix and suffix
-# If the ctrl field is empty it is treated as all of the above
-# Note if there is any other value by itself, it will cause the test to
-# be skipped. This is intentional to skip known bugs.
-# TODO - other encodings
-# TODO - out of range code point (note cannot be generated by \U notation)
-lappend encUnencodableStrings {*}{
- ascii \u00e0 tcl8 3f -1 {} {unencodable}
- ascii \u00e0 strict {} 0 {} {unencodable}
-
- iso8859-1 \u0141 tcl8 3f -1 {} unencodable
- iso8859-1 \u0141 strict {} 0 {} unencodable
-
- utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate
- utf-8 \uD800 strict {} 0 {} High-surrogate
- utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate
- utf-8 \uDC00 strict {} 0 {} High-surrogate
-}
+source [file join [file dirname [info script]] encodingVectors.tcl]
-# Generated tests comparing against ICU
-# TODO - commented out for now as generating a lot of mismatches.
-# source [file join [file dirname [info script]] icuUcmTests.tcl]
# Maps utf-{16,32}{le,be} to utf-16, utf-32 and
# others to "". Used to test utf-16, utf-32 based