From 8d5e59d44d993f6a3351a5e1547eedcc49f0a11d Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 18 Aug 2024 10:18:03 +0000 Subject: Add frag test for utf32 and escaped encodings --- tests/utfext.test | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/tests/utfext.test b/tests/utfext.test index a51b7ec..ad50f1a 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -21,7 +21,9 @@ testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testencoding [llength [info commands testencoding]] namespace eval utftest { - # Format of table, indexed by encoding. + # Format of table, indexed by encoding. The encodings are not exhaustive + # but one of each kind of encoding transform (algorithmic, table-driven, + # stateful, DBCS, MBCS). # Each element is list of lists. Nested lists have following fields # 0 comment (no spaces, might be used to generate id's as well) # The combination of comment and internal hex (2) should be unique. @@ -40,7 +42,14 @@ namespace eval utftest { } utf-8 { {bmp {41 c3a9 42} {41 c3a9 42} 2 2} - {nonbmp {41 f09f9880 42} {41 f09f9880 42} 2 3} + {nonbmp-frag-1 {41 f09f9880 42} {41 f09f9880 42} 2 2} + {nonbmp-frag-2 {41 f09f9880 42} {41 f09f9880 42} 3 3} + {nonbmp-frag-3 {41 f09f9880 42} {41 f09f9880 42} 4 4} + {null {41 c080 42} {41 00 42} 2 -1} + } + cesu-8 { + {bmp {41 c3a9 42} {41 c3a9 42} 2 2} + {nonbmp {41 f09f9880 42} {41 eda0bd edb080 42} 3 3} {null {41 c080 42} {41 00 42} 2 -1} } utf-16le { @@ -55,16 +64,35 @@ namespace eval utftest { {split-surrogate {41 f09f9080 42} {0041 d83d dc00 0042} 3 4} {null {41 c080 42} {0041 0000 0042} 2 3} } + utf-32le { + {bmp {41 c3a9 42} {41000000 e9000000 42000000} 2 3} + {nonbmp {41 f09f9880 42} {41000000 00f60100 42000000} 4 6} + {null {41 c080 42} {41000000 00000000 42000000} 2 3} + } + utf-32be { + {bmp {41 c3a9 42} {00000041 000000e9 00000042} 2 3} + {nonbmp {41 f09f9880 42} {00000041 0001f600 00000042} 4 3} + {null {41 c080 42} {00000041 00000000 00000042} 2 3} + } iso8859-1 { {basic {41 c3a9 42} 41e942 2 -1} {null {41 c080 42} 410042 2 -1} } + iso8859-3 { + {basic {41 c4a0 42} 41d542 2 -1} + {null {41 c080 42} 410042 2 -1} + } shiftjis { {basic {41 e4b98e 42} {41 8cc1 42} 3 2} } jis0208 { {basic {e4b98e e590be} {3843 3863} 1 1} } + iso2022-jp { + {frag-in-leadescape {58 e4b98e 5a} {58 1b2442 3843 1b2842 5a} 2 2} + {frag-in-char {58 e4b98e 5a} {58 1b2442 3843 1b2842 5a} 2 5} + {frag-in-trailescape {58 e4b98e 5a} {58 1b2442 3843 1b2842 5a} 2 8} + } } # Return a binary string containing nul terminator for encoding -- cgit v0.12