summaryrefslogtreecommitdiffstats
path: root/tests/encodingVectors.tcl
blob: 38b3da5cb216bbe6b35eab6063d2a4666cb99fca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
# This file contains test vectors for verifying various encodings. They are
# stored in a common file so that they can be sourced into the various test
# modules that are dependent on encodings. This file contains statically defined
# test vectors. In addition, it sources the ICU-generated test vectors from
# icuUcmTests.tcl.
#
# Note that sourcing the file will reinitialize any existing encoding test
# vectors.
#

# List of defined encoding profiles
set encProfiles {tcl8 strict replace}
set encDefaultProfile tcl8; # Should reflect the default from implementation

# encValidStrings - Table of valid strings.
#
# Each row is <ENCODING STR BYTES CTRL COMMENT>
# The pair <ENCODING,STR> should be unique for generated test ids to be unique.
# STR is a string that can be encoded in the encoding ENCODING resulting
# in the byte sequence BYTES. The CTRL field is a list that controls test
# generation. It may contain zero or more of `solo`, `lead`, `tail` and
# `middle` indicating that the generated tests should include the string
# by itself, as the lead of a longer string, as the tail of a longer string
# and in the middle of a longer string. If CTRL is empty, it is treated as
# containing all four of the above. The CTRL field may also contain the
# words knownBug or knownW3C which will cause the test generation for that
# vector to be skipped.
#
# utf-16, utf-32 missing because they are automatically
# generated based on le/be versions.
set encValidStrings {}; # Reset the table

lappend encValidStrings {*}{
    ascii    \u0000 00 {} {Lowest ASCII}
    ascii    \u007F 7F {} {Highest ASCII}
    ascii    \u007D 7D {} {Brace - just to verify test scripts are escaped correctly}
    ascii    \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly}

    utf-8    \u0000 00 {} {Unicode Table 3.7 Row 1}
    utf-8    \u007F 7F {} {Unicode Table 3.7 Row 1}
    utf-8    \u0080 C280 {} {Unicode Table 3.7 Row 2}
    utf-8    \u07FF DFBF {} {Unicode Table 3.7 Row 2}
    utf-8    \u0800 E0A080 {} {Unicode Table 3.7 Row 3}
    utf-8    \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3}
    utf-8    \u1000 E18080 {} {Unicode Table 3.7 Row 4}
    utf-8    \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4}
    utf-8    \uD000 ED8080 {} {Unicode Table 3.7 Row 5}
    utf-8    \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5}
    utf-8    \uE000 EE8080 {} {Unicode Table 3.7 Row 6}
    utf-8    \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6}
    utf-8    \U10000 F0908080 {} {Unicode Table 3.7 Row 7}
    utf-8    \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7}
    utf-8    \U40000 F1808080 {} {Unicode Table 3.7 Row 8}
    utf-8    \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8}
    utf-8    \U100000 F4808080 {} {Unicode Table 3.7 Row 9}
    utf-8    \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9}
    utf-8    A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5}

    utf-16le \u0000 0000 {} {Lowest code unit}
    utf-16le \uD7FF FFD7 {} {Below high surrogate range}
    utf-16le \uE000 00E0 {} {Above low surrogate range}
    utf-16le \uFFFF FFFF {} {Highest code unit}
    utf-16le \U010000 00D800DC {} {First surrogate pair}
    utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair}
    utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5}

    utf-16be \u0000 0000 {} {Lowest code unit}
    utf-16be \uD7FF D7FF {} {Below high surrogate range}
    utf-16be \uE000 E000 {} {Above low surrogate range}
    utf-16be \uFFFF FFFF {} {Highest code unit}
    utf-16be \U010000 D800DC00 {} {First surrogate pair}
    utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair}
    utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5}

    utf-32le \u0000 00000000 {} {Lowest code unit}
    utf-32le \uFFFF FFFF0000 {} {Highest BMP}
    utf-32le \U010000 00000100 {} {First supplementary}
    utf-32le \U10FFFF ffff1000 {} {Last supplementary}
    utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5}

    utf-32be \u0000 00000000 {} {Lowest code unit}
    utf-32be \uFFFF 0000FFFF {} {Highest BMP}
    utf-32be \U010000 00010000 {} {First supplementary}
    utf-32be \U10FFFF 0010FFFF {} {Last supplementary}
    utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5}
}

# encInvalidBytes - Table of invalid byte sequences
# These are byte sequences that should appear for an encoding. Each row is
# of the form
#    <ENCODING BYTES PROFILE EXPECTEDRESULT EXPECTEDFAILINDEX CTRL COMMENT>
# The triple <ENCODING,BYTES,PROFILE> should be unique for test ids to be
# unique. BYTES is a byte sequence that is invalid. EXPECTEDRESULT is the
# expected string when the bytes are decoded using the PROFILE profile.
# FAILINDEX gives the expected index of the invalid byte under that profile. The
# CTRL field is a list that controls test generation. It may contain zero or
# more of `solo`, `lead`, `tail` and `middle` indicating that the generated the
# tail of a longer and in the middle of a longer string. If empty, it is treated
# as containing all four of the above. The CTRL field may also contain the words
# knownBug or knownW3C which will cause the test generation for that vector to
# be skipped.
#
# utf-32 missing because they are automatically generated based on le/be
# versions.
set encInvalidBytes {}; # Reset the table

# ascii - Any byte above 127 is invalid and is mapped
# to the same numeric code point except for the range
# 80-9F which is treated as cp1252.
# This tests the TableToUtfProc code path.
lappend encInvalidBytes {*}{
    ascii 80 tcl8    \u20AC -1 {knownBug} {map to cp1252}
    ascii 80 replace \uFFFD -1 {} {Smallest invalid byte}
    ascii 80 strict  {}      0 {} {Smallest invalid byte}
    ascii 81 tcl8    \u0081 -1 {knownBug} {map to cp1252}
    ascii 82 tcl8    \u201A -1 {knownBug} {map to cp1252}
    ascii 83 tcl8    \u0192 -1 {knownBug} {map to cp1252}
    ascii 84 tcl8    \u201E -1 {knownBug} {map to cp1252}
    ascii 85 tcl8    \u2026 -1 {knownBug} {map to cp1252}
    ascii 86 tcl8    \u2020 -1 {knownBug} {map to cp1252}
    ascii 87 tcl8    \u2021 -1 {knownBug} {map to cp1252}
    ascii 88 tcl8    \u0276 -1 {knownBug} {map to cp1252}
    ascii 89 tcl8    \u2030 -1 {knownBug} {map to cp1252}
    ascii 8A tcl8    \u0160 -1 {knownBug} {map to cp1252}
    ascii 8B tcl8    \u2039 -1 {knownBug} {map to cp1252}
    ascii 8C tcl8    \u0152 -1 {knownBug} {map to cp1252}
    ascii 8D tcl8    \u008D -1 {knownBug} {map to cp1252}
    ascii 8E tcl8    \u017D -1 {knownBug} {map to cp1252}
    ascii 8F tcl8    \u008F -1 {knownBug} {map to cp1252}
    ascii 90 tcl8    \u0090 -1 {knownBug} {map to cp1252}
    ascii 91 tcl8    \u2018 -1 {knownBug} {map to cp1252}
    ascii 92 tcl8    \u2019 -1 {knownBug} {map to cp1252}
    ascii 93 tcl8    \u201C -1 {knownBug} {map to cp1252}
    ascii 94 tcl8    \u201D -1 {knownBug} {map to cp1252}
    ascii 95 tcl8    \u2022 -1 {knownBug} {map to cp1252}
    ascii 96 tcl8    \u2013 -1 {knownBug} {map to cp1252}
    ascii 97 tcl8    \u2014 -1 {knownBug} {map to cp1252}
    ascii 98 tcl8    \u02DC -1 {knownBug} {map to cp1252}
    ascii 99 tcl8    \u2122 -1 {knownBug} {map to cp1252}
    ascii 9A tcl8    \u0161 -1 {knownBug} {map to cp1252}
    ascii 9B tcl8    \u203A -1 {knownBug} {map to cp1252}
    ascii 9C tcl8    \u0153 -1 {knownBug} {map to cp1252}
    ascii 9D tcl8    \u009D -1 {knownBug} {map to cp1252}
    ascii 9E tcl8    \u017E -1 {knownBug} {map to cp1252}
    ascii 9F tcl8    \u0178 -1 {knownBug} {map to cp1252}

    ascii FF tcl8    \u00FF -1 {} {Largest invalid byte}
    ascii FF replace \uFFFD -1 {} {Largest invalid byte}
    ascii FF strict  {}      0 {} {Largest invalid byte}
}

# utf-8 - valid sequences based on Table 3.7 in the Unicode
# standard.
#
# Code Points        First   Second  Third   Fourth Byte
# U+0000..U+007F     00..7F
# U+0080..U+07FF     C2..DF  80..BF
# U+0800..U+0FFF     E0      A0..BF  80..BF
# U+1000..U+CFFF     E1..EC  80..BF  80..BF
# U+D000..U+D7FF     ED      80..9F  80..BF
# U+E000..U+FFFF     EE..EF  80..BF  80..BF
# U+10000..U+3FFFF   F0      90..BF  80..BF  80..BF
# U+40000..U+FFFFF   F1..F3  80..BF  80..BF  80..BF
# U+100000..U+10FFFF F4      80..8F  80..BF  80..BF
#
# Tests below are based on the "gaps" in the above table. Note ascii test
# values are repeated because internally a different code path is used
# (UtfToUtfProc).
# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080
lappend encInvalidBytes {*}{
    utf-8 80 tcl8    \u20AC -1 {} {map to cp1252}
    utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte}
    utf-8 80 strict  {}      0 {} {Smallest invalid byte}
    utf-8 81 tcl8    \u0081 -1 {} {map to cp1252}
    utf-8 82 tcl8    \u201A -1 {} {map to cp1252}
    utf-8 83 tcl8    \u0192 -1 {} {map to cp1252}
    utf-8 84 tcl8    \u201E -1 {} {map to cp1252}
    utf-8 85 tcl8    \u2026 -1 {} {map to cp1252}
    utf-8 86 tcl8    \u2020 -1 {} {map to cp1252}
    utf-8 87 tcl8    \u2021 -1 {} {map to cp1252}
    utf-8 88 tcl8    \u02C6 -1 {} {map to cp1252}
    utf-8 89 tcl8    \u2030 -1 {} {map to cp1252}
    utf-8 8A tcl8    \u0160 -1 {} {map to cp1252}
    utf-8 8B tcl8    \u2039 -1 {} {map to cp1252}
    utf-8 8C tcl8    \u0152 -1 {} {map to cp1252}
    utf-8 8D tcl8    \u008D -1 {} {map to cp1252}
    utf-8 8E tcl8    \u017D -1 {} {map to cp1252}
    utf-8 8F tcl8    \u008F -1 {} {map to cp1252}
    utf-8 90 tcl8    \u0090 -1 {} {map to cp1252}
    utf-8 91 tcl8    \u2018 -1 {} {map to cp1252}
    utf-8 92 tcl8    \u2019 -1 {} {map to cp1252}
    utf-8 93 tcl8    \u201C -1 {} {map to cp1252}
    utf-8 94 tcl8    \u201D -1 {} {map to cp1252}
    utf-8 95 tcl8    \u2022 -1 {} {map to cp1252}
    utf-8 96 tcl8    \u2013 -1 {} {map to cp1252}
    utf-8 97 tcl8    \u2014 -1 {} {map to cp1252}
    utf-8 98 tcl8    \u02DC -1 {} {map to cp1252}
    utf-8 99 tcl8    \u2122 -1 {} {map to cp1252}
    utf-8 9A tcl8    \u0161 -1 {} {map to cp1252}
    utf-8 9B tcl8    \u203A -1 {} {map to cp1252}
    utf-8 9C tcl8    \u0153 -1 {} {map to cp1252}
    utf-8 9D tcl8    \u009D -1 {} {map to cp1252}
    utf-8 9E tcl8    \u017E -1 {} {map to cp1252}
    utf-8 9F tcl8    \u0178 -1 {} {map to cp1252}

    utf-8 C0 tcl8    \u00C0 -1 {} {C0 is invalid anywhere}
    utf-8 C0 strict  {}      0 {} {C0 is invalid anywhere}
    utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere}
    utf-8 C080 tcl8    \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8}
    utf-8 C080 strict  {}      0 {} {C080 -> invalid}
    utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char}
    utf-8 C0A2 tcl8    \u00C0\u00A2 -1 {} {websec.github.io - A}
    utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A}
    utf-8 C0A2 strict  {}            0 {} {websec.github.io - A}
    utf-8 C0A7 tcl8    \u00C0\u00A7 -1 {} {websec.github.io - double quote}
    utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote}
    utf-8 C0A7 strict  {}            0 {} {websec.github.io - double quote}
    utf-8 C0AE tcl8    \u00C0\u00AE -1 {} {websec.github.io - full stop}
    utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop}
    utf-8 C0AE strict  {}            0 {} {websec.github.io - full stop}
    utf-8 C0AF tcl8    \u00C0\u00AF -1 {} {websec.github.io - solidus}
    utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus}
    utf-8 C0AF strict  {}            0 {} {websec.github.io - solidus}

    utf-8 C1 tcl8    \u00C1 -1 {} {C1 is invalid everywhere}
    utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere}
    utf-8 C1 strict  {}      0 {} {C1 is invalid everywhere}
    utf-8 C181 tcl8    \u00C1\u0081 -1 {} {websec.github.io - base test (A)}
    utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)}
    utf-8 C181 strict  {}            0 {} {websec.github.io - base test (A)}
    utf-8 C19C tcl8    \u00C1\u0153 -1 {} {websec.github.io - reverse solidus}
    utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus}
    utf-8 C19C strict  {}            0 {} {websec.github.io - reverse solidus}

    utf-8 C2 tcl8      \u00C2     -1 {} {Missing trail byte}
    utf-8 C2 replace   \uFFFD     -1 {} {Missing trail byte}
    utf-8 C2 strict    {}          0 {} {Missing trail byte}
    utf-8 C27F tcl8    \u00C2\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 C27F strict  {}          0 {} {Trail byte must be 80:BF}
    utf-8 DF tcl8      \u00DF     -1 {} {Missing trail byte}
    utf-8 DF replace   \uFFFD     -1 {} {Missing trail byte}
    utf-8 DF strict    {}          0 {} {Missing trail byte}
    utf-8 DF7F tcl8    \u00DF\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 DF7F strict  {}          0 {} {Trail byte must be 80:BF}
    utf-8 DFE0A080 tcl8    \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence}
    utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence}
    utf-8 DFE0A080 strict  {}            0 {} {Invalid trail byte is start of valid sequence}

    utf-8 E0 tcl8      \u00E0     -1 {} {Missing trail byte}
    utf-8 E0 replace   \uFFFD     -1 {} {Missing trail byte}
    utf-8 E0 strict    {}          0 {} {Missing trail byte}
    utf-8 E080 tcl8      \u00E0\u20AC   -1 {} {First trail byte must be A0:BF}
    utf-8 E080 replace   \uFFFD\uFFFD   -1 {} {First trail byte must be A0:BF}
    utf-8 E080 strict    {}              0 {} {First trail byte must be A0:BF}
    utf-8 E0819C tcl8    \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus}
    utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus}
    utf-8 E0819C strict  {}                  0 {} {websec.github.io - reverse solidus}
    utf-8 E09F tcl8      \u00E0\u0178   -1 {} {First trail byte must be A0:BF}
    utf-8 E09F replace   \uFFFD\uFFFD   -1 {} {First trail byte must be A0:BF}
    utf-8 E09F strict    {}              0 {} {First trail byte must be A0:BF}
    utf-8 E0A0 tcl8      \u00E0\u00A0   -1 {} {Missing second trail byte}
    utf-8 E0A0 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 E0A0 strict    {}              0 {} {Missing second trail byte}
    utf-8 E0BF tcl8      \u00E0\u00BF   -1 {} {Missing second trail byte}
    utf-8 E0BF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 E0BF strict    {}              0 {} {Missing second trail byte}
    utf-8 E0A07F tcl8    \u00E0\u00A0\x7F   -1 {}     {Second trail byte must be 80:BF}
    utf-8 E0A07F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 E0A07F strict  {}                  0 {}         {Second trail byte must be 80:BF}
    utf-8 E0BF7F tcl8    \u00E0\u00BF\x7F   -1 {}         {Second trail byte must be 80:BF}
    utf-8 E0BF7F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 E0BF7F strict  {}                  0 {}         {Second trail byte must be 80:BF}

    utf-8 E1 tcl8      \u00E1     -1 {} {Missing trail byte}
    utf-8 E1 replace   \uFFFD     -1 {} {Missing trail byte}
    utf-8 E1 strict    {}          0 {} {Missing trail byte}
    utf-8 E17F tcl8    \u00E1\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 E17F strict  {}          0 {} {Trail byte must be 80:BF}
    utf-8 E181 tcl8      \u00E1\u0081   -1 {} {Missing second trail byte}
    utf-8 E181 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 E181 strict    {}              0 {} {Missing second trail byte}
    utf-8 E1BF tcl8      \u00E1\u00BF   -1 {} {Missing second trail byte}
    utf-8 E1BF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 E1BF strict    {}              0 {} {Missing second trail byte}
    utf-8 E1807F tcl8    \u00E1\u20AC\x7F   -1 {} {Second trail byte must be 80:BF}
    utf-8 E1807F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 E1807F strict  {}                  0 {}         {Second trail byte must be 80:BF}
    utf-8 E1BF7F tcl8    \u00E1\u00BF\x7F   -1 {}         {Second trail byte must be 80:BF}
    utf-8 E1BF7F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 E1BF7F strict  {}                  0 {}         {Second trail byte must be 80:BF}
    utf-8 EC tcl8      \u00EC     -1 {} {Missing trail byte}
    utf-8 EC replace   \uFFFD     -1 {} {Missing trail byte}
    utf-8 EC strict    {}          0 {} {Missing trail byte}
    utf-8 EC7F tcl8    \u00EC\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF}
    utf-8 EC7F strict  {}          0 {} {Trail byte must be 80:BF}
    utf-8 EC81 tcl8      \u00EC\u0081   -1 {} {Missing second trail byte}
    utf-8 EC81 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EC81 strict    {}              0 {} {Missing second trail byte}
    utf-8 ECBF tcl8      \u00EC\u00BF   -1 {} {Missing second trail byte}
    utf-8 ECBF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 ECBF strict    {}              0 {} {Missing second trail byte}
    utf-8 EC807F tcl8    \u00EC\u20AC\x7F   -1 {} {Second trail byte must be 80:BF}
    utf-8 EC807F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 EC807F strict  {}                  0 {}         {Second trail byte must be 80:BF}
    utf-8 ECBF7F tcl8    \u00EC\u00BF\x7F   -1 {}         {Second trail byte must be 80:BF}
    utf-8 ECBF7F replace \uFFFD\u7F         -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 ECBF7F strict  {}                  0 {}         {Second trail byte must be 80:BF}

    utf-8 ED tcl8       \u00ED        -1 {} {Missing trail byte}
    utf-8 ED replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 ED strict     {}             0 {} {Missing trail byte}
    utf-8 ED7F tcl8     \u00ED\u7F    -1 {} {First trail byte must be 80:9F}
    utf-8 ED7F replace  \uFFFD\u7F    -1 {} {First trail byte must be 80:9F}
    utf-8 ED7F strict   {}             0 {} {First trail byte must be 80:9F}
    utf-8 EDA0 tcl8     \u00ED\u00A0  -1 {} {First trail byte must be 80:9F}
    utf-8 EDA0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:9F}
    utf-8 EDA0 strict   {}             0 {} {First trail byte must be 80:9F}
    utf-8 ED81 tcl8      \u00ED\u0081   -1 {} {Missing second trail byte}
    utf-8 ED81 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 ED81 strict    {}              0 {} {Missing second trail byte}
    utf-8 EDBF tcl8      \u00ED\u00BF   -1 {} {Missing second trail byte}
    utf-8 EDBF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EDBF strict    {}              0 {} {Missing second trail byte}
    utf-8 ED807F tcl8      \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 ED807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 ED807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 ED9F7F tcl8      \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 ED9F7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 ED9F7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 EDA080 tcl8       \uD800          -1 {}  {High surrogate}
    utf-8 EDA080 replace    \uFFFD          -1 {knownBug}  {High surrogate}
    utf-8 EDA080 strict     {}               0 {}  {High surrogate}
    utf-8 EDAFBF tcl8       \uDBFF          -1 {}  {High surrogate}
    utf-8 EDAFBF replace    \uFFFD          -1 {knownBug}  {High surrogate}
    utf-8 EDAFBF strict     {}               0 {}  {High surrogate}
    utf-8 EDB080 tcl8       \uDC00          -1 {}  {Low surrogate}
    utf-8 EDB080 replace    \uFFFD          -1 {knownBug}  {Low surrogate}
    utf-8 EDB080 strict     {}               0 {}  {Low surrogate}
    utf-8 EDBFBF tcl8       \uDFFF          -1 {knownBug}  {Low surrogate}
    utf-8 EDBFBF replace    \uFFFD          -1 {knownBug}  {Low surrogate}
    utf-8 EDBFBF strict     {}               0 {}  {Low surrogate}
    utf-8 EDA080EDB080 tcl8 \U00010000      -1 {knownBug}  {High low surrogate pair}
    utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {knownBug}  {High low surrogate pair}
    utf-8 EDA080EDB080 strict {}             0 {}  {High low surrogate pair}
    utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF      -1 {knownBug}  {High low surrogate pair}
    utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {knownBug}  {High low surrogate pair}
    utf-8 EDAFBFEDBFBF strict {}             0 {}  {High low surrogate pair}

    utf-8 EE tcl8       \u00EE        -1 {} {Missing trail byte}
    utf-8 EE replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 EE strict     {}             0 {} {Missing trail byte}
    utf-8 EE7F tcl8     \u00EE\u7F    -1 {} {First trail byte must be 80:BF}
    utf-8 EE7F replace  \uFFFD\u7F    -1 {} {First trail byte must be 80:BF}
    utf-8 EE7F strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 EED0 tcl8     \u00EE\u00D0  -1 {} {First trail byte must be 80:BF}
    utf-8 EED0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:BF}
    utf-8 EED0 strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 EE81 tcl8      \u00EE\u0081   -1 {} {Missing second trail byte}
    utf-8 EE81 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EE81 strict    {}              0 {} {Missing second trail byte}
    utf-8 EEBF tcl8      \u00EE\u00BF   -1 {} {Missing second trail byte}
    utf-8 EEBF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EEBF strict    {}              0 {} {Missing second trail byte}
    utf-8 EE807F tcl8      \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 EE807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 EE807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 EEBF7F tcl8      \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 EEBF7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 EEBF7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 EF tcl8       \u00EF        -1 {} {Missing trail byte}
    utf-8 EF replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 EF strict     {}             0 {} {Missing trail byte}
    utf-8 EF7F tcl8     \u00EF\u7F    -1 {} {First trail byte must be 80:BF}
    utf-8 EF7F replace  \uFFFD\u7F    -1 {} {First trail byte must be 80:BF}
    utf-8 EF7F strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 EFD0 tcl8     \u00EF\u00D0  -1 {} {First trail byte must be 80:BF}
    utf-8 EFD0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:BF}
    utf-8 EFD0 strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 EF81 tcl8      \u00EF\u0081   -1 {} {Missing second trail byte}
    utf-8 EF81 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EF81 strict    {}              0 {} {Missing second trail byte}
    utf-8 EFBF tcl8      \u00EF\u00BF   -1 {} {Missing second trail byte}
    utf-8 EFBF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 EFBF strict    {}              0 {} {Missing second trail byte}
    utf-8 EF807F tcl8      \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 EF807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 EF807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 EFBF7F tcl8      \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 EFBF7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 EFBF7F strict    {}                0 {}  {Second trail byte must be 80:BF}

    utf-8 F0 tcl8       \u00F0        -1 {} {Missing trail byte}
    utf-8 F0 replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 F0 strict     {}             0 {} {Missing trail byte}
    utf-8 F080 tcl8     \u00F0\u20AC  -1 {} {First trail byte must be 90:BF}
    utf-8 F080 replace  \uFFFD        -1 {knownW3C} {First trail byte must be 90:BF}
    utf-8 F080 strict   {}             0 {} {First trail byte must be 90:BF}
    utf-8 F08F tcl8     \u00F0\u8F    -1 {} {First trail byte must be 90:BF}
    utf-8 F08F replace  \uFFFD        -1 {knownW3C} {First trail byte must be 90:BF}
    utf-8 F08F strict   {}             0 {} {First trail byte must be 90:BF}
    utf-8 F0D0 tcl8     \u00F0\u00D0  -1 {} {First trail byte must be 90:BF}
    utf-8 F0D0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 90:BF}
    utf-8 F0D0 strict   {}             0 {} {First trail byte must be 90:BF}
    utf-8 F090 tcl8      \u00F0\u0090   -1 {} {Missing second trail byte}
    utf-8 F090 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F090 strict    {}              0 {} {Missing second trail byte}
    utf-8 F0BF tcl8      \u00F0\u00BF   -1 {} {Missing second trail byte}
    utf-8 F0BF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F0BF strict    {}              0 {} {Missing second trail byte}
    utf-8 F0907F tcl8      \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F0907F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F0907F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F0BF7F tcl8      \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F0BF7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F0BF7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F090BF tcl8      \u00F0\u0090\u00BF   -1 {} {Missing third trail byte}
    utf-8 F090BF replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F090BF strict    {}              0 {} {Missing third trail byte}
    utf-8 F0BF81 tcl8      \u00F0\u00BF\u0081   -1 {} {Missing third trail byte}
    utf-8 F0BF81 replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F0BF81 strict    {}              0 {} {Missing third trail byte}
    utf-8 F0BF807F tcl8      \u00F0\u00BF\u20AC\x7F   -1 {} {Third trail byte must be 80:BF}
    utf-8 F0BF817F replace   \uFFFD\x7F           -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F0BF817F strict    {}              0 {} {Third trail byte must be 80:BF}
    utf-8 F090BFD0 tcl8      \u00F0\u0090\u00BF\u00D0   -1 {} {Third trail byte must be 80:BF}
    utf-8 F090BFD0 replace   \uFFFD         -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F090BFD0 strict    {}              0 {} {Third trail byte must be 80:BF}

    utf-8 F1 tcl8       \u00F1        -1 {} {Missing trail byte}
    utf-8 F1 replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 F1 strict     {}             0 {} {Missing trail byte}
    utf-8 F17F tcl8     \u00F1\u7F    -1 {} {First trail byte must be 80:BF}
    utf-8 F17F replace  \uFFFD        -1 {knownW3C} {First trail byte must be 80:BF}
    utf-8 F17F strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 F1D0 tcl8     \u00F1\u00D0  -1 {} {First trail byte must be 80:BF}
    utf-8 F1D0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:BF}
    utf-8 F1D0 strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 F180 tcl8      \u00F1\u20AC   -1 {} {Missing second trail byte}
    utf-8 F180 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F180 strict    {}              0 {} {Missing second trail byte}
    utf-8 F1BF tcl8      \u00F1\u00BF   -1 {} {Missing second trail byte}
    utf-8 F1BF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F1BF strict    {}              0 {} {Missing second trail byte}
    utf-8 F1807F tcl8      \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F1807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F1807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F1BF7F tcl8      \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F1BF7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F1BF7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F180BF tcl8      \u00F1\u20AC\u00BF   -1 {} {Missing third trail byte}
    utf-8 F180BF replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F180BF strict    {}              0 {} {Missing third trail byte}
    utf-8 F1BF81 tcl8      \u00F1\u00BF\u0081   -1 {} {Missing third trail byte}
    utf-8 F1BF81 replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F1BF81 strict    {}              0 {} {Missing third trail byte}
    utf-8 F1BF807F tcl8      \u00F1\u00BF\u20AC\x7F   -1 {} {Third trail byte must be 80:BF}
    utf-8 F1BF817F replace   \uFFFD\x7F           -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F1BF817F strict    {}              0 {} {Third trail byte must be 80:BF}
    utf-8 F180BFD0 tcl8      \u00F1\u20AC\u00BF\u00D0   -1 {} {Third trail byte must be 80:BF}
    utf-8 F180BFD0 replace   \uFFFD         -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F180BFD0 strict    {}              0 {} {Third trail byte must be 80:BF}
    utf-8 F3 tcl8       \u00F3        -1 {} {Missing trail byte}
    utf-8 F3 replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 F3 strict     {}             0 {} {Missing trail byte}
    utf-8 F37F tcl8     \u00F3\x7F    -1 {} {First trail byte must be 80:BF}
    utf-8 F37F replace  \uFFFD        -1 {knownW3C} {First trail byte must be 80:BF}
    utf-8 F37F strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 F3D0 tcl8     \u00F3\u00D0  -1 {} {First trail byte must be 80:BF}
    utf-8 F3D0 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:BF}
    utf-8 F3D0 strict   {}             0 {} {First trail byte must be 80:BF}
    utf-8 F380 tcl8      \u00F3\u20AC   -1 {} {Missing second trail byte}
    utf-8 F380 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F380 strict    {}              0 {} {Missing second trail byte}
    utf-8 F3BF tcl8      \u00F3\u00BF   -1 {} {Missing second trail byte}
    utf-8 F3BF replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F3BF strict    {}              0 {} {Missing second trail byte}
    utf-8 F3807F tcl8      \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F3807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F3807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F3BF7F tcl8      \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F3BF7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F3BF7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F380BF tcl8      \u00F3\u20AC\u00BF   -1 {} {Missing third trail byte}
    utf-8 F380BF replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F380BF strict    {}              0 {} {Missing third trail byte}
    utf-8 F3BF81 tcl8      \u00F3\u00BF\u0081   -1 {} {Missing third trail byte}
    utf-8 F3BF81 replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F3BF81 strict    {}              0 {} {Missing third trail byte}
    utf-8 F3BF807F tcl8      \u00F3\u00BF\u20AC\x7F   -1 {} {Third trail byte must be 80:BF}
    utf-8 F3BF817F replace   \uFFFD\x7F           -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F3BF817F strict    {}              0 {} {Third trail byte must be 80:BF}
    utf-8 F380BFD0 tcl8      \u00F3\u20AC\u00BF\u00D0   -1 {} {Third trail byte must be 80:BF}
    utf-8 F380BFD0 replace   \uFFFD         -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F380BFD0 strict    {}              0 {} {Third trail byte must be 80:BF}

    utf-8 F4 tcl8       \u00F4        -1 {} {Missing trail byte}
    utf-8 F4 replace    \uFFFD        -1 {} {Missing trail byte}
    utf-8 F4 strict     {}             0 {} {Missing trail byte}
    utf-8 F47F tcl8     \u00F4\u7F    -1 {} {First trail byte must be 80:8F}
    utf-8 F47F replace  \uFFFD\u7F    -1 {knownW3C} {First trail byte must be 80:8F}
    utf-8 F47F strict   {}             0 {} {First trail byte must be 80:8F}
    utf-8 F490 tcl8     \u00F4\u0090  -1 {} {First trail byte must be 80:8F}
    utf-8 F490 replace  \uFFFD\uFFFD  -1 {} {First trail byte must be 80:8F}
    utf-8 F490 strict   {}             0 {} {First trail byte must be 80:8F}
    utf-8 F480 tcl8      \u00F4\u20AC   -1 {} {Missing second trail byte}
    utf-8 F480 replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F480 strict    {}              0 {} {Missing second trail byte}
    utf-8 F48F tcl8      \u00F4\u008F   -1 {} {Missing second trail byte}
    utf-8 F48F replace   \uFFFD         -1 {knownW3C} {Missing second trail byte}
    utf-8 F48F strict    {}              0 {} {Missing second trail byte}
    utf-8 F4807F tcl8      \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F4807F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F4807F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F48F7F tcl8      \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF}
    utf-8 F48F7F replace   \uFFFD\u7F       -1 {knownW3C} {Second trail byte must be 80:BF}
    utf-8 F48F7F strict    {}                0 {}  {Second trail byte must be 80:BF}
    utf-8 F48081 tcl8      \u00F4\u20AC\u0081   -1 {} {Missing third trail byte}
    utf-8 F48081 replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F48081 strict    {}              0 {} {Missing third trail byte}
    utf-8 F48F81 tcl8      \u00F4\u008F\u0081   -1 {} {Missing third trail byte}
    utf-8 F48F81 replace   \uFFFD         -1 {knownW3C} {Missing third trail byte}
    utf-8 F48F81 strict    {}              0 {} {Missing third trail byte}
    utf-8 F481817F tcl8      \u00F4\u0081\u0081\x7F   -1 {} {Third trail byte must be 80:BF}
    utf-8 F480817F replace   \uFFFD\x7F           -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F480817F strict    {}              0 {} {Third trail byte must be 80:BF}
    utf-8 F48FBFD0 tcl8      \u00F4\u008F\u00BF\u00D0   -1 {} {Third trail byte must be 80:BF}
    utf-8 F48FBFD0 replace   \uFFFD         -1 {knownW3C} {Third trail byte must be 80:BF}
    utf-8 F48FBFD0 strict    {}              0 {} {Third trail byte must be 80:BF}

    utf-8 F5 tcl8    \u00F5 -1 {} {F5:FF are invalid everywhere}
    utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere}
    utf-8 F5 strict  {}      0 {} {F5:FF are invalid everywhere}
    utf-8 FF tcl8    \u00FF -1 {} {F5:FF are invalid everywhere}
    utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere}
    utf-8 FF strict  {}      0 {} {F5:FF are invalid everywhere}

    utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8}
    utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9}
    utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10}
    utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30                         -1 {knownW3C} {Unicode Table 3.11}
}

# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated
# based on these depending on platform endianness. Note truncated tests can only
# happen when the sequence is at the end (including by itself) Thus {solo tail}
# in some cases.
lappend encInvalidBytes {*}{
    utf-16le 41      tcl8      \uFFFD -1 {solo tail} {Truncated}
    utf-16le 41      replace   \uFFFD -1 {solo tail} {Truncated}
    utf-16le 41      strict    {}      0 {solo tail} {Truncated}
    utf-16le 00D8    tcl8      \uD800 -1 {} {Missing low surrogate}
    utf-16le 00D8    replace   \uFFFD -1 {} {Missing low surrogate}
    utf-16le 00D8    strict    {}      0 {knownBug} {Missing low surrogate}
    utf-16le 00DC    tcl8      \uDC00 -1 {} {Missing high surrogate}
    utf-16le 00DC    replace   \uFFFD -1 {} {Missing high surrogate}
    utf-16le 00DC    strict    {}      0 {knownBug} {Missing high surrogate}

    utf-16be 41      tcl8      \uFFFD -1 {solo tail} {Truncated}
    utf-16be 41      replace   \uFFFD -1 {solo tail} {Truncated}
    utf-16be 41      strict    {}      0 {solo tail} {Truncated}
    utf-16be D800    tcl8      \uD800 -1 {} {Missing low surrogate}
    utf-16be D800    replace   \uFFFD -1 {knownBug} {Missing low surrogate}
    utf-16be D800    strict    {}      0 {knownBug} {Missing low surrogate}
    utf-16be DC00    tcl8      \uDC00 -1 {} {Missing high surrogate}
    utf-16be DC00    replace   \uFFFD -1 {knownBug} {Missing high surrogate}
    utf-16be DC00    strict    {}      0 {knownBug} {Missing high surrogate}
}

# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated
# based on these depending on platform endianness. Note truncated tests can only
# happen when the sequence is at the end (including by itself) Thus {solo tail}
# in some cases.
lappend encInvalidBytes {*}{
    utf-32le 41      tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32le 41      replace   \uFFFD  -1 {solo} {Truncated}
    utf-32le 41      strict    {}   0 {solo tail} {Truncated}
    utf-32le 4100    tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32le 4100    replace   \uFFFD  -1 {solo} {Truncated}
    utf-32le 4100    strict    {}   0 {solo tail} {Truncated}
    utf-32le 410000  tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32le 410000  replace   \uFFFD  -1 {solo} {Truncated}
    utf-32le 410000  strict    {}       0 {solo tail} {Truncated}
    utf-32le 00D80000 tcl8     \uD800   -1 {} {High-surrogate}
    utf-32le 00D80000 replace  \uFFFD   -1 {} {High-surrogate}
    utf-32le 00D80000 strict   {}        0 {} {High-surrogate}
    utf-32le 00DC0000 tcl8     \uDC00   -1 {} {Low-surrogate}
    utf-32le 00DC0000 replace  \uFFFD   -1 {} {Low-surrogate}
    utf-32le 00DC0000 strict   {}        0 {} {Low-surrogate}
    utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00    -1 {} {High-low-surrogate-pair}
    utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair}
    utf-32le 00D8000000DC0000 strict  {}            0 {} {High-low-surrogate-pair}
    utf-32le 00001100 tcl8 \uFFFD    -1 {} {Out of range}
    utf-32le 00001100 replace \uFFFD -1 {} {Out of range}
    utf-32le 00001100 strict {}       0 {} {Out of range}
    utf-32le FFFFFFFF tcl8 \uFFFD    -1 {} {Out of range}
    utf-32le FFFFFFFF replace \uFFFD -1 {} {Out of range}
    utf-32le FFFFFFFF strict {}       0 {} {Out of range}

    utf-32be 41      tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32be 41      replace   \uFFFD  -1 {solo tail} {Truncated}
    utf-32be 41      strict    {}       0 {solo tail} {Truncated}
    utf-32be 0041    tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32be 0041    replace   \uFFFD  -1 {solo} {Truncated}
    utf-32be 0041    strict    {}   0 {solo tail} {Truncated}
    utf-32be 000041  tcl8      \uFFFD  -1 {solo tail} {Truncated}
    utf-32be 000041  replace   \uFFFD  -1 {solo} {Truncated}
    utf-32be 000041  strict    {}       0 {solo tail} {Truncated}
    utf-32be 0000D800 tcl8     \uD800   -1 {} {High-surrogate}
    utf-32be 0000D800 replace  \uFFFD   -1 {} {High-surrogate}
    utf-32be 0000D800 strict   {}        0 {} {High-surrogate}
    utf-32be 0000DC00 tcl8     \uDC00   -1 {} {Low-surrogate}
    utf-32be 0000DC00 replace  \uFFFD   -1 {} {Low-surrogate}
    utf-32be 0000DC00 strict   {}        0 {} {Low-surrogate}
    utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00    -1 {} {High-low-surrogate-pair}
    utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair}
    utf-32be 0000D8000000DC00 strict  {}            0 {} {High-low-surrogate-pair}
    utf-32be 00110000 tcl8 \uFFFD    -1 {} {Out of range}
    utf-32be 00110000 replace \uFFFD -1 {} {Out of range}
    utf-32be 00110000 strict {}       0 {} {Out of range}
    utf-32be FFFFFFFF tcl8 \uFFFD    -1 {} {Out of range}
    utf-32be FFFFFFFF replace \uFFFD -1 {} {Out of range}
    utf-32be FFFFFFFF strict {}       0 {} {Out of range}
}

# Strings that cannot be encoded for specific encoding / profiles
# <ENCODING STRING PROFILE EXPECTEDRESULT EXPECTEDFAILINDEX CTRL COMMENT>
# <ENCODING,STRING,PROFILE> should be unique for test ids to be unique.
# See earlier comments about CTRL field.
#
# Note utf-16, utf-32 missing because they are automatically
# generated based on le/be versions.
# TODO - out of range code point (note cannot be generated by \U notation)
lappend encUnencodableStrings {*}{
    ascii \u00e0 tcl8    3f -1 {} {unencodable}
    ascii \u00e0 strict  {}  0 {} {unencodable}

    iso8859-1 \u0141 tcl8    3f -1 {} unencodable
    iso8859-1 \u0141 strict  {}  0 {} unencodable

    utf-8 \uD800 tcl8    eda080 -1 {} High-surrogate
    utf-8 \uD800 strict  {}      0 {} High-surrogate
    utf-8 \uDC00 tcl8    edb080 -1 {} High-surrogate
    utf-8 \uDC00 strict  {}      0 {} High-surrogate
}


# The icuUcmTests.tcl is generated by the tools/ucm2tests.tcl script
# and generates test vectors for the above tables for various encodings
# based on ICU UCM files.
# TODO - commented out for now as generating a lot of mismatches.
# source [file join [file dirname [info script]] icuUcmTests.tcl]