summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpooryorick <com.digitalsmarties@pooryorick.com>2023-01-31 22:15:02 (GMT)
committerpooryorick <com.digitalsmarties@pooryorick.com>2023-01-31 22:15:02 (GMT)
commit078a694834be8669ba6f79def0adbb61afacc0e2 (patch)
tree3d164e576172d4bc10be837a3d5225a3782bf9fa
parent8c0f76a7de2b22a611f26c3a08a434b5b85ce261 (diff)
downloadtcl-078a694834be8669ba6f79def0adbb61afacc0e2.zip
tcl-078a694834be8669ba6f79def0adbb61afacc0e2.tar.gz
tcl-078a694834be8669ba6f79def0adbb61afacc0e2.tar.bz2
Fix error introduced in [3e5e37f83b058f3d] for Tcl_UniCharToUtf, and add test.
-rw-r--r--generic/tclUtf.c2
-rw-r--r--tests/encoding.test24
2 files changed, 25 insertions, 1 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index ab27f1b..bef32f0 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -256,7 +256,7 @@ Tcl_UniCharToUtf(
ch += 0x40;
/* Fill buffer with specific 3-byte (invalid) byte combination,
so following low surrogate can recognize it and combine */
- buf[2] = (char) ( 0x03 & ch);
+ buf[2] = (char) ((ch << 4) & 0x30);
buf[1] = (char) (0x80 | (0x3F & (ch >> 2)));
buf[0] = (char) (0xF0 | (0x07 & (ch >> 8)));
return 1;
diff --git a/tests/encoding.test b/tests/encoding.test
index 1971360..8351c91 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -487,6 +487,30 @@ test encoding-16.8 {Utf32ToUtfProc} -body {
list $val [format %x [scan $val %c]]
} -result "\uFFFD fffd"
+test encoding-16.8 {
+ Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
+} -body {
+ apply [list {} {
+ for {set i 0xD800} {$i < 0xDBFF} {incr i} {
+ for {set j 0xDC00} {$j < 0xDFFF} {incr j} {
+ set string [binary format S2 [list $i $j]]
+ set status [catch {
+ set decoded [encoding convertfrom utf-16be $string]
+ set encoded [encoding convertto utf-16be $decoded]
+ }]
+ if {$status || ( $encoded ne $string )} {
+ return [list [format %x $i] [format %x $j]]
+ }
+ }
+ }
+ return done
+ } [namespace current]]
+} -result done
+
+
+
+
+
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"