summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c2
-rw-r--r--tests/encoding.test42
2 files changed, 40 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index a326856..0d03e2a 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2413,7 +2413,7 @@ UtfToUtfProc(
/* A surrogate character is detected, handle especially */
Tcl_UniChar low = *chPtr;
size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0;
- if (((low | 0x3FF) != 0xDFFF) || !(*chPtr & 0x800)) {
+ if (((low | 0x3FF) != 0xDFFF) || (*chPtr & 0x400)) {
*dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
*dst++ = (char) ((*chPtr | 0x80) & 0xBF);
diff --git a/tests/encoding.test b/tests/encoding.test
index 9c77f3e..1eb6ec5 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -321,11 +321,11 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
binary scan [teststringbytes $y] H* z
set z
} c080
-test encoding-15.4 {UtfToUtfProc emoji character input} {
+test encoding-15.4 {UtfToUtfProc emoji character input} -constraints knownBug -body {
set x \xED\xA0\xBD\xED\xB8\x82
set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
list [string length $x] $y
-} "6 \U1F602"
+} -result "6 \U1F602"
test encoding-15.5 {UtfToUtfProc emoji character input} {
set x \xF0\x9F\x98\x82
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
@@ -355,7 +355,43 @@ test encoding-15.9 {UtfToUtfProc emoji character output} {
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
-test encoding-15.10 {UtfToUtfProc emoji character output} {
+test encoding-15.10 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02\xE9
+ set y [encoding convertto utf-8 \uDE02\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 5 edb882c3a9}
+test encoding-15.11 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02\xE9
+ set y [encoding convertto utf-8 \uDA02\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 5 eda882c3a9}
+test encoding-15.12 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02Y
+ set y [encoding convertto utf-8 \uDE02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 edb88259}
+test encoding-15.13 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02Y
+ set y [encoding convertto utf-8 \uDA02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 eda88259}
+test encoding-15.14 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02
+ set y [encoding convertto utf-8 \uDE02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 edb882}
+test encoding-15.15 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02
+ set y [encoding convertto utf-8 \uDA02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 eda882}
+test encoding-15.16 {UtfToUtfProc emoji character output} {
set x \U1F602
set y [encoding convertto utf-8 \U1F602]
binary scan $y H* z