diff options
-rw-r--r-- | generic/tclEncoding.c | 2 | ||||
-rw-r--r-- | tests/encoding.test | 42 |
2 files changed, 40 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a326856..0d03e2a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2413,7 +2413,7 @@ UtfToUtfProc( /* A surrogate character is detected, handle especially */ Tcl_UniChar low = *chPtr; size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; - if (((low | 0x3FF) != 0xDFFF) || !(*chPtr & 0x800)) { + if (((low | 0x3FF) != 0xDFFF) || (*chPtr & 0x400)) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); *dst++ = (char) ((*chPtr | 0x80) & 0xBF); diff --git a/tests/encoding.test b/tests/encoding.test index 9c77f3e..1eb6ec5 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -321,11 +321,11 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { binary scan [teststringbytes $y] H* z set z } c080 -test encoding-15.4 {UtfToUtfProc emoji character input} { +test encoding-15.4 {UtfToUtfProc emoji character input} -constraints knownBug -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y -} "6 \U1F602" +} -result "6 \U1F602" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] @@ -355,7 +355,43 @@ test encoding-15.9 {UtfToUtfProc emoji character output} { binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} -test encoding-15.10 {UtfToUtfProc emoji character output} { +test encoding-15.10 {UtfToUtfProc high surrogate character output} { + set x \uDE02\xE9 + set y [encoding convertto utf-8 \uDE02\xE9] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {2 5 edb882c3a9} +test encoding-15.11 {UtfToUtfProc low surrogate character output} { + set x \uDA02\xE9 + set y [encoding convertto utf-8 \uDA02\xE9] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {2 5 eda882c3a9} +test encoding-15.12 {UtfToUtfProc high surrogate character output} { + set x \uDE02Y + set y [encoding convertto utf-8 \uDE02Y] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {2 4 edb88259} +test encoding-15.13 {UtfToUtfProc low surrogate character output} { + set x \uDA02Y + set y [encoding convertto utf-8 \uDA02Y] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {2 4 eda88259} +test encoding-15.14 {UtfToUtfProc high surrogate character output} { + set x \uDE02 + set y [encoding convertto utf-8 \uDE02] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {1 3 edb882} +test encoding-15.15 {UtfToUtfProc low surrogate character output} { + set x \uDA02 + set y [encoding convertto utf-8 \uDA02] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {1 3 eda882} +test encoding-15.16 {UtfToUtfProc emoji character output} { set x \U1F602 set y [encoding convertto utf-8 \U1F602] binary scan $y H* z |