summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-01-14 22:34:06 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-01-14 22:34:06 (GMT)
commit9bcfb6703d6eaa02bfcaad401aa83ce48309d4a1 (patch)
treee26d0592d28d86592b9b44ebc84073821f2b6099
parentc513699e2c1d661da77c76813d7bdac494bfae91 (diff)
downloadtcl-9bcfb6703d6eaa02bfcaad401aa83ce48309d4a1.zip
tcl-9bcfb6703d6eaa02bfcaad401aa83ce48309d4a1.tar.gz
tcl-9bcfb6703d6eaa02bfcaad401aa83ce48309d4a1.tar.bz2
Proposed fix for [6474fbd934]: Tcl 8.7a5: why utf-8 is different?
-rw-r--r--generic/tclEncoding.c3
-rw-r--r--tests/encoding.test20
2 files changed, 10 insertions, 13 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 57c6148..037beed 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2323,9 +2323,6 @@ UtfToUtfProc(
src = saveSrc;
break;
}
- if (!(flags & TCL_ENCODING_MODIFIED)) {
- ch = 0xFFFD;
- }
cesu8:
*dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
diff --git a/tests/encoding.test b/tests/encoding.test
index c6f4e02..75e0dcc 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -349,61 +349,61 @@ test encoding-15.6 {UtfToUtfProc emoji character output} {
set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
-} {10 efbfbdf09f9882efbfbd}
+} {10 edb882f09f9882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 9 efbfbdefbfbdefbfbd}
+} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83Dé
set y [encoding convertto utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 8 efbfbdefbfbdc3a9}
+} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
set y [encoding convertto utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {3 7 efbfbdefbfbd58}
+} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
set x \uDE02é
set y [encoding convertto utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 5 efbfbdc3a9}
+} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
set x \uDA02é
set y [encoding convertto utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 5 efbfbdc3a9}
+} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
set y [encoding convertto utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 4 efbfbd59}
+} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
set y [encoding convertto utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 4 efbfbd59}
+} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
set y [encoding convertto utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {1 3 efbfbd}
+} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
set y [encoding convertto utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {1 3 efbfbd}
+} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]