summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/string.n4
-rw-r--r--generic/tclEncoding.c40
-rw-r--r--library/init.tcl4
-rw-r--r--tests/encoding.test100
4 files changed, 32 insertions, 116 deletions
diff --git a/doc/string.n b/doc/string.n
index f1a0592..f3d7616 100644
--- a/doc/string.n
+++ b/doc/string.n
@@ -415,11 +415,11 @@ etc.)
.PP
\fICompatibility note:\fR This subcommand is deprecated and will
be removed in Tcl 9.0. It is better to use the \fBencoding convertto\fR
-command to convert a string to a known encoding (e.g. "wtf-8" or "tcl-8")
+command to convert a string to a known encoding (e.g. "utf-8" or "cesu-8")
and then apply \fBstring length\fR to that.
.PP
.CS
-\fBstring length\fR [encoding convertto wtf-8 $theString]
+\fBstring length\fR [encoding convertto utf-8 $theString]
.CE
.RE
.TP
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 29aeefd..21c254e 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -511,11 +511,10 @@ FillEncodingFileMap(void)
*/
/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
-/* Since TCL_ENCODING_MODIFIED is only used for utf-8/wtf-8/cesu-8 and
- * TCL_ENCODING_LE is only used for utf-16/wtf-16/ucs-2. re-use the same value */
+/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
+ * TCL_ENCODING_LE is only used for utf-16/ucs-2. re-use the same value */
#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */
#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */
-#define TCL_ENCODING_WTF 0x100 /* For WTF-8 encoding, don't check for surrogates/noncharacters */
#define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */
void
@@ -560,15 +559,9 @@ TclInitEncodingSubsystem(void)
type.nullSize = 1;
type.clientData = INT2PTR(TCL_ENCODING_UTF);
Tcl_CreateEncoding(&type);
- type.clientData = INT2PTR(TCL_ENCODING_UTF|TCL_ENCODING_WTF);
- type.encodingName = "wtf-8";
- Tcl_CreateEncoding(&type);
type.clientData = INT2PTR(0);
type.encodingName = "cesu-8";
Tcl_CreateEncoding(&type);
- type.clientData = INT2PTR(TCL_ENCODING_UTF|TCL_ENCODING_WTF|TCL_ENCODING_MODIFIED);
- type.encodingName = "tcl-8";
- Tcl_CreateEncoding(&type);
type.toUtfProc = Utf16ToUtfProc;
type.fromUtfProc = UtfToUcs2Proc;
@@ -591,21 +584,12 @@ TclInitEncodingSubsystem(void)
type.encodingName = "utf-16le";
type.clientData = INT2PTR(TCL_ENCODING_LE);
Tcl_CreateEncoding(&type);
- type.encodingName = "wtf-16le";
- type.clientData = INT2PTR(TCL_ENCODING_LE + TCL_ENCODING_WTF);
- Tcl_CreateEncoding(&type);
type.encodingName = "utf-16be";
type.clientData = INT2PTR(0);
Tcl_CreateEncoding(&type);
- type.encodingName = "wtf-16be";
- type.clientData = INT2PTR(TCL_ENCODING_WTF);
- Tcl_CreateEncoding(&type);
type.encodingName = "utf-16";
type.clientData = INT2PTR(isLe.c);
Tcl_CreateEncoding(&type);
- type.encodingName = "wtf-16";
- type.clientData = INT2PTR(isLe.c + TCL_ENCODING_WTF);
- Tcl_CreateEncoding(&type);
#ifndef TCL_NO_DEPRECATED
type.encodingName = "unicode";
@@ -2315,15 +2299,13 @@ UtfToUtfProc(
len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
- if (!(flags & TCL_ENCODING_WTF)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
- }
- if (!(flags & TCL_ENCODING_MODIFIED)) {
- ch = 0xFFFD;
- }
+ if (flags & TCL_ENCODING_STOPONERROR) {
+ result = TCL_CONVERT_UNKNOWN;
+ src = saveSrc;
+ break;
+ }
+ if (!(flags & TCL_ENCODING_MODIFIED)) {
+ ch = 0xFFFD;
}
cesu8:
*dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
@@ -2334,7 +2316,7 @@ UtfToUtfProc(
src += len;
dst += Tcl_UniCharToUtf(ch, dst);
ch = low;
- } else if (!(flags & TCL_ENCODING_WTF) && !Tcl_UniCharIsUnicode(ch)) {
+ } else if (!Tcl_UniCharIsUnicode(ch)) {
if (flags & TCL_ENCODING_STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
@@ -2530,7 +2512,7 @@ UtfToUtf16Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!(flags & TCL_ENCODING_WTF) && !Tcl_UniCharIsUnicode(ch)) {
+ if (!Tcl_UniCharIsUnicode(ch)) {
if (flags & TCL_ENCODING_STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
diff --git a/library/init.tcl b/library/init.tcl
index 749eed9..e30296e 100644
--- a/library/init.tcl
+++ b/library/init.tcl
@@ -214,9 +214,9 @@ proc unknown args {
set errInfo [dict get $opts -errorinfo]
set errCode [dict get $opts -errorcode]
set cinfo $args
- if {[string length [encoding convertto wtf-8 $cinfo]] > 150} {
+ if {[string length [encoding convertto utf-8 $cinfo]] > 150} {
set cinfo [string range $cinfo 0 150]
- while {[string length [encoding convertto wtf-8 $cinfo]] > 150} {
+ while {[string length [encoding convertto utf-8 $cinfo]] > 150} {
set cinfo [string range $cinfo 0 end-1]
}
append cinfo ...
diff --git a/tests/encoding.test b/tests/encoding.test
index 9924886..82a2d6b 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -338,138 +338,78 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
list [string length $x] $y
} "4 😂"
-test encoding-15.6 {UtfToUtfProc emoji character output} {
- set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto wtf-8 \uDE02\uD83D\uDE02\uD83D]
- binary scan $y H* z
- list [string length $y] $z
-} {10 edb882f09f9882eda0bd}
-test encoding-15.7 {UtfToUtfProc emoji character output} {
- set x \uDE02\uD83D\uD83D
- set y [encoding convertto wtf-8 \uDE02\uD83D\uD83D]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {3 9 edb882eda0bdeda0bd}
-test encoding-15.8 {UtfToUtfProc emoji character output} {
- set x \uDE02\uD83Dé
- set y [encoding convertto wtf-8 \uDE02\uD83Dé]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {3 8 edb882eda0bdc3a9}
-test encoding-15.9 {UtfToUtfProc emoji character output} {
- set x \uDE02\uD83DX
- set y [encoding convertto wtf-8 \uDE02\uD83DX]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {3 7 edb882eda0bd58}
-test encoding-15.10 {UtfToUtfProc high surrogate character output} {
- set x \uDE02é
- set y [encoding convertto wtf-8 \uDE02é]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {2 5 edb882c3a9}
-test encoding-15.11 {UtfToUtfProc low surrogate character output} {
- set x \uDA02é
- set y [encoding convertto wtf-8 \uDA02é]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {2 5 eda882c3a9}
-test encoding-15.12 {UtfToUtfProc high surrogate character output} {
- set x \uDE02Y
- set y [encoding convertto wtf-8 \uDE02Y]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {2 4 edb88259}
-test encoding-15.13 {UtfToUtfProc low surrogate character output} {
- set x \uDA02Y
- set y [encoding convertto wtf-8 \uDA02Y]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {2 4 eda88259}
-test encoding-15.14 {UtfToUtfProc high surrogate character output} {
- set x \uDE02
- set y [encoding convertto wtf-8 \uDE02]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {1 3 edb882}
-test encoding-15.15 {UtfToUtfProc low surrogate character output} {
- set x \uDA02
- set y [encoding convertto wtf-8 \uDA02]
- binary scan $y H* z
- list [string length $x] [string length $y] $z
-} {1 3 eda882}
-test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
+test encoding-15.6 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
list [string length $x] $y
} "4 \xF0\xA0\xA1\xC2"
-test encoding-15.17 {UtfToUtfProc emoji character output} {
+test encoding-15.7 {UtfToUtfProc emoji character output} {
set x 😂
set y [encoding convertto utf-8 😂]
binary scan $y H* z
list [string length $y] $z
} {4 f09f9882}
-test encoding-15.18 {UtfToUtfProc emoji character output} {
+test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uDE02\uD83D
set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {10 efbfbdf09f9882efbfbd}
-test encoding-15.19 {UtfToUtfProc emoji character output} {
+test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 efbfbdefbfbdefbfbd}
-test encoding-15.20 {UtfToUtfProc emoji character output} {
+test encoding-15.10 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\xE9
set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 efbfbdefbfbdc3a9}
-test encoding-15.21 {UtfToUtfProc emoji character output} {
+test encoding-15.11 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
set y [encoding convertto utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 efbfbdefbfbd58}
-test encoding-15.22 {UtfToUtfProc high surrogate character output} {
+test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02\xE9
set y [encoding convertto utf-8 \uDE02\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 efbfbdc3a9}
-test encoding-15.23 {UtfToUtfProc low surrogate character output} {
+test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02\xE9
set y [encoding convertto utf-8 \uDA02\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 efbfbdc3a9}
-test encoding-15.24 {UtfToUtfProc high surrogate character output} {
+test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
set y [encoding convertto utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 efbfbd59}
-test encoding-15.25 {UtfToUtfProc low surrogate character output} {
+test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
set y [encoding convertto utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 efbfbd59}
-test encoding-15.26 {UtfToUtfProc high surrogate character output} {
+test encoding-15.16 {UtfToUtfProc high surrogate character output} {
set x \uDE02
set y [encoding convertto utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 efbfbd}
-test encoding-15.27 {UtfToUtfProc low surrogate character output} {
+test encoding-15.17 {UtfToUtfProc low surrogate character output} {
set x \uDA02
set y [encoding convertto utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 efbfbd}
-test encoding-15.28 {UtfToUtfProc CESU-8 6-byte sequence} {
+test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
set y [encoding convertto cesu-8 \U10000]
binary scan $y H* z
list [string length $y] $z
@@ -499,19 +439,13 @@ test encoding-16.4 {Ucs2ToUtfProc} -body {
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"
-test encoding-17.2 {UtfToUtf16Proc} -body {
- encoding convertto wtf-16 "\uDCDC"
-} -result "\xDC\xDC"
-test encoding-17.3 {UtfToUtf16Proc} -body {
- encoding convertto wtf-16 "\uD8D8"
-} -result "\xD8\xD8"
-test encoding-17.4 {UtfToUcs2Proc} -body {
+test encoding-17.2 {UtfToUcs2Proc} -body {
encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
} -result "\uFFFD"
-test encoding-17.5 {UtfToUtf16Proc} -body {
+test encoding-17.3 {UtfToUtf16Proc} -body {
encoding convertto utf-16be "\uDCDC"
} -result "\xFF\xFD"
-test encoding-17.6 {UtfToUtf16Proc} -body {
+test encoding-17.4 {UtfToUtf16Proc} -body {
encoding convertto utf-16le "\uD8D8"
} -result "\xFD\xFF"
@@ -813,7 +747,7 @@ test encoding-28.0 {all encodings load} -body {
llength $name
}
return $count
-} -result [expr {[info exists ::tcl_precision] ? 92 : 91}]
+} -result [expr {[info exists ::tcl_precision] ? 87 : 86}]
runtests