summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 07:28:27 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 07:28:27 (GMT)
commitf0284298add52f9e6804e1d3e53c4e67fcfbc1c3 (patch)
treefd61e54f92d80b8da24202bfaa10aea0fd4776b8
parent03a9d6928d201038d4fa4dbccc9bc724c0dcd713 (diff)
parent23d9ca0ec4772f703cd24c476d5fa485fd91e828 (diff)
downloadtcl-f0284298add52f9e6804e1d3e53c4e67fcfbc1c3.zip
tcl-f0284298add52f9e6804e1d3e53c4e67fcfbc1c3.tar.gz
tcl-f0284298add52f9e6804e1d3e53c4e67fcfbc1c3.tar.bz2
Fix [5607d6482c]: strict ucs-2 never implemented (TIP #346/#656)
-rw-r--r--generic/tclEncoding.c36
-rw-r--r--tests/encoding.test8
2 files changed, 31 insertions, 13 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 1d3a3eb..d2b0efc 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -573,13 +573,13 @@ TclInitEncodingSubsystem(void)
type.freeProc = NULL;
type.nullSize = 2;
type.encodingName = "ucs-2le";
- type.clientData = INT2PTR(TCL_ENCODING_LE|TCL_ENCODING_NOCOMPLAIN);
+ type.clientData = INT2PTR(TCL_ENCODING_LE);
Tcl_CreateEncoding(&type);
type.encodingName = "ucs-2be";
- type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN);
+ type.clientData = INT2PTR(0);
Tcl_CreateEncoding(&type);
type.encodingName = "ucs-2";
- type.clientData = INT2PTR(isLe.c|TCL_ENCODING_NOCOMPLAIN);
+ type.clientData = INT2PTR(isLe.c);
Tcl_CreateEncoding(&type);
type.toUtfProc = Utf32ToUtfProc;
@@ -601,13 +601,13 @@ TclInitEncodingSubsystem(void)
type.freeProc = NULL;
type.nullSize = 2;
type.encodingName = "utf-16le";
- type.clientData = INT2PTR(TCL_ENCODING_LE);
+ type.clientData = INT2PTR(TCL_ENCODING_LE|ENCODING_UTF);
Tcl_CreateEncoding(&type);
type.encodingName = "utf-16be";
- type.clientData = INT2PTR(0);
+ type.clientData = INT2PTR(ENCODING_UTF);
Tcl_CreateEncoding(&type);
type.encodingName = "utf-16";
- type.clientData = INT2PTR(isLe.c);
+ type.clientData = INT2PTR(isLe.c|ENCODING_UTF);
Tcl_CreateEncoding(&type);
#ifndef TCL_NO_DEPRECATED
@@ -2984,10 +2984,7 @@ UtfToUcs2Proc(
* output buffer. */
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
- int result, numChars;
-#if TCL_UTF_MAX < 4
- int len;
-#endif
+ int result, numChars, len;
Tcl_UniChar ch = 0;
flags |= PTR2INT(clientData);
@@ -3017,17 +3014,32 @@ UtfToUcs2Proc(
break;
}
#if TCL_UTF_MAX < 4
- src += (len = TclUtfToUniChar(src, &ch));
+ len = TclUtfToUniChar(src, &ch);
if ((ch >= 0xD800) && (len < 3)) {
+ if (STOPONERROR) {
+ result = TCL_CONVERT_UNKNOWN;
+ break;
+ }
+ src += len;
src += TclUtfToUniChar(src, &ch);
ch = 0xFFFD;
}
#else
- src += TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
if (ch > 0xFFFF) {
+ if (STOPONERROR) {
+ result = TCL_CONVERT_UNKNOWN;
+ break;
+ }
ch = 0xFFFD;
}
#endif
+ if (STOPONERROR && ((ch & ~0x7FF) == 0xD800)) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+
+ src += len;
/*
* Need to handle this in a way that won't cause misalignment by
diff --git a/tests/encoding.test b/tests/encoding.test
index 03f0273..83e75be 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -561,7 +561,7 @@ test encoding-16.9 {
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
} -result "\xD8\xD8\xDC\xDC"
-test encoding-17.2 {UtfToUcs2Proc} -body {
+test encoding-17.2 {UtfToUcs2Proc, invalid testcase, see [5607d6482c]} -constraints deprecated -body {
encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
} -result "\uFFFD"
test encoding-17.3 {UtfToUtf16Proc} -body {
@@ -853,6 +853,12 @@ test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body
test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
encoding convertfrom -nocomplain utf-8 \x80
} -result \u20AC
+test encoding-24.44 {Try to generate invalid ucs-2 with -strict} -body {
+ encoding convertto -strict ucs-2 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.45 {Try to generate invalid ucs-2 with -strict} -body {
+ encoding convertto -strict ucs-2 \U10000
+} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'}
file delete [file join [temporaryDirectory] iso2022.txt]