Merge 8.7. This also fixes [133456085a]

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2022-11-30 11:11:28 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2022-11-30 11:11:28 (GMT)
commit: 4d235898611fe66668632c48acbe44e9c86de509 (patch)
tree: 645d763018361e93343272d49054dac1e06c1ad4
parent: 5cb9dc5ddc24459e55fab2188d60c867755e6f40 (diff)
parent: a7d6b92abf2f95c2c8478c3bd79ee9da76c6d717 (diff)
download: tcl-4d235898611fe66668632c48acbe44e9c86de509.zip
tcl-4d235898611fe66668632c48acbe44e9c86de509.tar.gz
tcl-4d235898611fe66668632c48acbe44e9c86de509.tar.bz2
6 files changed, 79 insertions, 42 deletions
diff --git a/generic/tclDictObj.c b/generic/tclDictObj.c
index a7e6bbf..04a909f 100644
--- a/generic/tclDictObj.c
+++ b/generic/tclDictObj.c
@@ -136,6 +136,20 @@ typedef struct Dict {
 				 * dictionaries. */
 } Dict;
 
+/*
+ * The structure below defines the dictionary object type by means of
+ * functions that can be invoked by generic object code.
+ */
+
+const Tcl_ObjType tclDictType = {
+    "dict",
+    FreeDictInternalRep,		/* freeIntRepProc */
+    DupDictInternalRep,			/* dupIntRepProc */
+    UpdateStringOfDict,			/* updateStringProc */
+    SetDictFromAny,			/* setFromAnyProc */
+    TCL_OBJTYPE_V0
+};
+
 #define DictSetInternalRep(objPtr, dictRepPtr)				\
     do {                                                                \
         Tcl_ObjInternalRep ir;                                               \
@@ -152,20 +166,6 @@ typedef struct Dict {
     } while (0)
 
 /*
- * The structure below defines the dictionary object type by means of
- * functions that can be invoked by generic object code.
- */
-
-const Tcl_ObjType tclDictType = {
-    "dict",
-    FreeDictInternalRep,		/* freeIntRepProc */
-    DupDictInternalRep,			/* dupIntRepProc */
-    UpdateStringOfDict,			/* updateStringProc */
-    SetDictFromAny,			/* setFromAnyProc */
-    TCL_OBJTYPE_V0
-};
-
-/*
  * The type of the specially adapted version of the Tcl_Obj*-containing hash
  * table defined in the tclObj.c code. This version differs in that it
  * allocates a bit more space in each hash entry in order to hold the pointers
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 3013916..5a516f3 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -568,7 +568,7 @@ TclInitEncodingSubsystem(void)
     type.nullSize	= 1;
     type.clientData	= INT2PTR(TCL_ENCODING_UTF);
     Tcl_CreateEncoding(&type);
-    type.clientData	= INT2PTR(0);
+    type.clientData	= INT2PTR(TCL_ENCODING_NOCOMPLAIN);
     type.encodingName	= "cesu-8";
     Tcl_CreateEncoding(&type);
 
@@ -577,13 +577,13 @@ TclInitEncodingSubsystem(void)
     type.freeProc	= NULL;
     type.nullSize	= 2;
     type.encodingName   = "ucs-2le";
-    type.clientData	= INT2PTR(TCL_ENCODING_LE);
+    type.clientData	= INT2PTR(TCL_ENCODING_LE|TCL_ENCODING_NOCOMPLAIN);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "ucs-2be";
-    type.clientData	= INT2PTR(0);
+    type.clientData	= INT2PTR(TCL_ENCODING_NOCOMPLAIN);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "ucs-2";
-    type.clientData	= INT2PTR(isLe.c);
+    type.clientData	= INT2PTR(isLe.c|TCL_ENCODING_NOCOMPLAIN);
     Tcl_CreateEncoding(&type);
 
     type.toUtfProc	= Utf32ToUtfProc;
@@ -2408,15 +2408,16 @@ UtfToUtfProc(
 		dst += Tcl_UniCharToUtf(ch, dst);
 		ch = low;
 #endif
-	    } else if (!Tcl_UniCharIsUnicode(ch)) {
-		if (STOPONERROR) {
-		    result = TCL_CONVERT_UNKNOWN;
-		    src = saveSrc;
-		    break;
-		}
-		if (!(flags & TCL_ENCODING_MODIFIED)) {
-		    ch = 0xFFFD;
-		}
+	    } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)
+		    && (((ch  & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
+		result = TCL_CONVERT_UNKNOWN;
+		src = saveSrc;
+		break;
+	    } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
+		    && (flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)) {
+		result = TCL_CONVERT_SYNTAX;
+		src = saveSrc;
+		break;
 	    }
 	    dst += Tcl_UniCharToUtf(ch, dst);
 	}
diff --git a/tests/chan.test b/tests/chan.test
index 946e424..cb44f06 100644
--- a/tests/chan.test
+++ b/tests/chan.test
@@ -55,7 +55,7 @@ test chan-4.2 {chan command: [Bug 800753]} -body {
 test chan-4.3 {chan command: [Bug 800753]} -body {
     chan configure stdout -eofchar \x00
 } -returnCodes error -result {bad value for -eofchar: must be non-NUL ASCII character}
-test chan-4.4 {chan command: check valid inValue, no outValue} -constraints deprecated -body {
+test chan-4.4 {chan command: check valid inValue, no outValue} -body {
     chan configure stdout -eofchar [list \x27 {}]
 } -result {}
 test chan-4.5 {chan command: check valid inValue, invalid outValue} -body {
diff --git a/tests/chanio.test b/tests/chanio.test
index 1d0b225..6cd3955 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -5285,7 +5285,7 @@ test chan-io-39.21 {Tcl_SetChannelOption, setting read mode independently} \
 test chan-io-39.22 {Tcl_SetChannelOption, invariance} -setup {
     file delete $path(test1)
     set l ""
-} -constraints {unix deprecated} -body {
+} -constraints unix -body {
     set f1 [open $path(test1) w+]
     lappend l [chan configure $f1 -eofchar]
     chan configure $f1 -eofchar {O {}}
@@ -5298,7 +5298,7 @@ test chan-io-39.22 {Tcl_SetChannelOption, invariance} -setup {
 test chan-io-39.22a {Tcl_SetChannelOption, invariance} -setup {
     file delete $path(test1)
     set l [list]
-} -constraints deprecated -body {
+} -body {
     set f1 [open $path(test1) w+]
     chan configure $f1 -eofchar {O {}}
     lappend l [chan configure $f1 -eofchar]
diff --git a/tests/encoding.test b/tests/encoding.test
index 63270fd..a87cd24 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -352,61 +352,61 @@ test encoding-15.6 {UtfToUtfProc emoji character output} utf32 {
     set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
     binary scan $y H* z
     list [string length $y] $z
-} {12 efbfbdefbfbdefbfbdefbfbd}
+} {12 edb882eda0bdedb882eda0bd}
 test encoding-15.7 {UtfToUtfProc emoji character output} utf32 {
     set x \uDE02\uD83D\uD83D
     set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 9 efbfbdefbfbdefbfbd}
+} {3 9 edb882eda0bdeda0bd}
 test encoding-15.8 {UtfToUtfProc emoji character output} utf32 {
     set x \uDE02\uD83Dé
     set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 8 efbfbdefbfbdc3a9}
+} {3 8 edb882eda0bdc3a9}
 test encoding-15.9 {UtfToUtfProc emoji character output} utf32 {
     set x \uDE02\uD83DX
     set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {3 7 efbfbdefbfbd58}
+} {3 7 edb882eda0bd58}
 test encoding-15.10 {UtfToUtfProc high surrogate character output} utf32 {
     set x \uDE02é
     set y [encoding convertto -nocomplain utf-8 \uDE02é]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 5 efbfbdc3a9}
+} {2 5 edb882c3a9}
 test encoding-15.11 {UtfToUtfProc low surrogate character output} utf32 {
     set x \uDA02é
     set y [encoding convertto -nocomplain utf-8 \uDA02é]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 5 efbfbdc3a9}
+} {2 5 eda882c3a9}
 test encoding-15.12 {UtfToUtfProc high surrogate character output} utf32 {
     set x \uDE02Y
     set y [encoding convertto -nocomplain utf-8 \uDE02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 4 efbfbd59}
+} {2 4 edb88259}
 test encoding-15.13 {UtfToUtfProc low surrogate character output} utf32 {
     set x \uDA02Y
     set y [encoding convertto -nocomplain utf-8 \uDA02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {2 4 efbfbd59}
+} {2 4 eda88259}
 test encoding-15.14 {UtfToUtfProc high surrogate character output} utf32 {
     set x \uDE02
     set y [encoding convertto -nocomplain utf-8 \uDE02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {1 3 efbfbd}
+} {1 3 edb882}
 test encoding-15.15 {UtfToUtfProc low surrogate character output} utf32 {
     set x \uDA02
     set y [encoding convertto -nocomplain utf-8 \uDA02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
-} {1 3 efbfbd}
+} {1 3 eda882}
 test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
     set x \xF0\xA0\xA1\xC2
     set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
@@ -689,6 +689,42 @@ test encoding-24.27 {Parse invalid utf-8 with -strict} -body {
 test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
     encoding convertfrom -strict utf-8 "\xFF\x00\x00"
 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'}
+test encoding-24.29 {Parse invalid utf-8} -body {
+    encoding convertfrom utf-8 \xEF\xBF\xBF
+} -result \uFFFF
+test encoding-24.30 {Parse invalid utf-8 with -strict} -body {
+    encoding convertfrom -strict utf-8 \xEF\xBF\xBF
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xEF'}
+test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body {
+    encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF
+} -result \uFFFF
+test encoding-24.32 {Try to generate invalid utf-8} -body {
+    encoding convertto utf-8 \uFFFF
+} -result \xEF\xBF\xBF
+test encoding-24.33 {Try to generate invalid utf-8 with -strict} -body {
+    encoding convertto -strict utf-8 \uFFFF
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00FFFF'}
+test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body {
+    encoding convertto -nocomplain utf-8 \uFFFF
+} -result \xEF\xBF\xBF
+test encoding-24.35 {Parse invalid utf-8} -body {
+    encoding convertfrom utf-8 \xED\xA0\x80
+} -result \uD800
+test encoding-24.36 {Parse invalid utf-8 with -strict} -body {
+    encoding convertfrom -strict utf-8 \xED\xA0\x80
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
+test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body {
+    encoding convertfrom -nocomplain utf-8 \xED\xA0\x80
+} -result \uD800
+test encoding-24.38 {Try to generate invalid utf-8} -body {
+    encoding convertto utf-8 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body {
+    encoding convertto -strict utf-8 \uD800
+} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
+test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body {
+    encoding convertto -nocomplain utf-8 \uD800
+} -result \xED\xA0\x80
 
 file delete [file join [temporaryDirectory] iso2022.txt]
 
diff --git a/tests/io.test b/tests/io.test
index 9dd37f3..52ec200 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -5756,7 +5756,7 @@ test io-39.21 {Tcl_SetChannelOption, setting read mode independently} \
     close $s2
     set modes
 } {auto crlf}
-test io-39.22 {Tcl_SetChannelOption, invariance} {unix deprecated} {
+test io-39.22 {Tcl_SetChannelOption, invariance} unix {
     file delete $path(test1)
     set f1 [open $path(test1) w+]
     set l ""
@@ -5768,7 +5768,7 @@ test io-39.22 {Tcl_SetChannelOption, invariance} {unix deprecated} {
     close $f1
     set l
 } {{} O D}
-test io-39.22a {Tcl_SetChannelOption, invariance} deprecated {
+test io-39.22a {Tcl_SetChannelOption, invariance} {
     file delete $path(test1)
     set f1 [open $path(test1) w+]
     set l [list]
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2022-11-30 11:11:28 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2022-11-30 11:11:28 (GMT)
commit	4d235898611fe66668632c48acbe44e9c86de509 (patch)
tree	645d763018361e93343272d49054dac1e06c1ad4
parent	5cb9dc5ddc24459e55fab2188d60c867755e6f40 (diff)
parent	a7d6b92abf2f95c2c8478c3bd79ee9da76c6d717 (diff)
download	tcl-4d235898611fe66668632c48acbe44e9c86de509.zip tcl-4d235898611fe66668632c48acbe44e9c86de509.tar.gz tcl-4d235898611fe66668632c48acbe44e9c86de509.tar.bz2