summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-18 17:34:25 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-18 17:34:25 (GMT)
commitcd3942fc69fba10b956f315c7316b07e62aed863 (patch)
treef23de35c6234cb8358fd58d7c67964572782da98
parent17937027a396cfdcf95f090f79a89faa47cb03e5 (diff)
parentc731ca1ffdd3e7cc90cf064ac89b2f71551958ce (diff)
downloadtcl-cd3942fc69fba10b956f315c7316b07e62aed863.zip
tcl-cd3942fc69fba10b956f315c7316b07e62aed863.tar.gz
tcl-cd3942fc69fba10b956f315c7316b07e62aed863.tar.bz2
Merge 8.7 to pick up UTF-8 encoding fix
-rw-r--r--generic/tclEncoding.c5
-rw-r--r--tests/cmdAH.test6
-rw-r--r--tests/encoding.test8
3 files changed, 14 insertions, 5 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 4d5743c..2095b4c 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2454,11 +2454,14 @@ UtfToUtfProc(
}
if (PROFILE_REPLACE(profile)) {
ch = UNICODE_REPLACE_CHAR;
+ ++src;
} else {
/* TCL_ENCODING_PROFILE_TCL8 */
ch = UCHAR(*src);
+ char chbuf[2];
+ chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
+ TclUtfToUCS4(chbuf, &ch);
}
- ++src;
}
else {
/*
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index ad315d2..11a8188 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -525,7 +525,7 @@ lappend encInvalidBytes {*}{
utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {knownBug} {First trail byte must be 80:BF}
utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F180 tcl8 \u00F1\u0080 -1 {} {Missing second trail byte}
+ utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte}
utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
utf-8 F180 strict {} 0 {} {Missing second trail byte}
utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte}
@@ -558,7 +558,7 @@ lappend encInvalidBytes {*}{
utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {knownBug} {First trail byte must be 80:BF}
utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF}
utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF}
- utf-8 F380 tcl8 \u00F3\u0080 -1 {} {Missing second trail byte}
+ utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte}
utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
utf-8 F380 strict {} 0 {} {Missing second trail byte}
utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte}
@@ -592,7 +592,7 @@ lappend encInvalidBytes {*}{
utf-8 F490 tcl8 \u00F4\u0090 -1 {knownBug} {First trail byte must be 80:8F}
utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F}
utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F}
- utf-8 F480 tcl8 \u00F4\u0080 -1 {} {Missing second trail byte}
+ utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte}
utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte}
utf-8 F480 strict {} 0 {} {Missing second trail byte}
utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte}
diff --git a/tests/encoding.test b/tests/encoding.test
index de6b87b..36728d1 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -768,7 +768,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} {
} 1
test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body {
encoding convertfrom utf-8 "Z\xE0\x80"
-} -result Z\xE0\x80
+} -result Z\xE0\u20AC
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
@@ -847,6 +847,12 @@ test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body {
test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body {
encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
+test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80
+} -result \xF0\u20AC\u20AC\u20AC
+test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
+ encoding convertfrom -nocomplain utf-8 \x80
+} -result \u20AC
file delete [file join [temporaryDirectory] iso2022.txt]