diff options
author | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-18 17:34:25 (GMT) |
---|---|---|
committer | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-18 17:34:25 (GMT) |
commit | cd3942fc69fba10b956f315c7316b07e62aed863 (patch) | |
tree | f23de35c6234cb8358fd58d7c67964572782da98 | |
parent | 17937027a396cfdcf95f090f79a89faa47cb03e5 (diff) | |
parent | c731ca1ffdd3e7cc90cf064ac89b2f71551958ce (diff) | |
download | tcl-cd3942fc69fba10b956f315c7316b07e62aed863.zip tcl-cd3942fc69fba10b956f315c7316b07e62aed863.tar.gz tcl-cd3942fc69fba10b956f315c7316b07e62aed863.tar.bz2 |
Merge 8.7 to pick up UTF-8 encoding fix
-rw-r--r-- | generic/tclEncoding.c | 5 | ||||
-rw-r--r-- | tests/cmdAH.test | 6 | ||||
-rw-r--r-- | tests/encoding.test | 8 |
3 files changed, 14 insertions, 5 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4d5743c..2095b4c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2454,11 +2454,14 @@ UtfToUtfProc( } if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; + ++src; } else { /* TCL_ENCODING_PROFILE_TCL8 */ ch = UCHAR(*src); + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); } - ++src; } else { /* diff --git a/tests/cmdAH.test b/tests/cmdAH.test index ad315d2..11a8188 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -525,7 +525,7 @@ lappend encInvalidBytes {*}{ utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {knownBug} {First trail byte must be 80:BF} utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F180 tcl8 \u00F1\u0080 -1 {} {Missing second trail byte} + utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte} utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F180 strict {} 0 {} {Missing second trail byte} utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} @@ -558,7 +558,7 @@ lappend encInvalidBytes {*}{ utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {knownBug} {First trail byte must be 80:BF} utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F380 tcl8 \u00F3\u0080 -1 {} {Missing second trail byte} + utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte} utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F380 strict {} 0 {} {Missing second trail byte} utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} @@ -592,7 +592,7 @@ lappend encInvalidBytes {*}{ utf-8 F490 tcl8 \u00F4\u0090 -1 {knownBug} {First trail byte must be 80:8F} utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} - utf-8 F480 tcl8 \u00F4\u0080 -1 {} {Missing second trail byte} + utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte} utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F480 strict {} 0 {} {Missing second trail byte} utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} diff --git a/tests/encoding.test b/tests/encoding.test index de6b87b..36728d1 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -768,7 +768,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { } 1 test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "Z\xE0\x80" -} -result Z\xE0\x80 +} -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃' (U+004343)} @@ -847,6 +847,12 @@ test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} +test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 +} -result \xF0\u20AC\u20AC\u20AC +test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \x80 +} -result \u20AC file delete [file join [temporaryDirectory] iso2022.txt] |