diff options
| -rw-r--r-- | generic/tclEncoding.c | 11 | ||||
| -rw-r--r-- | tests/encoding.test | 8 | ||||
| -rw-r--r-- | tests/io.test | 78 |
3 files changed, 9 insertions, 88 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 60abb51..23d71ff 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2408,13 +2408,12 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); ch = low; #endif - } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch) - && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)) { + && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -2506,7 +2505,7 @@ Utf32ToUtfProc( ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && !Tcl_UniCharIsUnicode(ch))) { + && ((ch & ~0x7FF) == 0xD800))) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; @@ -2602,7 +2601,7 @@ UtfToUtf32Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + if ((ch & ~0x7FF) == 0xD800) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; @@ -2804,7 +2803,7 @@ UtfToUtf16Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + if ((ch & ~0x7FF) == 0xD800) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; diff --git a/tests/encoding.test b/tests/encoding.test index a19357e..4dd2e98 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -704,18 +704,18 @@ test encoding-24.28 {Parse invalid utf-8 with -strict} -body { test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse invalid utf-8 with -strict} -body { +test encoding-24.30 {Parse noncharacter with -strict} -body { encoding convertfrom -strict utf-8 \xEF\xBF\xBF -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xEF'} +} -result \uFFFF test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate invalid utf-8 with -strict} -body { +test encoding-24.33 {Try to generate noncharacter with -strict} -body { encoding convertto -strict utf-8 \uFFFF -} -returnCodes 1 -result {unexpected character at index 0: 'U+00FFFF'} +} -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { encoding convertto -nocomplain utf-8 \uFFFF } -result \xEF\xBF\xBF diff --git a/tests/io.test b/tests/io.test index 19c00e9..f98cdee 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9198,84 +9198,6 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - removeFile io-75.13 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -# Testcase for Rolf's use-case (detecting Invalid byte sequence, but allowing noncharacter) -test io-75.14 {How to use -strict, but allow non-characters} -setup { - set fn [makeFile {} io-75.14] - set f [open $fn w+] - fconfigure $f -encoding binary - # Noncharacter followed by a single - puts -nonewline $f pre\xEF\xBF\xBE\x81post - flush $f - seek $f 0 - fconfigure stdout -nocomplainencoding 1 - catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9 - fconfigure $f -encoding utf-8 -buffering none -translation lf -strictencoding 1 -} -body { - set hd {} - catch { - while {![eof $f]} { - if {[catch { - append hd [read $f] - }]} { - fconfigure $f -nocomplainencoding 1 -strictencoding 0 - set char [read $f 1] - if {[string is unicode $char]} { - error "InvalidByteSequence" - } elseif {$char >= "\uD800" && $char < "\uE000"} { - error "Surrogate" - } else { - append hd $char - } - catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9 - fconfigure $f -strictencoding 1 -encoding utf-8 - } - } - } msg - close $f - append hd +$msg -} -cleanup { - removeFile io-75.14 -} -result "pre\uFFFE+InvalidByteSequence" - -# Testcase for Rolf's use-case (detecting Surrogate, but allowing noncharacter) -test io-75.15 {How to use -strict, but allow non-characters} -setup { - set fn [makeFile {} io-75.14] - set f [open $fn w+] - fconfigure $f -encoding utf-8 -nocomplainencoding 1 - # Noncharacter followed by a single - puts -nonewline $f pre\uFFFE\uD800post - flush $f - seek $f 0 - fconfigure stdout -nocomplainencoding 1 - catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9 - fconfigure $f -buffering none -translation lf -strictencoding 1 -} -body { - set hd {} - catch { - while {![eof $f]} { - if {[catch { - append hd [read $f] - }]} { - fconfigure $f -nocomplainencoding 1 -strictencoding 0 - set char [read $f 1] - if {[string is unicode $char]} { - error "Invalid Byte Sequence" - } elseif {$char >= "\uD800" && $char < "\uE000"} { - error "Surrogate" - } else { - append hd $char - } - catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9 - fconfigure $f -strictencoding 1 - } - } - } msg - close $f - append hd +$msg -} -cleanup { - removeFile io-75.15 -} -result "pre\uFFFE+Surrogate" - # ### ### ### ######### ######### ######### |
