summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c11
-rw-r--r--tests/encoding.test8
-rw-r--r--tests/io.test78
3 files changed, 9 insertions, 88 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 60abb51..23d71ff 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2408,13 +2408,12 @@ UtfToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
ch = low;
#endif
- } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)
- && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
+ } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
} else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
- && (flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)) {
+ && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) {
result = TCL_CONVERT_SYNTAX;
src = saveSrc;
break;
@@ -2506,7 +2505,7 @@ Utf32ToUtfProc(
ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
}
if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
- && !Tcl_UniCharIsUnicode(ch))) {
+ && ((ch & ~0x7FF) == 0xD800))) {
if (STOPONERROR) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -2602,7 +2601,7 @@ UtfToUtf32Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
+ if ((ch & ~0x7FF) == 0xD800) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
@@ -2804,7 +2803,7 @@ UtfToUtf16Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
+ if ((ch & ~0x7FF) == 0xD800) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
diff --git a/tests/encoding.test b/tests/encoding.test
index a19357e..4dd2e98 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -704,18 +704,18 @@ test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
test encoding-24.29 {Parse invalid utf-8} -body {
encoding convertfrom utf-8 \xEF\xBF\xBF
} -result \uFFFF
-test encoding-24.30 {Parse invalid utf-8 with -strict} -body {
+test encoding-24.30 {Parse noncharacter with -strict} -body {
encoding convertfrom -strict utf-8 \xEF\xBF\xBF
-} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xEF'}
+} -result \uFFFF
test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body {
encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.32 {Try to generate invalid utf-8} -body {
encoding convertto utf-8 \uFFFF
} -result \xEF\xBF\xBF
-test encoding-24.33 {Try to generate invalid utf-8 with -strict} -body {
+test encoding-24.33 {Try to generate noncharacter with -strict} -body {
encoding convertto -strict utf-8 \uFFFF
-} -returnCodes 1 -result {unexpected character at index 0: 'U+00FFFF'}
+} -result \xEF\xBF\xBF
test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body {
encoding convertto -nocomplain utf-8 \uFFFF
} -result \xEF\xBF\xBF
diff --git a/tests/io.test b/tests/io.test
index 19c00e9..f98cdee 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -9198,84 +9198,6 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -
removeFile io-75.13
} -match glob -result {41 1 {error reading "*": illegal byte sequence}}
-# Testcase for Rolf's use-case (detecting Invalid byte sequence, but allowing noncharacter)
-test io-75.14 {How to use -strict, but allow non-characters} -setup {
- set fn [makeFile {} io-75.14]
- set f [open $fn w+]
- fconfigure $f -encoding binary
- # Noncharacter followed by a single
- puts -nonewline $f pre\xEF\xBF\xBE\x81post
- flush $f
- seek $f 0
- fconfigure stdout -nocomplainencoding 1
- catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
- fconfigure $f -encoding utf-8 -buffering none -translation lf -strictencoding 1
-} -body {
- set hd {}
- catch {
- while {![eof $f]} {
- if {[catch {
- append hd [read $f]
- }]} {
- fconfigure $f -nocomplainencoding 1 -strictencoding 0
- set char [read $f 1]
- if {[string is unicode $char]} {
- error "InvalidByteSequence"
- } elseif {$char >= "\uD800" && $char < "\uE000"} {
- error "Surrogate"
- } else {
- append hd $char
- }
- catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
- fconfigure $f -strictencoding 1 -encoding utf-8
- }
- }
- } msg
- close $f
- append hd +$msg
-} -cleanup {
- removeFile io-75.14
-} -result "pre\uFFFE+InvalidByteSequence"
-
-# Testcase for Rolf's use-case (detecting Surrogate, but allowing noncharacter)
-test io-75.15 {How to use -strict, but allow non-characters} -setup {
- set fn [makeFile {} io-75.14]
- set f [open $fn w+]
- fconfigure $f -encoding utf-8 -nocomplainencoding 1
- # Noncharacter followed by a single
- puts -nonewline $f pre\uFFFE\uD800post
- flush $f
- seek $f 0
- fconfigure stdout -nocomplainencoding 1
- catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
- fconfigure $f -buffering none -translation lf -strictencoding 1
-} -body {
- set hd {}
- catch {
- while {![eof $f]} {
- if {[catch {
- append hd [read $f]
- }]} {
- fconfigure $f -nocomplainencoding 1 -strictencoding 0
- set char [read $f 1]
- if {[string is unicode $char]} {
- error "Invalid Byte Sequence"
- } elseif {$char >= "\uD800" && $char < "\uE000"} {
- error "Surrogate"
- } else {
- append hd $char
- }
- catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
- fconfigure $f -strictencoding 1
- }
- }
- } msg
- close $f
- append hd +$msg
-} -cleanup {
- removeFile io-75.15
-} -result "pre\uFFFE+Surrogate"
-
# ### ### ### ######### ######### #########