summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c10
-rw-r--r--generic/tclIO.c12
-rw-r--r--generic/tclIO.h2
-rw-r--r--tests/encoding.test15
-rw-r--r--tests/io.test21
-rw-r--r--tests/unixInit.test3
6 files changed, 54 insertions, 9 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index efe4b43..eb217b4 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2391,7 +2391,7 @@ UtfToUtfProc(
* If in input mode, and -strict is specified: This is an error.
*/
if (flags & TCL_ENCODING_MODIFIED) {
- result = TCL_CONVERT_UNKNOWN;
+ result = TCL_CONVERT_SYNTAX;
break;
}
@@ -2413,6 +2413,10 @@ UtfToUtfProc(
result = TCL_CONVERT_MULTIBYTE;
break;
}
+ if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
ch = UCHAR(*src++);
} else {
char chbuf[2];
@@ -2424,8 +2428,8 @@ UtfToUtfProc(
int low;
const char *saveSrc = src;
size_t len = TclUtfToUCS4(src, &ch);
- if ((len < 2) && (ch != 0) && STOPONERROR
- && (flags & TCL_ENCODING_MODIFIED)) {
+ if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)
+ && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
result = TCL_CONVERT_SYNTAX;
break;
}
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 4002934..d5fbd18 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -4466,7 +4466,7 @@ Write(
* current output encoding and strict encoding is active.
*/
- if (result == TCL_CONVERT_UNKNOWN) {
+ if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) {
encodingError = 1;
result = TCL_OK;
}
@@ -5517,6 +5517,11 @@ FilterInputBytes(
&statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
&gsPtr->bytesWrote, &gsPtr->charsWrote);
+ if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) {
+ SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
+ result = TCL_OK;
+ }
+
/*
* Make sure that if we go through 'gets', that we reset the
* TCL_ENCODING_START flag still. [Bug #523988]
@@ -6345,6 +6350,11 @@ ReadChars(
flags, &statePtr->inputEncodingState,
dst, dstLimit, &srcRead, &dstDecoded, &numChars);
+ if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX) {
+ SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
+ code = TCL_OK;
+ }
+
/*
* Perform the translation transformation in place. Read no more than
* the dstDecoded bytes the encoding transformation actually produced.
diff --git a/generic/tclIO.h b/generic/tclIO.h
index 1da8478..fbd01ee 100644
--- a/generic/tclIO.h
+++ b/generic/tclIO.h
@@ -271,6 +271,8 @@ typedef struct ChannelState {
* delivered for buffered data until
* the state of the channel
* changes. */
+#define CHANNEL_ENCODING_ERROR (1<<15) /* set if channel
+ * encountered an encoding error */
#define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is
* being used. */
#define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option
diff --git a/tests/encoding.test b/tests/encoding.test
index d234e0c..9aa123d 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -673,6 +673,21 @@ test encoding-24.22 {Syntax error, two encodings} -body {
test encoding-24.23 {Syntax error, two encodings} -body {
encoding convertto iso8859-1 utf-8 "ZX\uD800"
} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"}
+test encoding-24.24 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test encoding-24.25 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\x40\x80\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'}
+test encoding-24.26 {Parse valid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80"
+} -result \U40000
+test encoding-24.27 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'}
+test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
+ encoding convertfrom -strict utf-8 "\xFF\x00\x00"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'}
file delete [file join [temporaryDirectory] iso2022.txt]
diff --git a/tests/io.test b/tests/io.test
index 9ae25bb..ef9e14d 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -9029,11 +9029,10 @@ test io-75.4 {shiftjis encoding error read results in raw bytes} -setup {
removeFile io-75.4
} -result "4181ff41"
-test io-75.5 {incomplete shiftjis encoding read is ignored} -setup {
+test io-75.5 {invalid utf-8 encoding read is ignored} -setup {
set fn [makeFile {} io-75.5]
set f [open $fn w+]
fconfigure $f -encoding binary
- # \x81 announces a two byte sequence.
puts -nonewline $f "A\x81"
flush $f
seek $f 0
@@ -9047,6 +9046,24 @@ test io-75.5 {incomplete shiftjis encoding read is ignored} -setup {
removeFile io-75.5
} -result "4181"
+test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup {
+ set fn [makeFile {} io-75.6]
+ set f [open $fn w+]
+ fconfigure $f -encoding binary
+ # \x81 is invalid in utf-8
+ puts -nonewline $f "A\x81"
+ flush $f
+ seek $f 0
+ fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1
+} -body {
+ set d [read $f]
+ binary scan $d H* hd
+ lappend hd [catch {read $f} msg]
+ close $f
+ lappend hd $msg
+} -cleanup {
+ removeFile io-75.6
+} -result "41 0 {}" ; # Here, an exception should be thrown
# ### ### ### ######### ######### #########
diff --git a/tests/unixInit.test b/tests/unixInit.test
index 8e64c7a..16d9e64 100644
--- a/tests/unixInit.test
+++ b/tests/unixInit.test
@@ -346,8 +346,6 @@ test unixInit-3.1 {TclpSetInitialEncodings} -constraints {
} -match regexp -result {^(iso8859-15?|utf-8)$}
test unixInit-3.2 {TclpSetInitialEncodings} -setup {
catch {set oldlc_all $env(LC_ALL)}
- catch {set oldtcl_library $env(TCL_LIBRARY)}
- unset -nocomplain env(TCL_LIBRARY)
} -constraints {unix stdio knownBug} -body {
set env(LANG) japanese
set env(LC_ALL) japanese
@@ -366,7 +364,6 @@ test unixInit-3.2 {TclpSetInitialEncodings} -setup {
} -cleanup {
unset -nocomplain env(LANG) env(LC_ALL)
catch {set env(LC_ALL) $oldlc_all}
- catch {set env(TCL_LIBRARY) $oldtcl_library}
} -result 0
test unixInit-4.1 {TclpSetVariables} {unix} {