summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c45
-rw-r--r--tests/chanio.test2
-rw-r--r--tests/io.test2
3 files changed, 29 insertions, 20 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index dea112a..ba9f811 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -537,8 +537,8 @@ TclInitEncodingSubsystem(void)
unsigned size;
unsigned short i;
union {
- char c;
- short s;
+ char c;
+ short s;
} isLe;
if (encodingsInitialized) {
@@ -1233,9 +1233,9 @@ Tcl_ExternalToUtf(
}
if (!noTerminate) {
- if (dstLen < 1) {
- return TCL_CONVERT_NOSPACE;
- }
+ if (dstLen < 1) {
+ return TCL_CONVERT_NOSPACE;
+ }
/*
* If there are any null characters in the middle of the buffer,
* they will converted to the UTF-8 null character (\xC080). To get
@@ -1245,9 +1245,9 @@ Tcl_ExternalToUtf(
dstLen--;
} else {
- if (dstLen < 0) {
- return TCL_CONVERT_NOSPACE;
- }
+ if (dstLen < 0) {
+ return TCL_CONVERT_NOSPACE;
+ }
}
do {
Tcl_EncodingState savedState = *statePtr;
@@ -1423,7 +1423,7 @@ Tcl_UtfToExternal(
}
if (dstLen < encodingPtr->nullSize) {
- return TCL_CONVERT_NOSPACE;
+ return TCL_CONVERT_NOSPACE;
}
dstLen -= encodingPtr->nullSize;
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen,
@@ -2731,17 +2731,26 @@ TableToUtfProc(
}
byte = *((unsigned char *) src);
if (prefixBytes[byte]) {
- src++;
- if (src >= srcEnd) {
- src--;
- result = TCL_CONVERT_MULTIBYTE;
- break;
+ if (src >= srcEnd-1) {
+ /* Prefix byte but nothing after it */
+ if (!(flags & TCL_ENCODING_END)) {
+ /* More data to come */
+ result = TCL_CONVERT_MULTIBYTE;
+ break;
+ } else if (flags & TCL_ENCODING_STOPONERROR) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ } else {
+ ch = (Tcl_UniChar)byte;
+ }
+ } else {
+ ch = toUnicode[byte][*((unsigned char *)++src)];
}
- ch = toUnicode[byte][*((unsigned char *) src)];
} else {
ch = pageZero[byte];
}
if ((ch == 0) && (byte != 0)) {
+ /* Prefix+suffix pair is invalid */
if (flags & TCL_ENCODING_STOPONERROR) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -2749,14 +2758,14 @@ TableToUtfProc(
if (prefixBytes[byte]) {
src--;
}
- ch = (Tcl_UniChar) byte;
+ ch = (Tcl_UniChar)byte;
}
/*
* Special case for 1-byte Utf chars for speed.
*/
- if (ch && ch < 0x80) {
+ if ((unsigned)ch - 1 < 0x7F) {
*dst++ = (char) ch;
} else {
dst += Tcl_UniCharToUtf(ch, dst);
@@ -2963,7 +2972,7 @@ Iso88591ToUtfProc(
* Special case for 1-byte utf chars for speed.
*/
- if (ch && ch < 0x80) {
+ if ((unsigned)ch - 1 < 0x7F) {
*dst++ = (char) ch;
} else {
dst += Tcl_UniCharToUtf(ch, dst);
diff --git a/tests/chanio.test b/tests/chanio.test
index 3452f78..aef6a1b 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -1098,7 +1098,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup {
lappend x [chan gets $f line] $line
} -cleanup {
chan close $f
-} -result [list 15 "1234567890123\uFF10\uFF11" 18 0 1 -1 ""]
+} -result [list 16 "1234567890123\uFF10\uFF11\x82" 18 0 1 -1 ""]
test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup {
variable x ""
} -constraints {stdio fileevent} -body {
diff --git a/tests/io.test b/tests/io.test
index d20bc87..50a6018 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} {
lappend x [gets $f line] $line
close $f
set x
-} [list 15 "1234567890123\uFF10\uFF11" 18 0 1 -1 ""]
+} [list 16 "1234567890123\uFF10\uFF11\x82" 18 0 1 -1 ""]
test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} {
set f [open "|[list [interpreter] $path(cat)]" w+]
fconfigure $f -encoding binary -buffering none