diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-14 20:50:26 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-14 20:50:26 (GMT) |
commit | 38df35585000fd7245c6604e845663751a7bd524 (patch) | |
tree | ba2df03a1bbdaac0c186059f8712310934067a00 /generic | |
parent | 2974b5727951737a5b67789f4b7712cf72096ed0 (diff) | |
download | tcl-38df35585000fd7245c6604e845663751a7bd524.zip tcl-38df35585000fd7245c6604e845663751a7bd524.tar.gz tcl-38df35585000fd7245c6604e845663751a7bd524.tar.bz2 |
Complete fix for [bd1a60eb9c]. Also fix a bug in the tableencoding. With testcases.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclEncoding.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 01c4eb1..c5ecc46 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2380,7 +2380,7 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) { + if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. @@ -2388,11 +2388,13 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { + && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) + || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + || (flags & ENCODING_FAILINDEX))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ - if (flags & TCL_ENCODING_MODIFIED) { + if (flags & ENCODING_INPUT) { result = TCL_CONVERT_SYNTAX; break; } @@ -2410,7 +2412,7 @@ UtfToUtfProc( * unless the user has explicitly asked to be told. */ - if (flags & TCL_ENCODING_MODIFIED) { + if (flags & ENCODING_INPUT) { if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_MULTIBYTE; break; @@ -2430,7 +2432,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) + if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { result = TCL_CONVERT_SYNTAX; break; @@ -2451,6 +2453,11 @@ UtfToUtfProc( * A surrogate character is detected, handle especially. */ + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && (flags & ENCODING_UTF)) { + result = TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; @@ -2470,12 +2477,12 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; - } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) { + } else if (STOPONERROR && !(flags & ENCODING_INPUT) && (((ch & ~0x7FF) == 0xD800))) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { + && (flags & ENCODING_INPUT) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -3117,7 +3124,8 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (STOPONERROR) { + if ((flags & ENCODING_FAILINDEX) + || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_SYNTAX; break; } |