diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-20 22:45:51 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-20 22:45:51 (GMT) |
commit | d8ec6222e8374b744712d5901e829fd92ee43cf0 (patch) | |
tree | a366bd9c116e9aed571cc6b545e74edc5b43777a /generic/tclUtf.c | |
parent | 04ea3b8bff2991e54cc2469b372927735c9d7a83 (diff) | |
download | tcl-d8ec6222e8374b744712d5901e829fd92ee43cf0.zip tcl-d8ec6222e8374b744712d5901e829fd92ee43cf0.tar.gz tcl-d8ec6222e8374b744712d5901e829fd92ee43cf0.tar.bz2 |
Fix Tcl_UtfToUniCharDString() function, handling invalid byte at the end of the string: Not quite correct for bytes between 0x80-0x9F, according to TIP
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index f3561f9..a330d11 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -453,7 +453,7 @@ Tcl_UtfToUniChar( } /* - * A four-byte-character lead-byte not followed by two trail-bytes + * A four-byte-character lead-byte not followed by three trail-bytes * represents itself. */ } @@ -619,10 +619,10 @@ Tcl_UtfToUniCharDString( } end += 4; while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); - } else if (((UCHAR(*p)-0x80)) < 0x20) { + if (((unsigned)(UCHAR(*p)-0x80)) < 0x20) { ch = cp1252[UCHAR(*p++)-0x80]; + } else if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToUniChar(p, &ch); } else { ch = UCHAR(*p++); } @@ -673,10 +673,10 @@ TclUtfToWCharDString( } end += 4; while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToWChar(p, &ch); - } else if (((UCHAR(*p)-0x80)) < 0x20) { + if (((unsigned)(UCHAR(*p)-0x80)) < 0x20) { ch = cp1252[UCHAR(*p++)-0x80]; + } else if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToWChar(p, &ch); } else { ch = UCHAR(*p++); } |