From 0a42d952ff2cba9243fc2f432420bffb52aa9e70 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Sep 2005 15:31:09 +0000 Subject: * generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative * tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect handling was producing byte sequences outside of Tcl's legal internal encoding. [Bug 1283976]. --- ChangeLog | 7 ++++++ generic/tclUtf.c | 74 +++++++++++++++++++++++++++++--------------------------- tests/utf.test | 5 +++- 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 46f5483..ffefd16 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2005-09-07 Don Porter + + * generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative + * tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect + handling was producing byte sequences outside of Tcl's legal internal + encoding. [Bug 1283976]. + 2005-09-06 Donal K. Fellows * generic/tclInt.h (List): Added flag to keep track of whether a list diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fbd37e6..992a55f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.35 2005/07/21 14:38:51 dkf Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.36 2005/09/07 15:31:10 dgp Exp $ */ #include "tclInt.h" @@ -168,45 +168,47 @@ Tcl_UniCharToUtf(ch, buf) buf[0] = (char) ch; return 1; } - if (ch <= 0x7FF) { - buf[1] = (char) ((ch | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 6) | 0xC0); - return 2; - } - if (ch <= 0xFFFF) { + if (ch >= 0) { + if (ch <= 0x7FF) { + buf[1] = (char) ((ch | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 6) | 0xC0); + return 2; + } + if (ch <= 0xFFFF) { three: - buf[2] = (char) ((ch | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 12) | 0xE0); - return 3; - } + buf[2] = (char) ((ch | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 12) | 0xE0); + return 3; + } #if TCL_UTF_MAX > 3 - if (ch <= 0x1FFFFF) { - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 18) | 0xF0); - return 4; - } - if (ch <= 0x3FFFFFF) { - buf[4] = (char) ((ch | 0x80) & 0xBF); - buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 24) | 0xF8); - return 5; - } - if (ch <= 0x7FFFFFFF) { - buf[5] = (char) ((ch | 0x80) & 0xBF); - buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 30) | 0xFC); - return 6; - } + if (ch <= 0x1FFFFF) { + buf[3] = (char) ((ch | 0x80) & 0xBF); + buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 18) | 0xF0); + return 4; + } + if (ch <= 0x3FFFFFF) { + buf[4] = (char) ((ch | 0x80) & 0xBF); + buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 24) | 0xF8); + return 5; + } + if (ch <= 0x7FFFFFFF) { + buf[5] = (char) ((ch | 0x80) & 0xBF); + buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF); + buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF); + buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF); + buf[0] = (char) ((ch >> 30) | 0xFC); + return 6; + } #endif + } ch = 0xFFFD; goto three; diff --git a/tests/utf.test b/tests/utf.test index 56a3acb..d5df773 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: utf.test,v 1.12 2003/10/08 15:24:21 dgp Exp $ +# RCS: @(#) $Id: utf.test,v 1.13 2005/09/07 15:31:10 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -29,6 +29,9 @@ test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { set x "\u4e4e" } [bytestring "\xe4\xb9\x8e"] +test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { + string length [format %c -1] +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -- cgit v0.12