From 2d710b7bfb946720a165117b51982657462c87c2 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Sep 2005 14:35:56 +0000 Subject: * generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative * tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect handling was producing byte sequences outside of Tcl's legal internal encoding. [Bug 1283976]. --- ChangeLog | 7 ++++++ generic/tclUtf.c | 74 +++++++++++++++++++++++++++++--------------------------- tests/utf.test | 5 +++- 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index b2498d0..76230dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2005-09-07 Don Porter + + * generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative + * tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect + handling was producing byte sequences outside of Tcl's legal internal + encoding. [Bug 1283976]. + 2005-08-29 Kevin Kenny * generic/tclBasic.c (ExprMathFunc): Restored "round away from diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b7a6277..923f49f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.30.2.2 2003/10/08 14:21:20 dkf Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.30.2.3 2005/09/07 14:35:56 dgp Exp $ */ #include "tclInt.h" @@ -169,45 +169,47 @@ Tcl_UniCharToUtf(ch, str) str[0] = (char) ch; return 1; } - if (ch <= 0x7FF) { - str[1] = (char) ((ch | 0x80) & 0xBF); - str[0] = (char) ((ch >> 6) | 0xC0); - return 2; - } - if (ch <= 0xFFFF) { + if (ch >= 0) { + if (ch <= 0x7FF) { + str[1] = (char) ((ch | 0x80) & 0xBF); + str[0] = (char) ((ch >> 6) | 0xC0); + return 2; + } + if (ch <= 0xFFFF) { three: - str[2] = (char) ((ch | 0x80) & 0xBF); - str[1] = (char) (((ch >> 6) | 0x80) & 0xBF); - str[0] = (char) ((ch >> 12) | 0xE0); - return 3; - } + str[2] = (char) ((ch | 0x80) & 0xBF); + str[1] = (char) (((ch >> 6) | 0x80) & 0xBF); + str[0] = (char) ((ch >> 12) | 0xE0); + return 3; + } #if TCL_UTF_MAX > 3 - if (ch <= 0x1FFFFF) { - str[3] = (char) ((ch | 0x80) & 0xBF); - str[2] = (char) (((ch >> 6) | 0x80) & 0xBF); - str[1] = (char) (((ch >> 12) | 0x80) & 0xBF); - str[0] = (char) ((ch >> 18) | 0xF0); - return 4; - } - if (ch <= 0x3FFFFFF) { - str[4] = (char) ((ch | 0x80) & 0xBF); - str[3] = (char) (((ch >> 6) | 0x80) & 0xBF); - str[2] = (char) (((ch >> 12) | 0x80) & 0xBF); - str[1] = (char) (((ch >> 18) | 0x80) & 0xBF); - str[0] = (char) ((ch >> 24) | 0xF8); - return 5; - } - if (ch <= 0x7FFFFFFF) { - str[5] = (char) ((ch | 0x80) & 0xBF); - str[4] = (char) (((ch >> 6) | 0x80) & 0xBF); - str[3] = (char) (((ch >> 12) | 0x80) & 0xBF); - str[2] = (char) (((ch >> 18) | 0x80) & 0xBF); - str[1] = (char) (((ch >> 24) | 0x80) & 0xBF); - str[0] = (char) ((ch >> 30) | 0xFC); - return 6; - } + if (ch <= 0x1FFFFF) { + str[3] = (char) ((ch | 0x80) & 0xBF); + str[2] = (char) (((ch >> 6) | 0x80) & 0xBF); + str[1] = (char) (((ch >> 12) | 0x80) & 0xBF); + str[0] = (char) ((ch >> 18) | 0xF0); + return 4; + } + if (ch <= 0x3FFFFFF) { + str[4] = (char) ((ch | 0x80) & 0xBF); + str[3] = (char) (((ch >> 6) | 0x80) & 0xBF); + str[2] = (char) (((ch >> 12) | 0x80) & 0xBF); + str[1] = (char) (((ch >> 18) | 0x80) & 0xBF); + str[0] = (char) ((ch >> 24) | 0xF8); + return 5; + } + if (ch <= 0x7FFFFFFF) { + str[5] = (char) ((ch | 0x80) & 0xBF); + str[4] = (char) (((ch >> 6) | 0x80) & 0xBF); + str[3] = (char) (((ch >> 12) | 0x80) & 0xBF); + str[2] = (char) (((ch >> 18) | 0x80) & 0xBF); + str[1] = (char) (((ch >> 24) | 0x80) & 0xBF); + str[0] = (char) ((ch >> 30) | 0xFC); + return 6; + } #endif + } ch = 0xFFFD; goto three; diff --git a/tests/utf.test b/tests/utf.test index 7e4adf0..09fc5b1 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: utf.test,v 1.8.14.4 2003/10/08 15:24:01 dgp Exp $ +# RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -29,6 +29,9 @@ test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { set x "\u4e4e" } [bytestring "\xe4\xb9\x8e"] +test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { + string length [format %c -1] +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -- cgit v0.12