summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2005-09-07 15:31:09 (GMT)
committerdgp <dgp@users.sourceforge.net>2005-09-07 15:31:09 (GMT)
commit0a42d952ff2cba9243fc2f432420bffb52aa9e70 (patch)
tree481a4f34db391c90f180dc321a489ae8e50bff99
parent2a03bdad453a632583f84f71bf5091c682999d90 (diff)
downloadtcl-0a42d952ff2cba9243fc2f432420bffb52aa9e70.zip
tcl-0a42d952ff2cba9243fc2f432420bffb52aa9e70.tar.gz
tcl-0a42d952ff2cba9243fc2f432420bffb52aa9e70.tar.bz2
* generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative
* tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect handling was producing byte sequences outside of Tcl's legal internal encoding. [Bug 1283976].
-rw-r--r--ChangeLog7
-rw-r--r--generic/tclUtf.c74
-rw-r--r--tests/utf.test5
3 files changed, 49 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index 46f5483..ffefd16 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2005-09-07 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/tclUtf.c (Tcl_UniCharToUtf): Corrected handling of negative
+ * tests/utf.test (utf-1.5): Tcl_UniChar input value. Incorrect
+ handling was producing byte sequences outside of Tcl's legal internal
+ encoding. [Bug 1283976].
+
2005-09-06 Donal K. Fellows <donal.k.fellows@man.ac.uk>
* generic/tclInt.h (List): Added flag to keep track of whether a list
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fbd37e6..992a55f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.35 2005/07/21 14:38:51 dkf Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.36 2005/09/07 15:31:10 dgp Exp $
*/
#include "tclInt.h"
@@ -168,45 +168,47 @@ Tcl_UniCharToUtf(ch, buf)
buf[0] = (char) ch;
return 1;
}
- if (ch <= 0x7FF) {
- buf[1] = (char) ((ch | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 6) | 0xC0);
- return 2;
- }
- if (ch <= 0xFFFF) {
+ if (ch >= 0) {
+ if (ch <= 0x7FF) {
+ buf[1] = (char) ((ch | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 6) | 0xC0);
+ return 2;
+ }
+ if (ch <= 0xFFFF) {
three:
- buf[2] = (char) ((ch | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 12) | 0xE0);
- return 3;
- }
+ buf[2] = (char) ((ch | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 12) | 0xE0);
+ return 3;
+ }
#if TCL_UTF_MAX > 3
- if (ch <= 0x1FFFFF) {
- buf[3] = (char) ((ch | 0x80) & 0xBF);
- buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 18) | 0xF0);
- return 4;
- }
- if (ch <= 0x3FFFFFF) {
- buf[4] = (char) ((ch | 0x80) & 0xBF);
- buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 24) | 0xF8);
- return 5;
- }
- if (ch <= 0x7FFFFFFF) {
- buf[5] = (char) ((ch | 0x80) & 0xBF);
- buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
- buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 30) | 0xFC);
- return 6;
- }
+ if (ch <= 0x1FFFFF) {
+ buf[3] = (char) ((ch | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 18) | 0xF0);
+ return 4;
+ }
+ if (ch <= 0x3FFFFFF) {
+ buf[4] = (char) ((ch | 0x80) & 0xBF);
+ buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 24) | 0xF8);
+ return 5;
+ }
+ if (ch <= 0x7FFFFFFF) {
+ buf[5] = (char) ((ch | 0x80) & 0xBF);
+ buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 30) | 0xFC);
+ return 6;
+ }
#endif
+ }
ch = 0xFFFD;
goto three;
diff --git a/tests/utf.test b/tests/utf.test
index 56a3acb..d5df773 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -8,7 +8,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: utf.test,v 1.12 2003/10/08 15:24:21 dgp Exp $
+# RCS: @(#) $Id: utf.test,v 1.13 2005/09/07 15:31:10 dgp Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -29,6 +29,9 @@ test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
set x "\u4e4e"
} [bytestring "\xe4\xb9\x8e"]
+test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
+ string length [format %c -1]
+} 1
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
string length "abc"