From 769035280578d490045479f9cbae23e6fef5ab7c Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 27 Aug 2003 17:57:03 +0000 Subject: * tests/util.test: Added new tests for remaining TclNeedSpace() bugs discussed in [Bug 411825]. --- ChangeLog | 5 +++++ generic/tclUtil.c | 56 ++++++++++++++++++++++++++++++++++--------------------- tests/util.test | 37 +++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 08843af..7926223 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2003-08-27 Don Porter + + * tests/util.test: Added new tests for remaining TclNeedSpace() + bugs discussed in [Bug 411825]. + 2003-08-26 David Gravereaux * generic/tcl.h: Added some support for the LCC-Win32 compiler. diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 4085e73..fc57ed0 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.38 2003/07/16 21:24:12 hobbs Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.39 2003/08/27 17:57:03 dgp Exp $ */ #include "tclInt.h" @@ -2001,44 +2001,58 @@ TclNeedSpace(start, end) CONST char *end; /* End of string (place where space will * be added, if appropriate). */ { - Tcl_UniChar ch; + /* + * Direct char comparisons to the character literals '{' and '\\' + * below are safe because these literals are characters in the + * ASCII subset, and so single-byte in UTF8. + */ /* * A space is needed unless either * (a) we're at the start of the string, or - * (b) the trailing characters of the string consist of one or more - * open curly braces preceded by a space or extending back to - * the beginning of the string. - * (c) the trailing characters of the string consist of a space - * preceded by a character other than backslash. */ if (end == start) { return 0; } + end = Tcl_UtfPrev(end, start); if (*end != '{') { + Tcl_UniChar ch; Tcl_UtfToUniChar(end, &ch); - /* - * Direct char comparison on next line is safe as it is with - * a character in the ASCII subset, and so single-byte in UTF8. - */ - if (Tcl_UniCharIsSpace(ch) && ((end == start) || (end[-1] != '\\'))) { - return 0; - } - return 1; + + /* + * (b) the trailing characters of the string consist of a + * list-element-separating space ( space, tab, carriage return, + * newline ) preceded by a character other than backslash, or + */ + + /* NOTE: (Bug 411825) The non-breaking space \u00a0 is not + * recognized by Tcl's script parser, nor its list element + * parser as a word separating character (Fits well with + * "non-breaking" doesn't it?), so the non-breaking space + * should not suppress the need of a space. Any other + * whitespace Unicode characters outside the ASCII subset are + * treated likewise. + */ + return (!Tcl_UniCharIsSpace(ch) || (*end >= 0x80) + || ((end > start) && (end[-1] == '\\'))); } + + /* + * (c) the trailing characters of the string consist of one or more + * open curly braces, and the beginning of the sequence of + * open curly braces represents the beginning of a list element + * (as tested by a recursive call to TclNeedSpace). + */ + do { if (end == start) { return 0; } - end = Tcl_UtfPrev(end, start); + end--; } while (*end == '{'); - Tcl_UtfToUniChar(end, &ch); - if (Tcl_UniCharIsSpace(ch)) { - return 0; - } - return 1; + return TclNeedSpace(start,end+1); } /* diff --git a/tests/util.test b/tests/util.test index 22b1dbc..ddba3e4 100644 --- a/tests/util.test +++ b/tests/util.test @@ -7,7 +7,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: util.test,v 1.11 2003/07/24 16:05:24 dgp Exp $ +# RCS: @(#) $Id: util.test,v 1.12 2003/08/27 17:57:03 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -333,6 +333,12 @@ set tcl_precision 12 # This test always succeeded in the C locale anyway... test util-8.1 {TclNeedSpace - correct UTF8 handling} { + # Bug 411825 + # Note that this test relies on the fact that + # [interp target] calls on Tcl_AppendElement() + # which calls on TclNeedSpace(). If [interp target] + # is ever updated, this test will no longer test + # TclNeedSpace. interp create \u5420 interp create [list \u5420 foo] interp alias {} fooset [list \u5420 foo] set @@ -341,6 +347,35 @@ test util-8.1 {TclNeedSpace - correct UTF8 handling} { set result } "\u5420 foo" +set ::tcltest::testConstraints(testdstring) \ + [expr {[info commands testdstring] != {}}] + +test util-8.2 {TclNeedSpace - correct UTF8 handling} testdstring { + # Bug 411825 + # This tests the same bug as the previous test, but + # should be more future-proof, as the DString + # operations will likely continue to call TclNeedSpace + testdstring free + testdstring append \u5420 -1 + testdstring element foo + llength [testdstring get] +} 2 +test util-8.3 {TclNeedSpace - correct UTF8 handling} testdstring { + # Bug 411825 - new variant reported by Dossy Shiobara + testdstring free + testdstring append \u00A0 -1 + testdstring element foo + llength [testdstring get] +} 2 +test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring { + # Another bug uncovered while fixing 411825 + testdstring free + testdstring append {\ } -1 + testdstring append \{ -1 + testdstring element foo + llength [testdstring get] +} 2 + # cleanup ::tcltest::cleanupTests return -- cgit v0.12