summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--generic/tclUtil.c56
-rw-r--r--tests/util.test37
3 files changed, 76 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 08843af..7926223 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2003-08-27 Don Porter <dgp@users.sourceforge.net>
+
+ * tests/util.test: Added new tests for remaining TclNeedSpace()
+ bugs discussed in [Bug 411825].
+
2003-08-26 David Gravereaux <davygrvy@pobox.com>
* generic/tcl.h: Added some support for the LCC-Win32 compiler.
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 4085e73..fc57ed0 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.38 2003/07/16 21:24:12 hobbs Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.39 2003/08/27 17:57:03 dgp Exp $
*/
#include "tclInt.h"
@@ -2001,44 +2001,58 @@ TclNeedSpace(start, end)
CONST char *end; /* End of string (place where space will
* be added, if appropriate). */
{
- Tcl_UniChar ch;
+ /*
+ * Direct char comparisons to the character literals '{' and '\\'
+ * below are safe because these literals are characters in the
+ * ASCII subset, and so single-byte in UTF8.
+ */
/*
* A space is needed unless either
* (a) we're at the start of the string, or
- * (b) the trailing characters of the string consist of one or more
- * open curly braces preceded by a space or extending back to
- * the beginning of the string.
- * (c) the trailing characters of the string consist of a space
- * preceded by a character other than backslash.
*/
if (end == start) {
return 0;
}
+
end = Tcl_UtfPrev(end, start);
if (*end != '{') {
+ Tcl_UniChar ch;
Tcl_UtfToUniChar(end, &ch);
- /*
- * Direct char comparison on next line is safe as it is with
- * a character in the ASCII subset, and so single-byte in UTF8.
- */
- if (Tcl_UniCharIsSpace(ch) && ((end == start) || (end[-1] != '\\'))) {
- return 0;
- }
- return 1;
+
+ /*
+ * (b) the trailing characters of the string consist of a
+ * list-element-separating space ( space, tab, carriage return,
+ * newline ) preceded by a character other than backslash, or
+ */
+
+ /* NOTE: (Bug 411825) The non-breaking space \u00a0 is not
+ * recognized by Tcl's script parser, nor its list element
+ * parser as a word separating character (Fits well with
+ * "non-breaking" doesn't it?), so the non-breaking space
+ * should not suppress the need of a space. Any other
+ * whitespace Unicode characters outside the ASCII subset are
+ * treated likewise.
+ */
+ return (!Tcl_UniCharIsSpace(ch) || (*end >= 0x80)
+ || ((end > start) && (end[-1] == '\\')));
}
+
+ /*
+ * (c) the trailing characters of the string consist of one or more
+ * open curly braces, and the beginning of the sequence of
+ * open curly braces represents the beginning of a list element
+ * (as tested by a recursive call to TclNeedSpace).
+ */
+
do {
if (end == start) {
return 0;
}
- end = Tcl_UtfPrev(end, start);
+ end--;
} while (*end == '{');
- Tcl_UtfToUniChar(end, &ch);
- if (Tcl_UniCharIsSpace(ch)) {
- return 0;
- }
- return 1;
+ return TclNeedSpace(start,end+1);
}
/*
diff --git a/tests/util.test b/tests/util.test
index 22b1dbc..ddba3e4 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -7,7 +7,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: util.test,v 1.11 2003/07/24 16:05:24 dgp Exp $
+# RCS: @(#) $Id: util.test,v 1.12 2003/08/27 17:57:03 dgp Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -333,6 +333,12 @@ set tcl_precision 12
# This test always succeeded in the C locale anyway...
test util-8.1 {TclNeedSpace - correct UTF8 handling} {
+ # Bug 411825
+ # Note that this test relies on the fact that
+ # [interp target] calls on Tcl_AppendElement()
+ # which calls on TclNeedSpace(). If [interp target]
+ # is ever updated, this test will no longer test
+ # TclNeedSpace.
interp create \u5420
interp create [list \u5420 foo]
interp alias {} fooset [list \u5420 foo] set
@@ -341,6 +347,35 @@ test util-8.1 {TclNeedSpace - correct UTF8 handling} {
set result
} "\u5420 foo"
+set ::tcltest::testConstraints(testdstring) \
+ [expr {[info commands testdstring] != {}}]
+
+test util-8.2 {TclNeedSpace - correct UTF8 handling} testdstring {
+ # Bug 411825
+ # This tests the same bug as the previous test, but
+ # should be more future-proof, as the DString
+ # operations will likely continue to call TclNeedSpace
+ testdstring free
+ testdstring append \u5420 -1
+ testdstring element foo
+ llength [testdstring get]
+} 2
+test util-8.3 {TclNeedSpace - correct UTF8 handling} testdstring {
+ # Bug 411825 - new variant reported by Dossy Shiobara
+ testdstring free
+ testdstring append \u00A0 -1
+ testdstring element foo
+ llength [testdstring get]
+} 2
+test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring {
+ # Another bug uncovered while fixing 411825
+ testdstring free
+ testdstring append {\ } -1
+ testdstring append \{ -1
+ testdstring element foo
+ llength [testdstring get]
+} 2
+
# cleanup
::tcltest::cleanupTests
return