From c8cea779f14930d7be251801f4003224424f110d Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 27 Aug 2003 20:29:36 +0000 Subject: * generic/tclUtil.c: Corrected [Bug 411825] and other bugs in TclNeedSpace() where non-breaking space (\u00A0) and backslash-escaped spaces were handled incorrectly. * tests/util.test: Added new tests util-8.[2-6]. --- ChangeLog | 6 ++++-- generic/tclUtil.c | 61 ++++++++++++++++++++++++++++++++----------------------- tests/util.test | 22 +++++++++++++++++++- 3 files changed, 61 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index dc84a3b..4af48f3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -28,8 +28,10 @@ 2003-08-27 Don Porter - * tests/util.test: Added new tests for remaining TclNeedSpace() - bugs discussed in [Bug 411825]. + * generic/tclUtil.c: Corrected [Bug 411825] and other bugs in + TclNeedSpace() where non-breaking space (\u00A0) and backslash-escaped + spaces were handled incorrectly. + * tests/util.test: Added new tests util-8.[2-6]. 2003-08-26 David Gravereaux diff --git a/generic/tclUtil.c b/generic/tclUtil.c index fd58235..d939496 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.40 2003/08/27 19:55:50 dgp Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.41 2003/08/27 20:29:36 dgp Exp $ */ #include "tclInt.h" @@ -2001,42 +2001,53 @@ TclNeedSpace(start, end) CONST char *end; /* End of string (place where space will * be added, if appropriate). */ { - Tcl_UniChar ch; - /* * A space is needed unless either * (a) we're at the start of the string, or - * (b) the trailing characters of the string consist of one or more - * open curly braces preceded by a space or extending back to - * the beginning of the string. - * (c) the trailing characters of the string consist of a space - * preceded by a character other than backslash. */ - if (end == start) { return 0; } + + /* + * (b) we're at the start of a nested list-element, quoted with an + * open curly brace; we can be nested arbitrarily deep, so long + * as the first curly brace starts an element, so backtrack over + * open curly braces that are trailing characters of the string; and + */ + end = Tcl_UtfPrev(end, start); - if (*end != '{') { - Tcl_UtfToUniChar(end, &ch); - /* - * Direct char comparison on next line is safe as it is with - * a character in the ASCII subset, and so single-byte in UTF8. - */ - if (Tcl_UniCharIsSpace(ch) && ((end == start) || (end[-1] != '\\'))) { - return 0; - } - return 1; - } - do { + while (*end == '{') { if (end == start) { return 0; } end = Tcl_UtfPrev(end, start); - } while (*end == '{'); - Tcl_UtfToUniChar(end, &ch); - if (Tcl_UniCharIsSpace(ch)) { - return 0; + } + + /* + * (c) the trailing character of the string is already a list-element + * separator (according to TclFindElement); that is, one of these + * characters: + * \u0009 \t TAB + * \u000A \n NEWLINE + * \u000B \v VERTICAL TAB + * \u000C \f FORM FEED + * \u000D \r CARRIAGE RETURN + * \u0020 SPACE + * with the condition that the penultimate character is not a + * backslash. + */ + + switch (*end) { + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + if ((end == start) || (end[-1] != '\\')) { + return 0; + } } return 1; } diff --git a/tests/util.test b/tests/util.test index ddba3e4..6c4db29 100644 --- a/tests/util.test +++ b/tests/util.test @@ -7,7 +7,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: util.test,v 1.12 2003/08/27 17:57:03 dgp Exp $ +# RCS: @(#) $Id: util.test,v 1.13 2003/08/27 20:29:36 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -375,6 +375,26 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring { testdstring element foo llength [testdstring get] } 2 +test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring { + # Note that in this test TclNeedSpace actually gets it wrong, + # claiming we need a space when we really do not. Extra space + # between list elements is harmless though, and better to have + # extra space in really weird string reps of lists, than to + # invest the effort required to make TclNeedSpace foolproof. + testdstring free + testdstring append {\\ } -1 + testdstring element foo + list [llength [testdstring get]] [string length [testdstring get]] +} {2 7} +test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring { + # Another example of TclNeedSpace harmlessly getting it wrong. + testdstring free + testdstring append {\\ } -1 + testdstring append \{ -1 + testdstring element foo + testdstring append \} -1 + list [llength [testdstring get]] [string length [testdstring get]] +} {2 9} # cleanup ::tcltest::cleanupTests -- cgit v0.12