diff options
-rw-r--r-- | generic/tclUtil.c | 44 | ||||
-rw-r--r-- | tests/util.test | 10 |
2 files changed, 38 insertions, 16 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 874e2a5..7ec224e 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -3225,25 +3225,49 @@ TclNeedSpace( /* * A space is needed unless either: * (a) we're at the start of the string, or - */ + * + * (NOTE: This check is now absorbed into the loop below.) + * if (end == start) { return 0; } + * + */ + /* * (b) we're at the start of a nested list-element, quoted with an open * curly brace; we can be nested arbitrarily deep, so long as the * first curly brace starts an element, so backtrack over open curly * braces that are trailing characters of the string; and - */ + * + * (NOTE: Every character our parser is looking for is a proper + * single-byte encoding of an ASCII value. It does not accept + * overlong encodings. Given that, there's no benefit using + * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte + * backward scan. Save routine call overhead and risk of wrong + * results should the behavior of Tcl_UtfPrev change in unexpected ways. + * Reconsider this if we ever start treating non-ASCII Unicode + * characters as meaningful list syntax, expanded Unicode spaces as + * element separators, for example.) + * end = Tcl_UtfPrev(end, start); while (*end == '{') { - if (end == start) { - return 0; - } - end = Tcl_UtfPrev(end, start); + if (end == start) { + return 0; + } + end = Tcl_UtfPrev(end, start); + } + + * + */ + + while ((--end >= start) && (*end == '{')) { + } + if (end < start) { + return 0; } /* @@ -3278,8 +3302,12 @@ TclNeedSpace( case '\r': case '\v': case '\f': - if ((end == start) || (end[-1] != '\\')) { - return 0; + { + int result = 0; + while ((--end >= start) && (*end == '\\')) { + result = !result; + } + return result; } } return 1; diff --git a/tests/util.test b/tests/util.test index 294d883..46d9152 100644 --- a/tests/util.test +++ b/tests/util.test @@ -503,25 +503,19 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring { llength [testdstring get] } 2 test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring { - # Note that in this test TclNeedSpace actually gets it wrong, - # claiming we need a space when we really do not. Extra space - # between list elements is harmless though, and better to have - # extra space in really weird string reps of lists, than to - # invest the effort required to make TclNeedSpace foolproof. testdstring free testdstring append {\\ } -1 testdstring element foo list [llength [testdstring get]] [string length [testdstring get]] -} {2 7} +} {2 6} test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring { - # Another example of TclNeedSpace harmlessly getting it wrong. testdstring free testdstring append {\\ } -1 testdstring append \{ -1 testdstring element foo testdstring append \} -1 list [llength [testdstring get]] [string length [testdstring get]] -} {2 9} +} {2 8} test util-8.7 {TclNeedSpace - watch out for escaped space} { testdstring free testdstring append {\ } -1 |