summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclCmdAH.c2
-rw-r--r--generic/tclCmdMZ.c2
-rw-r--r--generic/tclDate.c2
-rw-r--r--generic/tclInt.h11
-rw-r--r--generic/tclParse.c2
-rw-r--r--generic/tclResult.c11
-rw-r--r--generic/tclStrToD.c6
-rw-r--r--generic/tclUtf.c2
-rw-r--r--generic/tclUtil.c196
-rw-r--r--tests/dstring.test42
-rw-r--r--tests/util.test55
-rw-r--r--unix/tclUnixFile.c2
12 files changed, 226 insertions, 107 deletions
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 06743d6..f30396b 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -145,7 +145,7 @@ Tcl_CaseObjCmd(
pat = TclGetString(caseObjv[i]);
for (p = pat; *p != '\0'; p++) {
- if (TclIsSpaceProc(*p) || (*p == '\\')) {
+ if (TclIsSpaceProcM(*p) || (*p == '\\')) {
break;
}
}
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 255fca1..d4fa4e9 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1648,7 +1648,7 @@ StringIsCmd(
* if it is the first "element" that has the failure.
*/
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
TclNewStringObj(tmpStr, string1, p-string1);
diff --git a/generic/tclDate.c b/generic/tclDate.c
index 2cf20d6..8d37f3d 100644
--- a/generic/tclDate.c
+++ b/generic/tclDate.c
@@ -2684,7 +2684,7 @@ TclDatelex(
location->first_column = yyInput - info->dateStart;
for ( ; ; ) {
- while (TclIsSpaceProc(*yyInput)) {
+ while (TclIsSpaceProcM(*yyInput)) {
yyInput++;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 06cff60..15bc000 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2608,7 +2608,6 @@ MODULE_SCOPE void TclInitNotifier(void);
MODULE_SCOPE void TclInitObjSubsystem(void);
MODULE_SCOPE void TclInitSubsystems(void);
MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp);
-MODULE_SCOPE int TclIsSpaceProc(char byte);
MODULE_SCOPE int TclIsBareword(char byte);
MODULE_SCOPE int TclJoinThread(Tcl_ThreadId id, int *result);
MODULE_SCOPE void TclLimitRemoveAllHandlers(Tcl_Interp *interp);
@@ -2800,6 +2799,16 @@ MODULE_SCOPE Tcl_Obj * TclDisassembleByteCodeObj(Tcl_Obj *objPtr);
MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct);
/*
+ * Many parsing tasks need a common definition of whitespace.
+ * Use this routine and macro to achieve that and place
+ * optimization (fragile on changes) in one place.
+ */
+
+MODULE_SCOPE int TclIsSpaceProc(char byte);
+# define TclIsSpaceProcM(byte) \
+ (((byte) > 0x20) ? 0 : TclIsSpaceProc(byte))
+
+/*
*----------------------------------------------------------------
* Command procedures in the generic core:
*----------------------------------------------------------------
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 6f989d9..7bead99 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -1809,7 +1809,7 @@ Tcl_ParseBraces(
openBrace = 0;
break;
case '#' :
- if (openBrace && TclIsSpaceProc(src[-1])) {
+ if (openBrace && TclIsSpaceProcM(src[-1])) {
Tcl_AppendResult(parsePtr->interp,
": possible unbalanced brace in comment", NULL);
goto error;
diff --git a/generic/tclResult.c b/generic/tclResult.c
index 7b58d44..1b9d5c9 100644
--- a/generic/tclResult.c
+++ b/generic/tclResult.c
@@ -704,6 +704,7 @@ Tcl_AppendElement(
char *dst;
int size;
int flags;
+ int quoteHash = 1;
/*
* If the string result is empty, move the object result to the string
@@ -740,9 +741,17 @@ Tcl_AppendElement(
* then this element will not lead a list, and need not have it's
* leading '#' quoted.
*/
-
+ quoteHash = 0;
+ } else {
+ while ((--dst >= iPtr->appendResult) && TclIsSpaceProcM(*dst)) {
+ }
+ quoteHash = !TclNeedSpace(iPtr->appendResult, dst+1);
+ }
+ dst = iPtr->appendResult + iPtr->appendUsed;
+ if (!quoteHash) {
flags |= TCL_DONT_QUOTE_HASH;
}
+
iPtr->appendUsed += Tcl_ConvertElement(element, dst, flags);
}
diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c
index 4359829..3776521 100644
--- a/generic/tclStrToD.c
+++ b/generic/tclStrToD.c
@@ -533,7 +533,7 @@ TclParseNumber(
* I, N, and whitespace.
*/
- if (TclIsSpaceProc(c)) {
+ if (TclIsSpaceProcM(c)) {
if (flags & TCL_PARSE_NO_WHITESPACE) {
goto endgame;
}
@@ -1053,7 +1053,7 @@ TclParseNumber(
}
/* FALLTHROUGH */
case sNANPAREN:
- if (TclIsSpaceProc(c)) {
+ if (TclIsSpaceProcM(c)) {
break;
}
if (numSigDigs < 13) {
@@ -1107,7 +1107,7 @@ TclParseNumber(
* Accept trailing whitespace.
*/
- while (len != 0 && TclIsSpaceProc(*p)) {
+ while (len != 0 && TclIsSpaceProcM(*p)) {
p++;
len--;
}
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b7e8f5e..1a8d515 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1613,7 +1613,7 @@ Tcl_UniCharIsSpace(
*/
if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
- return TclIsSpaceProc((char) ch);
+ return TclIsSpaceProcM((char) ch);
} else if ((Tcl_UniChar) ch == 0x180E || (Tcl_UniChar) ch == 0x202F) {
return 1;
} else {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index f4879a1..01f1d34 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -396,20 +396,20 @@ TclMaxListLength(
}
/* No list element before leading white space */
- count += 1 - TclIsSpaceProc(*bytes);
+ count += 1 - TclIsSpaceProcM(*bytes);
/* Count white space runs as potential element separators */
while (numBytes) {
if ((numBytes == -1) && (*bytes == '\0')) {
break;
}
- if (TclIsSpaceProc(*bytes)) {
+ if (TclIsSpaceProcM(*bytes)) {
/* Space run started; bump count */
count++;
do {
bytes++;
numBytes -= (numBytes != -1);
- } while (numBytes && TclIsSpaceProc(*bytes));
+ } while (numBytes && TclIsSpaceProcM(*bytes));
if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) {
break;
}
@@ -420,7 +420,7 @@ TclMaxListLength(
}
/* No list element following trailing white space */
- count -= TclIsSpaceProc(bytes[-1]);
+ count -= TclIsSpaceProcM(bytes[-1]);
done:
if (endPtr) {
@@ -508,7 +508,7 @@ TclFindElement(
*/
limit = (list + listLength);
- while ((p < limit) && (TclIsSpaceProc(*p))) {
+ while ((p < limit) && (TclIsSpaceProcM(*p))) {
p++;
}
if (p == limit) { /* no element found */
@@ -553,7 +553,7 @@ TclFindElement(
} else if (openBraces == 1) {
size = (p - elemStart);
p++;
- if ((p >= limit) || TclIsSpaceProc(*p)) {
+ if ((p >= limit) || TclIsSpaceProcM(*p)) {
goto done;
}
@@ -563,7 +563,7 @@ TclFindElement(
if (interp != NULL) {
p2 = p;
- while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ while ((p2 < limit) && (!TclIsSpaceProcM(*p2))
&& (p2 < p+20)) {
p2++;
}
@@ -595,23 +595,6 @@ TclFindElement(
break;
/*
- * Space: ignore if element is in braces or quotes; otherwise
- * terminate element.
- */
-
- case ' ':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- if ((openBraces == 0) && !inQuotes) {
- size = (p - elemStart);
- goto done;
- }
- break;
-
- /*
* Double-quote: if element is in quotes then terminate it.
*/
@@ -619,7 +602,7 @@ TclFindElement(
if (inQuotes) {
size = (p - elemStart);
p++;
- if ((p >= limit) || TclIsSpaceProc(*p)) {
+ if ((p >= limit) || TclIsSpaceProcM(*p)) {
goto done;
}
@@ -629,7 +612,7 @@ TclFindElement(
if (interp != NULL) {
p2 = p;
- while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ while ((p2 < limit) && (!TclIsSpaceProcM(*p2))
&& (p2 < p+20)) {
p2++;
}
@@ -640,6 +623,20 @@ TclFindElement(
return TCL_ERROR;
}
break;
+
+ default:
+ if (TclIsSpaceProcM(*p)) {
+ /*
+ * Space: ignore if element is in braces or quotes;
+ * otherwise terminate element.
+ */
+ if ((openBraces == 0) && !inQuotes) {
+ size = (p - elemStart);
+ goto done;
+ }
+ }
+ break;
+
}
p++;
}
@@ -666,7 +663,7 @@ TclFindElement(
}
done:
- while ((p < limit) && (TclIsSpaceProc(*p))) {
+ while ((p < limit) && (TclIsSpaceProcM(*p))) {
p++;
}
*elementPtr = elemStart;
@@ -1013,12 +1010,6 @@ TclScanElement(
case '[':
case '$':
case ';':
- case ' ':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
forbidNone = 1;
extra++; /* Escape sequences all one byte longer. */
#if COMPAT
@@ -1056,6 +1047,15 @@ TclScanElement(
}
/* TODO: Panic on improper encoding? */
break;
+ default:
+ if (TclIsSpaceProcM(*p)) {
+ forbidNone = 1;
+ extra++; /* Escape sequences all one byte longer. */
+#if COMPAT
+ preferBrace = 1;
+#endif
+ }
+ break;
}
length -= (length > 0);
p++;
@@ -1809,6 +1809,7 @@ TclTrim(
*/
/* The whitespace characters trimmed during [concat] operations */
+/* TODO: Find a reasonable way to guarantee in sync with TclIsSpaceProc() */
#define CONCAT_WS " \f\v\r\t\n"
#define CONCAT_WS_SIZE (int) (sizeof(CONCAT_WS "") - 1)
@@ -2632,9 +2633,36 @@ Tcl_DStringAppendElement(
{
char *dst = dsPtr->string + dsPtr->length;
int needSpace = TclNeedSpace(dsPtr->string, dst);
- int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
- int newSize = dsPtr->length + needSpace
- + TclScanElement(element, -1, &flags);
+ int flags = 0, quoteHash = 1, newSize;
+
+ if (needSpace) {
+ /*
+ * If we need a space to separate the new element from something
+ * already ending the string, we're not appending the first element
+ * of any list, so we need not quote any leading hash character.
+ */
+ quoteHash = 0;
+ } else {
+ /*
+ * We don't need a space, maybe because there's some already there.
+ * Checking whether we might be appending a first element is a bit
+ * more involved.
+ *
+ * Backtrack over all whitespace.
+ */
+ while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) {
+ }
+
+ /* Call again without whitespace to confound things. */
+ quoteHash = !TclNeedSpace(dsPtr->string, dst+1);
+ }
+ if (!quoteHash) {
+ flags |= TCL_DONT_QUOTE_HASH;
+ }
+ newSize = dsPtr->length + needSpace + TclScanElement(element, -1, &flags);
+ if (!quoteHash) {
+ flags |= TCL_DONT_QUOTE_HASH;
+ }
/*
* Allocate a larger buffer for the string if the current one isn't large
@@ -2667,8 +2695,8 @@ Tcl_DStringAppendElement(
element = dsPtr->string + offset;
}
}
- dst = dsPtr->string + dsPtr->length;
}
+ dst = dsPtr->string + dsPtr->length;
/*
* Convert the new string to a list element and copy it into the buffer at
@@ -2679,15 +2707,8 @@ Tcl_DStringAppendElement(
*dst = ' ';
dst++;
dsPtr->length++;
-
- /*
- * If we need a space to separate this element from preceding stuff,
- * then this element will not lead a list, and need not have it's
- * leading '#' quoted.
- */
-
- flags |= TCL_DONT_QUOTE_HASH;
}
+
dsPtr->length += TclConvertElement(element, -1, dst, flags);
dsPtr->string[dsPtr->length] = '\0';
return dsPtr->string;
@@ -3228,62 +3249,69 @@ TclNeedSpace(
/*
* A space is needed unless either:
* (a) we're at the start of the string, or
- */
+ *
+ * (NOTE: This check is now absorbed into the loop below.)
+ *
if (end == start) {
return 0;
}
+ *
+ */
+
/*
* (b) we're at the start of a nested list-element, quoted with an open
* curly brace; we can be nested arbitrarily deep, so long as the
* first curly brace starts an element, so backtrack over open curly
* braces that are trailing characters of the string; and
- */
+ *
+ * (NOTE: Every character our parser is looking for is a proper
+ * single-byte encoding of an ASCII value. It does not accept
+ * overlong encodings. Given that, there's no benefit using
+ * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte
+ * backward scan. Save routine call overhead and risk of wrong
+ * results should the behavior of Tcl_UtfPrev change in unexpected ways.
+ * Reconsider this if we ever start treating non-ASCII Unicode
+ * characters as meaningful list syntax, expanded Unicode spaces as
+ * element separators, for example.)
+ *
end = Tcl_UtfPrev(end, start);
while (*end == '{') {
- if (end == start) {
- return 0;
- }
- end = Tcl_UtfPrev(end, start);
+ if (end == start) {
+ return 0;
+ }
+ end = Tcl_UtfPrev(end, start);
+ }
+
+ *
+ */
+
+ while ((--end >= start) && (*end == '{')) {
+ }
+ if (end < start) {
+ return 0;
}
/*
* (c) the trailing character of the string is already a list-element
- * separator (according to TclFindElement); that is, one of these
- * characters:
- * \u0009 \t TAB
- * \u000A \n NEWLINE
- * \u000B \v VERTICAL TAB
- * \u000C \f FORM FEED
- * \u000D \r CARRIAGE RETURN
- * \u0020 SPACE
- * with the condition that the penultimate character is not a
- * backslash.
+ * separator, Use the same testing routine as TclFindElement to
+ * enforce consistency.
*/
- if (*end > 0x20) {
+ if (TclIsSpaceProcM(*end)) {
+ int result = 0;
+
/*
- * Performance tweak. All ASCII spaces are <= 0x20. So get a quick
- * answer for most characters before comparing against all spaces in
- * the switch below.
- *
- * NOTE: Remove this if other Unicode spaces ever get accepted as
- * list-element separators.
+ * Trailing whitespace might be part of a backslash escape
+ * sequence. Handle that possibility.
*/
- return 1;
- }
- switch (*end) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\v':
- case '\f':
- if ((end == start) || (end[-1] != '\\')) {
- return 0;
+
+ while ((--end >= start) && (*end == '\\')) {
+ result = !result;
}
+ return result;
}
return 1;
}
@@ -3423,7 +3451,7 @@ TclGetIntForIndex(
* Leading whitespace is acceptable in an index.
*/
- while (length && TclIsSpaceProc(*bytes)) {
+ while (length && TclIsSpaceProcM(*bytes)) {
bytes++;
length--;
}
@@ -3436,7 +3464,7 @@ TclGetIntForIndex(
if ((savedOp != '+') && (savedOp != '-')) {
goto parseError;
}
- if (TclIsSpaceProc(opPtr[1])) {
+ if (TclIsSpaceProcM(opPtr[1])) {
goto parseError;
}
*opPtr = '\0';
@@ -3582,7 +3610,7 @@ SetEndOffsetFromAny(
* after "end-" to Tcl_GetInt, then reverse for offset.
*/
- if (TclIsSpaceProc(bytes[4])) {
+ if (TclIsSpaceProcM(bytes[4])) {
return TCL_ERROR;
}
if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) {
@@ -3647,7 +3675,7 @@ TclCheckBadOctal(
* zero. Try to generate a meaningful error message.
*/
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
if (*p == '+' || *p == '-') {
@@ -3660,7 +3688,7 @@ TclCheckBadOctal(
while (isdigit(UCHAR(*p))) { /* INTL: digit. */
p++;
}
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
if (*p == '\0') {
diff --git a/tests/dstring.test b/tests/dstring.test
index 95321ec..f2d8656 100644
--- a/tests/dstring.test
+++ b/tests/dstring.test
@@ -130,13 +130,27 @@ test dstring-2.12 {appending list elements} testdstring {
testdstring get
} {x #}
test dstring-2.13 {appending list elements} testdstring {
- # This test shows lack of sophistication in Tcl_DStringAppendElement's
+ # This test checks the sophistication in Tcl_DStringAppendElement's
# decision about whether #-quoting can be disabled.
testdstring free
testdstring append "x " -1
testdstring element #
testdstring get
-} {x {#}}
+} {x #}
+test dstring-2.14 {appending list elements} testdstring {
+ testdstring free
+ testdstring append " " -1
+ testdstring element #
+ testdstring get
+} { {#}}
+test dstring-2.15 {appending list elements} testdstring {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
+ testdstring free
+ testdstring append "x " -1
+ testdstring element #
+ testdstring get
+} {x #}
test dstring-3.1 {nested sublists} testdstring {
testdstring free
@@ -227,7 +241,7 @@ test dstring-3.9 {appending list elements} testdstring {
testdstring get
} {x {x #}}
test dstring-3.10 {appending list elements} testdstring {
- # This test shows lack of sophistication in Tcl_DStringAppendElement's
+ # This test checks the sophistication in Tcl_DStringAppendElement's
# decision about whether #-quoting can be disabled.
testdstring free
testdstring append x -1
@@ -236,7 +250,27 @@ test dstring-3.10 {appending list elements} testdstring {
testdstring element #
testdstring end
testdstring get
-} {x {x {#}}}
+} {x {x #}}
+test dstring-3.11 {appending list elements} testdstring {
+ testdstring free
+ testdstring append x -1
+ testdstring start
+ testdstring append " " -1
+ testdstring element #
+ testdstring end
+ testdstring get
+} {x { {#}}}
+test dstring-3.12 {appending list elements} testdstring {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
+ testdstring free
+ testdstring append x -1
+ testdstring start
+ testdstring append "x " -1
+ testdstring element #
+ testdstring end
+ testdstring get
+} {x {x #}}
test dstring-4.1 {truncation} testdstring {
testdstring free
diff --git a/tests/util.test b/tests/util.test
index 61a1790..46d9152 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -503,25 +503,64 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring {
llength [testdstring get]
} 2
test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring {
- # Note that in this test TclNeedSpace actually gets it wrong,
- # claiming we need a space when we really do not. Extra space
- # between list elements is harmless though, and better to have
- # extra space in really weird string reps of lists, than to
- # invest the effort required to make TclNeedSpace foolproof.
testdstring free
testdstring append {\\ } -1
testdstring element foo
list [llength [testdstring get]] [string length [testdstring get]]
-} {2 7}
+} {2 6}
test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring {
- # Another example of TclNeedSpace harmlessly getting it wrong.
testdstring free
testdstring append {\\ } -1
testdstring append \{ -1
testdstring element foo
testdstring append \} -1
list [llength [testdstring get]] [string length [testdstring get]]
-} {2 9}
+} {2 8}
+test util-8.7 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 3]
+} {2 \{}
+test util-8.8 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 3]
+} {2 \{}
+test util-8.9 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 5]
+} {2 \{}
+test util-8.10 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\\\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\\\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 9]
+} {2 \{}
+test util-8.11 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\\\\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\\\\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 9]
+} {2 \{}
test util-9.0.0 {TclGetIntForIndex} {
string index abcd 0
diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c
index 65e144d..038cbf8 100644
--- a/unix/tclUnixFile.c
+++ b/unix/tclUnixFile.c
@@ -98,7 +98,7 @@ TclpFindExecutable(
*/
while (1) {
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
name = p;