From 18503cb00a32b1fcb0cb46b7b3649903f38c989a Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 2 May 2011 17:36:27 +0000 Subject: Revised TclFindElement() interface. The final argument had been bracePtr, the address of a boolean var, where the caller can be told whether or not the parsed list element was enclosed in braces. In practice, no callers really care about that. What the callers really want to know is whether the list element value exists as a literal substring of the string being parsed, or whether a call to TclCopyAndCollpase() is needed to produce the list element value. Now the final argument is changed to do what callers actually need. This is a better fit for the calls in tclParse.c, where now a good deal of post-processing checking for "naked backslashes" is no longer necessary. ***POTENTIAL INCOMPATIBILITY*** For any callers calling in via the internal stubs table who really do use the final argument explicitly to check for the enclosing brace scenario. Simply looking for the braces where they must be is the revision available to those callers, and it will backport cleanly. --- ChangeLog | 18 ++++++++++++++++++ generic/tclCmdMZ.c | 4 ++-- generic/tclDictObj.c | 12 +++++++----- generic/tclListObj.c | 8 +++++--- generic/tclParse.c | 34 +++++++++++++--------------------- generic/tclUtil.c | 40 ++++++++++++++++++++++++++++++---------- 6 files changed, 75 insertions(+), 41 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1b5b2ed..e98d492 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,23 @@ 2011-05-02 Don Porter + * generic/tclCmdMZ.c: Revised TclFindElement() interface. The + * generic/tclDictObj.c: final argument had been bracePtr, the address + * generic/tclListObj.c: of a boolean var, where the caller can be told + * generic/tclParse.c: whether or not the parsed list element was + * generic/tclUtil.c: enclosed in braces. In practice, no callers + really care about that. What the callers really want to know is + whether the list element value exists as a literal substring of the + string being parsed, or whether a call to TclCopyAndCollpase() is + needed to produce the list element value. Now the final argument + is changed to do what callers actually need. This is a better fit + for the calls in tclParse.c, where now a good deal of post-processing + checking for "naked backslashes" is no longer necessary. + ***POTENTIAL INCOMPATIBILITY*** + For any callers calling in via the internal stubs table who really + do use the final argument explicitly to check for the enclosing brace + scenario. Simply looking for the braces where they must be is the + revision available to those callers, and it will backport cleanly. + * generic/tclInt.h: Replace TclCountSpaceRuns() with * generic/tclListObj.c: TclMaxListLength() which is the function we * generic/tclUtil.c: actually want. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 5390cf0..60a9414 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1621,7 +1621,7 @@ StringIsCmd( */ const char *elemStart, *nextElem; - int lenRemain, elemSize, hasBrace; + int lenRemain, elemSize; register const char *p; string1 = TclGetStringFromObj(objPtr, &length1); @@ -1630,7 +1630,7 @@ StringIsCmd( for (p=string1, lenRemain=length1; lenRemain > 0; p=nextElem, lenRemain=end-nextElem) { if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, - &elemStart, &nextElem, &elemSize, &hasBrace)) { + &elemStart, &nextElem, &elemSize, NULL)) { Tcl_Obj *tmpStr; /* diff --git a/generic/tclDictObj.c b/generic/tclDictObj.c index 912e7a9..8b44137 100644 --- a/generic/tclDictObj.c +++ b/generic/tclDictObj.c @@ -558,7 +558,7 @@ SetDictFromAny( { char *string, *s; const char *elemStart, *nextElem; - int lenRemain, length, elemSize, hasBrace, result, isNew; + int lenRemain, length, elemSize, result, isNew; char *limit; /* Points just after string's last byte. */ register const char *p; register Tcl_Obj *keyPtr, *valuePtr; @@ -639,8 +639,10 @@ SetDictFromAny( for (p = string, lenRemain = length; lenRemain > 0; p = nextElem, lenRemain = (limit - nextElem)) { + int literal; + result = TclFindElement(interp, p, lenRemain, - &elemStart, &nextElem, &elemSize, &hasBrace); + &elemStart, &nextElem, &elemSize, &literal); if (result != TCL_OK) { goto errorExit; } @@ -654,7 +656,7 @@ SetDictFromAny( */ s = ckalloc((unsigned) elemSize + 1); - if (hasBrace) { + if (literal) { memcpy(s, elemStart, (size_t) elemSize); s[elemSize] = 0; } else { @@ -672,7 +674,7 @@ SetDictFromAny( } result = TclFindElement(interp, p, lenRemain, - &elemStart, &nextElem, &elemSize, &hasBrace); + &elemStart, &nextElem, &elemSize, &literal); if (result != TCL_OK) { TclDecrRefCount(keyPtr); goto errorExit; @@ -687,7 +689,7 @@ SetDictFromAny( */ s = ckalloc((unsigned) elemSize + 1); - if (hasBrace) { + if (literal) { memcpy((void *) s, (void *) elemStart, (size_t) elemSize); s[elemSize] = 0; } else { diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 1fabcab..412a902 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1693,7 +1693,7 @@ SetListFromAny( { char *string, *s; const char *elemStart, *nextElem; - int lenRemain, length, estCount, elemSize, hasBrace, i, j, result; + int lenRemain, length, estCount, elemSize, i, j, result; const char *limit; /* Points just after string's last byte. */ register const char *p; register Tcl_Obj **elemPtrs; @@ -1777,8 +1777,10 @@ SetListFromAny( for (p=string, lenRemain=length, i=0; lenRemain > 0; p=nextElem, lenRemain=limit-nextElem, i++) { + int literal; + result = TclFindElement(interp, p, lenRemain, &elemStart, &nextElem, - &elemSize, &hasBrace); + &elemSize, &literal); if (result != TCL_OK) { for (j = 0; j < i; j++) { elemPtr = elemPtrs[j]; @@ -1800,7 +1802,7 @@ SetListFromAny( */ s = ckalloc((unsigned) elemSize + 1); - if (hasBrace) { + if (literal) { memcpy(s, elemStart, (size_t) elemSize); s[elemSize] = 0; } else { diff --git a/generic/tclParse.c b/generic/tclParse.c index 4e1e8b0..c8d2012 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -433,7 +433,7 @@ Tcl_ParseCommand( } if (isLiteral) { - int elemCount = 0, code = TCL_OK, nakedbs = 0; + int elemCount = 0, code = TCL_OK, literal = 1; const char *nextElem, *listEnd, *elemStart; /* @@ -455,33 +455,24 @@ Tcl_ParseCommand( */ while (nextElem < listEnd) { - int size, brace; + int size; code = TclFindElement(NULL, nextElem, listEnd - nextElem, - &elemStart, &nextElem, &size, &brace); - if (code != TCL_OK) { + &elemStart, &nextElem, &size, &literal); + if ((code != TCL_OK) || !literal) { break; } - if (!brace) { - const char *s; - - for(s=elemStart;size>0;s++,size--) { - if ((*s)=='\\') { - nakedbs=1; - break; - } - } - } if (elemStart < listEnd) { elemCount++; } } - if ((code != TCL_OK) || nakedbs) { + if ((code != TCL_OK) || !literal) { /* - * Some list element could not be parsed, or contained - * naked backslashes. This means the literal string was - * not in fact a valid nor canonical list. Defer the + * Some list element could not be parsed, or is not + * present as a literal substring of the script. The + * compiler cannot handle list elements that get generated + * by a call to TclCopyAndCollapse(). Defer the * handling of this to compile/eval time, where code is * already in place to report the "attempt to expand a * non-list" error or expand lists that require @@ -525,7 +516,7 @@ Tcl_ParseCommand( nextElem = tokenPtr[1].start; while (nextElem < listEnd) { - int quoted, brace; + int quoted; tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; tokenPtr->numComponents = 1; @@ -535,9 +526,10 @@ Tcl_ParseCommand( tokenPtr->numComponents = 0; TclFindElement(NULL, nextElem, listEnd - nextElem, &(tokenPtr->start), &nextElem, - &(tokenPtr->size), &brace); + &(tokenPtr->size), NULL); - quoted = brace || tokenPtr->start[-1] == '"'; + quoted = tokenPtr->start[-1] == '{' + || tokenPtr->start[-1] == '"'; tokenPtr[-1].start = tokenPtr->start - quoted; tokenPtr[-1].size = tokenPtr->start + tokenPtr->size - tokenPtr[-1].start + quoted; diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 76676a1..fd3d935 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -181,8 +181,13 @@ TclMaxListLength( * after the opening brace and *sizePtr will not include either of the * braces. If there isn't an element in the list, *sizePtr will be zero, * and both *elementPtr and *termPtr will point just after the last - * character in the list. Note: this function does NOT collapse backslash - * sequences. + * character in the list. If literalPtr is non-NULL, *literalPtr is set + * to a boolean value indicating whether the substring returned as + * the values of **elementPtr and *sizePtr is the literal value of + * a list element. If not, a call to TclCopyAndCollapse() is needed + * to produce the actual value of the list element. Note: this function + * does NOT collapse backslash sequences, but uses *literalPtr to tell + * callers when it is required for them to do so. * * Side effects: * None. @@ -206,8 +211,12 @@ TclFindElement( * argument (next arg or end of list). */ int *sizePtr, /* If non-zero, fill in with size of * element. */ - int *bracePtr) /* If non-zero, fill in with non-zero/zero to - * indicate that arg was/wasn't in braces. */ + int *literalPtr) /* If non-zero, fill in with non-zero/zero to + * indicate that the substring of *sizePtr + * bytes starting at **elementPtr is/is not + * the literal list element and therefore + * does not/does require a call to + * TclCopyAndCollapse() by the caller. */ { CONST char *p = list; CONST char *elemStart; /* Points to first byte of first element. */ @@ -216,6 +225,7 @@ TclFindElement( int inQuotes = 0; int size = 0; /* lint. */ int numChars; + int literal = 1; CONST char *p2; /* @@ -241,9 +251,6 @@ TclFindElement( p++; } elemStart = p; - if (bracePtr != 0) { - *bracePtr = openBraces; - } /* * Find element's end (a space, close brace, or the end of the string). @@ -301,6 +308,15 @@ TclFindElement( */ case '\\': + if (openBraces == 0) { + /* + * A backslash sequence not within a brace quoted element + * means the value of the element is different from the + * substring we are parsing. A call to TclCopyAndCollapse() + * is needed to produce the element value. Inform the caller. + */ + literal = 0; + } TclParseBackslash(p, limit - p, &numChars, NULL); p += (numChars - 1); break; @@ -385,6 +401,9 @@ TclFindElement( if (sizePtr != 0) { *sizePtr = size; } + if (literalPtr != 0) { + *literalPtr = literal; + } return TCL_OK; } @@ -477,7 +496,7 @@ Tcl_SplitList( { CONST char **argv, *end, *element; char *p; - int length, size, i, result, elSize, brace; + int length, size, i, result, elSize; /* * Allocate enough space to work in. A (CONST char *) for each @@ -496,9 +515,10 @@ Tcl_SplitList( for (i = 0, p = ((char *) argv) + size*sizeof(char *); *list != 0; i++) { CONST char *prevList = list; + int literal; result = TclFindElement(interp, list, length, &element, &list, - &elSize, &brace); + &elSize, &literal); length -= (list - prevList); if (result != TCL_OK) { ckfree((char *) argv); @@ -516,7 +536,7 @@ Tcl_SplitList( return TCL_ERROR; } argv[i] = p; - if (brace) { + if (literal) { memcpy(p, element, (size_t) elSize); p += elSize; *p = 0; -- cgit v0.12 From 9af810992a3d08e5964ca72b8b6221e120fb42d8 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 2 May 2011 20:10:00 +0000 Subject: Tests for expanded literals quoting detection. --- ChangeLog | 2 ++ generic/tclParse.c | 8 +++++--- tests/parse.test | 12 ++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e98d492..0207c97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -18,6 +18,8 @@ scenario. Simply looking for the braces where they must be is the revision available to those callers, and it will backport cleanly. + * tests/parse.test: Tests for expanded literals quoting detection. + * generic/tclInt.h: Replace TclCountSpaceRuns() with * generic/tclListObj.c: TclMaxListLength() which is the function we * generic/tclUtil.c: actually want. diff --git a/generic/tclParse.c b/generic/tclParse.c index c8d2012..96c2a10 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -496,6 +496,7 @@ Tcl_ParseCommand( * tokens representing the expanded list. */ + CONST char *listStart; int growthNeeded = wordIndex + 2*elemCount - parsePtr->numTokens; parsePtr->numWords += elemCount - 1; @@ -514,7 +515,7 @@ Tcl_ParseCommand( * word value. */ - nextElem = tokenPtr[1].start; + listStart = nextElem = tokenPtr[1].start; while (nextElem < listEnd) { int quoted; @@ -528,8 +529,9 @@ Tcl_ParseCommand( &(tokenPtr->start), &nextElem, &(tokenPtr->size), NULL); - quoted = tokenPtr->start[-1] == '{' - || tokenPtr->start[-1] == '"'; + quoted = (tokenPtr->start[-1] == '{' + || tokenPtr->start[-1] == '"') + && tokenPtr->start > listStart; tokenPtr[-1].start = tokenPtr->start - quoted; tokenPtr[-1].size = tokenPtr->start + tokenPtr->size - tokenPtr[-1].start + quoted; diff --git a/tests/parse.test b/tests/parse.test index b5211e7..37c44d5 100644 --- a/tests/parse.test +++ b/tests/parse.test @@ -227,6 +227,18 @@ test parse-5.26 {Tcl_ParseCommand: {*} parsing} testparser { test parse-5.27 {Tcl_ParseCommand: {*} parsing} testparser { testparser "{*}\\\n foo bar" 0 } {- \{*\}\\\n\ foo\ bar 3 simple {{*}} 1 text * 0 simple foo 1 text foo 0 simple bar 1 text bar 0 {}} +test parse-5.28 {Tcl_ParseCommand: {*} parsing, expanded literals} testparser { + testparser {{*}{a b}} 0 +} {- {{*}{a b}} 2 simple a 1 text a 0 simple b 1 text b 0 {}} +test parse-5.29 {Tcl_ParseCommand: {*} parsing, expanded literals, naked backslashes} testparser { + testparser {{*}{a \n b}} 0 +} {- {{*}{a \n b}} 1 expand {{*}{a \n b}} 1 text {a \n b} 0 {}} +test parse-5.30 {Tcl_ParseCommand: {*} parsing, expanded literals} testparser { + testparser {{*}"a b"} 0 +} {- {{*}"a b"} 2 simple a 1 text a 0 simple b 1 text b 0 {}} +test parse-5.31 {Tcl_ParseCommand: {*} parsing, expanded literals, naked backslashes} testparser { + testparser {{*}"a \n b"} 0 +} {- {{*}"a \n b"} 1 expand {{*}"a \n b"} 3 text {a } 0 backslash {\n} 0 text { b} 0 {}} test parse-6.1 {ParseTokens procedure, empty word} testparser { testparser {""} 0 -- cgit v0.12