diff options
Diffstat (limited to 'generic/tclParse.c')
| -rw-r--r-- | generic/tclParse.c | 165 | 
1 files changed, 93 insertions, 72 deletions
| diff --git a/generic/tclParse.c b/generic/tclParse.c index 9bfe608..ee0d4c4 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -14,6 +14,8 @@   */  #include "tclInt.h" +#include "tclParse.h" +#include <assert.h>  /*   * The following table provides parsing information about each possible 8-bit @@ -41,18 +43,7 @@   * TYPE_BRACE -		Character is a curly brace (either left or right).   */ -#define TYPE_NORMAL		0 -#define TYPE_SPACE		0x1 -#define TYPE_COMMAND_END	0x2 -#define TYPE_SUBS		0x4 -#define TYPE_QUOTE		0x8 -#define TYPE_CLOSE_PAREN	0x10 -#define TYPE_CLOSE_BRACK	0x20 -#define TYPE_BRACE		0x40 - -#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)] - -static const char charTypeTable[] = { +const char tclCharTypeTable[] = {      /*       * Negative character values, from -128 to -1:       */ @@ -268,7 +259,8 @@ Tcl_ParseCommand(      if ((start == NULL) && (numBytes != 0)) {  	if (interp != NULL) { -	    Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC); +	    Tcl_SetObjResult(interp, Tcl_NewStringObj( +		    "can't parse a NULL pointer", -1));  	}  	return TCL_ERROR;      } @@ -433,7 +425,7 @@ Tcl_ParseCommand(  	    }  	    if (isLiteral) { -		int elemCount = 0, code = TCL_OK, nakedbs = 0; +		int elemCount = 0, code = TCL_OK, literal = 1;  		const char *nextElem, *listEnd, *elemStart;  		/* @@ -455,35 +447,26 @@ Tcl_ParseCommand(  		 */  		while (nextElem < listEnd) { -		    int size, brace; +		    int size;  		    code = TclFindElement(NULL, nextElem, listEnd - nextElem, -			    &elemStart, &nextElem, &size, &brace); -		    if (code != TCL_OK) { +			    &elemStart, &nextElem, &size, &literal); +		    if ((code != TCL_OK) || !literal) {  			break;  		    } -		    if (!brace) { -			const char *s; - -			for(s=elemStart;size>0;s++,size--) { -			    if ((*s)=='\\') { -				nakedbs = 1; -				break; -			    } -			} -		    }  		    if (elemStart < listEnd) {  			elemCount++;  		    }  		} -		if ((code != TCL_OK) || nakedbs) { +		if ((code != TCL_OK) || !literal) {  		    /* -		     * Some list element could not be parsed, or contained -		     * naked backslashes. This means the literal string was -		     * not in fact a valid nor canonical list. Defer the -		     * handling of this to compile/eval time, where code is -		     * already in place to report the "attempt to expand a +		     * Some list element could not be parsed, or is not +		     * present as a literal substring of the script.  The +		     * compiler cannot handle list elements that get generated +		     * by a call to TclCopyAndCollapse(). Defer  the +		     * handling of  this to  compile/eval time, where  code is +		     * already  in place to  report the  "attempt to  expand a  		     * non-list" error or expand lists that require  		     * substitution.  		     */ @@ -505,6 +488,7 @@ Tcl_ParseCommand(  		     * tokens representing the expanded list.  		     */ +		    const char *listStart;  		    int growthNeeded = wordIndex + 2*elemCount  			    - parsePtr->numTokens; @@ -524,9 +508,9 @@ Tcl_ParseCommand(  		     * word value.  		     */ -		    nextElem = tokenPtr[1].start; +		    listStart = nextElem = tokenPtr[1].start;  		    while (nextElem < listEnd) { -			int quoted, brace; +			int quoted;  			tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;  			tokenPtr->numComponents = 1; @@ -536,9 +520,11 @@ Tcl_ParseCommand(  			tokenPtr->numComponents = 0;  			TclFindElement(NULL, nextElem, listEnd - nextElem,  				&(tokenPtr->start), &nextElem, -				&(tokenPtr->size), &brace); +				&(tokenPtr->size), NULL); -			quoted = brace || tokenPtr->start[-1] == '"'; +			quoted = (tokenPtr->start[-1] == '{' +				|| tokenPtr->start[-1] == '"') +				&& tokenPtr->start > listStart;  			tokenPtr[-1].start = tokenPtr->start - quoted;  			tokenPtr[-1].size = tokenPtr->start + tokenPtr->size  				- tokenPtr[-1].start + quoted; @@ -584,14 +570,14 @@ Tcl_ParseCommand(  	}  	if (src[-1] == '"') {  	    if (interp != NULL) { -		Tcl_SetResult(interp, "extra characters after close-quote", -			TCL_STATIC); +		Tcl_SetObjResult(interp, Tcl_NewStringObj( +			"extra characters after close-quote", -1));  	    }  	    parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA;  	} else {  	    if (interp != NULL) { -		Tcl_SetResult(interp, "extra characters after close-brace", -			TCL_STATIC); +		Tcl_SetObjResult(interp, Tcl_NewStringObj( +			"extra characters after close-brace", -1));  	    }  	    parsePtr->errorType = TCL_PARSE_BRACE_EXTRA;  	} @@ -611,6 +597,30 @@ Tcl_ParseCommand(  /*   *----------------------------------------------------------------------   * + * TclIsSpaceProc -- + * + *	Report whether byte is in the set of whitespace characters used by + *	Tcl to separate words in scripts or elements in lists. + * + * Results: + *	Returns 1, if byte is in the set, 0 otherwise. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclIsSpaceProc( +    char byte) +{ +    return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n'; +} + +/* + *---------------------------------------------------------------------- + *   * ParseWhiteSpace --   *   *	Scans up to numBytes bytes starting at src, consuming white space @@ -726,17 +736,17 @@ int  TclParseHex(      const char *src,		/* First character to parse. */      int numBytes,		/* Max number of byes to scan */ -    Tcl_UniChar *resultPtr)	/* Points to storage provided by caller where -				 * the Tcl_UniChar resulting from the +    int *resultPtr)	/* Points to storage provided by caller where +				 * the character resulting from the  				 * conversion is to be written. */  { -    Tcl_UniChar result = 0; +    int result = 0;      register const char *p = src;      while (numBytes--) {  	unsigned char digit = UCHAR(*p); -	if (!isxdigit(digit)) { +	if (!isxdigit(digit) || (result > 0x10fff)) {  	    break;  	} @@ -790,7 +800,8 @@ TclParseBackslash(  				 * written there. */  {      register const char *p = src+1; -    Tcl_UniChar result; +    Tcl_UniChar unichar; +    int result;      int count;      char buf[TCL_UTF_MAX]; @@ -847,7 +858,7 @@ TclParseBackslash(  	result = 0xb;  	break;      case 'x': -	count += TclParseHex(p+1, numBytes-2, &result); +	count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);  	if (count == 2) {  	    /*  	     * No hexadigits -> This is just "x". @@ -870,6 +881,15 @@ TclParseBackslash(  	    result = 'u';  	}  	break; +    case 'U': +	count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); +	if (count == 2) { +	    /* +	     * No hexadigits -> This is just "U". +	     */ +	    result = 'U'; +	} +	break;      case '\n':  	count--;  	do { @@ -888,17 +908,17 @@ TclParseBackslash(  	 */  	if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) {	/* INTL: digit */ -	    result = UCHAR(*p - '0'); +	    result = *p - '0';  	    p++;  	    if ((numBytes == 2) || !isdigit(UCHAR(*p))	/* INTL: digit */  		    || (UCHAR(*p) >= '8')) {  		break;  	    }  	    count = 3; -	    result = UCHAR((result << 3) + (*p - '0')); +	    result = (result << 3) + (*p - '0');  	    p++;  	    if ((numBytes == 3) || !isdigit(UCHAR(*p))	/* INTL: digit */ -		    || (UCHAR(*p) >= '8')) { +		    || (UCHAR(*p) >= '8') || (result >= 0x20)) {  		break;  	    }  	    count = 4; @@ -914,14 +934,15 @@ TclParseBackslash(  	 */  	if (Tcl_UtfCharComplete(p, numBytes - 1)) { -	    count = Tcl_UtfToUniChar(p, &result) + 1;	/* +1 for '\' */ +	    count = Tcl_UtfToUniChar(p, &unichar) + 1;	/* +1 for '\' */  	} else {  	    char utfBytes[TCL_UTF_MAX];  	    memcpy(utfBytes, p, (size_t) (numBytes - 1));  	    utfBytes[numBytes - 1] = '\0'; -	    count = Tcl_UtfToUniChar(utfBytes, &result) + 1; +	    count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;  	} +	result = unichar;  	break;      } @@ -929,7 +950,7 @@ TclParseBackslash(      if (readPtr != NULL) {  	*readPtr = count;      } -    return Tcl_UniCharToUtf((int) result, dst); +    return Tcl_UniCharToUtf(result, dst);  }  /* @@ -1156,8 +1177,8 @@ ParseTokens(  		}  		if (numBytes == 0) {  		    if (parsePtr->interp != NULL) { -			Tcl_SetResult(parsePtr->interp, -				"missing close-bracket", TCL_STATIC); +			Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +				"missing close-bracket", -1));  		    }  		    parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;  		    parsePtr->term = tokenPtr->start; @@ -1392,8 +1413,8 @@ Tcl_ParseVarName(  	}  	if (numBytes == 0) {  	    if (parsePtr->interp != NULL) { -		Tcl_SetResult(parsePtr->interp, -			"missing close-brace for variable name", TCL_STATIC); +		Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +			"missing close-brace for variable name", -1));  	    }  	    parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;  	    parsePtr->term = tokenPtr->start-1; @@ -1460,8 +1481,8 @@ Tcl_ParseVarName(  	    }  	    if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){  		if (parsePtr->interp != NULL) { -		    Tcl_SetResult(parsePtr->interp, "missing )", -			    TCL_STATIC); +		    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +			    "missing )", -1));  		}  		parsePtr->errorType = TCL_PARSE_MISSING_PAREN;  		parsePtr->term = src; @@ -1547,6 +1568,7 @@ Tcl_ParseVar(      code = TclSubstTokens(interp, parsePtr->tokenPtr, parsePtr->numTokens,  	    NULL, 1, NULL, NULL); +    Tcl_FreeParse(parsePtr);      TclStackFree(interp, parsePtr);      if (code != TCL_OK) {  	return NULL; @@ -1557,16 +1579,13 @@ Tcl_ParseVar(       * At this point we should have an object containing the value of a       * variable. Just return the string from that object.       * -     * This should have returned the object for the user to manage, but -     * instead we have some weak reference to the string value in the object, -     * which is why we make sure the object exists after resetting the result. -     * This isn't ideal, but it's the best we can do with the current -     * documented interface. -- hobbs +     * Since TclSubstTokens above returned TCL_OK, we know that objPtr +     * is shared.  It is in both the interp result and the value of the +     * variable.  Returning the string relies on that to be true.       */ -    if (!Tcl_IsShared(objPtr)) { -	Tcl_IncrRefCount(objPtr); -    } +    assert( Tcl_IsShared(objPtr) ); +      Tcl_ResetResult(interp);      return TclGetString(objPtr);  } @@ -1736,7 +1755,8 @@ Tcl_ParseBraces(  	goto error;      } -    Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC); +    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +	    "missing close-brace", -1));      /*       * Guess if the problem is due to comments by searching the source string @@ -1757,9 +1777,9 @@ Tcl_ParseBraces(  		openBrace = 0;  		break;  	    case '#' : -		if (openBrace && isspace(UCHAR(src[-1]))) { -		    Tcl_AppendResult(parsePtr->interp, -			    ": possible unbalanced brace in comment", NULL); +		if (openBrace && TclIsSpaceProc(src[-1])) { +		    Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), +			    ": possible unbalanced brace in comment", -1);  		    goto error;  		}  		break; @@ -1838,7 +1858,8 @@ Tcl_ParseQuotedString(      }      if (*parsePtr->term != '"') {  	if (parsePtr->interp != NULL) { -	    Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC); +	    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +		    "missing \"", -1));  	}  	parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;  	parsePtr->term = start; | 
