diff options
author | dgp <dgp@users.sourceforge.net> | 2020-04-09 17:30:06 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-04-09 17:30:06 (GMT) |
commit | e2b3b1edf02e0c1ab06cfe784d6a7bd76e8050d8 (patch) | |
tree | 18f2f6548b58be30c2a8ec13c42375ddb5802e94 | |
parent | 77dbe8dc087788e733edf13dedd31202b18fded4 (diff) | |
download | tcl-e2b3b1edf02e0c1ab06cfe784d6a7bd76e8050d8.zip tcl-e2b3b1edf02e0c1ab06cfe784d6a7bd76e8050d8.tar.gz tcl-e2b3b1edf02e0c1ab06cfe784d6a7bd76e8050d8.tar.bz2 |
Guarantee TclNeedSpace and TclFindElement have common definition of whitespace
by having both call the same routine. Create a macro form to contain
performance costs and adapt callers.
-rw-r--r-- | generic/tclCmdAH.c | 2 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 2 | ||||
-rw-r--r-- | generic/tclDate.c | 2 | ||||
-rw-r--r-- | generic/tclInt.h | 11 | ||||
-rw-r--r-- | generic/tclParse.c | 2 | ||||
-rw-r--r-- | generic/tclStrToD.c | 6 | ||||
-rw-r--r-- | generic/tclUtf.c | 2 | ||||
-rw-r--r-- | generic/tclUtil.c | 120 | ||||
-rw-r--r-- | unix/tclUnixFile.c | 2 |
9 files changed, 69 insertions, 80 deletions
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 06743d6..f30396b 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -145,7 +145,7 @@ Tcl_CaseObjCmd( pat = TclGetString(caseObjv[i]); for (p = pat; *p != '\0'; p++) { - if (TclIsSpaceProc(*p) || (*p == '\\')) { + if (TclIsSpaceProcM(*p) || (*p == '\\')) { break; } } diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 255fca1..d4fa4e9 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1648,7 +1648,7 @@ StringIsCmd( * if it is the first "element" that has the failure. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } TclNewStringObj(tmpStr, string1, p-string1); diff --git a/generic/tclDate.c b/generic/tclDate.c index 2cf20d6..8d37f3d 100644 --- a/generic/tclDate.c +++ b/generic/tclDate.c @@ -2684,7 +2684,7 @@ TclDatelex( location->first_column = yyInput - info->dateStart; for ( ; ; ) { - while (TclIsSpaceProc(*yyInput)) { + while (TclIsSpaceProcM(*yyInput)) { yyInput++; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 06cff60..15bc000 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2608,7 +2608,6 @@ MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE void TclInitSubsystems(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); -MODULE_SCOPE int TclIsSpaceProc(char byte); MODULE_SCOPE int TclIsBareword(char byte); MODULE_SCOPE int TclJoinThread(Tcl_ThreadId id, int *result); MODULE_SCOPE void TclLimitRemoveAllHandlers(Tcl_Interp *interp); @@ -2800,6 +2799,16 @@ MODULE_SCOPE Tcl_Obj * TclDisassembleByteCodeObj(Tcl_Obj *objPtr); MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct); /* + * Many parsing tasks need a common definition of whitespace. + * Use this routine and macro to achieve that and place + * optimization (fragile on changes) in one place. + */ + +MODULE_SCOPE int TclIsSpaceProc(char byte); +# define TclIsSpaceProcM(byte) \ + (((byte) > 0x20) ? 0 : TclIsSpaceProc(byte)) + +/* *---------------------------------------------------------------- * Command procedures in the generic core: *---------------------------------------------------------------- diff --git a/generic/tclParse.c b/generic/tclParse.c index 6f989d9..7bead99 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -1809,7 +1809,7 @@ Tcl_ParseBraces( openBrace = 0; break; case '#' : - if (openBrace && TclIsSpaceProc(src[-1])) { + if (openBrace && TclIsSpaceProcM(src[-1])) { Tcl_AppendResult(parsePtr->interp, ": possible unbalanced brace in comment", NULL); goto error; diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 4359829..3776521 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -533,7 +533,7 @@ TclParseNumber( * I, N, and whitespace. */ - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { if (flags & TCL_PARSE_NO_WHITESPACE) { goto endgame; } @@ -1053,7 +1053,7 @@ TclParseNumber( } /* FALLTHROUGH */ case sNANPAREN: - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { break; } if (numSigDigs < 13) { @@ -1107,7 +1107,7 @@ TclParseNumber( * Accept trailing whitespace. */ - while (len != 0 && TclIsSpaceProc(*p)) { + while (len != 0 && TclIsSpaceProcM(*p)) { p++; len--; } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 7d3db57..9aaf506 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1610,7 +1610,7 @@ Tcl_UniCharIsSpace( */ if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { - return TclIsSpaceProc((char) ch); + return TclIsSpaceProcM((char) ch); } else if ((Tcl_UniChar) ch == 0x180E || (Tcl_UniChar) ch == 0x202F) { return 1; } else { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 7ec224e..0b8ec2d 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -396,20 +396,20 @@ TclMaxListLength( } /* No list element before leading white space */ - count += 1 - TclIsSpaceProc(*bytes); + count += 1 - TclIsSpaceProcM(*bytes); /* Count white space runs as potential element separators */ while (numBytes) { if ((numBytes == -1) && (*bytes == '\0')) { break; } - if (TclIsSpaceProc(*bytes)) { + if (TclIsSpaceProcM(*bytes)) { /* Space run started; bump count */ count++; do { bytes++; numBytes -= (numBytes != -1); - } while (numBytes && TclIsSpaceProc(*bytes)); + } while (numBytes && TclIsSpaceProcM(*bytes)); if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { break; } @@ -420,7 +420,7 @@ TclMaxListLength( } /* No list element following trailing white space */ - count -= TclIsSpaceProc(bytes[-1]); + count -= TclIsSpaceProcM(bytes[-1]); done: if (endPtr) { @@ -508,7 +508,7 @@ TclFindElement( */ limit = (list + listLength); - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } if (p == limit) { /* no element found */ @@ -553,7 +553,7 @@ TclFindElement( } else if (openBraces == 1) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -563,7 +563,7 @@ TclFindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -595,23 +595,6 @@ TclFindElement( break; /* - * Space: ignore if element is in braces or quotes; otherwise - * terminate element. - */ - - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - if ((openBraces == 0) && !inQuotes) { - size = (p - elemStart); - goto done; - } - break; - - /* * Double-quote: if element is in quotes then terminate it. */ @@ -619,7 +602,7 @@ TclFindElement( if (inQuotes) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -629,7 +612,7 @@ TclFindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -640,6 +623,20 @@ TclFindElement( return TCL_ERROR; } break; + + default: + if (TclIsSpaceProcM(*p)) { + /* + * Space: ignore if element is in braces or quotes; + * otherwise terminate element. + */ + if ((openBraces == 0) && !inQuotes) { + size = (p - elemStart); + goto done; + } + } + break; + } p++; } @@ -666,7 +663,7 @@ TclFindElement( } done: - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } *elementPtr = elemStart; @@ -1013,12 +1010,6 @@ TclScanElement( case '[': case '$': case ';': - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': forbidNone = 1; extra++; /* Escape sequences all one byte longer. */ #if COMPAT @@ -1056,6 +1047,15 @@ TclScanElement( } /* TODO: Panic on improper encoding? */ break; + default: + if (TclIsSpaceProcM(*p)) { + forbidNone = 1; + extra++; /* Escape sequences all one byte longer. */ +#if COMPAT + preferBrace = 1; +#endif + } + break; } length -= (length > 0); p++; @@ -1806,6 +1806,7 @@ TclTrim( */ /* The whitespace characters trimmed during [concat] operations */ +/* TODO: Find a reasonable way to guarantee in sync with TclIsSpaceProc() */ #define CONCAT_WS " \f\v\r\t\n" #define CONCAT_WS_SIZE (int) (sizeof(CONCAT_WS "") - 1) @@ -3272,43 +3273,22 @@ TclNeedSpace( /* * (c) the trailing character of the string is already a list-element - * separator (according to TclFindElement); that is, one of these - * characters: - * \u0009 \t TAB - * \u000A \n NEWLINE - * \u000B \v VERTICAL TAB - * \u000C \f FORM FEED - * \u000D \r CARRIAGE RETURN - * \u0020 SPACE - * with the condition that the penultimate character is not a - * backslash. + * separator, Use the same testing routine as TclFindElement to + * enforce consistency. */ - if (*end > 0x20) { + if (TclIsSpaceProcM(*end)) { + int result = 0; + /* - * Performance tweak. All ASCII spaces are <= 0x20. So get a quick - * answer for most characters before comparing against all spaces in - * the switch below. - * - * NOTE: Remove this if other Unicode spaces ever get accepted as - * list-element separators. + * Trailing whitespace might be part of a backslash escape + * sequence. Handle that possibility. */ - return 1; - } - switch (*end) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\v': - case '\f': - { - int result = 0; - while ((--end >= start) && (*end == '\\')) { - result = !result; - } - return result; + + while ((--end >= start) && (*end == '\\')) { + result = !result; } + return result; } return 1; } @@ -3448,7 +3428,7 @@ TclGetIntForIndex( * Leading whitespace is acceptable in an index. */ - while (length && TclIsSpaceProc(*bytes)) { + while (length && TclIsSpaceProcM(*bytes)) { bytes++; length--; } @@ -3461,7 +3441,7 @@ TclGetIntForIndex( if ((savedOp != '+') && (savedOp != '-')) { goto parseError; } - if (TclIsSpaceProc(opPtr[1])) { + if (TclIsSpaceProcM(opPtr[1])) { goto parseError; } *opPtr = '\0'; @@ -3607,7 +3587,7 @@ SetEndOffsetFromAny( * after "end-" to Tcl_GetInt, then reverse for offset. */ - if (TclIsSpaceProc(bytes[4])) { + if (TclIsSpaceProcM(bytes[4])) { return TCL_ERROR; } if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) { @@ -3672,7 +3652,7 @@ TclCheckBadOctal( * zero. Try to generate a meaningful error message. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '+' || *p == '-') { @@ -3685,7 +3665,7 @@ TclCheckBadOctal( while (isdigit(UCHAR(*p))) { /* INTL: digit. */ p++; } - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '\0') { diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 65e144d..038cbf8 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -98,7 +98,7 @@ TclpFindExecutable( */ while (1) { - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } name = p; |