diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-24 22:34:15 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-24 22:34:15 (GMT) |
commit | f41dac748006f0d68b74c28d0fb477f488305fd8 (patch) | |
tree | 8c447ea147c1d9b992839f93faaab4216415f22f /generic/tclUtil.c | |
parent | f3d156bf8089b9a7af65d8f1608243e7c32f1acf (diff) | |
parent | 6de32c896abb44a00ad7368892924e9c9de5db11 (diff) | |
download | tcl-f41dac748006f0d68b74c28d0fb477f488305fd8.zip tcl-f41dac748006f0d68b74c28d0fb477f488305fd8.tar.gz tcl-f41dac748006f0d68b74c28d0fb477f488305fd8.tar.bz2 |
Merge 8.7
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r-- | generic/tclUtil.c | 437 |
1 files changed, 193 insertions, 244 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index a13b037..37b3c83 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -408,7 +408,7 @@ TclMaxListLength( * No list element before leading white space. */ - count += 1 - TclIsSpaceProc(*bytes); + count += 1 - TclIsSpaceProcM(*bytes); /* * Count white space runs as potential element separators. @@ -418,7 +418,7 @@ TclMaxListLength( if ((numBytes == -1) && (*bytes == '\0')) { break; } - if (TclIsSpaceProc(*bytes)) { + if (TclIsSpaceProcM(*bytes)) { /* * Space run started; bump count. */ @@ -427,7 +427,7 @@ TclMaxListLength( do { bytes++; numBytes -= (numBytes != -1); - } while (numBytes && TclIsSpaceProc(*bytes)); + } while (numBytes && TclIsSpaceProcM(*bytes)); if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { break; } @@ -444,7 +444,7 @@ TclMaxListLength( * No list element following trailing white space. */ - count -= TclIsSpaceProc(bytes[-1]); + count -= TclIsSpaceProcM(bytes[-1]); done: if (endPtr) { @@ -593,7 +593,7 @@ FindElement( */ limit = (string + stringLength); - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } if (p == limit) { /* no element found */ @@ -638,7 +638,7 @@ FindElement( } else if (openBraces == 1) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -648,7 +648,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -683,23 +683,6 @@ FindElement( break; /* - * Space: ignore if element is in braces or quotes; otherwise - * terminate element. - */ - - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - if ((openBraces == 0) && !inQuotes) { - size = (p - elemStart); - goto done; - } - break; - - /* * Double-quote: if element is in quotes then terminate it. */ @@ -707,7 +690,7 @@ FindElement( if (inQuotes) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -717,7 +700,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -730,6 +713,20 @@ FindElement( return TCL_ERROR; } break; + + default: + if (TclIsSpaceProcM(*p)) { + /* + * Space: ignore if element is in braces or quotes; + * otherwise terminate element. + */ + if ((openBraces == 0) && !inQuotes) { + size = (p - elemStart); + goto done; + } + } + break; + } p++; } @@ -760,7 +757,7 @@ FindElement( } done: - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } *elementPtr = elemStart; @@ -1116,12 +1113,6 @@ TclScanElement( case '[': /* TYPE_SUBS */ case '$': /* TYPE_SUBS */ case ';': /* TYPE_COMMAND_END */ - case ' ': /* TYPE_SPACE */ - case '\f': /* TYPE_SPACE */ - case '\n': /* TYPE_COMMAND_END */ - case '\r': /* TYPE_SPACE */ - case '\t': /* TYPE_SPACE */ - case '\v': /* TYPE_SPACE */ forbidNone = 1; extra++; /* Escape sequences all one byte longer. */ #if COMPAT @@ -1166,6 +1157,15 @@ TclScanElement( } /* TODO: Panic on improper encoding? */ break; + default: + if (TclIsSpaceProcM(*p)) { + forbidNone = 1; + extra++; /* Escape sequences all one byte longer. */ +#if COMPAT + preferBrace = 1; +#endif + } + break; } } length -= (length > 0); @@ -1666,42 +1666,6 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * UtfWellFormedEnd -- - * Checks the end of utf string is malformed, if yes - wraps bytes - * to the given buffer (as well-formed NTS string). The buffer - * argument should be initialized by the caller and ready to use. - * - * Results: - * The bytes with well-formed end of the string. - * - * Side effects: - * Buffer (DString) may be allocated, so must be released. - * - *---------------------------------------------------------------------- - */ - -static inline const char* -UtfWellFormedEnd( - Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ - const char *bytes, /* Pointer to the beginning of the string. */ - int length) /* Length of the string. */ -{ - const char *l = bytes + length; - const char *p = Tcl_UtfPrev(l, bytes); - - if (Tcl_UtfCharComplete(p, l - p)) { - return bytes; - } - /* - * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, - * avoid segfault by access violation out of range. - */ - Tcl_DStringAppend(buffer, bytes, length); - return Tcl_DStringValue(buffer); -} -/* - *---------------------------------------------------------------------- - * * TclTrimRight -- * Takes two counted strings in the Tcl encoding. Conceptually * finds the sub string (offset) to trim from the right side of the @@ -1716,27 +1680,38 @@ UtfWellFormedEnd( *---------------------------------------------------------------------- */ -static inline int -TrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { - const char *p = bytes + numBytes; - int pInc; + const char *pp, *p = bytes + numBytes; int ch1, ch2; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + /* * Outer loop: iterate over string to be trimmed. */ do { const char *q = trim; - int bytesLeft = numTrim; + int pInc = 0, bytesLeft = numTrim; - p = Tcl_UtfPrev(p, bytes); - pInc = TclUtfToUCS4(p, &ch1); + pp = TclUtfPrev(p, bytes); + do { + pp += pInc; + pInc = TclUtfToUCS4(pp, &ch1); + } while (pp + pInc < p); /* * Inner loop: scan trim string for match to current character. @@ -1758,44 +1733,13 @@ TrimRight( * No match; trim task done; *p is last non-trimmed char. */ - p += pInc; break; } + p = pp; } while (p > bytes); return numBytes - (p - bytes); } - -int -TclTrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimRight(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1815,16 +1759,25 @@ TclTrimRight( *---------------------------------------------------------------------- */ -static inline int -TrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { const char *p = bytes; int ch1, ch2; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + /* * Outer loop: iterate over string to be trimmed. */ @@ -1863,37 +1816,6 @@ TrimLeft( return p - bytes; } - -int -TclTrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimLeft(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1915,41 +1837,39 @@ int TclTrim( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ + /* Calls in this routine + * rely on (bytes[numBytes] == '\0'). */ const char *trim, /* String of trim characters... */ int numTrim, /* ...and its length in bytes */ - int *trimRight) /* Offset from the end of the string. */ + /* Calls in this routine + * rely on (trim[numTrim] == '\0'). */ + int *trimRightPtr) /* Offset from the end of the string. */ { - int trimLeft; - Tcl_DString bytesBuf, trimBuf; + int trimLeft = 0, trimRight = 0; - *trimRight = 0; /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); - if (trimLeft > numBytes) { - trimLeft = numBytes; - } - numBytes -= trimLeft; - /* have to trim yet (first char was already verified within TrimLeft) */ - if (numBytes > 1) { - bytes += trimLeft; - *trimRight = TrimRight(bytes, numBytes, trim, numTrim); - if (*trimRight > numBytes) { - *trimRight = numBytes; + if ((numBytes > 0) && (numTrim > 0)) { + + /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ + trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); + numBytes -= trimLeft; + + /* If we did not trim the whole string, it starts with a character + * that we will not trim. Skip over it. */ + if (numBytes > 0) { + int ch; + const char *first = bytes + trimLeft; + bytes += TclUtfToUCS4(first, &ch); + numBytes -= (bytes - first); + + if (numBytes > 0) { + /* When bytes is NUL-terminated, returns + * 0 <= trimRight <= numBytes */ + trimRight = TclTrimRight(bytes, numBytes, trim, numTrim); + } } } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - + *trimRightPtr = trimRight; return trimLeft; } @@ -2242,8 +2162,7 @@ Tcl_StringCaseMatch( int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen; - const char *pstart = pattern; - Tcl_UniChar ch1 = 0, ch2 = 0; + int ch1 = 0, ch2 = 0; while (1) { p = *pattern; @@ -2284,10 +2203,10 @@ Tcl_StringCaseMatch( */ if (UCHAR(*pattern) < 0x80) { - ch2 = (Tcl_UniChar) + ch2 = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); } else { - Tcl_UtfToUniChar(pattern, &ch2); + TclUtfToUCS4(pattern, &ch2); if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } @@ -2303,7 +2222,7 @@ Tcl_StringCaseMatch( if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*str) { - charLen = TclUtfToUniChar(str, &ch1); + charLen = TclUtfToUCS4(str, &ch1); if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { break; } @@ -2317,7 +2236,7 @@ Tcl_StringCaseMatch( */ while (*str) { - charLen = TclUtfToUniChar(str, &ch1); + charLen = TclUtfToUCS4(str, &ch1); if (ch2 == ch1) { break; } @@ -2331,7 +2250,7 @@ Tcl_StringCaseMatch( if (*str == '\0') { return 0; } - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); } } @@ -2342,7 +2261,7 @@ Tcl_StringCaseMatch( if (p == '?') { pattern++; - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); continue; } @@ -2353,15 +2272,15 @@ Tcl_StringCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar = 0, endChar = 0; + int startChar = 0, endChar = 0; pattern++; if (UCHAR(*str) < 0x80) { - ch1 = (Tcl_UniChar) + ch1 = (int) (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); str++; } else { - str += Tcl_UtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } @@ -2371,11 +2290,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - startChar = (Tcl_UniChar) (nocase + startChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &startChar); + pattern += TclUtfToUCS4(pattern, &startChar); if (nocase) { startChar = Tcl_UniCharToLower(startChar); } @@ -2386,11 +2305,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - endChar = (Tcl_UniChar) (nocase + endChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &endChar); + pattern += TclUtfToUCS4(pattern, &endChar); if (nocase) { endChar = Tcl_UniCharToLower(endChar); } @@ -2407,10 +2326,13 @@ Tcl_StringCaseMatch( break; } } + /* If we reach here, we matched. Need to move past closing ] */ while (*pattern != ']') { if (*pattern == '\0') { - pattern = Tcl_UtfPrev(pattern, pstart); - break; + /* We ran out of pattern after matching something in + * (unclosed!) brackets. So long as we ran out of string + * at the same time, we have a match. Otherwise, not. */ + return (*str == '\0'); } pattern++; } @@ -2435,8 +2357,8 @@ Tcl_StringCaseMatch( * each string match. */ - str += TclUtfToUniChar(str, &ch1); - pattern += TclUtfToUniChar(pattern, &ch2); + str += TclUtfToUCS4(str, &ch1); + pattern += TclUtfToUCS4(pattern, &ch2); if (nocase) { if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { return 0; @@ -2839,9 +2761,37 @@ Tcl_DStringAppendElement( { char *dst = dsPtr->string + dsPtr->length; int needSpace = TclNeedSpace(dsPtr->string, dst); - char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; - int newSize = dsPtr->length + needSpace - + TclScanElement(element, -1, &flags); + char flags = 0; + int quoteHash = 1, newSize; + + if (needSpace) { + /* + * If we need a space to separate the new element from something + * already ending the string, we're not appending the first element + * of any list, so we need not quote any leading hash character. + */ + quoteHash = 0; + } else { + /* + * We don't need a space, maybe because there's some already there. + * Checking whether we might be appending a first element is a bit + * more involved. + * + * Backtrack over all whitespace. + */ + while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) { + } + + /* Call again without whitespace to confound things. */ + quoteHash = !TclNeedSpace(dsPtr->string, dst+1); + } + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } + newSize = dsPtr->length + needSpace + TclScanElement(element, -1, &flags); + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } /* * Allocate a larger buffer for the string if the current one isn't large @@ -2873,8 +2823,8 @@ Tcl_DStringAppendElement( element = dsPtr->string + offset; } } - dst = dsPtr->string + dsPtr->length; } + dst = dsPtr->string + dsPtr->length; /* * Convert the new string to a list element and copy it into the buffer at @@ -2885,15 +2835,8 @@ Tcl_DStringAppendElement( *dst = ' '; dst++; dsPtr->length++; - - /* - * If we need a space to separate this element from preceding stuff, - * then this element will not lead a list, and need not have it's - * leading '#' quoted. - */ - - flags |= TCL_DONT_QUOTE_HASH; } + dsPtr->length += TclConvertElement(element, -1, dst, flags); dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; @@ -3520,63 +3463,69 @@ TclNeedSpace( /* * A space is needed unless either: * (a) we're at the start of the string, or - */ + * + * (NOTE: This check is now absorbed into the loop below.) + * if (end == start) { return 0; } + * + */ + /* * (b) we're at the start of a nested list-element, quoted with an open * curly brace; we can be nested arbitrarily deep, so long as the * first curly brace starts an element, so backtrack over open curly * braces that are trailing characters of the string; and - */ + * + * (NOTE: Every character our parser is looking for is a proper + * single-byte encoding of an ASCII value. It does not accept + * overlong encodings. Given that, there's no benefit using + * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte + * backward scan. Save routine call overhead and risk of wrong + * results should the behavior of Tcl_UtfPrev change in unexpected ways. + * Reconsider this if we ever start treating non-ASCII Unicode + * characters as meaningful list syntax, expanded Unicode spaces as + * element separators, for example.) + * end = Tcl_UtfPrev(end, start); while (*end == '{') { - if (end == start) { - return 0; - } - end = Tcl_UtfPrev(end, start); + if (end == start) { + return 0; + } + end = Tcl_UtfPrev(end, start); + } + + * + */ + + while ((--end >= start) && (*end == '{')) { + } + if (end < start) { + return 0; } /* * (c) the trailing character of the string is already a list-element - * separator (according to TclFindElement); that is, one of these - * characters: - * \u0009 \t TAB - * \u000A \n NEWLINE - * \u000B \v VERTICAL TAB - * \u000C \f FORM FEED - * \u000D \r CARRIAGE RETURN - * \u0020 SPACE - * with the condition that the penultimate character is not a - * backslash. + * separator, Use the same testing routine as TclFindElement to + * enforce consistency. */ - if (*end > 0x20) { + if (TclIsSpaceProcM(*end)) { + int result = 0; + /* - * Performance tweak. All ASCII spaces are <= 0x20. So get a quick - * answer for most characters before comparing against all spaces in - * the switch below. - * - * NOTE: Remove this if other Unicode spaces ever get accepted as - * list-element separators. + * Trailing whitespace might be part of a backslash escape + * sequence. Handle that possibility. */ - return 1; - } - switch (*end) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\v': - case '\f': - if ((end == start) || (end[-1] != '\\')) { - return 0; + while ((--end >= start) && (*end == '\\')) { + result = !result; } + return result; } return 1; } @@ -4178,7 +4127,7 @@ TclCheckBadOctal( * zero. Try to generate a meaningful error message. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '+' || *p == '-') { @@ -4191,7 +4140,7 @@ TclCheckBadOctal( while (isdigit(UCHAR(*p))) { /* INTL: digit. */ p++; } - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '\0') { |