diff options
author | dgp <dgp@users.sourceforge.net> | 2018-03-15 14:23:18 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2018-03-15 14:23:18 (GMT) |
commit | be8b7d25f6ed27943242888bf395ea2aa50632ae (patch) | |
tree | e7faf7871cc0c0180df22b5903d708fba0a73a9c /generic/tclUtil.c | |
parent | 0bef3861ff2e6a20bb908d6099a1b55ff80860e8 (diff) | |
parent | aa199edba612a516e6309290fb6dc4442a49a5ee (diff) | |
download | tcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.zip tcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.tar.gz tcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.tar.bz2 |
merge 8.7
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r-- | generic/tclUtil.c | 255 |
1 files changed, 183 insertions, 72 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3833e30..9136c21 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1645,11 +1645,46 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * TclTrimRight -- + * UtfWellFormedEnd -- + * Checks the end of utf string is malformed, if yes - wraps bytes + * to the given buffer (as well-formed NTS string). The buffer + * argument should be initialized by the caller and ready to use. + * + * Results: + * The bytes with well-formed end of the string. * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the right side of the first string - * all characters found in the second string. + * Side effects: + * Buffer (DString) may be allocated, so must be released. + * + *---------------------------------------------------------------------- + */ + +static inline const char* +UtfWellFormedEnd( + Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ + const char *bytes, /* Pointer to the beginning of the string. */ + int length) /* Length of the string. */ +{ + const char *l = bytes + length; + const char *p = Tcl_UtfPrev(l, bytes); + + if (Tcl_UtfCharComplete(p, l - p)) { + return bytes; + } + /* + * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, + * avoid segfault by access violation out of range. + */ + Tcl_DStringAppend(buffer, bytes, length); + return Tcl_DStringValue(buffer); +} +/* + *---------------------------------------------------------------------- + * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the right side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the end of the string. @@ -1660,8 +1695,8 @@ Tcl_Backslash( *---------------------------------------------------------------------- */ -int -TclTrimRight( +static inline int +TrimRight( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1671,18 +1706,6 @@ TclTrimRight( int pInc; Tcl_UniChar ch1 = 0, ch2 = 0; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimRight works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1721,15 +1744,46 @@ TclTrimRight( return numBytes - (p - bytes); } + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimRight(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} /* *---------------------------------------------------------------------- * * TclTrimLeft -- * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the left side of the first string - * all characters found in the second string. + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the left side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the start of the string. @@ -1740,8 +1794,8 @@ TclTrimRight( *---------------------------------------------------------------------- */ -int -TclTrimLeft( +static inline int +TrimLeft( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1750,18 +1804,6 @@ TclTrimLeft( const char *p = bytes; Tcl_UniChar ch1 = 0, ch2 = 0; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimLeft works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1796,10 +1838,99 @@ TclTrimLeft( p += pInc; numBytes -= pInc; - } while (numBytes); + } while (numBytes > 0); return p - bytes; } + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimLeft(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrim -- + * Finds the sub string (offset) to trim from both sides of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the start of the string + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrim( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim, /* ...and its length in bytes */ + int *trimRight) /* Offset from the end of the string. */ +{ + int trimLeft; + Tcl_DString bytesBuf, trimBuf; + + *trimRight = 0; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); + if (trimLeft > numBytes) { + trimLeft = numBytes; + } + numBytes -= trimLeft; + /* have to trim yet (first char was already verified within TrimLeft) */ + if (numBytes > 1) { + bytes += trimLeft; + *trimRight = TrimRight(bytes, numBytes, trim, numTrim); + if (*trimRight > numBytes) { + *trimRight = numBytes; + } + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return trimLeft; +} /* *---------------------------------------------------------------------- @@ -1867,30 +1998,20 @@ Tcl_Concat( result = ckalloc((unsigned) (bytesNeeded + argc)); for (p = result, i = 0; i < argc; i++) { - int trim, elemLength; + int triml, trimr, elemLength; const char *element; element = argv[i]; elemLength = strlen(argv[i]); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. @@ -2010,28 +2131,18 @@ Tcl_ConcatObj( Tcl_SetObjLength(resPtr, 0); for (i = 0; i < objc; i++) { - int trim; + int triml, trimr; element = TclGetStringFromObj(objv[i], &elemLength); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. |