summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
authorsebres <sebres@users.sourceforge.net>2018-03-13 18:52:45 (GMT)
committersebres <sebres@users.sourceforge.net>2018-03-13 18:52:45 (GMT)
commit64eaafad72c1c1c0a76fb1adffb7200f79debb04 (patch)
tree746280001dd5424d8f68eb4bd663069d458aee15 /generic/tclUtil.c
parent6c0170972f57e3d1daf5684c55eccfb2d78b386f (diff)
downloadtcl-64eaafad72c1c1c0a76fb1adffb7200f79debb04.zip
tcl-64eaafad72c1c1c0a76fb1adffb7200f79debb04.tar.gz
tcl-64eaafad72c1c1c0a76fb1adffb7200f79debb04.tar.bz2
remove unexpected panic by TclTrimLeft/TclTrimRight, handling rewritten:
- instead of check the NTS-byte after string (may cause segfault by access violation if out of range), it checks now the end-character is well-formed utf8-sequence; - both work well now on non-NTS strings (without null character at end); - additionally fixed wrong offsets (trim-length if last char malformed in case of out of range); new function TclTrim introduced (as combination of TclTrimLeft/TclTrimRight).
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c228
1 files changed, 173 insertions, 55 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index d782ea1..96ab96d 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1499,9 +1499,46 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
+ * TclUtfWellFormedEnd --
+ * Checks the end of utf string is malformed, if yes - wraps bytes
+ * to the given buffer (as well-formed NTS string).
+ *
+ * Results:
+ * The bytes with well-formed end of the string.
+ *
+ * Side effects:
+ * Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+TclUtfWellFormedEnd(
+ Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
+ CONST char *bytes, /* Pointer to the beginning of the string. */
+ int length) /* Length of the string. */
+{
+ CONST char *l = bytes + length;
+ CONST char *p = Tcl_UtfPrev(l, bytes);
+
+ if (Tcl_UtfCharComplete(p, l - p)) {
+ buffer->string = buffer->staticSpace;
+ return bytes;
+ }
+ /*
+ * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+ * avoid segfault by access violation out of range.
+ */
+ Tcl_DStringInit(buffer);
+ Tcl_DStringAppend(buffer, bytes, length);
+ return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
+ *
* TclTrimRight --
- * Takes two counted strings in the Tcl encoding which must both be
- * null terminated. Conceptually trims from the right side of the
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the right side of the
* first string all characters found in the second string.
*
* Results:
@@ -1513,8 +1550,8 @@ Tcl_Backslash(
*----------------------------------------------------------------------
*/
-int
-TclTrimRight(
+static inline int
+TrimRight(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1523,15 +1560,6 @@ TclTrimRight(
const char *p = bytes + numBytes;
int pInc;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimRight works only on null-terminated strings");
- }
-
- /* Empty strings -> nothing to do */
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/* Outer loop: iterate over string to be trimmed */
do {
Tcl_UniChar ch1;
@@ -1563,13 +1591,42 @@ TclTrimRight(
return numBytes - (p - bytes);
}
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimRight(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
/*
*----------------------------------------------------------------------
*
* TclTrimLeft --
- * Takes two counted strings in the Tcl encoding which must both be
- * null terminated. Conceptually trims from the left side of the
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the left side of the
* first string all characters found in the second string.
*
* Results:
@@ -1581,8 +1638,8 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1590,15 +1647,6 @@ TclTrimLeft(
{
const char *p = bytes;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimLeft works only on null-terminated strings");
- }
-
- /* Empty strings -> nothing to do */
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/* Outer loop: iterate over string to be trimmed */
do {
Tcl_UniChar ch1;
@@ -1626,10 +1674,94 @@ TclTrimLeft(
p += pInc;
numBytes -= pInc;
- } while (numBytes);
+ } while (numBytes > 0);
return p - bytes;
}
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ * Finds the sub string (offset) to trim from both sides of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim, /* ...and its length in bytes */
+ int *trimRight) /* Offset from the end of the string. */
+{
+ int trimLeft;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (trimLeft > numBytes) {
+ trimLeft = numBytes;
+ }
+ numBytes -= trimLeft;
+ *trimRight = 0;
+ if (numBytes) {
+ bytes += trimLeft;
+ *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+ if (*trimRight > numBytes) {
+ *trimRight = numBytes;
+ }
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return trimLeft;
+}
/*
*----------------------------------------------------------------------
@@ -1687,25 +1819,18 @@ Tcl_Concat(
result = (char *) ckalloc((unsigned) (bytesNeeded + argc));
for (p = result, i = 0; i < argc; i++) {
- int trim, elemLength;
+ int triml, trimr, elemLength;
const char *element;
element = argv[i];
elemLength = strlen(argv[i]);
- /* Trim away the leading whitespace */
- trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming
- * to expose a final backslash character.
- */
-
- trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/* If we're left with empty element after trimming, do nothing */
if (elemLength == 0) {
@@ -1832,23 +1957,16 @@ Tcl_ConcatObj(
Tcl_SetObjLength(resPtr, 0);
for (i = 0; i < objc; i++) {
- int trim;
+ int triml, trimr;
element = TclGetStringFromObj(objv[i], &elemLength);
- /* Trim away the leading whitespace */
- trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming
- * to expose a final backslash character.
- */
-
- trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/* If we're left with empty element after trimming, do nothing */
if (elemLength == 0) {