summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2018-03-15 14:23:18 (GMT)
committerdgp <dgp@users.sourceforge.net>2018-03-15 14:23:18 (GMT)
commitbe8b7d25f6ed27943242888bf395ea2aa50632ae (patch)
treee7faf7871cc0c0180df22b5903d708fba0a73a9c /generic/tclUtil.c
parent0bef3861ff2e6a20bb908d6099a1b55ff80860e8 (diff)
parentaa199edba612a516e6309290fb6dc4442a49a5ee (diff)
downloadtcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.zip
tcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.tar.gz
tcl-be8b7d25f6ed27943242888bf395ea2aa50632ae.tar.bz2
merge 8.7
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c255
1 files changed, 183 insertions, 72 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3833e30..9136c21 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1645,11 +1645,46 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
- * TclTrimRight --
+ * UtfWellFormedEnd --
+ * Checks the end of utf string is malformed, if yes - wraps bytes
+ * to the given buffer (as well-formed NTS string). The buffer
+ * argument should be initialized by the caller and ready to use.
+ *
+ * Results:
+ * The bytes with well-formed end of the string.
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the right side of the first string
- * all characters found in the second string.
+ * Side effects:
+ * Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+UtfWellFormedEnd(
+ Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
+ const char *bytes, /* Pointer to the beginning of the string. */
+ int length) /* Length of the string. */
+{
+ const char *l = bytes + length;
+ const char *p = Tcl_UtfPrev(l, bytes);
+
+ if (Tcl_UtfCharComplete(p, l - p)) {
+ return bytes;
+ }
+ /*
+ * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+ * avoid segfault by access violation out of range.
+ */
+ Tcl_DStringAppend(buffer, bytes, length);
+ return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the right side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the end of the string.
@@ -1660,8 +1695,8 @@ Tcl_Backslash(
*----------------------------------------------------------------------
*/
-int
-TclTrimRight(
+static inline int
+TrimRight(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1671,18 +1706,6 @@ TclTrimRight(
int pInc;
Tcl_UniChar ch1 = 0, ch2 = 0;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimRight works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1721,15 +1744,46 @@ TclTrimRight(
return numBytes - (p - bytes);
}
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimRight(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
/*
*----------------------------------------------------------------------
*
* TclTrimLeft --
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the left side of the first string
- * all characters found in the second string.
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the left side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the start of the string.
@@ -1740,8 +1794,8 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1750,18 +1804,6 @@ TclTrimLeft(
const char *p = bytes;
Tcl_UniChar ch1 = 0, ch2 = 0;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimLeft works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1796,10 +1838,99 @@ TclTrimLeft(
p += pInc;
numBytes -= pInc;
- } while (numBytes);
+ } while (numBytes > 0);
return p - bytes;
}
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ * Finds the sub string (offset) to trim from both sides of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim, /* ...and its length in bytes */
+ int *trimRight) /* Offset from the end of the string. */
+{
+ int trimLeft;
+ Tcl_DString bytesBuf, trimBuf;
+
+ *trimRight = 0;
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (trimLeft > numBytes) {
+ trimLeft = numBytes;
+ }
+ numBytes -= trimLeft;
+ /* have to trim yet (first char was already verified within TrimLeft) */
+ if (numBytes > 1) {
+ bytes += trimLeft;
+ *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+ if (*trimRight > numBytes) {
+ *trimRight = numBytes;
+ }
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return trimLeft;
+}
/*
*----------------------------------------------------------------------
@@ -1867,30 +1998,20 @@ Tcl_Concat(
result = ckalloc((unsigned) (bytesNeeded + argc));
for (p = result, i = 0; i < argc; i++) {
- int trim, elemLength;
+ int triml, trimr, elemLength;
const char *element;
element = argv[i];
elemLength = strlen(argv[i]);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2010,28 +2131,18 @@ Tcl_ConcatObj(
Tcl_SetObjLength(resPtr, 0);
for (i = 0; i < objc; i++) {
- int trim;
+ int triml, trimr;
element = TclGetStringFromObj(objv[i], &elemLength);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.