summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c506
1 files changed, 389 insertions, 117 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 0eddb00..6eab7b8 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -107,6 +107,8 @@ static Tcl_ThreadDataKey precisionKey;
static void ClearHash(Tcl_HashTable *tablePtr);
static void FreeProcessGlobalValue(ClientData clientData);
static void FreeThreadHash(ClientData clientData);
+static int GetEndOffsetFromObj(Tcl_Obj *objPtr, int endValue,
+ int *indexPtr);
static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr);
static int SetEndOffsetFromAny(Tcl_Interp *interp,
Tcl_Obj *objPtr);
@@ -974,7 +976,7 @@ Tcl_ScanCountedElement(
int *flagPtr) /* Where to store information to guide
* Tcl_ConvertElement. */
{
- int flags = CONVERT_ANY;
+ char flags = CONVERT_ANY;
int numBytes = TclScanElement(src, length, &flags);
*flagPtr = flags;
@@ -1015,7 +1017,7 @@ int
TclScanElement(
const char *src, /* String to convert to Tcl list element. */
int length, /* Number of bytes in src, or -1. */
- int *flagPtr) /* Where to store information to guide
+ char *flagPtr) /* Where to store information to guide
* Tcl_ConvertElement. */
{
const char *p = src;
@@ -1547,11 +1549,10 @@ Tcl_Merge(
int argc, /* How many strings to merge. */
const char *const *argv) /* Array of string values. */
{
-#define LOCAL_SIZE 20
- int localFlags[LOCAL_SIZE], *flagPtr = NULL;
+#define LOCAL_SIZE 64
+ char localFlags[LOCAL_SIZE], *flagPtr = NULL;
int i, bytesNeeded = 0;
char *result, *dst;
- const int maxFlags = UINT_MAX / sizeof(int);
/*
* Handle empty list case first, so logic of the general case can be
@@ -1570,22 +1571,8 @@ Tcl_Merge(
if (argc <= LOCAL_SIZE) {
flagPtr = localFlags;
- } else if (argc > maxFlags) {
- /*
- * We cannot allocate a large enough flag array to format this list in
- * one pass. We could imagine converting this routine to a multi-pass
- * implementation, but for sizeof(int) == 4, the limit is a max of
- * 2^30 list elements and since each element is at least one byte
- * formatted, and requires one byte space between it and the next one,
- * that a minimum space requirement of 2^31 bytes, which is already
- * INT_MAX. If we tried to format a list of > maxFlags elements, we're
- * just going to overflow the size limits on the formatted string
- * anyway, so just issue that same panic early.
- */
-
- Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
} else {
- flagPtr = ckalloc(argc * sizeof(int));
+ flagPtr = ckalloc(argc);
}
for (i = 0; i < argc; i++) {
flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 );
@@ -1656,11 +1643,46 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
- * TclTrimRight --
+ * UtfWellFormedEnd --
+ * Checks the end of utf string is malformed, if yes - wraps bytes
+ * to the given buffer (as well-formed NTS string). The buffer
+ * argument should be initialized by the caller and ready to use.
+ *
+ * Results:
+ * The bytes with well-formed end of the string.
+ *
+ * Side effects:
+ * Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+UtfWellFormedEnd(
+ Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
+ const char *bytes, /* Pointer to the beginning of the string. */
+ int length) /* Length of the string. */
+{
+ const char *l = bytes + length;
+ const char *p = Tcl_UtfPrev(l, bytes);
+
+ if (Tcl_UtfCharComplete(p, l - p)) {
+ return bytes;
+ }
+ /*
+ * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+ * avoid segfault by access violation out of range.
+ */
+ Tcl_DStringAppend(buffer, bytes, length);
+ return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the right side of the first string
- * all characters found in the second string.
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the right side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the end of the string.
@@ -1671,8 +1693,8 @@ Tcl_Backslash(
*----------------------------------------------------------------------
*/
-int
-TclTrimRight(
+static inline int
+TrimRight(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1681,18 +1703,6 @@ TclTrimRight(
const char *p = bytes + numBytes;
int pInc;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimRight works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1733,15 +1743,46 @@ TclTrimRight(
return numBytes - (p - bytes);
}
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimRight(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
/*
*----------------------------------------------------------------------
*
* TclTrimLeft --
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the left side of the first string
- * all characters found in the second string.
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the left side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the start of the string.
@@ -1752,8 +1793,8 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1761,18 +1802,6 @@ TclTrimLeft(
{
const char *p = bytes;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimLeft works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1809,10 +1838,99 @@ TclTrimLeft(
p += pInc;
numBytes -= pInc;
- } while (numBytes);
+ } while (numBytes > 0);
return p - bytes;
}
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ * Finds the sub string (offset) to trim from both sides of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim, /* ...and its length in bytes */
+ int *trimRight) /* Offset from the end of the string. */
+{
+ int trimLeft;
+ Tcl_DString bytesBuf, trimBuf;
+
+ *trimRight = 0;
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (trimLeft > numBytes) {
+ trimLeft = numBytes;
+ }
+ numBytes -= trimLeft;
+ /* have to trim yet (first char was already verified within TrimLeft) */
+ if (numBytes > 1) {
+ bytes += trimLeft;
+ *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+ if (*trimRight > numBytes) {
+ *trimRight = numBytes;
+ }
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return trimLeft;
+}
/*
*----------------------------------------------------------------------
@@ -1880,30 +1998,20 @@ Tcl_Concat(
result = ckalloc((unsigned) (bytesNeeded + argc));
for (p = result, i = 0; i < argc; i++) {
- int trim, elemLength;
+ int triml, trimr, elemLength;
const char *element;
element = argv[i];
elemLength = strlen(argv[i]);
- /*
- * Trim away the leading whitespace.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
-
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2023,28 +2131,18 @@ Tcl_ConcatObj(
Tcl_SetObjLength(resPtr, 0);
for (i = 0; i < objc; i++) {
- int trim;
+ int triml, trimr;
element = TclGetStringFromObj(objv[i], &elemLength);
- /*
- * Trim away the leading whitespace.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
-
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2545,7 +2643,8 @@ TclStringMatchObj(
udata = Tcl_GetUnicodeFromObj(strObj, &length);
uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
- } else if (TclIsPureByteArray(strObj) && !flags) {
+ } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj)
+ && !flags) {
unsigned char *data, *ptn;
data = Tcl_GetByteArrayFromObj(strObj, &length);
@@ -2717,7 +2816,7 @@ Tcl_DStringAppendElement(
{
char *dst = dsPtr->string + dsPtr->length;
int needSpace = TclNeedSpace(dsPtr->string, dst);
- int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
+ char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
int newSize = dsPtr->length + needSpace
+ TclScanElement(element, -1, &flags);
@@ -3545,22 +3644,27 @@ TclFormatInt(
*
* TclGetIntForIndex --
*
- * This function returns an integer corresponding to the list index held
- * in a Tcl object. The Tcl object's value is expected to be in the
- * format integer([+-]integer)? or the format end([+-]integer)?.
- *
- * Results:
- * The return value is normally TCL_OK, which means that the index was
- * successfully stored into the location referenced by "indexPtr". If the
- * Tcl object referenced by "objPtr" has the value "end", the value
- * stored is "endValue". If "objPtr"s values is not of one of the
- * expected formats, TCL_ERROR is returned and, if "interp" is non-NULL,
- * an error message is left in the interpreter's result object.
- *
- * Side effects:
- * The object referenced by "objPtr" might be converted to an integer,
- * wide integer, or end-based-index object.
- *
+ * Provides an integer corresponding to the list index held in a Tcl
+ * object. The string value 'objPtr' is expected have the format
+ * integer([+-]integer)? or end([+-]integer)?.
+ *
+ * Value
+ * TCL_OK
+ *
+ * The index is stored at the address given by by 'indexPtr'. If
+ * 'objPtr' has the value "end", the value stored is 'endValue'.
+ *
+ * TCL_ERROR
+ *
+ * The value of 'objPtr' does not have one of the expected formats. If
+ * 'interp' is non-NULL, an error message is left in the interpreter's
+ * result object.
+ *
+ * Effect
+ *
+ * The object referenced by 'objPtr' is converted, as needed, to an
+ * integer, wide integer, or end-based-index object.
+ *
*----------------------------------------------------------------------
*/
@@ -3584,13 +3688,7 @@ TclGetIntForIndex(
return TCL_OK;
}
- if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) {
- /*
- * If the object is already an offset from the end of the list, or can
- * be converted to one, use it.
- */
-
- *indexPtr = endValue + objPtr->internalRep.longValue;
+ if (GetEndOffsetFromObj(objPtr, endValue, indexPtr) == TCL_OK) {
return TCL_OK;
}
@@ -3693,6 +3791,41 @@ UpdateStringOfEndOffset(
/*
*----------------------------------------------------------------------
*
+ * GetEndOffsetFromObj --
+ *
+ * Look for a string of the form "end[+-]offset" and convert it to an
+ * internal representation holding the offset.
+ *
+ * Results:
+ * Tcl return code.
+ *
+ * Side effects:
+ * May store a Tcl_ObjType.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+GetEndOffsetFromObj(
+ Tcl_Obj *objPtr, /* Pointer to the object to parse */
+ int endValue, /* The value to be stored at "indexPtr" if
+ * "objPtr" holds "end". */
+ int *indexPtr) /* Location filled in with an integer
+ * representing an index. */
+{
+ if (SetEndOffsetFromAny(NULL, objPtr) != TCL_OK) {
+ return TCL_ERROR;
+ }
+
+ /* TODO: Handle overflow cases sensibly */
+ *indexPtr = endValue + (int)objPtr->internalRep.longValue;
+ return TCL_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
* SetEndOffsetFromAny --
*
* Look for a string of the form "end[+-]offset" and convert it to an
@@ -3759,6 +3892,8 @@ SetEndOffsetFromAny(
return TCL_ERROR;
}
if (bytes[3] == '-') {
+
+ /* TODO: Review overflow concerns here! */
offset = -offset;
}
} else {
@@ -3790,6 +3925,143 @@ SetEndOffsetFromAny(
/*
*----------------------------------------------------------------------
*
+ * TclIndexEncode --
+ *
+ * Parse objPtr to determine if it is an index value. Two cases
+ * are possible. The value objPtr might be parsed as an absolute
+ * index value in the C signed int range. Note that this includes
+ * index values that are integers as presented and it includes index
+ * arithmetic expressions. The absolute index values that can be
+ * directly meaningful as an index into either a list or a string are
+ * those integer values >= TCL_INDEX_START (0)
+ * and < TCL_INDEX_AFTER (INT_MAX).
+ * The largest string supported in Tcl 8 has bytelength INT_MAX.
+ * This means the largest supported character length is also INT_MAX,
+ * and the index of the last character in a string of length INT_MAX
+ * is INT_MAX-1.
+ *
+ * Any absolute index value parsed outside that range is encoded
+ * using the before and after values passed in by the
+ * caller as the encoding to use for indices that are either
+ * less than or greater than the usable index range. TCL_INDEX_AFTER
+ * is available as a good choice for most callers to use for
+ * after. Likewise, the value TCL_INDEX_BEFORE is good for
+ * most callers to use for before. Other values are possible
+ * when the caller knows it is helpful in producing its own behavior
+ * for indices before and after the indexed item.
+ *
+ * A token can also be parsed as an end-relative index expression.
+ * All end-relative expressions that indicate an index larger
+ * than end (end+2, end--5) point beyond the end of the indexed
+ * collection, and can be encoded as after. The end-relative
+ * expressions that indicate an index less than or equal to end
+ * are encoded relative to the value TCL_INDEX_END (-2). The
+ * index "end" is encoded as -2, down to the index "end-0x7ffffffe"
+ * which is encoded as INT_MIN. Since the largest index into a
+ * string possible in Tcl 8 is 0x7ffffffe, the interpretation of
+ * "end-0x7ffffffe" for that largest string would be 0. Thus,
+ * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed,
+ * they can be encoded with the before value.
+ *
+ * These details will require re-examination whenever string and
+ * list length limits are increased, but that will likely also
+ * mean a revised routine capable of returning Tcl_WideInt values.
+ *
+ * Returns:
+ * TCL_OK if parsing succeeded, and TCL_ERROR if it failed.
+ *
+ * Side effects:
+ * When TCL_OK is returned, the encoded index value is written
+ * to *indexPtr.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclIndexEncode(
+ Tcl_Interp *interp, /* For error reporting, may be NULL */
+ Tcl_Obj *objPtr, /* Index value to parse */
+ int before, /* Value to return for index before beginning */
+ int after, /* Value to return for index after end */
+ int *indexPtr) /* Where to write the encoded answer, not NULL */
+{
+ int idx;
+
+ if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &idx)) {
+ /* We parsed a value in the range INT_MIN...INT_MAX */
+ integerEncode:
+ if (idx < TCL_INDEX_START) {
+ /* All negative absolute indices are "before the beginning" */
+ idx = before;
+ } else if (idx == INT_MAX) {
+ /* This index value is always "after the end" */
+ idx = after;
+ }
+ /* usual case, the absolute index value encodes itself */
+ } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &idx)) {
+ /*
+ * We parsed an end+offset index value.
+ * idx holds the offset value in the range INT_MIN...INT_MAX.
+ */
+ if (idx > 0) {
+ /*
+ * All end+postive or end-negative expressions
+ * always indicate "after the end".
+ */
+ idx = after;
+ } else if (idx < INT_MIN - TCL_INDEX_END) {
+ /* These indices always indicate "before the beginning */
+ idx = before;
+ } else {
+ /* Encoded end-positive (or end+negative) are offset */
+ idx += TCL_INDEX_END;
+ }
+
+ /* TODO: Consider flag to suppress repeated end-offset parse. */
+ } else if (TCL_OK == TclGetIntForIndexM(interp, objPtr, 0, &idx)) {
+ /*
+ * Only reach this case when the index value is a
+ * constant index arithmetic expression, and idx
+ * holds the result. Treat it the same as if it were
+ * parsed as an absolute integer value.
+ */
+ goto integerEncode;
+ } else {
+ return TCL_ERROR;
+ }
+ *indexPtr = idx;
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclIndexDecode --
+ *
+ * Decodes a value previously encoded by TclIndexEncode. The argument
+ * endValue indicates what value of "end" should be used in the
+ * decoding.
+ *
+ * Results:
+ * The decoded index value.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclIndexDecode(
+ int encoded, /* Value to decode */
+ int endValue) /* Meaning of "end" to use, > TCL_INDEX_END */
+{
+ if (encoded <= TCL_INDEX_END) {
+ return (encoded - TCL_INDEX_END) + endValue;
+ }
+ return encoded;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TclCheckBadOctal --
*
* This function checks for a bad octal value and appends a meaningful