summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c930
1 files changed, 644 insertions, 286 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index a4d523a..fa8c925 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -15,6 +15,7 @@
#include "tclInt.h"
#include "tclParse.h"
#include "tclStringTrim.h"
+#include "tommath.h"
#include <math.h>
/*
@@ -107,10 +108,11 @@ static Tcl_ThreadDataKey precisionKey;
static void ClearHash(Tcl_HashTable *tablePtr);
static void FreeProcessGlobalValue(ClientData clientData);
static void FreeThreadHash(ClientData clientData);
+static int GetEndOffsetFromObj(Tcl_Obj *objPtr,
+ Tcl_WideInt endValue, Tcl_WideInt *indexPtr);
static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr);
-static int SetEndOffsetFromAny(Tcl_Interp *interp,
- Tcl_Obj *objPtr);
-static void UpdateStringOfEndOffset(Tcl_Obj *objPtr);
+static int GetWideForIndex(Tcl_Interp *interp, Tcl_Obj *objPtr,
+ Tcl_WideInt endValue, Tcl_WideInt *widePtr);
static int FindElement(Tcl_Interp *interp, const char *string,
int stringLength, const char *typeStr,
const char *typeCode, const char **elementPtr,
@@ -119,16 +121,20 @@ static int FindElement(Tcl_Interp *interp, const char *string,
/*
* The following is the Tcl object type definition for an object that
* represents a list index in the form, "end-offset". It is used as a
- * performance optimization in TclGetIntForIndex. The internal rep is an
- * integer, so no memory management is required for it.
+ * performance optimization in TclGetIntForIndex. The internal rep is
+ * stored directly in the wideValue, so no memory management is required
+ * for it. This is a caching intrep, keeping the result of a parse
+ * around. This type is only created from a pre-existing string, so an
+ * updateStringProc will never be called and need not exist. The type
+ * is unregistered, so has no need of a setFromAnyProc either.
*/
-const Tcl_ObjType tclEndOffsetType = {
+static const Tcl_ObjType endOffsetType = {
"end-offset", /* name */
NULL, /* freeIntRepProc */
NULL, /* dupIntRepProc */
- UpdateStringOfEndOffset, /* updateStringProc */
- SetEndOffsetFromAny
+ NULL, /* updateStringProc */
+ NULL /* setFromAnyProc */
};
/*
@@ -974,7 +980,7 @@ Tcl_ScanCountedElement(
int *flagPtr) /* Where to store information to guide
* Tcl_ConvertElement. */
{
- int flags = CONVERT_ANY;
+ char flags = CONVERT_ANY;
int numBytes = TclScanElement(src, length, &flags);
*flagPtr = flags;
@@ -1015,7 +1021,7 @@ int
TclScanElement(
const char *src, /* String to convert to Tcl list element. */
int length, /* Number of bytes in src, or -1. */
- int *flagPtr) /* Where to store information to guide
+ char *flagPtr) /* Where to store information to guide
* Tcl_ConvertElement. */
{
const char *p = src;
@@ -1547,11 +1553,10 @@ Tcl_Merge(
int argc, /* How many strings to merge. */
const char *const *argv) /* Array of string values. */
{
-#define LOCAL_SIZE 20
- int localFlags[LOCAL_SIZE], *flagPtr = NULL;
+#define LOCAL_SIZE 64
+ char localFlags[LOCAL_SIZE], *flagPtr = NULL;
int i, bytesNeeded = 0;
char *result, *dst;
- const int maxFlags = UINT_MAX / sizeof(int);
/*
* Handle empty list case first, so logic of the general case can be
@@ -1570,22 +1575,8 @@ Tcl_Merge(
if (argc <= LOCAL_SIZE) {
flagPtr = localFlags;
- } else if (argc > maxFlags) {
- /*
- * We cannot allocate a large enough flag array to format this list in
- * one pass. We could imagine converting this routine to a multi-pass
- * implementation, but for sizeof(int) == 4, the limit is a max of
- * 2^30 list elements and since each element is at least one byte
- * formatted, and requires one byte space between it and the next one,
- * that a minimum space requirement of 2^31 bytes, which is already
- * INT_MAX. If we tried to format a list of > maxFlags elements, we're
- * just going to overflow the size limits on the formatted string
- * anyway, so just issue that same panic early.
- */
-
- Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
} else {
- flagPtr = ckalloc(argc * sizeof(int));
+ flagPtr = ckalloc(argc);
}
for (i = 0; i < argc; i++) {
flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 );
@@ -1619,6 +1610,7 @@ Tcl_Merge(
return result;
}
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
/*
*----------------------------------------------------------------------
*
@@ -1646,21 +1638,57 @@ Tcl_Backslash(
* src, unless NULL. */
{
char buf[TCL_UTF_MAX];
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
Tcl_UtfBackslash(src, readPtr, buf);
TclUtfToUniChar(buf, &ch);
return (char) ch;
}
+#endif /* !TCL_NO_DEPRECATED */
/*
*----------------------------------------------------------------------
*
- * TclTrimRight --
+ * UtfWellFormedEnd --
+ * Checks the end of utf string is malformed, if yes - wraps bytes
+ * to the given buffer (as well-formed NTS string). The buffer
+ * argument should be initialized by the caller and ready to use.
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the right side of the first string
- * all characters found in the second string.
+ * Results:
+ * The bytes with well-formed end of the string.
+ *
+ * Side effects:
+ * Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+UtfWellFormedEnd(
+ Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
+ const char *bytes, /* Pointer to the beginning of the string. */
+ int length) /* Length of the string. */
+{
+ const char *l = bytes + length;
+ const char *p = Tcl_UtfPrev(l, bytes);
+
+ if (Tcl_UtfCharComplete(p, l - p)) {
+ return bytes;
+ }
+ /*
+ * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+ * avoid segfault by access violation out of range.
+ */
+ Tcl_DStringAppend(buffer, bytes, length);
+ return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the right side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the end of the string.
@@ -1671,8 +1699,8 @@ Tcl_Backslash(
*----------------------------------------------------------------------
*/
-int
-TclTrimRight(
+static inline int
+TrimRight(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1680,25 +1708,13 @@ TclTrimRight(
{
const char *p = bytes + numBytes;
int pInc;
-
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimRight works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
+ Tcl_UniChar ch1 = 0, ch2 = 0;
/*
* Outer loop: iterate over string to be trimmed.
*/
do {
- Tcl_UniChar ch1;
const char *q = trim;
int bytesLeft = numTrim;
@@ -1710,7 +1726,6 @@ TclTrimRight(
*/
do {
- Tcl_UniChar ch2;
int qInc = TclUtfToUniChar(q, &ch2);
if (ch1 == ch2) {
@@ -1733,15 +1748,46 @@ TclTrimRight(
return numBytes - (p - bytes);
}
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimRight(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
/*
*----------------------------------------------------------------------
*
* TclTrimLeft --
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the left side of the first string
- * all characters found in the second string.
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the left side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the start of the string.
@@ -1752,33 +1798,21 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
int numTrim) /* ...and its length in bytes */
{
const char *p = bytes;
-
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimLeft works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
+ Tcl_UniChar ch1 = 0, ch2 = 0;
/*
* Outer loop: iterate over string to be trimmed.
*/
do {
- Tcl_UniChar ch1;
int pInc = TclUtfToUniChar(p, &ch1);
const char *q = trim;
int bytesLeft = numTrim;
@@ -1788,7 +1822,6 @@ TclTrimLeft(
*/
do {
- Tcl_UniChar ch2;
int qInc = TclUtfToUniChar(q, &ch2);
if (ch1 == ch2) {
@@ -1809,10 +1842,99 @@ TclTrimLeft(
p += pInc;
numBytes -= pInc;
- } while (numBytes);
+ } while (numBytes > 0);
return p - bytes;
}
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ * Finds the sub string (offset) to trim from both sides of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim, /* ...and its length in bytes */
+ int *trimRight) /* Offset from the end of the string. */
+{
+ int trimLeft;
+ Tcl_DString bytesBuf, trimBuf;
+
+ *trimRight = 0;
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (trimLeft > numBytes) {
+ trimLeft = numBytes;
+ }
+ numBytes -= trimLeft;
+ /* have to trim yet (first char was already verified within TrimLeft) */
+ if (numBytes > 1) {
+ bytes += trimLeft;
+ *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+ if (*trimRight > numBytes) {
+ *trimRight = numBytes;
+ }
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return trimLeft;
+}
/*
*----------------------------------------------------------------------
@@ -1880,30 +2002,20 @@ Tcl_Concat(
result = ckalloc((unsigned) (bytesNeeded + argc));
for (p = result, i = 0; i < argc; i++) {
- int trim, elemLength;
+ int triml, trimr, elemLength;
const char *element;
element = argv[i];
elemLength = strlen(argv[i]);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2023,28 +2135,18 @@ Tcl_ConcatObj(
Tcl_SetObjLength(resPtr, 0);
for (i = 0; i < objc; i++) {
- int trim;
+ int triml, trimr;
element = TclGetStringFromObj(objv[i], &elemLength);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2122,7 +2224,7 @@ Tcl_StringCaseMatch(
{
int p, charLen;
const char *pstart = pattern;
- Tcl_UniChar ch1, ch2;
+ Tcl_UniChar ch1 = 0, ch2 = 0;
while (1) {
p = *pattern;
@@ -2232,7 +2334,7 @@ Tcl_StringCaseMatch(
*/
if (p == '[') {
- Tcl_UniChar startChar, endChar;
+ Tcl_UniChar startChar = 0, endChar = 0;
pattern++;
if (UCHAR(*str) < 0x80) {
@@ -2545,7 +2647,8 @@ TclStringMatchObj(
udata = Tcl_GetUnicodeFromObj(strObj, &length);
uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
- } else if (TclIsPureByteArray(strObj) && !flags) {
+ } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj)
+ && !flags) {
unsigned char *data, *ptn;
data = Tcl_GetByteArrayFromObj(strObj, &length);
@@ -2717,7 +2820,7 @@ Tcl_DStringAppendElement(
{
char *dst = dsPtr->string + dsPtr->length;
int needSpace = TclNeedSpace(dsPtr->string, dst);
- int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
+ char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
int newSize = dsPtr->length + needSpace
+ TclScanElement(element, -1, &flags);
@@ -3153,7 +3256,7 @@ Tcl_PrintDouble(
int signum;
char *digits;
char *end;
- int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int));
+ int *precisionPtr = Tcl_GetThreadData(&precisionKey, sizeof(int));
/*
* Handle NaN.
@@ -3315,6 +3418,7 @@ Tcl_PrintDouble(
*----------------------------------------------------------------------
*/
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
/* ARGSUSED */
char *
TclPrecTraceProc(
@@ -3326,7 +3430,7 @@ TclPrecTraceProc(
{
Tcl_Obj *value;
int prec;
- int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int));
+ int *precisionPtr = Tcl_GetThreadData(&precisionKey, sizeof(int));
/*
* If the variable is unset, then recreate the trace.
@@ -3372,6 +3476,7 @@ TclPrecTraceProc(
*precisionPtr = prec;
return NULL;
}
+#endif /* !TCL_NO_DEPRECATED)*/
/*
*----------------------------------------------------------------------
@@ -3489,9 +3594,9 @@ int
TclFormatInt(
char *buffer, /* Points to the storage into which the
* formatted characters are written. */
- long n) /* The integer to format. */
+ Tcl_WideInt n) /* The integer to format. */
{
- long intVal;
+ Tcl_WideInt intVal;
int i;
int numFormatted, j;
const char *digits = "0123456789";
@@ -3514,7 +3619,7 @@ TclFormatInt(
intVal = -n; /* [Bug 3390638] Workaround for*/
if (n == -n || intVal == n) { /* broken compiler optimizers. */
- return sprintf(buffer, "%ld", n);
+ return sprintf(buffer, "%" TCL_LL_MODIFIER "d", n);
}
/*
@@ -3551,254 +3656,507 @@ TclFormatInt(
/*
*----------------------------------------------------------------------
*
- * TclGetIntForIndex --
+ * GetWideForIndex --
*
- * This function returns an integer corresponding to the list index held
- * in a Tcl object. The Tcl object's value is expected to be in the
- * format integer([+-]integer)? or the format end([+-]integer)?.
+ * This function produces a wide integer value corresponding to the
+ * index value held in *objPtr. The parsing supports all values
+ * recognized as any size of integer, and the syntaxes end[-+]$integer
+ * and $integer[-+]$integer. The argument endValue is used to give
+ * the meaning of the literal index value "end". Index arithmetic
+ * on arguments outside the wide integer range are only accepted
+ * when interp is a working interpreter, not NULL.
*
* Results:
- * The return value is normally TCL_OK, which means that the index was
- * successfully stored into the location referenced by "indexPtr". If the
- * Tcl object referenced by "objPtr" has the value "end", the value
- * stored is "endValue". If "objPtr"s values is not of one of the
- * expected formats, TCL_ERROR is returned and, if "interp" is non-NULL,
- * an error message is left in the interpreter's result object.
+ * When parsing of *objPtr successfully recognizes an index value,
+ * TCL_OK is returned, and the wide integer value corresponding to
+ * the recognized index value is written to *widePtr. When parsing
+ * fails, TCL_ERROR is returned and error information is written to
+ * interp, if non-NULL.
*
* Side effects:
- * The object referenced by "objPtr" might be converted to an integer,
- * wide integer, or end-based-index object.
+ * The type of *objPtr may change.
*
*----------------------------------------------------------------------
*/
-int
-TclGetIntForIndex(
- Tcl_Interp *interp, /* Interpreter to use for error reporting. If
- * NULL, then no error message is left after
- * errors. */
- Tcl_Obj *objPtr, /* Points to an object containing either "end"
- * or an integer. */
- int endValue, /* The value to be stored at "indexPtr" if
- * "objPtr" holds "end". */
- int *indexPtr) /* Location filled in with an integer
- * representing an index. */
+static int
+GetWideForIndex(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, then no error message is left after
+ * errors. */
+ Tcl_Obj *objPtr, /* Points to the value to be parsed */
+ Tcl_WideInt endValue, /* The value to be stored at *widePtr if
+ * objPtr holds "end".
+ * NOTE: this value may be negative. */
+ Tcl_WideInt *widePtr) /* Location filled in with a wide integer
+ * representing an index. */
{
- size_t length;
- char *opPtr;
- const char *bytes;
-
- if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) {
- return TCL_OK;
+ ClientData cd;
+ const char *opPtr;
+ int numType, length, t1 = 0, t2 = 0;
+ int code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType);
+
+ if (code == TCL_OK) {
+ if (numType == TCL_NUMBER_INT) {
+ /* objPtr holds an integer in the signed wide range */
+ *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd);
+ return TCL_OK;
+ }
+ if (numType == TCL_NUMBER_BIG) {
+ /* objPtr holds an integer outside the signed wide range */
+ /* Truncate to the signed wide range. */
+ if (mp_isneg((mp_int *)cd)) {
+ *widePtr = LLONG_MIN;
+ } else {
+ *widePtr = LLONG_MAX;
+ }
+ return TCL_OK;
+ }
+ /* Must be a double -> not a valid index */
+ goto parseError;
}
- if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) {
- /*
- * If the object is already an offset from the end of the list, or can
- * be converted to one, use it.
- */
-
- *indexPtr = endValue + objPtr->internalRep.longValue;
+ /* objPtr does not hold a number, check the end+/- format... */
+ if (GetEndOffsetFromObj(objPtr, endValue, widePtr) == TCL_OK) {
return TCL_OK;
}
- bytes = TclGetString(objPtr);
- length = objPtr->length;
+ /* If we reach here, the string rep of objPtr exists. */
/*
- * Leading whitespace is acceptable in an index.
+ * The valid index syntax does not include any value that is
+ * a list of more than one element. This is necessary so that
+ * lists of index values can be reliably distinguished from any
+ * single index value.
*/
- while (length && TclIsSpaceProc(*bytes)) {
- bytes++;
- length--;
+ /*
+ * Quick scan to see if multi-value list is even possible.
+ * This relies on TclGetString() returning a NUL-terminated string.
+ */
+ if ((TclMaxListLength(TclGetString(objPtr), -1, NULL) > 1)
+
+ /* If it's possible, do the full list parse. */
+ && (TCL_OK == Tcl_ListObjLength(NULL, objPtr, &length))
+ && (length > 1)) {
+ goto parseError;
}
- if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr,
- TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
- int code, first, second;
- char savedOp = *opPtr;
+ /* Passed the list screen, so parse for index arithmetic expression */
+ if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, NULL, -1, &opPtr,
+ TCL_PARSE_INTEGER_ONLY)) {
+ Tcl_WideInt w1=0, w2=0;
- if ((savedOp != '+') && (savedOp != '-')) {
- goto parseError;
- }
- if (TclIsSpaceProc(opPtr[1])) {
- goto parseError;
- }
- *opPtr = '\0';
- code = Tcl_GetInt(interp, bytes, &first);
- *opPtr = savedOp;
- if (code == TCL_ERROR) {
- goto parseError;
- }
- if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) {
- goto parseError;
- }
- if (savedOp == '+') {
- *indexPtr = first + second;
- } else {
- *indexPtr = first - second;
+ /* value starts with valid integer... */
+
+ if ((*opPtr == '-') || (*opPtr == '+')) {
+ /* ... value continues with [-+] ... */
+
+ /* Save first integer as wide if possible */
+ TclGetNumberFromObj(NULL, objPtr, &cd, &t1);
+ if (t1 == TCL_NUMBER_INT) {
+ w1 = (*(Tcl_WideInt *)cd);
+ }
+
+ if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, opPtr + 1,
+ -1, NULL, TCL_PARSE_INTEGER_ONLY)) {
+ /* ... value concludes with second valid integer */
+
+ /* Save second integer as wide if possible */
+ TclGetNumberFromObj(NULL, objPtr, &cd, &t2);
+ if (t2 == TCL_NUMBER_INT) {
+ w2 = (*(Tcl_WideInt *)cd);
+ }
+ }
+ }
+ /* Clear invalid intreps left by TclParseNumber */
+ TclFreeIntRep(objPtr);
+
+ if (t1 && t2) {
+ /* We have both integer values */
+ if ((t1 == TCL_NUMBER_INT) && (t2 == TCL_NUMBER_INT)) {
+ /* Both are wide, do wide-integer math */
+ if (*opPtr == '-') {
+ if ((w2 == LLONG_MIN) && (interp != NULL)) {
+ goto extreme;
+ }
+ w2 = -w2;
+ }
+
+ if ((w1 ^ w2) < 0) {
+ /* Different signs, sum cannot overflow */
+ *widePtr = w1 + w2;
+ } else if (w1 >= 0) {
+ if (w1 < LLONG_MAX - w2) {
+ *widePtr = w1 + w2;
+ } else {
+ *widePtr = LLONG_MAX;
+ }
+ } else {
+ if (w1 > LLONG_MIN - w2) {
+ *widePtr = w1 + w2;
+ } else {
+ *widePtr = LLONG_MIN;
+ }
+ }
+ } else if (interp == NULL) {
+ /*
+ * We use an interp to do bignum index calculations.
+ * If we don't get one, call all indices with bignums errors,
+ * and rely on callers to handle it.
+ */
+ return TCL_ERROR;
+ } else {
+ /*
+ * At least one is big, do bignum math. Little reason to
+ * value performance here. Re-use code. Parse has verified
+ * objPtr is an expression. Compute it.
+ */
+
+ Tcl_Obj *sum;
+
+ extreme:
+ Tcl_ExprObj(interp, objPtr, &sum);
+ TclGetNumberFromObj(NULL, sum, &cd, &numType);
+
+ if (numType == TCL_NUMBER_INT) {
+ /* sum holds an integer in the signed wide range */
+ *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd);
+ } else {
+ /* sum holds an integer outside the signed wide range */
+ /* Truncate to the signed wide range. */
+ if (mp_isneg((mp_int *)cd)) {
+ *widePtr = LLONG_MIN;
+ } else {
+ *widePtr = LLONG_MAX;
+ }
+ }
+ Tcl_DecrRefCount(sum);
+ }
+ return TCL_OK;
}
- return TCL_OK;
}
- /*
- * Report a parse error.
- */
-
+ /* Report a parse error. */
parseError:
if (interp != NULL) {
- bytes = TclGetString(objPtr);
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "bad index \"%s\": must be integer?[+-]integer? or"
- " end?[+-]integer?", bytes));
- if (!strncmp(bytes, "end-", 4)) {
- bytes += 4;
- }
- TclCheckBadOctal(interp, bytes);
- Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
+ char * bytes = TclGetString(objPtr);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "bad index \"%s\": must be integer?[+-]integer? or"
+ " end?[+-]integer?", bytes));
+ if (!strncmp(bytes, "end-", 4)) {
+ bytes += 4;
+ }
+ TclCheckBadOctal(interp, bytes);
+ Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
}
-
return TCL_ERROR;
}
/*
*----------------------------------------------------------------------
*
- * UpdateStringOfEndOffset --
+ * TclGetIntForIndex --
*
- * Update the string rep of a Tcl object holding an "end-offset"
- * expression.
+ * This function returns an integer corresponding to the list index held
+ * in a Tcl object. The Tcl object's value is expected to be in the
+ * format integer([+-]integer)? or the format end([+-]integer)?.
*
* Results:
- * None.
+ * The return value is normally TCL_OK, which means that the index was
+ * successfully stored into the location referenced by "indexPtr". If the
+ * Tcl object referenced by "objPtr" has the value "end", the value
+ * stored is "endValue". If "objPtr"s values is not of one of the
+ * expected formats, TCL_ERROR is returned and, if "interp" is non-NULL,
+ * an error message is left in the interpreter's result object.
*
* Side effects:
- * Stores a valid string in the object's string rep.
- *
- * This function does NOT free any earlier string rep. If it is called on an
- * object that already has a valid string rep, it will leak memory.
+ * The object referenced by "objPtr" might be converted to an integer,
+ * wide integer, or end-based-index object.
*
*----------------------------------------------------------------------
*/
-static void
-UpdateStringOfEndOffset(
- register Tcl_Obj *objPtr)
+int
+TclGetIntForIndex(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, then no error message is left after
+ * errors. */
+ Tcl_Obj *objPtr, /* Points to an object containing either "end"
+ * or an integer. */
+ int endValue, /* The value to be stored at "indexPtr" if
+ * "objPtr" holds "end". */
+ int *indexPtr) /* Location filled in with an integer
+ * representing an index. */
{
- char buffer[TCL_INTEGER_SPACE + 5];
- register int len = 3;
+ Tcl_WideInt wide;
- memcpy(buffer, "end", 4);
- if (objPtr->internalRep.longValue != 0) {
- buffer[len++] = '-';
- len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue));
+ if (GetWideForIndex(interp, objPtr, endValue, &wide) == TCL_ERROR) {
+ return TCL_ERROR;
+ }
+ if (wide < INT_MIN) {
+ *indexPtr = INT_MIN;
+ } else if (wide > INT_MAX) {
+ *indexPtr = INT_MAX;
+ } else {
+ *indexPtr = (int) wide;
}
- objPtr->bytes = ckalloc((unsigned) len+1);
- memcpy(objPtr->bytes, buffer, (unsigned) len+1);
- objPtr->length = len;
+ return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
- * SetEndOffsetFromAny --
+ * GetEndOffsetFromObj --
*
- * Look for a string of the form "end[+-]offset" and convert it to an
- * internal representation holding the offset.
+ * Look for a string of the form "end[+-]offset" and convert it to an
+ * internal representation holding the offset.
*
* Results:
- * Returns TCL_OK if ok, TCL_ERROR if the string was badly formed.
+ * Tcl return code.
*
* Side effects:
- * If interp is not NULL, stores an error message in the interpreter
- * result.
+ * May store a Tcl_ObjType.
*
*----------------------------------------------------------------------
*/
static int
-SetEndOffsetFromAny(
- Tcl_Interp *interp, /* Tcl interpreter or NULL */
- Tcl_Obj *objPtr) /* Pointer to the object to parse */
+GetEndOffsetFromObj(
+ Tcl_Obj *objPtr, /* Pointer to the object to parse */
+ Tcl_WideInt endValue, /* The value to be stored at "indexPtr" if
+ * "objPtr" holds "end". */
+ Tcl_WideInt *widePtr) /* Location filled in with an integer
+ * representing an index. */
{
- int offset; /* Offset in the "end-offset" expression */
- register const char *bytes; /* String rep of the object */
- int length; /* Length of the object's string rep */
+ Tcl_WideInt offset = 0; /* Offset in the "end-offset" expression */
- /*
- * If it's already the right type, we're fine.
- */
+ if (objPtr->typePtr != &endOffsetType) {
+ int length;
+ const char *bytes = TclGetStringFromObj(objPtr, &length);
- if (objPtr->typePtr == &tclEndOffsetType) {
- return TCL_OK;
- }
+ if ((length < 3) || (length == 4)) {
+ /* Too short to be "end" or to be "end-$integer" */
+ return TCL_ERROR;
+ }
+ if ((*bytes != 'e') || (strncmp(bytes, "end", 3) != 0)) {
+ /* Value doesn't start with "end" */
+ return TCL_ERROR;
+ }
- /*
- * Check for a string rep of the right form.
- */
+ if (length > 4) {
+ ClientData cd;
+ int t;
- bytes = TclGetStringFromObj(objPtr, &length);
- if ((*bytes != 'e') || (strncmp(bytes, "end",
- (size_t)((length > 3) ? 3 : length)) != 0)) {
- if (interp != NULL) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "bad index \"%s\": must be end?[+-]integer?", bytes));
- Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
- }
- return TCL_ERROR;
- }
+ /* Parse for the "end-..." or "end+..." formats */
- /*
- * Convert the string rep.
- */
+ if ((bytes[3] != '-') && (bytes[3] != '+')) {
+ /* No operator where we need one */
+ return TCL_ERROR;
+ }
+ if (TclIsSpaceProc(bytes[4])) {
+ /* Space after + or - not permitted. */
+ return TCL_ERROR;
+ }
- if (length <= 3) {
- offset = 0;
- } else if ((length > 4) && ((bytes[3] == '-') || (bytes[3] == '+'))) {
- /*
- * This is our limited string expression evaluator. Pass everything
- * after "end-" to Tcl_GetInt, then reverse for offset.
- */
+ /* Parse the integer offset */
+ if (TCL_OK != TclParseNumber(NULL, objPtr, NULL,
+ bytes+4, length-4, NULL, TCL_PARSE_INTEGER_ONLY)) {
+ /* Not a recognized integer format */
+ return TCL_ERROR;
+ }
- if (TclIsSpaceProc(bytes[4])) {
- goto badIndexFormat;
- }
- if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) {
- return TCL_ERROR;
- }
- if (bytes[3] == '-') {
- offset = -offset;
+ /* Got an integer offset; pull it from where parser left it. */
+ TclGetNumberFromObj(NULL, objPtr, &cd, &t);
+
+ if (t == TCL_NUMBER_BIG) {
+ /* Truncate to the signed wide range. */
+ if (mp_isneg((mp_int *)cd)) {
+ offset = (bytes[3] == '-') ? LLONG_MAX : LLONG_MIN;
+ } else {
+ offset = (bytes[3] == '-') ? LLONG_MIN : LLONG_MAX;
+ }
+ } else {
+ /* assert (t == TCL_NUMBER_INT); */
+ offset = (*(Tcl_WideInt *)cd);
+ if (bytes[3] == '-') {
+ offset = (offset == LLONG_MIN) ? LLONG_MAX : -offset;
+ }
+ }
}
+
+ /* Success. Free the old internal rep and set the new one. */
+ TclFreeIntRep(objPtr);
+ objPtr->internalRep.wideValue = offset;
+ objPtr->typePtr = &endOffsetType;
+ }
+
+ offset = objPtr->internalRep.wideValue;
+
+ if ((endValue ^ offset) < 0) {
+ /* Different signs, sum cannot overflow */
+ *widePtr = endValue + offset;
+ } else if (endValue >= 0) {
+ if (endValue < LLONG_MAX - offset) {
+ *widePtr = endValue + offset;
+ } else {
+ *widePtr = LLONG_MAX;
+ }
} else {
- /*
- * Conversion failed. Report the error.
- */
+ if (endValue > LLONG_MIN - offset) {
+ *widePtr = endValue + offset;
+ } else {
+ *widePtr = LLONG_MIN;
+ }
+ }
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclIndexEncode --
+ *
+ * Parse objPtr to determine if it is an index value. Two cases
+ * are possible. The value objPtr might be parsed as an absolute
+ * index value in the C signed int range. Note that this includes
+ * index values that are integers as presented and it includes index
+ * arithmetic expressions. The absolute index values that can be
+ * directly meaningful as an index into either a list or a string are
+ * those integer values >= TCL_INDEX_START (0)
+ * and < TCL_INDEX_AFTER (INT_MAX).
+ * The largest string supported in Tcl 8 has bytelength INT_MAX.
+ * This means the largest supported character length is also INT_MAX,
+ * and the index of the last character in a string of length INT_MAX
+ * is INT_MAX-1.
+ *
+ * Any absolute index value parsed outside that range is encoded
+ * using the before and after values passed in by the
+ * caller as the encoding to use for indices that are either
+ * less than or greater than the usable index range. TCL_INDEX_AFTER
+ * is available as a good choice for most callers to use for
+ * after. Likewise, the value TCL_INDEX_BEFORE is good for
+ * most callers to use for before. Other values are possible
+ * when the caller knows it is helpful in producing its own behavior
+ * for indices before and after the indexed item.
+ *
+ * A token can also be parsed as an end-relative index expression.
+ * All end-relative expressions that indicate an index larger
+ * than end (end+2, end--5) point beyond the end of the indexed
+ * collection, and can be encoded as after. The end-relative
+ * expressions that indicate an index less than or equal to end
+ * are encoded relative to the value TCL_INDEX_END (-2). The
+ * index "end" is encoded as -2, down to the index "end-0x7ffffffe"
+ * which is encoded as INT_MIN. Since the largest index into a
+ * string possible in Tcl 8 is 0x7ffffffe, the interpretation of
+ * "end-0x7ffffffe" for that largest string would be 0. Thus,
+ * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed,
+ * they can be encoded with the before value.
+ *
+ * These details will require re-examination whenever string and
+ * list length limits are increased, but that will likely also
+ * mean a revised routine capable of returning Tcl_WideInt values.
+ *
+ * Returns:
+ * TCL_OK if parsing succeeded, and TCL_ERROR if it failed.
+ *
+ * Side effects:
+ * When TCL_OK is returned, the encoded index value is written
+ * to *indexPtr.
+ *
+ *----------------------------------------------------------------------
+ */
- badIndexFormat:
- if (interp != NULL) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "bad index \"%s\": must be end?[+-]integer?", bytes));
- Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
+int
+TclIndexEncode(
+ Tcl_Interp *interp, /* For error reporting, may be NULL */
+ Tcl_Obj *objPtr, /* Index value to parse */
+ int before, /* Value to return for index before beginning */
+ int after, /* Value to return for index after end */
+ int *indexPtr) /* Where to write the encoded answer, not NULL */
+{
+ ClientData cd;
+ Tcl_WideInt wide;
+ int idx, numType, code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType);
+
+ if ((code == TCL_OK) && (numType == TCL_NUMBER_INT)) {
+ /* We parsed a value in the range LLONG_MIN...LLONG_MAX */
+ wide = (*(Tcl_WideInt *)cd);
+ integerEncode:
+ if (wide < TCL_INDEX_START) {
+ /* All negative absolute indices are "before the beginning" */
+ idx = before;
+ } else if (wide >= INT_MAX) {
+ /* This index value is always "after the end" */
+ idx = after;
+ } else {
+ idx = (int) wide;
}
+ /* usual case, the absolute index value encodes itself */
+ } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &wide)) {
+ /*
+ * We parsed an end+offset index value.
+ * wide holds the offset value in the range LLONG_MIN...LLONG_MAX.
+ */
+ if (wide > 0) {
+ /*
+ * All end+postive or end-negative expressions
+ * always indicate "after the end".
+ */
+ idx = after;
+ } else if (wide < INT_MIN - TCL_INDEX_END) {
+ /* These indices always indicate "before the beginning */
+ idx = before;
+ } else {
+ /* Encoded end-positive (or end+negative) are offset */
+ idx = (int)wide + TCL_INDEX_END;
+ }
+
+ /* TODO: Consider flag to suppress repeated end-offset parse. */
+ } else if (TCL_OK == GetWideForIndex(interp, objPtr, 0, &wide)) {
+ /*
+ * Only reach this case when the index value is a
+ * constant index arithmetic expression, and wide
+ * holds the result. Treat it the same as if it were
+ * parsed as an absolute integer value.
+ */
+ goto integerEncode;
+ } else {
return TCL_ERROR;
}
-
- /*
- * The conversion succeeded. Free the old internal rep and set the new
- * one.
- */
-
- TclFreeIntRep(objPtr);
- objPtr->internalRep.longValue = offset;
- objPtr->typePtr = &tclEndOffsetType;
-
+ *indexPtr = idx;
return TCL_OK;
}
/*
*----------------------------------------------------------------------
*
+ * TclIndexDecode --
+ *
+ * Decodes a value previously encoded by TclIndexEncode. The argument
+ * endValue indicates what value of "end" should be used in the
+ * decoding.
+ *
+ * Results:
+ * The decoded index value.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclIndexDecode(
+ int encoded, /* Value to decode */
+ int endValue) /* Meaning of "end" to use, > TCL_INDEX_END */
+{
+ if (encoded <= TCL_INDEX_END) {
+ return (encoded - TCL_INDEX_END) + endValue;
+ }
+ return encoded;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TclCheckBadOctal --
*
* This function checks for a bad octal value and appends a meaningful
@@ -4027,7 +4385,7 @@ TclSetProcessGlobalValue(
Tcl_IncrRefCount(newValue);
cacheMap = GetThreadHash(&pgvPtr->key);
ClearHash(cacheMap);
- hPtr = Tcl_CreateHashEntry(cacheMap, (void *)(pgvPtr->epoch), &dummy);
+ hPtr = Tcl_CreateHashEntry(cacheMap, (void *)(size_t)(pgvPtr->epoch), &dummy);
Tcl_SetHashValue(hPtr, newValue);
Tcl_MutexUnlock(&pgvPtr->mutex);
}
@@ -4053,7 +4411,7 @@ TclGetProcessGlobalValue(
Tcl_Obj *value = NULL;
Tcl_HashTable *cacheMap;
Tcl_HashEntry *hPtr;
- size_t epoch = pgvPtr->epoch;
+ unsigned int epoch = pgvPtr->epoch;
if (pgvPtr->encoding) {
Tcl_Encoding current = Tcl_GetEncoding(NULL, NULL);
@@ -4077,7 +4435,7 @@ TclGetProcessGlobalValue(
ckfree(pgvPtr->value);
pgvPtr->value = ckalloc(Tcl_DStringLength(&newValue) + 1);
memcpy(pgvPtr->value, Tcl_DStringValue(&newValue),
- (size_t) Tcl_DStringLength(&newValue) + 1);
+ Tcl_DStringLength(&newValue) + 1);
Tcl_DStringFree(&newValue);
Tcl_FreeEncoding(pgvPtr->encoding);
pgvPtr->encoding = current;
@@ -4087,7 +4445,7 @@ TclGetProcessGlobalValue(
}
}
cacheMap = GetThreadHash(&pgvPtr->key);
- hPtr = Tcl_FindHashEntry(cacheMap, (void *) (epoch));
+ hPtr = Tcl_FindHashEntry(cacheMap, (void *)(size_t)epoch);
if (NULL == hPtr) {
int dummy;
@@ -4120,7 +4478,7 @@ TclGetProcessGlobalValue(
value = Tcl_NewStringObj(pgvPtr->value, pgvPtr->numBytes);
hPtr = Tcl_CreateHashEntry(cacheMap,
- (void *)(pgvPtr->epoch), &dummy);
+ (void *)(size_t)(pgvPtr->epoch), &dummy);
Tcl_MutexUnlock(&pgvPtr->mutex);
Tcl_SetHashValue(hPtr, value);
Tcl_IncrRefCount(value);