diff options
57 files changed, 2108 insertions, 859 deletions
diff --git a/.travis.yml b/.travis.yml index bf8ab15..50eb658 100644 --- a/.travis.yml +++ b/.travis.yml @@ -139,9 +139,9 @@ matrix: - BUILD_DIR=unix - CFGOPT="--enable-symbols=mem" # Testing on Mac, various styles - - name: "macOS/Xcode 11.3/Shared" + - name: "macOS/Xcode 11.4/Shared" os: osx - osx_image: xcode11.3 + osx_image: xcode11.4 env: - BUILD_DIR=macosx install: [] @@ -149,9 +149,9 @@ matrix: - make all # The styles=develop avoids some weird problems on OSX - make test styles=develop - - name: "macOS/Xcode 11.3/Shared/Unix-like" + - name: "macOS/Xcode 11.4/Shared/Unix-like" os: osx - osx_image: xcode11.3 + osx_image: xcode11.4 env: - BUILD_DIR=unix # Older MacOS versions @@ -441,8 +441,8 @@ matrix: before_install: - cd ${BUILD_DIR} install: - - mkdir $HOME/install - - ./configure ${CFGOPT} --prefix=$HOME/install || (cat config.log && exit 1) + - mkdir "$HOME/install dir" + - ./configure ${CFGOPT} "--prefix=$HOME/install dir" || (cat config.log && exit 1) before_script: - export ERROR_ON_FAILURES=1 script: diff --git a/ChangeLog.2000 b/ChangeLog.2000 index 5b62351..7e78c19 100644 --- a/ChangeLog.2000 +++ b/ChangeLog.2000 @@ -1779,7 +1779,7 @@ * generic/tclCmdMZ.c: Fixed infinite loop bug with [regexp -all] [Bug: 4981]. - * tests/*.test: Changed all occurances of "namespace import + * tests/*.test: Changed all occurrences of "namespace import ::tcltest" to "namespace import -force ::tcltest" [Bug: 3948]. 2000-04-09 Brent Welch <welch@scriptics.com> diff --git a/doc/ListObj.3 b/doc/ListObj.3 index dc1ba53..ab836d8 100644 --- a/doc/ListObj.3 +++ b/doc/ListObj.3 @@ -138,7 +138,9 @@ create a new value or modify an existing value to hold the \fIobjc\fR elements of the array referenced by \fIobjv\fR where each element is a pointer to a Tcl value. If \fIobjc\fR is less than or equal to zero, -they return an empty value. +they return an empty value. If \fIobjv\fR is NULL, the resulting list +contains 0 elements, with reserved space in an internal representation +for \fIobjc\fR more elements (to avoid its reallocation later). The new value's string representation is left invalid. The two procedures increment the reference counts of the elements in \fIobjc\fR since the list value now refers to them. @@ -259,25 +259,43 @@ string. The caller must not ask for the next character after the last character in the string if the string is not terminated by a null character. .PP -Given \fIsrc\fR, a pointer to some location in a UTF-8 string (or to a -null byte immediately following such a string), \fBTcl_UtfPrev\fR -returns a pointer to the closest preceding byte that starts a UTF-8 -character. -This function will not back up to a position before \fIstart\fR, -the start of the UTF-8 string. If \fIsrc\fR was already at \fIstart\fR, the -return value will be \fIstart\fR. +\fBTcl_UtfPrev\fR is used to step backward through but not beyond the +UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made +up entirely of complete and well-formed characters, and \fIsrc\fR points +to the lead byte of one of those characters (or to the location one byte +past the end of the string), then repeated calls of \fBTcl_UtfPrev\fR will +return pointers to the lead bytes of each character in the string, one +character at a time, terminating when it returns \fIstart\fR. +.PP +When the conditions of completeness and well-formedness may not be satisfied, +a more precise description of the function of \fBTcl_UtfPrev\fR is necessary. +It always returns a pointer greater than or equal to \fIstart\fR; that is, +always a pointer to a location in the string. It always returns a pointer to +a byte that begins a character when scanning for characters beginning +from \fIstart\fR. When \fIsrc\fR is greater than \fIstart\fR, it +always returns a pointer less than \fIsrc\fR and greater than or +equal to (\fIsrc\fR - \fBTCL_UTF_MAX\fR). The character that begins +at the returned pointer is the first one that either includes the +byte \fIsrc[-1]\fR, or might include it if the right trail bytes are +present at \fIsrc\fR and greater. \fBTcl_UtfPrev\fR never reads the +byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte +\fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR. .PP \fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the -Pascal Ord() function. It returns the Tcl_UniChar represented at the +Pascal Ord() function. It returns the Unicode character represented at the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR -characters. Behavior is undefined if a negative \fIindex\fR is given. +characters. If a negative \fIindex\fR is given or \fIindex\fR points +to the second half of a surrogate pair, it returns -1. .PP \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR characters. This is equivalent to calling -\fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, -the return pointer points to the first character in the source string. +\fBTcl_UtfToUniChar\fR \fIindex\fR times, except if that would return +a pointer to the second byte of a valid 4-byte UTF-8 sequence, in which +case, \fBTcl_UtfToUniChar\fR will be called once more to find the end +of the sequence. If a negative \fIindex\fR is given, the returned pointer +points to the first character in the source string. .PP \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl commands. It parses a backslash sequence and stores the properly formed diff --git a/generic/tcl.h b/generic/tcl.h index 369a894..02ef01e 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2114,8 +2114,8 @@ typedef struct Tcl_EncodingType { * The maximum number of bytes that are necessary to represent a single * Unicode character in UTF-8. The valid values are 3 and 4 * (or perhaps 1 if we want to support a non-unicode enabled core). If 3, - * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If > 3, - * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode + * then Tcl_UniChar must be 2-bytes in size (UTF-16) (the default). If > 3, + * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UTF-16 mode * is the default and recommended mode. */ diff --git a/generic/tclAlloc.c b/generic/tclAlloc.c index 3a76469..bc4716d 100644 --- a/generic/tclAlloc.c +++ b/generic/tclAlloc.c @@ -345,7 +345,7 @@ TclpAlloc( nextf[bucket] = overPtr->next; overPtr->overMagic0 = overPtr->overMagic1 = MAGIC; - overPtr->bucketIndex = (unsigned char) bucket; + overPtr->bucketIndex = UCHAR(bucket); #ifdef MSTATS numMallocs[bucket]++; diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 2119043..bb82fe7 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2841,7 +2841,7 @@ BinaryEncodeUu( unsigned char *data, *start, *cursor; int offset, count, rawLength, n, i, j, bits, index; int lineLength = 61; - const unsigned char SingleNewline[] = { (unsigned char) '\n' }; + const unsigned char SingleNewline[] = { UCHAR('\n') }; const unsigned char *wrapchar = SingleNewline; int wrapcharlen = sizeof(SingleNewline); enum { OPT_MAXLEN, OPT_WRAPCHAR }; diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index ffe0088..056bd47 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -211,7 +211,7 @@ Tcl_CaseObjCmd( pat = TclGetString(caseObjv[i]); for (p = pat; *p != '\0'; p++) { - if (TclIsSpaceProc(*p) || (*p == '\\')) { + if (TclIsSpaceProcM(*p) || (*p == '\\')) { break; } } diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 94ff2cc..60331f5 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -3305,7 +3305,7 @@ Tcl_LsearchObjCmd( if (groupSize < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "stride length must be at least 1", -1)); - Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSORT", + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BADSTRIDE", NULL); result = TCL_ERROR; goto done; @@ -3640,11 +3640,11 @@ Tcl_LsearchObjCmd( /* * Normally, binary search is written to stop when it finds a * match. If there are duplicates of an element in the list, - * our first match might not be the first occurance. + * our first match might not be the first occurrence. * Consider: 0 0 0 1 1 1 2 2 2 * * To maintain consistancy with standard lsearch semantics, we - * must find the leftmost occurance of the pattern in the + * must find the leftmost occurrence of the pattern in the * list. Thus we don't just stop searching here. This * variation means that a search always makes log n * comparisons (normal binary search might "get lucky" with an @@ -4697,7 +4697,7 @@ static int DictionaryCompare( const char *left, const char *right) /* The strings to compare. */ { - Tcl_UniChar uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower; + int uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower; int diff, zeros; int secondaryDiff = 0; @@ -4766,8 +4766,8 @@ DictionaryCompare( */ if ((*left != '\0') && (*right != '\0')) { - left += TclUtfToUniChar(left, &uniLeft); - right += TclUtfToUniChar(right, &uniRight); + left += TclUtfToUCS4(left, &uniLeft); + right += TclUtfToUCS4(right, &uniRight); /* * Convert both chars to lower for the comparison, because diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 1ddae0f..56df0dd 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1424,7 +1424,7 @@ StringIndexCmd( */ if (TclIsPureByteArray(objv[1])) { - unsigned char uch = (unsigned char) ch; + unsigned char uch = UCHAR(ch); Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); } else { @@ -1845,7 +1845,7 @@ StringIsCmd( * if it is the first "element" that has the failure. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } TclNewStringObj(tmpStr, string1, p-string1); @@ -2519,12 +2519,22 @@ StringStartCmd( cur = 0; if (index > 0) { p = Tcl_UtfAtIndex(string, index); + + TclUtfToUCS4(p, &ch); for (cur = index; cur >= 0; cur--) { - TclUtfToUCS4(p, &ch); + int delta = 0; + const char *next; + if (!Tcl_UniCharIsWordChar(ch)) { break; } - p = Tcl_UtfPrev(p, string); + + next = TclUtfPrev(p, string); + do { + next += delta; + delta = TclUtfToUCS4(next, &ch); + } while (next + delta < p); + p = next; } if (cur != index) { cur += 1; diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index e38be07..37adcef 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -126,9 +126,9 @@ TclCompileAppendCmd( * compiled. */ CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr, *valueTokenPtr; int isScalar, localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ /* TODO: Consider support for compiling expanded args. */ numWords = parsePtr->numWords; @@ -572,10 +572,10 @@ TclCompileCatchCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ JumpFixup jumpFixup; Tcl_Token *cmdTokenPtr, *resultNameTokenPtr, *optsNameTokenPtr; int resultIndex, optsIndex, range, dropScript = 0; - DefineLineInformation; /* TIP #280 */ int depth = TclGetStackDepth(envPtr); /* @@ -1003,9 +1003,9 @@ TclCompileDictSetCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int i, dictVarIndex; - DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr; /* @@ -1128,9 +1128,9 @@ TclCompileDictGetCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int i; - DefineLineInformation; /* TIP #280 */ /* * There must be at least two arguments after the command (the single-arg @@ -1164,9 +1164,9 @@ TclCompileDictGetWithDefaultCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int i; - DefineLineInformation; /* TIP #280 */ /* * There must be at least three arguments after the command. @@ -1195,9 +1195,9 @@ TclCompileDictExistsCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int i; - DefineLineInformation; /* TIP #280 */ /* * There must be at least two arguments after the command (the single-arg @@ -1232,8 +1232,8 @@ TclCompileDictUnsetCmd( * compiled. */ CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; int i, dictVarIndex; /* @@ -2347,13 +2347,13 @@ TclCompileErrorCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; + /* * General syntax: [error message ?errorInfo? ?errorCode?] */ - Tcl_Token *tokenPtr; - DefineLineInformation; /* TIP #280 */ - if (parsePtr->numWords < 2 || parsePtr->numWords > 4) { return TCL_ERROR; } @@ -2464,11 +2464,11 @@ TclCompileForCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *startTokenPtr, *testTokenPtr, *nextTokenPtr, *bodyTokenPtr; JumpFixup jumpEvalCondFixup; int bodyCodeOffset, nextCodeOffset, jumpDist; int bodyRange, nextRange; - DefineLineInformation; /* TIP #280 */ if (parsePtr->numWords != 5) { return TCL_ERROR; @@ -2676,6 +2676,7 @@ CompileEachloopCmd( int collect) /* Select collecting or accumulating mode * (TCL_EACH_*) */ { + DefineLineInformation; /* TIP #280 */ Proc *procPtr = envPtr->procPtr; ForeachInfo *infoPtr=NULL; /* Points to the structure describing this * foreach command. Stored in a AuxData @@ -2685,7 +2686,6 @@ CompileEachloopCmd( int jumpBackOffset, infoIndex, range; int numWords, numLists, i, j, code = TCL_OK; Tcl_Obj *varListObj = NULL; - DefineLineInformation; /* TIP #280 */ /* * If the foreach command isn't in a procedure, don't compile it inline: @@ -3446,10 +3446,10 @@ TclPushVarName( /* * last char is ')' => potential array reference. */ - last = Tcl_UtfPrev(name + nameLen, name); + last = &name[nameLen-1]; if (*last == ')') { - for (p = name; p < last; p = Tcl_UtfNext(p)) { + for (p = name; p < last; p++) { if (*p == '(') { elName = p + 1; elNameLen = last - elName; @@ -3477,15 +3477,14 @@ TclPushVarName( } else if (interp && ((n = varTokenPtr->numComponents) > 1) && (varTokenPtr[1].type == TCL_TOKEN_TEXT) && (varTokenPtr[n].type == TCL_TOKEN_TEXT) - && (*((p = varTokenPtr[n].start + varTokenPtr[n].size)-1) == ')') - && (*Tcl_UtfPrev(p, varTokenPtr[n].start) == ')')) { + && (*(varTokenPtr[n].start + varTokenPtr[n].size - 1) == ')')) { /* * Check for parentheses inside first token. */ simpleVarName = 0; for (p = varTokenPtr[1].start, - last = p + varTokenPtr[1].size; p < last; p = Tcl_UtfNext(p)) { + last = p + varTokenPtr[1].size; p < last; p++) { if (*p == '(') { simpleVarName = 1; break; @@ -3553,7 +3552,7 @@ TclPushVarName( int hasNsQualifiers = 0; - for (p = name, last = p + nameLen-1; p < last; p = Tcl_UtfNext(p)) { + for (p = name, last = p + nameLen-1; p < last; p++) { if ((*p == ':') && (*(p+1) == ':')) { hasNsQualifiers = 1; break; diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index 59eebae..3361d7f 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -89,9 +89,9 @@ TclCompileGlobalCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr; int localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ /* TODO: Consider support for compiling expanded args. */ numWords = parsePtr->numWords; @@ -170,6 +170,7 @@ TclCompileIfCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ JumpFixupArray jumpFalseFixupArray; /* Used to fix the ifFalse jump after each * test when its target PC is determined. */ @@ -185,7 +186,6 @@ TclCompileIfCmd( * "if 0 {..}" */ int boolVal; /* Value of static condition. */ int compileScripts = 1; - DefineLineInformation; /* TIP #280 */ /* * Only compile the "if" command if all arguments are simple words, in @@ -472,9 +472,9 @@ TclCompileIncrCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr, *incrTokenPtr; int isScalar, localIndex, haveImmValue, immValue; - DefineLineInformation; /* TIP #280 */ if ((parsePtr->numWords != 2) && (parsePtr->numWords != 3)) { return TCL_ERROR; @@ -667,9 +667,9 @@ TclCompileInfoExistsCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int isScalar, localIndex; - DefineLineInformation; /* TIP #280 */ if (parsePtr->numWords != 2) { return TCL_ERROR; @@ -840,9 +840,9 @@ TclCompileLappendCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr, *valueTokenPtr; int isScalar, localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ /* TODO: Consider support for compiling expanded args. */ numWords = parsePtr->numWords; @@ -955,9 +955,9 @@ TclCompileLassignCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; int isScalar, localIndex, numWords, idx; - DefineLineInformation; /* TIP #280 */ numWords = parsePtr->numWords; @@ -1058,9 +1058,9 @@ TclCompileLindexCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *idxTokenPtr, *valTokenPtr; int i, idx, numWords = parsePtr->numWords; - DefineLineInformation; /* TIP #280 */ /* * Quit if too few args. @@ -1261,8 +1261,8 @@ TclCompileLlengthCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *varTokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *varTokenPtr; if (parsePtr->numWords != 2) { return TCL_ERROR; @@ -1293,8 +1293,8 @@ TclCompileLrangeCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *listTokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr, *listTokenPtr; int idx1, idx2; if (parsePtr->numWords != 4) { @@ -1353,8 +1353,8 @@ TclCompileLinsertCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *listTokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr, *listTokenPtr; int idx, i; if (parsePtr->numWords < 3) { @@ -1455,8 +1455,8 @@ TclCompileLreplaceCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *listTokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr, *listTokenPtr; int idx1, idx2, i; int emptyPrefix=1, suffixStart = 0; @@ -1618,6 +1618,7 @@ TclCompileLsetCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ int tempDepth; /* Depth used for emitting one part of the * code burst. */ Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the @@ -1625,7 +1626,6 @@ TclCompileLsetCmd( int localIndex; /* Index of var in local var table. */ int isScalar; /* Flag == 1 if scalar, 0 if array. */ int i; - DefineLineInformation; /* TIP #280 */ /* * Check argument count. @@ -1788,8 +1788,8 @@ TclCompileNamespaceCodeCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; if (parsePtr->numWords != 2) { return TCL_ERROR; @@ -1837,8 +1837,8 @@ TclCompileNamespaceOriginCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; if (parsePtr->numWords != 2) { return TCL_ERROR; @@ -1858,8 +1858,8 @@ TclCompileNamespaceQualifiersCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); int off; if (parsePtr->numWords != 2) { @@ -1893,8 +1893,8 @@ TclCompileNamespaceTailCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); JumpFixup jumpFixup; if (parsePtr->numWords != 2) { @@ -1929,9 +1929,9 @@ TclCompileNamespaceUpvarCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr; int localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ if (envPtr->procPtr == NULL) { return TCL_ERROR; @@ -2052,11 +2052,11 @@ TclCompileRegexpCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the * parse of the RE or string. */ int i, len, nocase, exact, sawLast, simple; const char *str; - DefineLineInformation; /* TIP #280 */ /* * We are only interested in compiling simple regexp cases. Currently @@ -2390,6 +2390,7 @@ TclCompileReturnCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ /* * General syntax: [return ?-option value ...? ?result?] * An even number of words means an explicit result argument is present. @@ -2400,7 +2401,6 @@ TclCompileReturnCmd( int numOptionWords = numWords - 1 - explicitResult; Tcl_Obj *returnOpts, **objv; Tcl_Token *wordTokenPtr = TokenAfter(parsePtr->tokenPtr); - DefineLineInformation; /* TIP #280 */ /* * Check for special case which can always be compiled: @@ -2641,9 +2641,9 @@ TclCompileUpvarCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr; int localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ Tcl_Obj *objPtr; if (envPtr->procPtr == NULL) { @@ -2747,9 +2747,9 @@ TclCompileVariableCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr, *valueTokenPtr; int localIndex, numWords, i; - DefineLineInformation; /* TIP #280 */ numWords = parsePtr->numWords; if (numWords < 2) { diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 081b141..c8002a7 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -129,9 +129,9 @@ TclCompileSetCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr, *valueTokenPtr; int isAssignment, isScalar, localIndex, numWords; - DefineLineInformation; /* TIP #280 */ numWords = parsePtr->numWords; if ((numWords != 2) && (numWords != 3)) { @@ -222,10 +222,10 @@ TclCompileStringCatCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ int i, numWords = parsePtr->numWords, numArgs; Tcl_Token *wordTokenPtr; Tcl_Obj *obj, *folded; - DefineLineInformation; /* TIP #280 */ /* Trivial case, no arg */ @@ -444,8 +444,8 @@ TclCompileStringInsertCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; int idx; if (parsePtr->numWords != 4) { @@ -1046,8 +1046,8 @@ TclCompileStringReplaceCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *valueTokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr, *valueTokenPtr; int first, last; if (parsePtr->numWords < 4 || parsePtr->numWords > 5) { @@ -1446,13 +1446,13 @@ TclCompileSubstCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ int numArgs = parsePtr->numWords - 1; int numOpts = numArgs - 1; int objc, flags = TCL_SUBST_ALL; Tcl_Obj **objv/*, *toSubst = NULL*/; Tcl_Token *wordTokenPtr = TokenAfter(parsePtr->tokenPtr); int code = TCL_ERROR; - DefineLineInformation; /* TIP #280 */ if (numArgs == 0) { return TCL_ERROR; @@ -1778,6 +1778,7 @@ TclCompileSwitchCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr; /* Pointer to tokens in command. */ int numWords; /* Number of words in command. */ @@ -1794,7 +1795,6 @@ TclCompileSwitchCmd( int foundMode = 0; /* Have we seen a mode flag yet? */ int i, valueIndex; int result = TCL_ERROR; - DefineLineInformation; /* TIP #280 */ int *clNext = envPtr->clNext; /* @@ -3610,9 +3610,9 @@ TclCompileUnsetCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *varTokenPtr; int isScalar, localIndex, flags = 1, i, varCount = 0, haveFlags = 0; - DefineLineInformation; /* TIP #280 */ /* TODO: Consider support for compiling expanded args. */ @@ -3747,13 +3747,13 @@ TclCompileWhileCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; /* TIP #280 */ Tcl_Token *testTokenPtr, *bodyTokenPtr; JumpFixup jumpEvalCondFixup; int testCodeOffset, bodyCodeOffset, jumpDist, range, code, boolVal; int loopMayEnd = 1; /* This is set to 0 if it is recognized as an * infinite loop. */ Tcl_Obj *boolObj; - DefineLineInformation; /* TIP #280 */ if (parsePtr->numWords != 3) { return TCL_ERROR; @@ -4009,8 +4009,8 @@ CompileUnaryOpCmd( int instruction, CompileEnv *envPtr) { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; if (parsePtr->numWords != 2) { return TCL_ERROR; @@ -4051,8 +4051,8 @@ CompileAssociativeBinaryOpCmd( int instruction, CompileEnv *envPtr) { - Tcl_Token *tokenPtr = parsePtr->tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = parsePtr->tokenPtr; int words; /* TODO: Consider support for compiling expanded args. */ @@ -4136,8 +4136,8 @@ CompileComparisonOpCmd( int instruction, CompileEnv *envPtr) { - Tcl_Token *tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; /* TODO: Consider support for compiling expanded args. */ if (parsePtr->numWords < 3) { @@ -4290,15 +4290,15 @@ TclCompilePowOpCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) { + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = parsePtr->tokenPtr; + int words; + /* * This one has its own implementation because the ** operator is the only * one with right associativity. */ - Tcl_Token *tokenPtr = parsePtr->tokenPtr; - DefineLineInformation; /* TIP #280 */ - int words; - for (words=1 ; words<parsePtr->numWords ; words++) { tokenPtr = TokenAfter(tokenPtr); CompileWord(envPtr, tokenPtr, interp, words); @@ -4491,8 +4491,8 @@ TclCompileMinusOpCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) { - Tcl_Token *tokenPtr = parsePtr->tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = parsePtr->tokenPtr; int words; /* TODO: Consider support for compiling expanded args. */ @@ -4536,8 +4536,8 @@ TclCompileDivOpCmd( TCL_UNUSED(Command *), CompileEnv *envPtr) { - Tcl_Token *tokenPtr = parsePtr->tokenPtr; DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = parsePtr->tokenPtr; int words; /* TODO: Consider support for compiling expanded args. */ diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 5d4555e..9d1c56d 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1848,8 +1848,8 @@ TclCompileInvocation( int numWords, CompileEnv *envPtr) { - int wordIdx = 0, depth = TclGetStackDepth(envPtr); DefineLineInformation; + int wordIdx = 0, depth = TclGetStackDepth(envPtr); if (cmdObj) { CompileCmdLiteral(interp, cmdObj, envPtr); @@ -1892,8 +1892,8 @@ CompileExpanded( int numWords, CompileEnv *envPtr) { - int wordIdx = 0; DefineLineInformation; + int wordIdx = 0; int depth = TclGetStackDepth(envPtr); StartExpanding(envPtr); @@ -1951,8 +1951,8 @@ CompileCmdCompileProc( Command *cmdPtr, CompileEnv *envPtr) { - int unwind = 0, incrOffset = -1; DefineLineInformation; + int unwind = 0, incrOffset = -1; int depth = TclGetStackDepth(envPtr); /* diff --git a/generic/tclDate.c b/generic/tclDate.c index 341dd0f..294d4fe 100644 --- a/generic/tclDate.c +++ b/generic/tclDate.c @@ -2681,7 +2681,7 @@ TclDatelex( location->first_column = yyInput - info->dateStart; for ( ; ; ) { - while (TclIsSpaceProc(UCHAR(*yyInput))) { + while (TclIsSpaceProcM(*yyInput)) { yyInput++; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 890114a..c713469 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4178,4 +4178,10 @@ extern const TclStubs *tclStubsPtr; #define Tcl_Close(interp, chan) Tcl_CloseEx(interp, chan, 0) #endif +#if defined(USE_TCL_STUBS) && (TCL_UTF_MAX > 3) +# undef Tcl_UtfCharComplete +# define Tcl_UtfCharComplete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? ((length) >= 4) : tclStubsPtr->tcl_UtfCharComplete((src), (length))) +#endif + #endif /* _TCLDECLS */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2e4c3f7..ae02821 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2300,7 +2300,7 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + int *chPtr = (int *) statePtr; if (flags & TCL_ENCODING_START) { *statePtr = 0; @@ -2321,7 +2321,7 @@ UtfToUtfProc( dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. @@ -2341,6 +2341,7 @@ UtfToUtfProc( */ *dst++ = *src++; + *chPtr = 0; /* reset surrogate handling */ } else if (pureNullMode == 1 && UCHAR(*src) == 0xC0 && (src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) { /* @@ -2348,24 +2349,25 @@ UtfToUtfProc( */ *dst++ = 0; + *chPtr = 0; /* reset surrogate handling */ src += 2; - } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + } else if (!TclUCS4Complete(src, srcEnd - src)) { /* - * Always check before using TclUtfToUniChar. Not doing can so + * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves. */ - *chPtr = (unsigned char) *src; + *chPtr = UCHAR(*src); src += 1; dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - src += TclUtfToUniChar(src, chPtr); + src += TclUtfToUCS4(src, chPtr); if ((*chPtr | 0x7FF) == 0xDFFF) { /* A surrogate character is detected, handle especially */ - Tcl_UniChar low = *chPtr; - size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; - if (((low | 0x3FF) != 0xDFFF) || (*chPtr & 0x400)) { + int low = *chPtr; + size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; + if (((low & ~0x3FF) != 0xDC00) || (*chPtr & 0x400)) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); *dst++ = (char) ((*chPtr | 0x80) & 0xBF); @@ -2557,11 +2559,6 @@ UtfToUtf16Proc( } src += TclUtfToUniChar(src, chPtr); - /* - * Need to handle this in a way that won't cause misalignment by - * casting dst to a Tcl_UniChar. [Bug 1122671] - */ - if (clientData) { #if TCL_UTF_MAX > 3 if (*chPtr <= 0xFFFF) { diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c index b9c71a0..3c99631 100644 --- a/generic/tclEnsemble.c +++ b/generic/tclEnsemble.c @@ -2906,6 +2906,7 @@ TclCompileEnsemble( * compiled. */ CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); Tcl_Obj *mapObj, *subcmdObj, *targetCmdObj, *listObj, **elems; Tcl_Obj *replaced = Tcl_NewObj(), *replacement; @@ -2915,7 +2916,6 @@ TclCompileEnsemble( int ourResult = TCL_ERROR; unsigned numBytes; const char *word; - DefineLineInformation; Tcl_IncrRefCount(replaced); if (parsePtr->numWords < depth + 1) { @@ -3244,6 +3244,7 @@ TclAttemptCompileProc( Command *cmdPtr, CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; int result, i; Tcl_Token *saveTokenPtr = parsePtr->tokenPtr; int savedStackDepth = envPtr->currStackDepth; @@ -3253,7 +3254,6 @@ TclAttemptCompileProc( #ifdef TCL_COMPILE_DEBUG int savedExceptDepth = envPtr->exceptDepth; #endif - DefineLineInformation; if (cmdPtr->compileProc == NULL) { return TCL_ERROR; @@ -3377,11 +3377,11 @@ CompileToInvokedCommand( Command *cmdPtr, CompileEnv *envPtr) /* Holds resulting instructions. */ { + DefineLineInformation; Tcl_Token *tokPtr; Tcl_Obj *objPtr, **words; const char *bytes; int i, numWords, cmdLit, extraLiteralFlags = LITERAL_CMD_NAME; - DefineLineInformation; /* * Push the words of the command. Take care; the command words may be diff --git a/generic/tclEnv.c b/generic/tclEnv.c index bc4f675..96d050d 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -127,6 +127,17 @@ TclSetupEnv( /*msg*/ 0, /*createPart1*/ 0, /*createPart2*/ 0, &arrayPtr); TclFindArrayPtrElements(varPtr, &namesHash); +#if defined(_WIN32) + if (tenviron == NULL) { + /* + * When we are started from main(), the _wenviron array could + * be NULL and will be initialized by the first _wgetenv() call. + */ + + (void) _wgetenv(L"WINDIR"); + } +#endif + /* * Go through the environment array and transfer its values into Tcl. At * the same time, remove those elements we add/update from the hash table diff --git a/generic/tclFileName.c b/generic/tclFileName.c index 32b217f..5705a11 100644 --- a/generic/tclFileName.c +++ b/generic/tclFileName.c @@ -2055,7 +2055,7 @@ TclGlob( * SkipToChar -- * * This function traverses a glob pattern looking for the next unquoted - * occurance of the specified character at the same braces nesting level. + * occurrence of the specified character at the same braces nesting level. * * Results: * Updates stringPtr to point to the matching character, or to the end of diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c index 5aa4d42..4749e6e 100644 --- a/generic/tclIndexObj.c +++ b/generic/tclIndexObj.c @@ -785,7 +785,7 @@ PrefixLongestObjCmd( * Adjust in case we stopped in the middle of a UTF char. */ - resultLength = Tcl_UtfPrev(&resultString[i+1], + resultLength = TclUtfPrev(&resultString[i+1], resultString) - resultString; break; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 9891e62..03b2850 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3079,7 +3079,6 @@ MODULE_SCOPE void TclInitNamespaceSubsystem(void); MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); -MODULE_SCOPE int TclIsSpaceProc(int byte); MODULE_SCOPE int TclIsDigitProc(int byte); MODULE_SCOPE int TclIsBareword(int byte); MODULE_SCOPE Tcl_Obj * TclJoinPath(int elements, Tcl_Obj * const objv[], @@ -3252,8 +3251,14 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar +# define TclUCS4Complete Tcl_UtfCharComplete +# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length))) #else - MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); + MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) +# define TclChar16Complete Tcl_UtfCharComplete #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); @@ -3305,6 +3310,16 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); /* + * Many parsing tasks need a common definition of whitespace. + * Use this routine and macro to achieve that and place + * optimization (fragile on changes) in one place. + */ + +MODULE_SCOPE int TclIsSpaceProc(int byte); +# define TclIsSpaceProcM(byte) \ + (((byte) > 0x20) ? 0 : TclIsSpaceProc(byte)) + +/* *---------------------------------------------------------------- * Command procedures in the generic core: *---------------------------------------------------------------- @@ -4645,8 +4660,8 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; #if TCL_UTF_MAX > 3 #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0x80) ? \ - ((*(chPtr) = (unsigned char) *(str)), 1) \ + (((UCHAR(*(str))) < 0x80) ? \ + ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToUniChar(str, chPtr)) #else #define TclUtfToUniChar(str, chPtr) \ @@ -4679,6 +4694,11 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; (numChars) = _count; \ } while (0); +#define TclUtfPrev(src, start) \ + (((src) < (start) + 2) ? (start) : \ + ((unsigned char) *((src) - 1)) < 0x80 ? (src) - 1 : \ + Tcl_UtfPrev(src, start)) + /* *---------------------------------------------------------------- * Macro that encapsulates the logic that determines when it is safe to diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index eb76884..22eff3c 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -994,7 +994,8 @@ RebuildLiteralTable( } tablePtr->numBuckets *= 4; - tablePtr->buckets = (LiteralEntry **)ckalloc(tablePtr->numBuckets * sizeof(LiteralEntry*)); + tablePtr->buckets = (LiteralEntry **)ckalloc( + tablePtr->numBuckets * sizeof(LiteralEntry*)); for (count=tablePtr->numBuckets, newChainPtr=tablePtr->buckets; count>0 ; count--, newChainPtr++) { *newChainPtr = NULL; diff --git a/generic/tclOptimize.c b/generic/tclOptimize.c index 095e6c5..4383c62 100644 --- a/generic/tclOptimize.c +++ b/generic/tclOptimize.c @@ -34,7 +34,7 @@ static void TrimUnreachable(CompileEnv *envPtr); #define AddrLength(address) \ (tclInstructionTable[*(unsigned char *)(address)].numBytes) #define InstLength(instruction) \ - (tclInstructionTable[(unsigned char)(instruction)].numBytes) + (tclInstructionTable[UCHAR(instruction)].numBytes) /* * ---------------------------------------------------------------------- diff --git a/generic/tclParse.c b/generic/tclParse.c index dbffed0..132e804 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -128,6 +128,8 @@ static int ParseWhiteSpace(const char *src, int numBytes, int *incompletePtr, char *typePtr); static int ParseAllWhiteSpace(const char *src, int numBytes, int *incompletePtr); +static int ParseHex(const char *src, int numBytes, + int *resultPtr); /* *---------------------------------------------------------------------- @@ -701,7 +703,7 @@ TclParseAllWhiteSpace( /* *---------------------------------------------------------------------- * - * TclParseHex -- + * ParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing * \x and \u escape sequences). At most numBytes bytes are scanned. @@ -721,7 +723,7 @@ TclParseAllWhiteSpace( */ int -TclParseHex( +ParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ int *resultPtr) /* Points to storage provided by caller where @@ -845,7 +847,7 @@ TclParseBackslash( result = 0xB; break; case 'x': - count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); if (count == 2) { /* * No hexdigits -> This is just "x". @@ -856,30 +858,30 @@ TclParseBackslash( /* * Keep only the last byte (2 hex digits). */ - result = (unsigned char) result; + result = UCHAR(result); } break; case 'u': - count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexdigits -> This is just "u". */ result = 'u'; - } else if (((result & 0xDC00) == 0xD800) && (count == 6) + } else if (((result & 0xFC00) == 0xD800) && (count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { /* If high surrogate is immediately followed by a low surrogate * escape, combine them into one character. */ int low; - int count2 = TclParseHex(p+7, 4, &low); - if ((count2 == 4) && ((low & 0xDC00) == 0xDC00)) { + int count2 = ParseHex(p+7, 4, &low); + if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; count += count2 + 2; } } break; case 'U': - count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* * No hexdigits -> This is just "U". @@ -1761,7 +1763,7 @@ Tcl_ParseBraces( openBrace = 0; break; case '#' : - if (openBrace && TclIsSpaceProc(src[-1])) { + if (openBrace && TclIsSpaceProcM(src[-1])) { Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), ": possible unbalanced brace in comment", -1); goto error; diff --git a/generic/tclProc.c b/generic/tclProc.c index 5a1b589..0d67c37 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -544,7 +544,7 @@ TclCreateProc( */ argnamei = argname; - argnamelast = Tcl_UtfPrev(argname + nameLength, argname); + argnamelast = (nameLength > 0) ? (argname + nameLength - 1) : argname; while (argnamei < argnamelast) { if (*argnamei == '(') { if (*argnamelast == ')') { /* We have an array element. */ @@ -565,7 +565,7 @@ TclCreateProc( "FORMALARGUMENTFORMAT", NULL); goto procError; } - argnamei = Tcl_UtfNext(argnamei); + argnamei++; } if (precompiled) { diff --git a/generic/tclResult.c b/generic/tclResult.c index 2336aad..baecf46 100644 --- a/generic/tclResult.c +++ b/generic/tclResult.c @@ -730,6 +730,7 @@ Tcl_AppendElement( char *dst; int size; int flags; + int quoteHash = 1; /* * If the string result is empty, move the object result to the string @@ -766,9 +767,17 @@ Tcl_AppendElement( * then this element will not lead a list, and need not have it's * leading '#' quoted. */ - + quoteHash = 0; + } else { + while ((--dst >= iPtr->appendResult) && TclIsSpaceProcM(*dst)) { + } + quoteHash = !TclNeedSpace(iPtr->appendResult, dst+1); + } + dst = iPtr->appendResult + iPtr->appendUsed; + if (!quoteHash) { flags |= TCL_DONT_QUOTE_HASH; } + iPtr->appendUsed += Tcl_ConvertElement(element, dst, flags); #endif /* !TCL_NO_DEPRECATED */ } diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index c4b9211..7ef2c60 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -576,7 +576,7 @@ TclParseNumber( * I, N, and whitespace. */ - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { if (flags & TCL_PARSE_NO_WHITESPACE) { goto endgame; } @@ -1134,7 +1134,7 @@ TclParseNumber( } /* FALLTHROUGH */ case sNANPAREN: - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { break; } if (numSigDigs < 13) { @@ -1188,7 +1188,7 @@ TclParseNumber( * Accept trailing whitespace. */ - while (len != 0 && TclIsSpaceProc(*p)) { + while (len != 0 && TclIsSpaceProcM(*p)) { p++; len--; } diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c6d5323..78e49f9 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1151,10 +1151,7 @@ Tcl_AppendLimitedToObj( { String *stringPtr; int toCopy = 0; - - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); - } + int eLen = 0; if (length < 0) { length = (bytes ? strlen(bytes) : 0); @@ -1162,6 +1159,9 @@ Tcl_AppendLimitedToObj( if (length == 0) { return; } + if (limit <= 0) { + return; + } if (length <= limit) { toCopy = length; @@ -1169,8 +1169,12 @@ Tcl_AppendLimitedToObj( if (ellipsis == NULL) { ellipsis = "..."; } - toCopy = (bytes == NULL) ? limit - : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; + eLen = strlen(ellipsis); + while (eLen > limit) { + eLen = TclUtfPrev(ellipsis+eLen, ellipsis) - ellipsis; + } + + toCopy = TclUtfPrev(bytes+limit+1-eLen, bytes) - bytes; } /* @@ -1179,6 +1183,10 @@ Tcl_AppendLimitedToObj( * objPtr's string rep. */ + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); + } + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); @@ -1194,9 +1202,9 @@ Tcl_AppendLimitedToObj( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode && stringPtr->numChars > 0) { - AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis)); + AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); } else { - AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis)); + AppendUtfToUtfRep(objPtr, ellipsis, eLen); } } @@ -2606,7 +2614,7 @@ AppendPrintfToObjVA( * multi-byte characters. */ - q = Tcl_UtfPrev(end, bytes); + q = TclUtfPrev(end, bytes); if (!Tcl_UtfCharComplete(q, (int)(end - q))) { end = q; } diff --git a/generic/tclStringTrim.h b/generic/tclStringTrim.h index 030e4ec..7067428 100644 --- a/generic/tclStringTrim.h +++ b/generic/tclStringTrim.h @@ -28,6 +28,8 @@ MODULE_SCOPE const char tclDefaultTrimSet[]; /* * The whitespace trimming set used when [concat]enating. This is a subset of * the above, and deliberately so. + * + * TODO: Find a reasonable way to guarantee in sync with TclIsSpaceProc() */ #define CONCAT_TRIM_SET " \f\v\r\t\n" diff --git a/generic/tclTest.c b/generic/tclTest.c index 3d300cd..8cca744 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -20,7 +20,11 @@ # define USE_TCL_STUBS #endif #include "tclInt.h" -#include "tclTomMath.h" +#ifdef TCL_WITH_EXTERNAL_TOMMATH +# include "tommath.h" +#else +# include "tclTomMath.h" +#endif #include "tclOO.h" #include <math.h> @@ -316,6 +320,8 @@ static Tcl_FSListVolumesProc SimpleListVolumes; static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; +static Tcl_ObjCmdProc TestUtfNextCmd; +static Tcl_ObjCmdProc TestUtfPrevCmd; static Tcl_ObjCmdProc TestNumUtfCharsCmd; static Tcl_ObjCmdProc TestFindFirstCmd; static Tcl_ObjCmdProc TestFindLastCmd; @@ -444,9 +450,11 @@ Tcltest_Init( if (Tcl_InitStubs(interp, "8.5-", 0) == NULL) { return TCL_ERROR; } +#ifndef TCL_WITH_EXTERNAL_TOMMATH if (Tcl_TomMath_InitStubs(interp, "8.5-") == NULL) { return TCL_ERROR; } +#endif if (Tcl_OOInitStubs(interp) == NULL) { return TCL_ERROR; } @@ -577,6 +585,10 @@ Tcltest_Init( NULL, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", TestsetobjerrorcodeCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testutfnext", + TestUtfNextCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testutfprev", + TestUtfPrevCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testnumutfchars", TestNumUtfCharsCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfindfirst", @@ -1512,7 +1524,7 @@ TestdelCmd( return TCL_ERROR; } - dPtr = (DelCmd*)ckalloc(sizeof(DelCmd)); + dPtr = (DelCmd *)ckalloc(sizeof(DelCmd)); dPtr->interp = interp; dPtr->deleteCmd = (char *)ckalloc(strlen(argv[3]) + 1); strcpy(dPtr->deleteCmd, argv[3]); @@ -1541,7 +1553,7 @@ static void DelDeleteProc( void *clientData) /* String command to evaluate. */ { - DelCmd *dPtr = (DelCmd *) clientData; + DelCmd *dPtr = (DelCmd *)clientData; Tcl_EvalEx(dPtr->interp, dPtr->deleteCmd, -1, 0); Tcl_ResetResult(dPtr->interp); @@ -6377,11 +6389,6 @@ TestReport( if (interp == NULL) { /* This is bad, but not much we can do about it */ } else { - /* - * No idea why I decided to program this up using the old string-based - * API, but there you go. We should convert it to objects. - */ - Tcl_Obj *savedResult; Tcl_DString ds; @@ -6808,6 +6815,107 @@ SimpleListVolumes(void) } /* + * Used to check operations of Tcl_UtfNext. + * + * Usage: testutfnext -bytestring $bytes + */ + +static int +TestUtfNextCmd( + TCL_UNUSED(void *), + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int numBytes; + char *bytes; + const char *result, *first; + char buffer[32]; + static const char tobetested[] = "A\xA0\xC0\xC1\xC2\xD0\xE0\xE8\xF2\xF7\xF8\xFE\xFF"; + const char *p = tobetested; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "?-bytestring? bytes"); + return TCL_ERROR; + } + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + + if (numBytes > (int)sizeof(buffer) - 4) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "\"testutfnext\" can only handle %d bytes", + (int)sizeof(buffer) - 4)); + return TCL_ERROR; + } + + memcpy(buffer + 1, bytes, numBytes); + buffer[0] = buffer[numBytes + 1] = buffer[numBytes + 2] = buffer[numBytes + 3] = '\xA0'; + + first = result = Tcl_UtfNext(buffer + 1); + while ((buffer[0] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + Tcl_AppendResult(interp, "Tcl_UtfNext is not supposed to read src[-1]", NULL); + return TCL_ERROR; + } + } + p = tobetested; + while ((buffer[numBytes + 1] = *p++) != '\0') { + /* Run Tcl_UtfNext with many more possible bytes at src[end], all should give the same result */ + result = Tcl_UtfNext(buffer + 1); + if (first != result) { + first = buffer; + break; + } + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(first - buffer - 1)); + + return TCL_OK; +} +/* + * Used to check operations of Tcl_UtfPrev. + * + * Usage: testutfprev $bytes $offset + */ + +static int +TestUtfPrevCmd( + TCL_UNUSED(void *), + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int numBytes, offset; + char *bytes; + const char *result; + + if (objc < 2 || objc > 3) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + return TCL_ERROR; + } + + bytes = Tcl_GetStringFromObj(objv[1], &numBytes); + + if (objc == 3) { + if (TCL_OK != Tcl_GetIntForIndex(interp, objv[2], numBytes, &offset)) { + return TCL_ERROR; + } + if (offset < 0) { + offset = 0; + } + if (offset > numBytes) { + offset = numBytes; + } + } else { + offset = numBytes; + } + result = TclUtfPrev(bytes + offset, bytes); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + return TCL_OK; +} + +/* * Used to check correct string-length determining in Tcl_NumUtfChars */ diff --git a/generic/tclTomMath.h b/generic/tclTomMath.h index e9257a0..0d2d320 100644 --- a/generic/tclTomMath.h +++ b/generic/tclTomMath.h @@ -2,13 +2,15 @@ #define BN_TCL_H_ #ifdef MP_NO_STDINT -#ifdef HAVE_STDINT_H -# include <stdint.h> +# ifdef HAVE_STDINT_H +# include <stdint.h> #else -# include "../compat/stdint.h" +# include "../compat/stdint.h" +# endif #endif +#ifndef BN_H_ /* If BN_H_ already defined, don't try to include tommath.h again. */ +# include "tommath.h" #endif -#include "tommath.h" #include "tclTomMathDecls.h" #endif diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 8d1371a..2eb959e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -55,7 +55,7 @@ #define UNICODE_SELF 0x80 /* - * The following structures are used when mapping between Unicode (UCS-2) and + * The following structures are used when mapping between Unicode and * UTF-8. */ @@ -68,10 +68,41 @@ static const unsigned char totalBytes[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif + 1,1,1,1,1,1,1,1,1,1,1 }; +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/* End of "continuation byte section" */ + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 3,3,3,3,3, +#endif + 1,1,1,1,1,1,1,1,1,1,1 +}; + +/* + * Functions used only in this module. + */ + +static int Invalid(const char *src); + /* *--------------------------------------------------------------------------- * @@ -103,7 +134,63 @@ TclUtfCount( } return 3; } + +/* + *--------------------------------------------------------------------------- + * + * Invalid -- + * + * Given a pointer to a two-byte prefix of a well-formed UTF-8 byte + * sequence (a lead byte followed by a trail byte) this routine + * examines those two bytes to determine whether the sequence is + * invalid in UTF-8. This might be because it is an overlong + * encoding, or because it encodes something out of the proper range. + * + * Given a pointer to the bytes \xF8 or \xFC , this routine will + * try to read beyond the end of the "bounds" table. Callers must + * prevent this. + * + * Given a pointer to something else (an ASCII byte, a trail byte, + * or another byte that can never begin a valid byte sequence such + * as \xF5) this routine returns false. That makes the routine poorly + * named, as it does not detect and report all invalid sequences. + * + * Callers have to take care that this routine does something useful + * for their needs. + * + * Results: + * A boolean. + *--------------------------------------------------------------------------- + */ +static const unsigned char bounds[28] = { + 0x80, 0x80, /* \xC0 accepts \x80 only */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, + 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */ + 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ + 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */ + 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */ + 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */ +}; + +static int +Invalid( + const char *src) /* Points to lead byte of a UTF-8 byte sequence */ +{ + unsigned char byte = UCHAR(*src); + int index; + + if ((byte & 0xC3) == 0xC0) { + /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ + index = (byte - 0xC0) >> 1; + if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { + /* Out of bounds - report invalid. */ + return 1; + } + } + return 0; +} + /* *--------------------------------------------------------------------------- * @@ -364,7 +451,7 @@ static const unsigned short cp1252[32] = { int Tcl_UtfToUniChar( const char *src, /* The UTF-8 string. */ - int *chPtr)/* Filled with the unsigned int represented by + int *chPtr)/* Filled with the Unicode character represented by * the UTF-8 string. */ { int byte; @@ -423,7 +510,7 @@ Tcl_UtfToUniChar( * represents itself. */ } - else if (byte < 0xF8) { + else if (byte < 0xF5) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. @@ -448,8 +535,8 @@ Tcl_UtfToUniChar( int Tcl_UtfToChar16( const char *src, /* The UTF-8 string. */ - unsigned short *chPtr)/* Filled with the unsigned short represented by - * the UTF-8 string. */ + unsigned short *chPtr)/* Filled with the Tcl_UniChar represented by + * the UTF-8 string. This could be a surrogate too. */ { unsigned short byte; @@ -457,7 +544,7 @@ Tcl_UtfToChar16( * Unroll 1 to 4 byte UTF-8 sequences. */ - byte = *((unsigned char *) src); + byte = UCHAR(*src); if (byte < 0xC0) { /* * Handles properly formed UTF-8 characters between 0x01 and 0x7F. @@ -519,20 +606,20 @@ Tcl_UtfToChar16( * represents itself. */ } - else if (byte < 0xF8) { - if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { + else if (byte < 0xF5) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { /* - * Four-byte-character lead byte followed by three trail bytes. + * Four-byte-character lead byte followed by at least two trail bytes. + * We don't test the validity of 3th trail byte, see [ed29806ba] */ - unsigned short high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) + Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if (high >= 0x400) { - /* out of range, < 0x10000 or > 0x10FFFF */ - } else { + if (high < 0x400) { /* produce high surrogate, advance source pointer */ *chPtr = 0xD800 + high; return 1; } + /* out of range, < 0x10000 or > 0x10FFFF */ } /* @@ -544,7 +631,7 @@ Tcl_UtfToChar16( *chPtr = byte; return 1; } - + /* *--------------------------------------------------------------------------- * @@ -574,8 +661,12 @@ Tcl_UtfToUniCharDString( * DString. */ { int ch = 0, *w, *wString; - const char *p, *end; + const char *p; int oldLength; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; if (src == NULL) { return NULL; @@ -597,20 +688,19 @@ Tcl_UtfToUniCharDString( w = wString; p = src; - end = src + length - 4; - while (p < end) { - p += Tcl_UtfToUniChar(p, &ch); + endPtr = src + length; + optPtr = endPtr - 4; + while (p <= optPtr) { + p += TclUtfToUCS4(p, &ch); *w++ = ch; } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += Tcl_UtfToUniChar(p, &ch); - } else { - ch = UCHAR(*p++); - } + while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) { + p += TclUtfToUCS4(p, &ch); *w++ = ch; } + while (p < endPtr) { + *w++ = UCHAR(*p++); + } *w = '\0'; Tcl_DStringSetLength(dsPtr, oldLength + ((char *) w - (char *) wString)); @@ -627,10 +717,13 @@ Tcl_UtfToChar16DString( * appended to this previously initialized * DString. */ { - unsigned short ch = 0; - unsigned short *w, *wString; - const char *p, *end; + unsigned short ch = 0, *w, *wString; + const char *p; int oldLength; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; if (src == NULL) { return NULL; @@ -652,19 +745,19 @@ Tcl_UtfToChar16DString( w = wString; p = src; - end = src + length - 4; - while (p < end) { + endPtr = src + length; + optPtr = endPtr - 3; + while (p <= optPtr) { p += Tcl_UtfToChar16(p, &ch); *w++ = ch; } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { + while (p < endPtr) { + if (TclChar16Complete(p, endPtr-p)) { p += Tcl_UtfToChar16(p, &ch); + *w++ = ch; } else { - ch = UCHAR(*p++); + *w++ = UCHAR(*p++); } - *w++ = ch; } *w = '\0'; Tcl_DStringSetLength(dsPtr, @@ -672,6 +765,7 @@ Tcl_UtfToChar16DString( return wString; } + /* *--------------------------------------------------------------------------- * @@ -697,7 +791,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[(unsigned char)*src]; + return length >= complete[UCHAR(*src)]; } /* @@ -721,40 +815,51 @@ Tcl_UtfCharComplete( int Tcl_NumUtfChars( const char *src, /* The UTF-8 string to measure. */ - int length) /* The length of the string in bytes, or -1 - * for strlen(string). */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ { Tcl_UniChar ch = 0; int i = 0; - /* - * The separate implementations are faster. - * - * Since this is a time-sensitive function, we also do the check for the - * single-byte char case specially. - */ - if (length < 0) { - while (*src != '\0') { + /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ + while ((*src != '\0') && (i < INT_MAX)) { src += TclUtfToUniChar(src, &ch); i++; } - if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - const char *endPtr = src + length - 4; + /* Will return value between 0 and length. No overflow checks. */ - while (src < endPtr) { + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; + + /* + * Optimize away the call in this loop. Justified because... + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX + * So (endPtr - src) >= TCL_UTF_MAX, and passing that to + * Tcl_UtfCharComplete we know will cause return of 1. + */ + while (src <= optPtr + /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { src += TclUtfToUniChar(src, &ch); i++; } - endPtr += 4; - while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + /* Loop over the remaining string where call must happen */ + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += TclUtfToUniChar(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } i++; } - if (src < endPtr) { - i += endPtr - src; - } } return i; } @@ -764,7 +869,7 @@ Tcl_NumUtfChars( * * Tcl_UtfFindFirst -- * - * Returns a pointer to the first occurance of the given Unicode character + * Returns a pointer to the first occurrence of the given Unicode character * in the NULL-terminated UTF-8 string. The NULL terminator is considered * part of the UTF-8 string. Equivalent to Plan 9 utfrune(). * @@ -784,9 +889,9 @@ Tcl_UtfFindFirst( int ch) /* The Unicode character to search for. */ { while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { return src; } if (*src == '\0') { @@ -801,7 +906,7 @@ Tcl_UtfFindFirst( * * Tcl_UtfFindLast -- * - * Returns a pointer to the last occurance of the given Unicode character + * Returns a pointer to the last occurrence of the given Unicode character * in the NULL-terminated UTF-8 string. The NULL terminator is considered * part of the UTF-8 string. Equivalent to Plan 9 utfrrune(). * @@ -823,9 +928,9 @@ Tcl_UtfFindLast( const char *last = NULL; while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { last = src; } if (*src == '\0') { @@ -841,9 +946,11 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * Given a pointer to some current location in a UTF-8 string, move - * forward one character. The caller must ensure that they are not asking - * for the next character after the last character in the string. + * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext + * returns a pointer to the next UTF-8 character in the string. + * The caller must not ask for the next character after the last + * character in the string if the string is not terminated by a null + * character. * * Results: * The return value is the pointer to the next character in the UTF-8 @@ -859,15 +966,40 @@ const char * Tcl_UtfNext( const char *src) /* The current location in the string. */ { - Tcl_UniChar ch = 0; - int len = TclUtfToUniChar(src, &ch); + int left; + const char *next; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - len += TclUtfToUniChar(src + len, &ch); + if (((*src) & 0xC0) == 0x80) { + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; } -#endif - return src + len; + + left = totalBytes[UCHAR(*src)]; + next = src + 1; + while (--left) { + if ((*next & 0xC0) != 0x80) { + /* + * src points to non-trail byte; We ran out of trail bytes + * before the needs of the lead byte were satisfied. + * Let the (malformed) lead byte alone be a character + */ + return src + 1; + } + next++; + } + /* + * Call Invalid() here only if required conditions are met: + * src[0] is known a lead byte. + * src[1] is known a trail byte. + * Especially important to prevent calls when src[0] == '\xF8' or '\xFC' + * See tests utf-6.37 through utf-6.43 through valgrind or similar tool. + */ + if ((next == src + 1) || Invalid(src)) { + return src + 1; + } + return next; } /* @@ -893,31 +1025,99 @@ Tcl_UtfNext( const char * Tcl_UtfPrev( - const char *src, /* The current location in the string. */ - const char *start) /* Pointer to the beginning of the string, to - * avoid going backwards too far. */ + const char *src, /* A location in a UTF-8 string. */ + const char *start) /* Pointer to the beginning of the string */ { - const char *look; - int i, byte; - - look = --src; - for (i = 0; i < 4; i++) { - if (look < start) { - if (src < start) { - src = start; - } - break; - } - byte = *((unsigned char *) look); + int trailBytesSeen = 0; /* How many trail bytes have been verified? */ + const char *fallback = src - 1; + /* If we cannot find a lead byte that might + * start a prefix of a valid UTF byte sequence, + * we will fallback to a one-byte back step */ + const char *look = fallback; + /* Start search at the fallback position */ + + /* Quick boundary case exit. */ + if (fallback <= start) { + return start; + } + + do { + unsigned char byte = UCHAR(look[0]); + if (byte < 0x80) { - break; + /* + * Single byte character. Either this is a correct previous + * character, or it is followed by at least one trail byte + * which indicates a malformed sequence. In either case the + * correct result is to return the fallback. + */ + return fallback; } if (byte >= 0xC0) { - return look; + /* Non-trail byte; May be multibyte lead. */ + + if ((trailBytesSeen == 0) + /* + * We've seen no trailing context to use to check + * anything. From what we know, this non-trail byte + * is a prefix of a previous character, and accepting + * it (the fallback) is correct. + */ + + || (trailBytesSeen >= complete[byte])) { + /* + * That is, (1 + trailBytesSeen > needed). + * We've examined more bytes than needed to complete + * this lead byte. No matter about well-formedness or + * validity, the sequence starting with this lead byte + * will never include the fallback location, so we must + * return the fallback location. See test utf-7.17 + */ + return fallback; + } + + /* + * trailBytesSeen > 0, so we can examine look[1] safely. + * Use that capability to screen out invalid sequences. + */ + + if (Invalid(look)) { + /* Reject */ + return fallback; + } + return (const char *)look; } + + /* We saw a trail byte. */ + trailBytesSeen++; + + if ((const char *)look == start) { + /* + * Do not read before the start of the string + * + * If we get here, we've examined bytes at every location + * >= start and < src and all of them are trail bytes, + * including (*start). We need to return our fallback + * and exit this loop before we run past the start of the string. + */ + return fallback; + } + + /* Continue the search backwards... */ look--; - } - return src; + } while (trailBytesSeen < TCL_UTF_MAX); + + /* + * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a + * properly formed byte sequence to find, and we can stop looking, + * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as + * far as we can. + */ +#if TCL_UTF_MAX > 3 + return fallback; +#else + return src - TCL_UTF_MAX; +#endif } /* @@ -925,7 +1125,7 @@ Tcl_UtfPrev( * * Tcl_UniCharAtIndex -- * - * Returns the Tcl_UniChar represented at the specified character + * Returns the Unicode character represented at the specified character * (not byte) position in the UTF-8 string. * * Results: @@ -943,27 +1143,23 @@ Tcl_UniCharAtIndex( int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; - int fullchar = 0; -#if TCL_UTF_MAX <= 3 - int len = 0; -#endif + int i = 0; - while (index-- >= 0) { -#if TCL_UTF_MAX <= 3 - src += (len = TclUtfToUniChar(src, &ch)); -#else - src += TclUtfToUniChar(src, &ch); -#endif + if (index < 0) { + return -1; + } + while (index-- > 0) { + i = TclUtfToUniChar(src, &ch); + src += i; } - fullchar = ch; #if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */ - (void)TclUtfToUniChar(src, &ch); - fullchar = (((fullchar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; + if ((ch >= 0xD800) && (i < 3)) { + /* Index points at character following high Surrogate */ + return -1; } #endif - return fullchar; + TclUtfToUCS4(src, &i); + return i; } /* @@ -1081,8 +1277,7 @@ int Tcl_UtfToUpper( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; - int upChar; + int ch, upChar; char *src, *dst; int len; @@ -1092,16 +1287,8 @@ Tcl_UtfToUpper( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - upChar = ch; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - len += TclUtfToUniChar(src + len, &ch); - /* Combine surrogates */ - upChar = (((upChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; - } -#endif - upChar = Tcl_UniCharToUpper(upChar); + len = TclUtfToUCS4(src, &ch); + upChar = Tcl_UniCharToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1109,7 +1296,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if ((len < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) { + if ((len < TclUtfCount(upChar)) || ((upChar & ~0x7FF) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1143,8 +1330,7 @@ int Tcl_UtfToLower( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; - int lowChar; + int ch, lowChar; char *src, *dst; int len; @@ -1154,16 +1340,8 @@ Tcl_UtfToLower( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - lowChar = ch; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - len += TclUtfToUniChar(src + len, &ch); - /* Combine surrogates */ - lowChar = (((lowChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; - } -#endif - lowChar = Tcl_UniCharToLower(lowChar); + len = TclUtfToUCS4(src, &ch); + lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1171,7 +1349,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { + if ((len < TclUtfCount(lowChar)) || ((lowChar & ~0x7FF) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1206,8 +1384,7 @@ int Tcl_UtfToTitle( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0; - int titleChar, lowChar; + int ch, titleChar, lowChar; char *src, *dst; int len; @@ -1219,18 +1396,10 @@ Tcl_UtfToTitle( src = dst = str; if (*src) { - len = TclUtfToUniChar(src, &ch); - titleChar = ch; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - len += TclUtfToUniChar(src + len, &ch); - /* Combine surrogates */ - titleChar = (((titleChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; - } -#endif - titleChar = Tcl_UniCharToTitle(titleChar); + len = TclUtfToUCS4(src, &ch); + titleChar = Tcl_UniCharToTitle(ch); - if ((len < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) { + if ((len < TclUtfCount(titleChar)) || ((titleChar & ~0x7FF) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1239,21 +1408,14 @@ Tcl_UtfToTitle( src += len; } while (*src) { - len = TclUtfToUniChar(src, &ch); + len = TclUtfToUCS4(src, &ch); lowChar = ch; -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - len += TclUtfToUniChar(src + len, &ch); - /* Combine surrogates */ - lowChar = (((lowChar & 0x3FF) << 10) | (ch & 0x3FF)) + 0x10000; - } -#endif /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } - if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { + if ((len < TclUtfCount(lowChar)) || ((lowChar & ~0x7FF) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1547,6 +1709,7 @@ Tcl_UniCharToUpper( ch -= GetDelta(info); } } + /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } @@ -1578,6 +1741,7 @@ Tcl_UniCharToLower( ch += GetDelta(info); } } + /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } @@ -1617,6 +1781,7 @@ Tcl_UniCharToTitle( ch -= GetDelta(info); } } + /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } @@ -1804,6 +1969,7 @@ Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { if (UNICODE_OUT_OF_RANGE(ch)) { + /* Clear away extension bits, if any */ ch &= 0x1FFFFF; if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007F))) { return 1; @@ -1975,7 +2141,7 @@ Tcl_UniCharIsSpace( */ if (ch < 0x80) { - return TclIsSpaceProc((char) ch); + return TclIsSpaceProcM((char) ch); } else if (UNICODE_OUT_OF_RANGE(ch)) { return 0; } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B diff --git a/generic/tclUtil.c b/generic/tclUtil.c index a13b037..37b3c83 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -408,7 +408,7 @@ TclMaxListLength( * No list element before leading white space. */ - count += 1 - TclIsSpaceProc(*bytes); + count += 1 - TclIsSpaceProcM(*bytes); /* * Count white space runs as potential element separators. @@ -418,7 +418,7 @@ TclMaxListLength( if ((numBytes == -1) && (*bytes == '\0')) { break; } - if (TclIsSpaceProc(*bytes)) { + if (TclIsSpaceProcM(*bytes)) { /* * Space run started; bump count. */ @@ -427,7 +427,7 @@ TclMaxListLength( do { bytes++; numBytes -= (numBytes != -1); - } while (numBytes && TclIsSpaceProc(*bytes)); + } while (numBytes && TclIsSpaceProcM(*bytes)); if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { break; } @@ -444,7 +444,7 @@ TclMaxListLength( * No list element following trailing white space. */ - count -= TclIsSpaceProc(bytes[-1]); + count -= TclIsSpaceProcM(bytes[-1]); done: if (endPtr) { @@ -593,7 +593,7 @@ FindElement( */ limit = (string + stringLength); - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } if (p == limit) { /* no element found */ @@ -638,7 +638,7 @@ FindElement( } else if (openBraces == 1) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -648,7 +648,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -683,23 +683,6 @@ FindElement( break; /* - * Space: ignore if element is in braces or quotes; otherwise - * terminate element. - */ - - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - if ((openBraces == 0) && !inQuotes) { - size = (p - elemStart); - goto done; - } - break; - - /* * Double-quote: if element is in quotes then terminate it. */ @@ -707,7 +690,7 @@ FindElement( if (inQuotes) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -717,7 +700,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -730,6 +713,20 @@ FindElement( return TCL_ERROR; } break; + + default: + if (TclIsSpaceProcM(*p)) { + /* + * Space: ignore if element is in braces or quotes; + * otherwise terminate element. + */ + if ((openBraces == 0) && !inQuotes) { + size = (p - elemStart); + goto done; + } + } + break; + } p++; } @@ -760,7 +757,7 @@ FindElement( } done: - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } *elementPtr = elemStart; @@ -1116,12 +1113,6 @@ TclScanElement( case '[': /* TYPE_SUBS */ case '$': /* TYPE_SUBS */ case ';': /* TYPE_COMMAND_END */ - case ' ': /* TYPE_SPACE */ - case '\f': /* TYPE_SPACE */ - case '\n': /* TYPE_COMMAND_END */ - case '\r': /* TYPE_SPACE */ - case '\t': /* TYPE_SPACE */ - case '\v': /* TYPE_SPACE */ forbidNone = 1; extra++; /* Escape sequences all one byte longer. */ #if COMPAT @@ -1166,6 +1157,15 @@ TclScanElement( } /* TODO: Panic on improper encoding? */ break; + default: + if (TclIsSpaceProcM(*p)) { + forbidNone = 1; + extra++; /* Escape sequences all one byte longer. */ +#if COMPAT + preferBrace = 1; +#endif + } + break; } } length -= (length > 0); @@ -1666,42 +1666,6 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * UtfWellFormedEnd -- - * Checks the end of utf string is malformed, if yes - wraps bytes - * to the given buffer (as well-formed NTS string). The buffer - * argument should be initialized by the caller and ready to use. - * - * Results: - * The bytes with well-formed end of the string. - * - * Side effects: - * Buffer (DString) may be allocated, so must be released. - * - *---------------------------------------------------------------------- - */ - -static inline const char* -UtfWellFormedEnd( - Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ - const char *bytes, /* Pointer to the beginning of the string. */ - int length) /* Length of the string. */ -{ - const char *l = bytes + length; - const char *p = Tcl_UtfPrev(l, bytes); - - if (Tcl_UtfCharComplete(p, l - p)) { - return bytes; - } - /* - * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, - * avoid segfault by access violation out of range. - */ - Tcl_DStringAppend(buffer, bytes, length); - return Tcl_DStringValue(buffer); -} -/* - *---------------------------------------------------------------------- - * * TclTrimRight -- * Takes two counted strings in the Tcl encoding. Conceptually * finds the sub string (offset) to trim from the right side of the @@ -1716,27 +1680,38 @@ UtfWellFormedEnd( *---------------------------------------------------------------------- */ -static inline int -TrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { - const char *p = bytes + numBytes; - int pInc; + const char *pp, *p = bytes + numBytes; int ch1, ch2; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + /* * Outer loop: iterate over string to be trimmed. */ do { const char *q = trim; - int bytesLeft = numTrim; + int pInc = 0, bytesLeft = numTrim; - p = Tcl_UtfPrev(p, bytes); - pInc = TclUtfToUCS4(p, &ch1); + pp = TclUtfPrev(p, bytes); + do { + pp += pInc; + pInc = TclUtfToUCS4(pp, &ch1); + } while (pp + pInc < p); /* * Inner loop: scan trim string for match to current character. @@ -1758,44 +1733,13 @@ TrimRight( * No match; trim task done; *p is last non-trimmed char. */ - p += pInc; break; } + p = pp; } while (p > bytes); return numBytes - (p - bytes); } - -int -TclTrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimRight(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1815,16 +1759,25 @@ TclTrimRight( *---------------------------------------------------------------------- */ -static inline int -TrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { const char *p = bytes; int ch1, ch2; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + /* * Outer loop: iterate over string to be trimmed. */ @@ -1863,37 +1816,6 @@ TrimLeft( return p - bytes; } - -int -TclTrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimLeft(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1915,41 +1837,39 @@ int TclTrim( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ + /* Calls in this routine + * rely on (bytes[numBytes] == '\0'). */ const char *trim, /* String of trim characters... */ int numTrim, /* ...and its length in bytes */ - int *trimRight) /* Offset from the end of the string. */ + /* Calls in this routine + * rely on (trim[numTrim] == '\0'). */ + int *trimRightPtr) /* Offset from the end of the string. */ { - int trimLeft; - Tcl_DString bytesBuf, trimBuf; + int trimLeft = 0, trimRight = 0; - *trimRight = 0; /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); - if (trimLeft > numBytes) { - trimLeft = numBytes; - } - numBytes -= trimLeft; - /* have to trim yet (first char was already verified within TrimLeft) */ - if (numBytes > 1) { - bytes += trimLeft; - *trimRight = TrimRight(bytes, numBytes, trim, numTrim); - if (*trimRight > numBytes) { - *trimRight = numBytes; + if ((numBytes > 0) && (numTrim > 0)) { + + /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ + trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); + numBytes -= trimLeft; + + /* If we did not trim the whole string, it starts with a character + * that we will not trim. Skip over it. */ + if (numBytes > 0) { + int ch; + const char *first = bytes + trimLeft; + bytes += TclUtfToUCS4(first, &ch); + numBytes -= (bytes - first); + + if (numBytes > 0) { + /* When bytes is NUL-terminated, returns + * 0 <= trimRight <= numBytes */ + trimRight = TclTrimRight(bytes, numBytes, trim, numTrim); + } } } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - + *trimRightPtr = trimRight; return trimLeft; } @@ -2242,8 +2162,7 @@ Tcl_StringCaseMatch( int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen; - const char *pstart = pattern; - Tcl_UniChar ch1 = 0, ch2 = 0; + int ch1 = 0, ch2 = 0; while (1) { p = *pattern; @@ -2284,10 +2203,10 @@ Tcl_StringCaseMatch( */ if (UCHAR(*pattern) < 0x80) { - ch2 = (Tcl_UniChar) + ch2 = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); } else { - Tcl_UtfToUniChar(pattern, &ch2); + TclUtfToUCS4(pattern, &ch2); if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } @@ -2303,7 +2222,7 @@ Tcl_StringCaseMatch( if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*str) { - charLen = TclUtfToUniChar(str, &ch1); + charLen = TclUtfToUCS4(str, &ch1); if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { break; } @@ -2317,7 +2236,7 @@ Tcl_StringCaseMatch( */ while (*str) { - charLen = TclUtfToUniChar(str, &ch1); + charLen = TclUtfToUCS4(str, &ch1); if (ch2 == ch1) { break; } @@ -2331,7 +2250,7 @@ Tcl_StringCaseMatch( if (*str == '\0') { return 0; } - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); } } @@ -2342,7 +2261,7 @@ Tcl_StringCaseMatch( if (p == '?') { pattern++; - str += TclUtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); continue; } @@ -2353,15 +2272,15 @@ Tcl_StringCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar = 0, endChar = 0; + int startChar = 0, endChar = 0; pattern++; if (UCHAR(*str) < 0x80) { - ch1 = (Tcl_UniChar) + ch1 = (int) (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); str++; } else { - str += Tcl_UtfToUniChar(str, &ch1); + str += TclUtfToUCS4(str, &ch1); if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } @@ -2371,11 +2290,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - startChar = (Tcl_UniChar) (nocase + startChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &startChar); + pattern += TclUtfToUCS4(pattern, &startChar); if (nocase) { startChar = Tcl_UniCharToLower(startChar); } @@ -2386,11 +2305,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - endChar = (Tcl_UniChar) (nocase + endChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += Tcl_UtfToUniChar(pattern, &endChar); + pattern += TclUtfToUCS4(pattern, &endChar); if (nocase) { endChar = Tcl_UniCharToLower(endChar); } @@ -2407,10 +2326,13 @@ Tcl_StringCaseMatch( break; } } + /* If we reach here, we matched. Need to move past closing ] */ while (*pattern != ']') { if (*pattern == '\0') { - pattern = Tcl_UtfPrev(pattern, pstart); - break; + /* We ran out of pattern after matching something in + * (unclosed!) brackets. So long as we ran out of string + * at the same time, we have a match. Otherwise, not. */ + return (*str == '\0'); } pattern++; } @@ -2435,8 +2357,8 @@ Tcl_StringCaseMatch( * each string match. */ - str += TclUtfToUniChar(str, &ch1); - pattern += TclUtfToUniChar(pattern, &ch2); + str += TclUtfToUCS4(str, &ch1); + pattern += TclUtfToUCS4(pattern, &ch2); if (nocase) { if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { return 0; @@ -2839,9 +2761,37 @@ Tcl_DStringAppendElement( { char *dst = dsPtr->string + dsPtr->length; int needSpace = TclNeedSpace(dsPtr->string, dst); - char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; - int newSize = dsPtr->length + needSpace - + TclScanElement(element, -1, &flags); + char flags = 0; + int quoteHash = 1, newSize; + + if (needSpace) { + /* + * If we need a space to separate the new element from something + * already ending the string, we're not appending the first element + * of any list, so we need not quote any leading hash character. + */ + quoteHash = 0; + } else { + /* + * We don't need a space, maybe because there's some already there. + * Checking whether we might be appending a first element is a bit + * more involved. + * + * Backtrack over all whitespace. + */ + while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) { + } + + /* Call again without whitespace to confound things. */ + quoteHash = !TclNeedSpace(dsPtr->string, dst+1); + } + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } + newSize = dsPtr->length + needSpace + TclScanElement(element, -1, &flags); + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } /* * Allocate a larger buffer for the string if the current one isn't large @@ -2873,8 +2823,8 @@ Tcl_DStringAppendElement( element = dsPtr->string + offset; } } - dst = dsPtr->string + dsPtr->length; } + dst = dsPtr->string + dsPtr->length; /* * Convert the new string to a list element and copy it into the buffer at @@ -2885,15 +2835,8 @@ Tcl_DStringAppendElement( *dst = ' '; dst++; dsPtr->length++; - - /* - * If we need a space to separate this element from preceding stuff, - * then this element will not lead a list, and need not have it's - * leading '#' quoted. - */ - - flags |= TCL_DONT_QUOTE_HASH; } + dsPtr->length += TclConvertElement(element, -1, dst, flags); dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; @@ -3520,63 +3463,69 @@ TclNeedSpace( /* * A space is needed unless either: * (a) we're at the start of the string, or - */ + * + * (NOTE: This check is now absorbed into the loop below.) + * if (end == start) { return 0; } + * + */ + /* * (b) we're at the start of a nested list-element, quoted with an open * curly brace; we can be nested arbitrarily deep, so long as the * first curly brace starts an element, so backtrack over open curly * braces that are trailing characters of the string; and - */ + * + * (NOTE: Every character our parser is looking for is a proper + * single-byte encoding of an ASCII value. It does not accept + * overlong encodings. Given that, there's no benefit using + * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte + * backward scan. Save routine call overhead and risk of wrong + * results should the behavior of Tcl_UtfPrev change in unexpected ways. + * Reconsider this if we ever start treating non-ASCII Unicode + * characters as meaningful list syntax, expanded Unicode spaces as + * element separators, for example.) + * end = Tcl_UtfPrev(end, start); while (*end == '{') { - if (end == start) { - return 0; - } - end = Tcl_UtfPrev(end, start); + if (end == start) { + return 0; + } + end = Tcl_UtfPrev(end, start); + } + + * + */ + + while ((--end >= start) && (*end == '{')) { + } + if (end < start) { + return 0; } /* * (c) the trailing character of the string is already a list-element - * separator (according to TclFindElement); that is, one of these - * characters: - * \u0009 \t TAB - * \u000A \n NEWLINE - * \u000B \v VERTICAL TAB - * \u000C \f FORM FEED - * \u000D \r CARRIAGE RETURN - * \u0020 SPACE - * with the condition that the penultimate character is not a - * backslash. + * separator, Use the same testing routine as TclFindElement to + * enforce consistency. */ - if (*end > 0x20) { + if (TclIsSpaceProcM(*end)) { + int result = 0; + /* - * Performance tweak. All ASCII spaces are <= 0x20. So get a quick - * answer for most characters before comparing against all spaces in - * the switch below. - * - * NOTE: Remove this if other Unicode spaces ever get accepted as - * list-element separators. + * Trailing whitespace might be part of a backslash escape + * sequence. Handle that possibility. */ - return 1; - } - switch (*end) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\v': - case '\f': - if ((end == start) || (end[-1] != '\\')) { - return 0; + while ((--end >= start) && (*end == '\\')) { + result = !result; } + return result; } return 1; } @@ -4178,7 +4127,7 @@ TclCheckBadOctal( * zero. Try to generate a meaningful error message. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '+' || *p == '-') { @@ -4191,7 +4140,7 @@ TclCheckBadOctal( while (isdigit(UCHAR(*p))) { /* INTL: digit. */ p++; } - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '\0') { diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index ed7aad6..9e1dee4 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -2244,16 +2244,15 @@ ZipAddFile( return TCL_ERROR; } ch = (int) (r * 256); - kvbuf[i + 12] = (unsigned char) zencode(keys, crc32tab, ch, tmp); + kvbuf[i + 12] = UCHAR(zencode(keys, crc32tab, ch, tmp)); } Tcl_ResetResult(interp); init_keys(passwd, keys, crc32tab); for (i = 0; i < 12 - 2; i++) { - kvbuf[i] = (unsigned char) - zencode(keys, crc32tab, kvbuf[i + 12], tmp); + kvbuf[i] = UCHAR(zencode(keys, crc32tab, kvbuf[i + 12], tmp)); } - kvbuf[i++] = (unsigned char) zencode(keys, crc32tab, crc >> 16, tmp); - kvbuf[i++] = (unsigned char) zencode(keys, crc32tab, crc >> 24, tmp); + kvbuf[i++] = UCHAR(zencode(keys, crc32tab, crc >> 16, tmp)); + kvbuf[i++] = UCHAR(zencode(keys, crc32tab, crc >> 24, tmp)); len = Tcl_Write(out, (char *) kvbuf, 12); memset(kvbuf, 0, 24); if (len != 12) { diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 19ea77b..319d1d1 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -3884,7 +3884,7 @@ ResultGenerate( if (((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR)) || (e == Z_STREAM_END) - || (e == Z_OK && cd->inStream.avail_out == 0)) { + || (e == Z_OK && written == 0)) { return TCL_OK; } diff --git a/tests/binary.test b/tests/binary.test index a777b2a..b06afe0 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -16,6 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { } testConstraint bigEndian [expr {$tcl_platform(byteOrder) eq "bigEndian"}] testConstraint littleEndian [expr {$tcl_platform(byteOrder) eq "littleEndian"}] +testConstraint testbytestring [llength [info commands testbytestring]] # Big test for correct ordering of data in [expr] proc testIEEE {} { @@ -2941,7 +2942,19 @@ test binary-79.2 {Tcl_SetByteArrayLength} testsetbytearraylength { testsetbytearraylength [string cat \u0141 B C] 1 } A - +test binary-80.1 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring "\u4E4E" +} -result "expected byte sequence but character 0 was '\u4E4E' (U+004E4E)" +test binary-80.2 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\x00\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.3 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.4 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xF0\x9F\x98\x81"] +} -result "expected byte sequence but character 4 was '\U01F601' (U+01F601)" + # ---------------------------------------------------------------------- # cleanup diff --git a/tests/clock.test b/tests/clock.test index 55607ce..f9db14b 100644 --- a/tests/clock.test +++ b/tests/clock.test @@ -35025,6 +35025,24 @@ test clock-30.8 {clock add months, negative} { set x4 [clock format $f4 -format %Y-%m-%d -timezone :UTC] list $x1 $x2 $x3 $x4 } {2000-02-29 2000-01-31 1999-12-31 1999-11-30} +test clock-30.8a {clock add months, negative, over threshold of a year} { + set t [clock scan 2019-01-31 -format %Y-%m-%d -gmt 1] + list [clock format [clock add $t -1 month -gmt 1] -format %Y-%m-%d -gmt 1] \ + [clock format [clock add $t -2 month -gmt 1] -format %Y-%m-%d -gmt 1] \ + [clock format [clock add $t -3 month -gmt 1] -format %Y-%m-%d -gmt 1] \ + [clock format [clock add $t -4 month -gmt 1] -format %Y-%m-%d -gmt 1] +} {2018-12-31 2018-11-30 2018-10-31 2018-09-30} +test clock-30.8b {clock add months, negative, over threshold of a year} { + set t [clock scan 2000-01-28 -format %Y-%m-%d -gmt 1] + for {set i 1} {$i < 24} {incr i 1} { + set f1 [clock add $t -$i month -gmt 1] + set f2 [clock add $f1 $i month -gmt 1] + if {$f2 != $t} { + error "\[clock add $t -$i month -gmt 1\] does not consider\ + \[clock add $f1 $i month -gmt 1\] != $t" + } + } +} {} test clock-30.9 {clock add days} { set t [clock scan {2000-01-01 12:34:56} -format {%Y-%m-%d %H:%M:%S} \ -timezone :UTC] diff --git a/tests/dstring.test b/tests/dstring.test index 06121a3..5feb355 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -180,16 +180,37 @@ test dstring-2.12 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} -test dstring-2.13 {appending list elements} -constraints testdstring -body { - # This test shows lack of sophistication in Tcl_DStringAppendElement's - # decision about whether #-quoting can be disabled. +test dstring-2.13 {appending list elements} -constraints testdstring -setup { testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. testdstring append "x " -1 testdstring element # testdstring get } -cleanup { testdstring free -} -result {x {#}} +} -result {x #} +test dstring-2.14 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + testdstring append " " -1 + testdstring element # + testdstring get +} -cleanup { + testdstring free +} -result { {#}} +test dstring-2.15 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. + testdstring append "x " -1 + testdstring element # + testdstring get +} -cleanup { + testdstring free +} -result {x #} test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free @@ -306,10 +327,11 @@ test dstring-3.9 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x {x #}} -test dstring-3.10 {appending list elements} -constraints testdstring -body { - # This test shows lack of sophistication in Tcl_DStringAppendElement's - # decision about whether #-quoting can be disabled. +test dstring-3.10 {appending list elements} -constraints testdstring -setup { testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. testdstring append x -1 testdstring start testdstring append "x " -1 @@ -318,7 +340,33 @@ test dstring-3.10 {appending list elements} -constraints testdstring -body { testdstring get } -cleanup { testdstring free -} -result {x {x {#}}} +} -result {x {x #}} +test dstring-3.11 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + testdstring append x -1 + testdstring start + testdstring append " " -1 + testdstring element # + testdstring end + testdstring get +} -cleanup { + testdstring free +} -result {x { {#}}} +test dstring-3.12 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. + testdstring append x -1 + testdstring start + testdstring append "x " -1 + testdstring element # + testdstring end + testdstring get +} -cleanup { + testdstring free +} -result {x {x #}} test dstring-4.1 {truncation} -constraints testdstring -setup { testdstring free diff --git a/tests/encoding.test b/tests/encoding.test index 664a041..f483160 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -283,16 +283,16 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} # OpenEncodingFile is fully tested by the rest of the tests in this file. test encoding-12.1 {LoadTableEncoding: normal encoding} { - set x [encoding convertto iso8859-3 \u120] - append x [encoding convertto iso8859-3 \ud5] - append x [encoding convertfrom iso8859-3 \xd5] + set x [encoding convertto iso8859-3 \u0120] + append x [encoding convertto iso8859-3 \xD5] + append x [encoding convertfrom iso8859-3 \xD5] } "\xd5?\u120" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { set x [encoding convertto iso8859-3 ab\u0120g] - append x [encoding convertfrom iso8859-3 ab\xd5g] + append x [encoding convertfrom iso8859-3 ab\xD5g] } "ab\xd5gab\u120g" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { - set x [encoding convertto shiftjis ab\u4e4eg] + set x [encoding convertto shiftjis ab\u4E4Eg] append x [encoding convertfrom shiftjis ab\x8c\xc1g] } "ab\x8c\xc1gab\u4e4eg" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { @@ -391,7 +391,12 @@ test encoding-15.15 {UtfToUtfProc low surrogate character output} { binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} -test encoding-15.16 {UtfToUtfProc emoji character output} { +test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { + set x \xF0\xA0\xA1\xC2 + set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] + list [string length $x] $y +} "4 \xF0\xA0\xA1\xC2" +test encoding-15.17 {UtfToUtfProc emoji character output} { set x \U1F602 set y [encoding convertto utf-8 \U1F602] binary scan $y H* z diff --git a/tests/lsearch.test b/tests/lsearch.test index b188924..2086615 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -149,14 +149,14 @@ test lsearch-5.2 {binary search} { } set res } $decreasingIntegers -test lsearch-5.3 {binary search finds leftmost occurances} { +test lsearch-5.3 {binary search finds leftmost occurrences} { set res {} for {set i 0} {$i < 10} {incr i} { lappend res [lsearch -integer -sorted $repeatingIncreasingIntegers $i] } set res } [list 0 5 10 15 20 25 30 35 40 45] -test lsearch-5.4 {binary search -decreasing finds leftmost occurances} { +test lsearch-5.4 {binary search -decreasing finds leftmost occurrences} { set res {} for {set i 9} {$i >= 0} {incr i -1} { lappend res [lsearch -sorted -integer -decreasing \ diff --git a/tests/main.test b/tests/main.test index 5b43b43..0398d36 100644 --- a/tests/main.test +++ b/tests/main.test @@ -613,7 +613,7 @@ namespace eval ::tcl::test::main { variable wait chan event $f readable \ [list set [namespace which -variable wait] "child exit"] - set id [after 2000 [list set [namespace which -variable wait] timeout]] + set id [after 5000 [list set [namespace which -variable wait] timeout]] vwait [namespace which -variable wait] after cancel $id set wait @@ -636,7 +636,7 @@ namespace eval ::tcl::test::main { variable wait chan event $f readable \ [list set [namespace which -variable wait] "child exit"] - set id [after 2000 [list set [namespace which -variable wait] timeout]] + set id [after 5000 [list set [namespace which -variable wait] timeout]] vwait [namespace which -variable wait] after cancel $id set wait diff --git a/tests/reg.test b/tests/reg.test index b9dc538..dabd3bc 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -626,6 +626,7 @@ expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" +expectError 13.17.1 - {a\ux} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectMatch 13.20 P {a\U0000008x} "a\bx" "a\bx" diff --git a/tests/string.test b/tests/string.test index aca3570..12821c0 100644 --- a/tests/string.test +++ b/tests/string.test @@ -31,7 +31,8 @@ proc makeShared {s} {uplevel 1 [list lappend copy $s]; return $s} testConstraint testobj [expr {[info commands testobj] ne {}}] testConstraint testindexobj [expr {[info commands testindexobj] ne {}}] testConstraint testevalex [expr {[info commands testevalex] ne {}}] -testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint utf16 [expr {[string length \U010000] == 2}] +testConstraint testbytestring [llength [info commands testbytestring]] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] @@ -505,7 +506,7 @@ test string-5.19.$noComp {string index, bytearray object out of bounds} { test string-5.20.$noComp {string index, bytearray object out of bounds} -body { run {string index [binary format I* {0x50515253 0x52}] 20} } -result {} -test string-5.21.$noComp {string index, surrogates, bug [11ae2be95dac9417]} -constraints {tip389} -body { +test string-5.21.$noComp {string index, surrogates, bug [11ae2be95dac9417]} -constraints utf16 -body { run {list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3]} } -result [list \U100000 {} b] @@ -1502,7 +1503,7 @@ test string-12.22.$noComp {string range, shimmering binary/index} { binary scan $s a* x run {string range $s $s end} } 000000001 -test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} tip389 { +test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} utf16 { run {list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]} } [list \U100000 {} b] @@ -1743,7 +1744,7 @@ test string-17.7.$noComp {string totitle, unicode} { test string-17.8.$noComp {string totitle, compiled} { lindex [run {string totitle [list aa bb [list cc]]}] 0 } Aa -test string-17.9.$noComp {string totitle, surrogates, bug [11ae2be95dac9417]} tip389 { +test string-17.9.$noComp {string totitle, surrogates, bug [11ae2be95dac9417]} utf16 { run {list [string totitle a\U118c0c 1 1] [string totitle a\U118c0c 2 2] \ [string totitle a\U118c0c 3 3]} } [list a\U118a0c a\U118c0C a\U118c0C] @@ -1813,6 +1814,34 @@ test string-20.5.$noComp {string trimright} { test string-20.6.$noComp {string trimright, unicode default} { run {string trimright ABC\u1361\x85\x00\xA0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000} } ABC\u1361 +test string-20.7.$noComp {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} {testbytestring} { + set result {} + set a [testbytestring \xC0\x80\xA0] + set b foo$a + set m [list \x00 U \xA0 V [testbytestring \xA0] W] + lappend result [string map $m $b] + lappend result [string map $m [run {string trimright $b x}]] + lappend result [string map $m [run {string trimright $b \x00}]] + lappend result [string map $m [run {string trimleft $b fox}]] + lappend result [string map $m [run {string trimleft $b fo\x00}]] + lappend result [string map $m [run {string trim $b fox}]] + lappend result [string map $m [run {string trim $b fo\x00}]] +} [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] +test string-20.8.$noComp {[c61818e4c9] [string trimright] fails when UtfPrev is ok} {testbytestring} { + set result {} + set a [testbytestring \xE8\xA0] + set b foo$a + set m [list \xE8 U \xA0 V [testbytestring \xE8] W [testbytestring \xA0] X]] + lappend result [string map $m $b] + lappend result [string map $m [run {string trimright $b x}]] + lappend result [string map $m [run {string trimright $b \xE8}]] + lappend result [string map $m [run {string trimright $b [testbytestring \xE8]}]] + lappend result [string map $m [run {string trimright $b \xA0}]] + lappend result [string map $m [run {string trimright $b [testbytestring \xA0]}]] + lappend result [string map $m [run {string trimright $b \xE8\xA0}]] + lappend result [string map $m [run {string trimright $b [testbytestring \xE8\xA0]}]] + lappend result [string map $m [run {string trimright $b \u0000}]] +} [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV] test string-21.1.$noComp {string wordend} -body { list [catch {run {string wordend a}} msg] $msg @@ -1859,7 +1888,7 @@ test string-21.14.$noComp {string wordend, unicode} -body { test string-21.15.$noComp {string wordend, unicode} -body { run {string wordend "\U1D7CA\U1D7CA abc" 0} } -result 2 -test string-21.16.$noComp {string wordend, unicode} -constraints tip389 -body { +test string-21.16.$noComp {string wordend, unicode} -constraints utf16 -body { run {string wordend "\U1D7CA\U1D7CA abc" 10} } -result 8 @@ -1902,10 +1931,15 @@ test string-22.12.$noComp {string wordstart, unicode} -body { test string-22.13.$noComp {string wordstart, unicode} -body { run {string wordstart "\uC700\uC700 abc" 8} } -result 3 -test string-22.14.$noComp {string wordstart, unicode} -body { +test string-22.14.$noComp {string wordstart, invalid UTF-8} -constraints testbytestring -body { + # See Bug c61818e4c9 + set demo [testbytestring "abc def\xE0\xA9ghi"] + run {string index $demo [string wordstart $demo 10]} +} -result g +test string-22.15.$noComp {string wordstart, unicode} -body { run {string wordstart "\U1D7CA\U1D7CA abc" 0} } -result 0 -test string-22.15.$noComp {string wordstart, unicode} -constraints tip389 -body { +test string-22.16.$noComp {string wordstart, unicode} -constraints utf16 -body { run {string wordstart "\U1D7CA\U1D7CA abc" 10} } -result 5 diff --git a/tests/utf.test b/tests/utf.test index 507c6f9..fdbc4e1 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -16,184 +16,945 @@ if {[lsearch [namespace children] ::tcltest] == -1} { ::tcltest::loadTestedCommands catch [list package require -exact Tcltest [info patchlevel]] +testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] +testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] +testConstraint ucs4 [expr {[testConstraint fullutf] + && [string length [format %c 0x10000]] == 1}] +testConstraint ucs2_utf16 [expr {![testConstraint ucs4]}] + +testConstraint Uesc [expr {"\U0041" eq "A"}] +testConstraint pre388 [expr {"\x741" eq "A"}] +testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] + && [string length [teststringbytes \uD83D\uDCA9]] == 4}] + testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] +testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint teststringobj [llength [info commands teststringobj]] +testConstraint testutfnext [llength [info commands testutfnext]] +testConstraint testutfprev [llength [info commands testutfprev]] -catch {unset x} +testConstraint tip413 [expr {[string trim \x00] eq {}}] -# Some tests require support for 4-byte UTF-8 sequences -testConstraint tip389 [expr {[string length \U010000] == 2}] +catch {unset x} test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} testbytestring { - expr {"\x01" eq [testbytestring "\x01"]} + expr {"\x01" eq [testbytestring \x01]} } 1 test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { - expr {"\x00" eq [testbytestring "\xC0\x80"]} + expr {"\x00" eq [testbytestring \xC0\x80]} } 1 test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} testbytestring { - expr {"\xE0" eq [testbytestring "\xC3\xA0"]} + expr {"\xE0" eq [testbytestring \xC3\xA0]} } 1 test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring { - expr {"\u4E4E" eq [testbytestring "\xE4\xB9\x8E"]} + expr {"\u4E4E" eq [testbytestring \xE4\xB9\x8E]} } 1 test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { - expr {[format %c 0x110000] eq [testbytestring "\xEF\xBF\xBD"]} + expr {[format %c 0x110000] eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { - expr {[format %c -1] eq [testbytestring "\xEF\xBF\xBD"]} + expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} +} 1 +test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf Uesc testbytestring} { + expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 -test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body { - expr {"\U014E4E" eq [testbytestring "\xF0\x94\xB9\x8E"]} -} -result 1 +test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 Uesc testbytestring} { + expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} +} 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { - expr {"\uD842" eq [testbytestring "\xED\xA1\x82"]} + expr {"\uD842" eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { - expr {"\uDC42" eq [testbytestring "\xED\xB1\x82"]} + expr {"\uDC42" eq [testbytestring \xED\xB1\x82]} } 1 test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { - expr {[format %c 0xD842] eq [testbytestring "\xED\xA1\x82"]} + expr {[format %c 0xD842] eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { - expr {[format %c 0xDC42] eq [testbytestring "\xED\xB1\x82"]} + expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 -test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} testbytestring { - expr {"\uD842\uDC42" eq [testbytestring "\xF0\xA0\xA1\x82"]} +test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { + expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 -test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} testbytestring { - expr {"\UD842" eq [testbytestring "\xEF\xBF\xBD"]} +test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { + expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" -} {3} +} 3 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { - string length [testbytestring "\x82\x83\x84"] -} {3} + string length [testbytestring \x82\x83\x84] +} 3 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} testbytestring { - string length [testbytestring "\xC2"] -} {1} -test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} testbytestring { - string length [testbytestring "\xC2\xA2"] -} {1} + string length [testbytestring \xC2] +} 1 +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { + string length \xA2 +} 1 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} testbytestring { - string length [testbytestring "\xE2"] -} {1} + string length [testbytestring \xE2] +} 1 test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestring { - string length [testbytestring "\xE2\xA2"] -} {2} + string length [testbytestring \xE2\xA2] +} 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { - string length [testbytestring "\xE4\xB9\x8E"] -} {1} -test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { - string length [testbytestring "\xF0\x90\x80\x80"] -} -result {2} -test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {tip389 testbytestring} -body { - string length [testbytestring "\xF4\x8F\xBF\xBF"] -} -result {2} + string length [testbytestring \xE4\xB9\x8E] +} 1 +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2_utf16} { + string length [testbytestring \xF0\x90\x80\x80] +} 2 +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { + string length [testbytestring \xF0\x90\x80\x80] +} 1 +test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { + string length [testbytestring \xF4\x8F\xBF\xBF] +} 2 +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { + string length \U10FFFF +} 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { - string length [testbytestring "\xF0\x8F\xBF\xBF"] -} {4} + string length [testbytestring \xF0\x8F\xBF\xBF] +} 4 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { - string length [testbytestring "\xF4\x90\x80\x80"] -} {4} + # Would decode to U+110000 but that is outside the Unicode range. + string length [testbytestring \xF4\x90\x80\x80] +} 4 test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { - string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] -} {5} + string length [testbytestring \xF8\xA2\xA2\xA2\xA2] +} 5 test utf-3.1 {Tcl_UtfCharComplete} { } {} -testConstraint testnumutfchars [llength [info commands testnumutfchars]] -testConstraint testfindfirst [llength [info commands testfindfirst]] -testConstraint testfindlast [llength [info commands testfindlast]] - test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" -} {0} -test utf-4.2 {Tcl_NumUtfChars: length 1} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] -} {1} +} 0 +test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { + testnumutfchars \xA2 +} 1 test utf-4.3 {Tcl_NumUtfChars: long string} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] -} {7} -test utf-4.4 {Tcl_NumUtfChars: #u0000} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] -} {1} + testnumutfchars abc\xA2[testbytestring \xE4\xB9\x8E\xA2\x4E] +} 7 +test utf-4.4 {Tcl_NumUtfChars: #x00} testnumutfchars { + testnumutfchars \x00 +} 1 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 -} {0} +} 0 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC2\xA2"] end -} {1} + testnumutfchars \xA2 end +} 1 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "abc\xC2\xA2\xE4\xB9\x8E\uA2\x4E"] end -} {7} -test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xC0\x80"] end -} {1} + testnumutfchars abc\xA2[testbytestring \xE4\xB9\x8E\xA2\x4E] end +} 7 +test utf-4.8 {Tcl_NumUtfChars: #x00, calc len} testnumutfchars { + testnumutfchars \x00 end +} 1 # Bug [2738427]: Tcl_NumUtfChars(...) no overflow check test utf-4.9 {Tcl_NumUtfChars: #u20AC, calc len, incomplete} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\xE2\x82\xAC"] end-1 -} {2} -test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring "\x00"] end+1 -} {2} + testnumutfchars [testbytestring \xE2\x82\xAC] end-1 +} 2 +test utf-4.10 {Tcl_NumUtfChars: #x00, calc len, overcomplete} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring \x00] end+1 +} 2 test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end-1 -} {3} -test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end -} {2} + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 +} 3 +test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end +} 2 +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end +} 1 +test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { + testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end +} 8 +test utf-4.14 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { + testnumutfchars [testbytestring \xF4\x90\x80\x80] end-1 +} 3 test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { - testfindfirst [testbytestring "abcbc"] 98 -} {bcbc} + testfindfirst [testbytestring abcbc] 98 +} bcbc test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { - testfindlast [testbytestring "abcbc"] 98 -} {bc} + testfindlast [testbytestring abcbc] 98 +} bc -test utf-6.1 {Tcl_UtfNext} { -} {} +test utf-6.1 {Tcl_UtfNext} {testutfnext testbytestring} { + # This takes the pointer one past the terminating NUL. + # This is really an invalid call. + testutfnext [testbytestring \x00] +} 1 +test utf-6.2 {Tcl_UtfNext} testutfnext { + testutfnext A +} 1 +test utf-6.3 {Tcl_UtfNext} testutfnext { + testutfnext AA +} 1 +test utf-6.4 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xA0] +} 1 +test utf-6.5 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xD0] +} 1 +test utf-6.6 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xE8] +} 1 +test utf-6.7 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xF2] +} 1 +test utf-6.8 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext A[testbytestring \xF8] +} 1 +test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\x00] +} 1 +test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0]G +} 1 +test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 +test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xD0] +} 1 +test utf-6.13 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xE8] +} 1 +test utf-6.14 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xF2] +} 1 +test utf-6.15 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xF8] +} 1 +test utf-6.16 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\x00] +} 1 +test utf-6.17 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0]G +} 1 +test utf-6.18 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0] +} 2 +test utf-6.19 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xD0] +} 1 +test utf-6.20 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xE8] +} 1 +test utf-6.21 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xF2] +} 1 +test utf-6.22 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xF8] +} 1 +test utf-6.23 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8] +} -1 +test utf-6.24 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8]G +} 1 +test utf-6.25 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\x00] +} 1 +test utf-6.26 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xD0] +} 1 +test utf-6.27 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xE8] +} 1 +test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xF2] +} 1 +test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xF8] +} 1 +test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2] +} 1 +test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2] +} -1 +test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2]G +} 1 +test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0] +} 1 +test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0] +} -1 +test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xD0] +} 1 +test utf-6.34 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xE8] +} 1 +test utf-6.35 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xF2] +} 1 +test utf-6.36 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xF8] +} 1 +test utf-6.37 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8] +} 1 +test utf-6.38 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8]G +} 1 +test utf-6.39 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xA0] +} 1 +test utf-6.40 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xD0] +} 1 +test utf-6.41 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xE8] +} 1 +test utf-6.42 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xF2] +} 1 +test utf-6.43 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF8\xF8] +} 1 +test utf-6.44 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0]G +} 2 +test utf-6.45 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xA0] +} 2 +test utf-6.46 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xD0] +} 2 +test utf-6.47 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xE8] +} 2 +test utf-6.48 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xF2] +} 2 +test utf-6.49 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xD0\xA0\xF8] +} 2 +test utf-6.50 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0]G +} 1 +test utf-6.51 {Tcl_UtfNext} testutfnext { + testutfnext \u8820 +} 3 +test utf-6.52 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xD0] +} 1 +test utf-6.53 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xE8] +} 1 +test utf-6.54 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xF2] +} 1 +test utf-6.55 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xE8\xA0\xF8] +} 1 +test utf-6.56 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0]G +} 1 +test utf-6.57 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\x00] +} 1 +test utf-6.58 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xD0] +} 1 +test utf-6.59 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xE8] +} 1 +test utf-6.60 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xF2] +} 1 +test utf-6.61 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xF8] +} 1 +test utf-6.62 {Tcl_UtfNext} testutfnext { + testutfnext \u8820G +} 3 +test utf-6.63 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xA0] +} 3 +test utf-6.64 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xD0] +} 3 +test utf-6.65 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xE8] +} 3 +test utf-6.66 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xF2] +} 3 +test utf-6.67 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext \u8820[testbytestring \xF8] +} 3 +test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0]G +} 1 +test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0] +} 1 +test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0] +} 4 +test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xD0] +} 1 +test utf-6.71 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xE8] +} 1 +test utf-6.72 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xF2] +} 1 +test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xF8] +} 1 +test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G +} 1 +test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G +} 4 +test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] +} 1 +test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] +} 4 +test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] +} 1 +test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] +} 4 +test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] +} 1 +test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] +} 4 +test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] +} 1 +test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] +} 4 +test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] +} 1 +test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] +} 4 +test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { + testutfnext \x00 +} 2 +test utf-6.81 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC0\x81] +} 1 +test utf-6.82 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC1\x80] +} 1 +test utf-6.83 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xC2\x80] +} 2 +test utf-6.84 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xE0\x80\x80] +} 1 +test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xE0\xA0\x80] +} 3 +test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xF0\x80\x80\x80] +} 1 +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF0\x90\x80\x80] +} 1 +test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF0\x90\x80\x80] +} 4 +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x00] +} 2 +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} { + testutfnext [testbytestring \xF4\x8F\xBF\xBF] +} 1 +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} { + testutfnext [testbytestring \xF4\x8F\xBF\xBF] +} 4 +test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { + testutfnext [testbytestring \xF4\x90\x80\x80] +} 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0] +} 3 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x80] +} 3 +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] +} 3 +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x80\x80] +} 3 -test utf-7.1 {Tcl_UtfPrev} { -} {} +test utf-7.1 {Tcl_UtfPrev} testutfprev { + testutfprev {} +} 0 +test utf-7.2 {Tcl_UtfPrev} testutfprev { + testutfprev A +} 0 +test utf-7.3 {Tcl_UtfPrev} testutfprev { + testutfprev AA +} 1 +test utf-7.4 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8] +} 1 +test utf-7.4.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 2 +} 1 +test utf-7.4.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xF8\xA0\xA0] 2 +} 1 +test utf-7.5 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2] +} 1 +test utf-7.5.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 2 +} 1 +test utf-7.5.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xF8\xA0\xA0] 2 +} 1 +test utf-7.6 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8] +} 1 +test utf-7.6.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 2 +} 1 +test utf-7.6.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xF8\xA0\xA0] 2 +} 1 +test utf-7.7 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0] +} 1 +test utf-7.7.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 2 +} 1 +test utf-7.7.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xF8\xA0\xA0] 2 +} 1 +test utf-7.8 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0] +} 1 +test utf-7.8.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 2 +} 1 +test utf-7.8.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xF8\xA0\xA0] 2 +} 1 +test utf-7.9 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0] +} 2 +test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 3 +} 2 +test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 +} 2 +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0] +} 2 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0] +} 1 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 +} 2 +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 +} 1 +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 +} 2 +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 +} 1 +test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0] +} 1 +test utf-7.11.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 3 +} 1 +test utf-7.11.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0\xF8\xA0] 3 +} 1 +test utf-7.11.3 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE8\xA0\xF8] 3 +} 1 +test utf-7.12 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0] +} 1 +test utf-7.12.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 3 +} 1 +test utf-7.12.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xF8\xA0] 3 +} 1 +test utf-7.13 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0] +} 2 +test utf-7.13.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 3 +} 2 +test utf-7.13.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xA0\xA0\xF8\xA0] 3 +} 2 +test utf-7.14 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0] +} 3 +test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 4 +} 3 +test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 +} 3 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0\xA0] +} 3 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0\xA0] +} 1 +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 +} 3 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 +} 1 +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 +} 3 +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 +} 1 +test utf-7.16 {Tcl_UtfPrev} testutfprev { + testutfprev A\u8820 +} 1 +test utf-7.16.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xA0] 4 +} 1 +test utf-7.16.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A\u8820[testbytestring \xF8] 4 +} 1 +test utf-7.17 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0] +} 3 +test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 4 +} 3 +test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4 +} 3 +test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xA0\xA0\xA0] +} 1 +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 +} 1 +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 +} 1 +test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF8\xA0\xA0\xA0] +} 2 +test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] +} 2 +test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A\u8820[testbytestring \xA0] +} 2 +test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xD0\xA0\xA0\xA0] +} 2 +test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xA0\xA0\xA0\xA0] +} 2 +test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC0\x81] +} 2 +test utf-7.25 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC0\x81] 2 +} 1 +test utf-7.26 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] +} 3 +test utf-7.27 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80] +} 2 +test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] 3 +} 2 +test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0] +} 1 +test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\x80\x80] 2 +} 1 +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x80\x80\x80] +} 2 +test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 4 +} 3 +test utf-7.31 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 3 +} 2 +test utf-7.32 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 2 +} 1 +test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\x00 +} 1 +test utf-7.34 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC1\x80] +} 2 +test utf-7.35 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xC2\x80] +} 1 +test utf-7.36 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] +} 1 +test utf-7.37 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] 3 +} 1 +test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xE0\xA0\x80] 2 +} 1 +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x90\x80\x80] +} 2 +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 4 +} 3 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 4 +} 1 +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 3 +} 2 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 3 +} 1 +test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 2 +} 1 +test utf-7.43 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0] +} 0 +test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0\xA0] +} 1 +test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { + testutfprev [testbytestring \xA0\xA0\xA0] +} 2 +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} { + testutfprev [testbytestring \xA0\xA0\xA0\xA0] +} 1 +test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { + testutfprev [testbytestring \xE8\xA0] +} 0 +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { + testutfprev \u8820 2 +} 0 +test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} { + testutfprev [testbytestring \xE8\xA0\x00] 2 +} 0 +test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] +} 2 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 +} 3 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 +} 2 +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 +} 1 +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2 +} 1 +test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF4\x90\x80\x80] +} 2 +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 4 +} 3 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 3 +} 2 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 2 +} 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 -} {a} +} a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 -} "\u4E4E" +} \u4E4E test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 -} {c} +} c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} "\uFF" -test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} { +} \xFF +test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 -} "\uD842" +} \uD842 +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { + string index \uD842 0 +} \uD842 +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 { + string index \uD842 0 +} \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 -} "\uDC42" +} \uDC42 +test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { + string index \uD83D\uDE00G 0 +} \uD83D +test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { + string index \uD83D\uDE00G 0 +} \U1F600 +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { + string index \uD83D\uDE00G 0 +} \U1F600 +test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { + string index \uD83D\uDE00G 1 +} \uDE00 +test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { + string index \uD83D\uDE00G 1 +} G +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { + string index \uD83D\uDE00G 1 +} {} +test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { + string index \uD83D\uDE00G 2 +} G +test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { + string index \uD83D\uDE00G 2 +} {} +test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { + string index \uD83D\uDE00G 2 +} G +test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { + string index \U1F600G 0 +} \uFFFD +test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { + string index \U1F600G 0 +} \U1F600 +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { + string index \U1F600G 0 +} \U1F600 +test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { + string index \U1F600G 1 +} G +test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { + string index \U1F600G 1 +} G +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { + string index \U1F600G 1 +} {} +test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { + string index \U1F600G 2 +} {} +test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { + string index \U1F600G 2 +} {} +test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} {Uesc utf16} { + string index \U1F600G 2 +} G test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 -} {abc} +} abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 -} "\u25A\xFF\u543kl" - +} \u25A\xFF\u543kl +test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { + string range \uD83D\uDE00G 0 0 +} \uD83D +test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { + string range \uD83D\uDE00G 0 0 +} \U1F600 +test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { + string range \uD83D\uDE00G 0 0 +} \U1F600 +test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { + string range \uD83D\uDE00G 1 1 +} \uDE00 +test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { + string range \uD83D\uDE00G 1 1 +} G +test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { + string range \uD83D\uDE00G 1 1 +} {} +test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { + string range \uD83D\uDE00G 2 2 +} G +test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { + string range \uD83D\uDE00G 2 2 +} {} +test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { + string range \uD83D\uDE00G 2 2 +} G +test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { + string range \U1f600G 0 0 +} \uFFFD +test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { + string range \U1f600G 0 0 +} \U1F600 +test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc utf16} { + string range \U1f600G 0 0 +} \U1F600 +test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { + string range \U1f600G 1 1 +} G +test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { + string range \U1f600G 1 1 +} G +test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { + string range \U1f600G 1 1 +} {} +test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { + string range \U1f600G 2 2 +} {} +test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { + string range \U1f600G 2 2 +} {} +test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc utf16} { + string range \U1f600G 2 2 +} G test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { set x \n } { } test utf-10.2 {Tcl_UtfBackslash: \u subst} testbytestring { - expr {"\uA2" eq [testbytestring "\xC2\xA2"]} + expr {"\uA2" eq [testbytestring \xC2\xA2]} } 1 test utf-10.3 {Tcl_UtfBackslash: longer \u subst} testbytestring { - expr {"\u4E21" eq [testbytestring "\xE4\xB8\xA1"]} + expr {"\u4E21" eq [testbytestring \xE4\xB8\xA1]} } 1 test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} @@ -201,15 +962,16 @@ test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} testbytestring { test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"} } 1 -test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} testbytestring { +test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {Uesc fullutf testbytestring} { expr {"\U1E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"} } 1 -test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} testbytestring { +test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {Uesc fullutf testbytestring} { expr {"\U10E2165" eq "[testbytestring \xF4\x8E\x88\x96]5"} } 1 -proc bsCheck {char num} { + +proc bsCheck {char num {constraints {}}} { global errNum - test utf-10.$errNum {backslash substitution} { + test utf-10.$errNum {backslash substitution} $constraints { scan $char %c value set value } $num @@ -244,7 +1006,8 @@ bsCheck \x 120 bsCheck \xa 10 bsCheck \xA 10 bsCheck \x41 65 -bsCheck \x541 84 +bsCheck \x541 65 pre388 ;# == \x41 +bsCheck \x541 84 !pre388 ;# == \x54 1 bsCheck \u 117 bsCheck \uk 117 bsCheck \u41 65 @@ -253,24 +1016,25 @@ bsCheck \uA 10 bsCheck \340 224 bsCheck \uA1 161 bsCheck \u4E21 20001 -bsCheck \741 60 -bsCheck \U 85 -bsCheck \Uk 85 -bsCheck \U41 65 -bsCheck \Ua 10 -bsCheck \UA 10 -bsCheck \Ua1 161 -bsCheck \U4E21 20001 -bsCheck \U004E21 20001 -bsCheck \U00004E21 20001 -bsCheck \U0000004E21 78 -bsCheck \U00110000 69632 -bsCheck \U01100000 69632 -bsCheck \U11000000 69632 -bsCheck \U0010FFFF 1114111 -bsCheck \U010FFFF0 1114111 -bsCheck \U10FFFF00 1114111 -bsCheck \UFFFFFFFF 1048575 +bsCheck \741 225 pre388 ;# == \341 +bsCheck \741 60 !pre388 ;# == \74 1 +bsCheck \U 85 +bsCheck \Uk 85 +bsCheck \U41 65 Uesc +bsCheck \Ua 10 Uesc +bsCheck \UA 10 Uesc +bsCheck \UA1 161 Uesc +bsCheck \U4E21 20001 Uesc +bsCheck \U004E21 20001 Uesc +bsCheck \U00004E21 20001 Uesc +bsCheck \U0000004E21 78 Uesc +bsCheck \U00110000 69632 {Uesc fullutf} +bsCheck \U01100000 69632 {Uesc fullutf} +bsCheck \U11000000 69632 {Uesc fullutf} +bsCheck \U0010FFFF 1114111 {Uesc fullutf} +bsCheck \U010FFFF0 1114111 {Uesc fullutf} +bsCheck \U10FFFF00 1114111 {Uesc fullutf} +bsCheck \UFFFFFFFF 1048575 {Uesc fullutf} test utf-11.1 {Tcl_UtfToUpper} { string toupper {} @@ -279,15 +1043,21 @@ test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC test utf-11.3 {Tcl_UtfToUpper} { - string toupper \xE3AB -} \xC3AB + string toupper \xE3gh +} \xC3GH test utf-11.4 {Tcl_UtfToUpper} { - string toupper \u01E3AB -} \u01E2AB + string toupper \u01E3gh +} \u01E2GH test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 -test utf-11.6 {Tcl_UtfToUpper low/high surrogate)} { +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { + string toupper \U10428 +} \U10400 +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { + string toupper \uD801\uDC28 +} \uD801\uDC00 +test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} { string toupper \uDC24\uD824 } \uDC24\uD824 @@ -298,17 +1068,23 @@ test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc test utf-12.3 {Tcl_UtfToLower} { - string tolower \xC3AB -} \xE3ab + string tolower \xC3GH +} \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { +test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { + string tolower \U10400 +} \U10428 +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { + string tolower \uD801\uDC00 +} \uD801\uDC28 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -317,8 +1093,8 @@ test utf-13.2 {Tcl_UtfToTitle} { string totitle abc } Abc test utf-13.3 {Tcl_UtfToTitle} { - string totitle \xE3AB -} \xC3ab + string totitle \xE3GH +} \xC3gh test utf-13.4 {Tcl_UtfToTitle} { string totitle \u01F3AB } \u01F2ab @@ -331,6 +1107,12 @@ test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { + string totitle \U10428\U10400 +} \U10400\U10428 +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { + string totitle \uD801\uDC28\uD801\uDC00 +} \uD801\uDC00\uD801\uDC28 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -359,8 +1141,8 @@ test utf-16.1 {Tcl_UniCharToLower, negative delta} { string tolower aA } aa test utf-16.2 {Tcl_UniCharToLower, positive delta} { - string tolower \u0178\xFF\uA78D\u01C5\U10400 -} \xFF\xFF\u0265\u01C6\U10428 + string tolower \u0178\xFF\uA78D\u01C5 +} \xFF\xFF\u0265\u01C6 test utf-17.1 {Tcl_UniCharToLower, no delta} { string tolower ! @@ -388,13 +1170,23 @@ test utf-19.1 {TclUniCharLen} -body { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} { -} {} +test utf-20.1 {TclUniCharNcmp} ucs4 { + string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] +} -1 +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} knownBug { + set one [format %c 0xFFFF] + set two [format %c 0x10000] + set first [string compare $one $two] + string range $one 0 0 + string range $two 0 0 + set second [string compare $one $two] + expr {($first == $second) ? "agree" : "disagree"} +} agree test utf-21.1 {TclUniCharIsAlnum} { # this returns 1 with Unicode 7 compliance string is alnum \u1040\u021F\u0220 -} {1} +} 1 test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021F\u0220] [regexp {^\w+$} \u1040\u021F\u0220_\u203F\u2040\u2054\uFE33\uFE34\uFE4D\uFE4E\uFE4F\uFF3F] @@ -406,39 +1198,39 @@ test utf-21.3 {unicode print char in regc_locale.c} { test utf-21.4 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \u0120 -} {1} +} 1 test utf-21.5 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {^[[:graph:]]+$} \u0120 -} {1} +} 1 test utf-21.6 {TclUniCharIsGraph} { # [Bug 3464428] string is graph \xA0 -} {0} +} 0 test utf-21.7 {unicode graph char in regc_locale.c} { # [Bug 3464428] regexp {[[:graph:]]} \x20\xA0\u2028\u2029 -} {0} +} 0 test utf-21.8 {TclUniCharIsPrint} { # [Bug 3464428] string is print \x09 -} {0} +} 0 test utf-21.9 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.10 {unicode print char in regc_locale.c} { # [Bug 3464428] regexp {[[:print:]]} \x09 -} {0} +} 0 test utf-21.11 {TclUniCharIsControl} { # [Bug 3464428] string is control \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-21.12 {unicode control char in regc_locale.c} { # [Bug 3464428], [Bug a876646efe] regexp {^[[:cntrl:]]*$} \x00\x1F\xAD\u0605\u061C\u180E\u2066\uFEFF -} {1} +} 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 @@ -450,93 +1242,84 @@ test utf-22.2 {TclUniCharIsWordChar} { test utf-23.1 {TclUniCharIsAlpha} { # this returns 1 with Unicode 7 compliance string is alpha \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-23.2 {unicode alpha char in regc_locale.c} { # this returns 1 with Unicode 7 compliance regexp {^[[:alpha:]]+$} \u021F\u0220\u037F\u052F -} {1} +} 1 test utf-24.1 {TclUniCharIsDigit} { # this returns 1 with Unicode 7 compliance string is digit \u1040\uABF0 -} {1} +} 1 test utf-24.2 {unicode digit char in regc_locale.c} { # this returns 1 with Unicode 7 compliance list [regexp {^[[:digit:]]+$} \u1040\uABF0] [regexp {^\d+$} \u1040\uABF0] } {1 1} test utf-24.3 {TclUniCharIsSpace} { + # this returns 1 with Unicode 7 compliance + string is space \u1680\u180E\u202F +} 1 +test utf-24.4 {unicode space char in regc_locale.c} { + # this returns 1 with Unicode 7 compliance + list [regexp {^[[:space:]]+$} \u1680\u180E\u202F] [regexp {^\s+$} \u1680\u180E\u202F] +} {1 1} +test utf-24.5 {TclUniCharIsSpace} tip413 { # this returns 1 with Unicode 7/TIP 413 compliance string is space \x85\u1680\u180E\u200B\u202F\u2060 -} {1} -test utf-24.4 {unicode space char in regc_locale.c} { +} 1 +test utf-24.6 {unicode space char in regc_locale.c} tip413 { # this returns 1 with Unicode 7/TIP 413 compliance list [regexp {^[[:space:]]+$} \x85\u1680\u180E\u200B\u202F\u2060] [regexp {^\s+$} \x85\u1680\u180E\u200B\u202F\u2060] } {1 1} -testConstraint teststringobj [llength [info commands teststringobj]] - -test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ - -setup { - testobj freeallvars - } \ - -body { - teststringobj set 1 a - teststringobj set 2 b - teststringobj maxchars 1 - teststringobj maxchars 2 - string compare -nocase [teststringobj get 1] [teststringobj get 2] - } \ - -cleanup { +proc UniCharCaseCmpTest {order one two {constraints {}}} { + variable count + test utf-25.$count {Tcl_UniCharNcasecmp} -setup { testobj freeallvars - } \ - -result -1 -test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \ - -setup { + } -constraints [linsert $constraints 0 teststringobj] -cleanup { testobj freeallvars - } \ - -body { - teststringobj set 1 b - teststringobj set 2 a + } -body { + teststringobj set 1 $one + teststringobj set 2 $two teststringobj maxchars 1 teststringobj maxchars 2 - string compare -nocase [teststringobj get 1] [teststringobj get 2] - } \ - -cleanup { - testobj freeallvars - } \ - -result 1 -test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \ - -setup { - testobj freeallvars - } \ - -body { - teststringobj set 1 B - teststringobj set 2 a - teststringobj maxchars 1 - teststringobj maxchars 2 - string compare -nocase [teststringobj get 1] [teststringobj get 2] - } \ - -cleanup { - testobj freeallvars - } \ - -result 1 + set result [string compare -nocase [teststringobj get 1] [teststringobj get 2]] + if {$result eq [string map {< -1 = 0 > 1} $order]} { + set result ok + } else { + set result "'$one' should be $order '$two' (no case)" + } + set result + } -result ok + incr count +} +variable count 1 +UniCharCaseCmpTest < a b +UniCharCaseCmpTest > b a +UniCharCaseCmpTest > B a +UniCharCaseCmpTest > aBcB abca +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 +UniCharCaseCmpTest < \uFFFF \U10000 {Uesc ucs4} +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 +UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} -test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \ - -setup { - testobj freeallvars - } \ - -body { - teststringobj set 1 aBcB - teststringobj set 2 abca - teststringobj maxchars 1 - teststringobj maxchars 2 - string compare -nocase [teststringobj get 1] [teststringobj get 2] - } \ - -cleanup { - testobj freeallvars - } \ - -result 1 + +test utf-26.1 {Tcl_UniCharDString} -setup { + testobj freeallvars +} -constraints {teststringobj testbytestring} -cleanup { + testobj freeallvars +} -body { + teststringobj set 1 foo + teststringobj maxchars 1 + teststringobj append 1 [testbytestring barsoom\xF2\xC2\x80] 10 + scan [string index [teststringobj get 1] 11] %c +} -result 128 + + +unset count +rename UniCharCaseCmpTest {} # cleanup ::tcltest::cleanupTests diff --git a/tests/util.test b/tests/util.test index 35bb12d..1d8162c 100644 --- a/tests/util.test +++ b/tests/util.test @@ -387,6 +387,10 @@ test util-5.50 {Tcl_StringMatch} { test util-5.51 {Tcl_StringMatch} { Wrapper_Tcl_StringMatch "" "" } 1 +test util-5.52 {Tcl_StringMatch} { + Wrapper_Tcl_StringMatch \[a\u0000 a\x80 +} 0 + test util-6.1 {Tcl_PrintDouble - using tcl_precision} -constraints precision -setup { set old_precision $::tcl_precision @@ -512,25 +516,64 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring { llength [testdstring get] } 2 test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring { - # Note that in this test TclNeedSpace actually gets it wrong, - # claiming we need a space when we really do not. Extra space - # between list elements is harmless though, and better to have - # extra space in really weird string reps of lists, than to - # invest the effort required to make TclNeedSpace foolproof. testdstring free testdstring append {\\ } -1 testdstring element foo list [llength [testdstring get]] [string length [testdstring get]] -} {2 7} +} {2 6} test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring { - # Another example of TclNeedSpace harmlessly getting it wrong. testdstring free testdstring append {\\ } -1 testdstring append \{ -1 testdstring element foo testdstring append \} -1 list [llength [testdstring get]] [string length [testdstring get]] -} {2 9} +} {2 8} +test util-8.7 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\ } -1 + testdstring start + testdstring end + + # Should make {\ {}} + list [llength [testdstring get]] [string index [testdstring get] 3] +} {2 \{} +test util-8.8 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\ } -1 + testdstring start + testdstring end + + # Should make {\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 3] +} {2 \{} +test util-8.9 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 5] +} {2 \{} +test util-8.10 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\\\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\\\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 9] +} {2 \{} +test util-8.11 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\\\\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\\\\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 9] +} {2 \{} test util-9.0.0 {Tcl_GetIntForIndex} { string index abcd 0 diff --git a/tools/mkdepend.tcl b/tools/mkdepend.tcl index ecb2206..3d96a5e 100644 --- a/tools/mkdepend.tcl +++ b/tools/mkdepend.tcl @@ -98,7 +98,7 @@ proc readDepends {chan} { } else { # don't include ourselves as a dependency of ourself. if {![string compare $fname $target]} {continue} - # store in an array so multiple occurances are not counted. + # store in an array so multiple occurrences are not counted. set depends($target|$fname) "" } } diff --git a/unix/Makefile.in b/unix/Makefile.in index 6654f25..08219ca 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -114,11 +114,6 @@ LDFLAGS_DEBUG = @LDFLAGS_DEBUG@ LDFLAGS_OPTIMIZE = @LDFLAGS_OPTIMIZE@ LDFLAGS = @LDFLAGS_DEFAULT@ @LDFLAGS@ -# To disable ANSI-C procedure prototypes reverse the comment characters on the -# following lines: -PROTO_FLAGS = -#PROTO_FLAGS = -DNO_PROTOTYPE - # If you use the setenv, putenv, or unsetenv procedures to modify environment # variables in your application and you'd like those modifications to appear # in the "env" Tcl variable, switch the comments on the two lines below so @@ -282,7 +277,7 @@ VALGRINDARGS = --tool=memcheck --num-callers=24 \ STUB_CC_SWITCHES = -I"${BUILD_DIR}" -I${UNIX_DIR} -I${GENERIC_DIR} -I${TOMMATH_DIR} \ ${CFLAGS} ${CFLAGS_WARNING} ${SHLIB_CFLAGS} \ - ${AC_FLAGS} ${PROTO_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} \ + ${AC_FLAGS} ${ENV_FLAGS} ${EXTRA_CFLAGS} \ @EXTRA_CC_SWITCHES@ CC_SWITCHES = $(STUB_CC_SWITCHES) ${NO_DEPRECATED_FLAGS} -DMP_FIXED_CUTOFFS -DMP_NO_STDINT @@ -292,7 +287,7 @@ APP_CC_SWITCHES = $(CC_SWITCHES) @EXTRA_APP_CC_SWITCHES@ LIBS = @TCL_LIBS@ DEPEND_SWITCHES = ${CFLAGS} -I${UNIX_DIR} -I${GENERIC_DIR} \ - ${AC_FLAGS} ${PROTO_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ + ${AC_FLAGS} ${EXTRA_CFLAGS} @EXTRA_CC_SWITCHES@ TCLSH_OBJS = tclAppInit.o diff --git a/unix/configure b/unix/configure index 6018e6f..874c945 100755 --- a/unix/configure +++ b/unix/configure @@ -5524,7 +5524,7 @@ fi SHLIB_LD='${CC} -shared' if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} ;; @@ -5614,7 +5614,7 @@ esac if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi ;; @@ -5632,7 +5632,7 @@ esac if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi if test "$GCC" = yes; then : @@ -5669,7 +5669,7 @@ esac if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi @@ -5709,7 +5709,7 @@ fi LDFLAGS="$LDFLAGS -Wl,--export-dynamic" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} if test "`uname -m`" = "alpha"; then : @@ -5776,8 +5776,8 @@ fi LD_FLAGS="-Wl,--export-dynamic" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi ;; OpenBSD-*) @@ -5796,7 +5796,7 @@ fi DL_LIBS="" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so.${SHLIB_VERSION}' @@ -5820,7 +5820,7 @@ fi LDFLAGS="$LDFLAGS -export-dynamic" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} # The -pthread needs to go in the CFLAGS, not LIBS @@ -5838,8 +5838,8 @@ fi DL_LIBS="" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' fi # The -pthread needs to go in the LDFLAGS, not LIBS LIBS=`echo $LIBS | sed s/-pthread//` @@ -6191,7 +6191,7 @@ fi DL_LIBS="" if test $doRpath = yes; then : - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}' fi if test "$GCC" = yes; then : diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 271bd03..0a2920b 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1180,7 +1180,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ do64bit_ok=yes SHLIB_LD='${CC} -shared' AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} ;; *) @@ -1215,7 +1215,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) ;; IRIX-6.*) @@ -1226,7 +1226,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) AS_IF([test "$GCC" = yes], [ CFLAGS="$CFLAGS -mabi=n32" @@ -1252,7 +1252,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" AC_LIBOBJ(mkstemp) AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) # Check to enable 64-bit flags for compiler/linker @@ -1283,7 +1283,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="-ldl" LDFLAGS="$LDFLAGS -Wl,--export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} AS_IF([test "`uname -m`" = "alpha"], [CFLAGS="$CFLAGS -mieee"]) AS_IF([test $do64bit = yes], [ @@ -1315,8 +1315,8 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="-mshared -ldl" LD_FLAGS="-Wl,--export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) ;; OpenBSD-*) arch=`arch -s` @@ -1333,7 +1333,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so.${SHLIB_VERSION}' LDFLAGS="-Wl,-export-dynamic" @@ -1355,7 +1355,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_LIBS="" LDFLAGS="$LDFLAGS -export-dynamic" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS} # The -pthread needs to go in the CFLAGS, not LIBS LIBS=`echo $LIBS | sed s/-pthread//` @@ -1371,8 +1371,8 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' - LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}']) + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' + LD_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"']) # The -pthread needs to go in the LDFLAGS, not LIBS LIBS=`echo $LIBS | sed s/-pthread//` CFLAGS="$CFLAGS $PTHREAD_CFLAGS" @@ -1540,7 +1540,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ DL_OBJS="tclLoadDl.o" DL_LIBS="" AS_IF([test $doRpath = yes], [ - CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}' + CC_SEARCH_FLAGS='"-Wl,-rpath,${LIB_RUNTIME_DIR}"' LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}']) AS_IF([test "$GCC" = yes], [CFLAGS="$CFLAGS -mieee"], [ CFLAGS="$CFLAGS -DHAVE_TZSET -std1 -ieee"]) diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 602ca63..fdf7904 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -105,7 +105,7 @@ TclpFindExecutable( */ while (1) { - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } name = p; diff --git a/unix/tclUnixSock.c b/unix/tclUnixSock.c index b707be4..cb20166 100644 --- a/unix/tclUnixSock.c +++ b/unix/tclUnixSock.c @@ -1009,7 +1009,7 @@ TcpThreadActionProc( if (GOT_BITS(statePtr->flags, TCP_ASYNC_CONNECT)) { /* * Async-connecting socket must get reassigned handler if it have been - * transferred to another thread. Remove the handler if the socket is + * transferred to another thread. Remove the handler if the socket is * not managed by this thread anymore and create new handler (TSD related) * so the callback will run in the correct thread, bug [f583715154]. */ diff --git a/win/tclAppInit.c b/win/tclAppInit.c index 105af65..59c21e5 100644 --- a/win/tclAppInit.c +++ b/win/tclAppInit.c @@ -87,7 +87,7 @@ MODULE_SCOPE int TCL_LOCAL_MAIN_HOOK(int *argc, TCHAR ***argv); int main( int argc, /* Number of command-line arguments. */ - TCL_UNUSED(char **)) + char **argv1) { TCHAR **argv; TCHAR *p; @@ -112,6 +112,7 @@ _tmain( * Get our args from the c-runtime. Ignore command line. */ + (void)argv1; setargv(&argc, &argv); #endif diff --git a/win/tclWinInit.c b/win/tclWinInit.c index 44067aa..420e324 100644 --- a/win/tclWinInit.c +++ b/win/tclWinInit.c @@ -624,15 +624,8 @@ TclpSetVariables( *---------------------------------------------------------------------- */ -#if defined(_WIN32) -# define tenviron _wenviron -# define tenviron2utfdstr(string, len, dsPtr) (Tcl_DStringInit(dsPtr), \ - (char *)Tcl_Char16ToUtfDString((const unsigned short *)(string), ((((len) + 2) >> 1) - 1), (dsPtr))) -#else -# define tenviron environ -# define tenviron2utfdstr(tenvstr, len, dstr) \ - Tcl_ExternalToUtfDString(NULL, tenvstr, len, dstr) -#endif +# define tenviron2utfdstr(string, len, dsPtr) \ + (char *)Tcl_Char16ToUtfDString((const unsigned short *)(string), ((((len) + 2) >> 1) - 1), (dsPtr)) int TclpFindVariable( @@ -644,7 +637,8 @@ TclpFindVariable( * searches). */ { int i, length, result = -1; - const char *env, *p1, *p2; + const WCHAR *env; + const char *p1, *p2; char *envUpper, *nameUpper; Tcl_DString envString; @@ -658,16 +652,17 @@ TclpFindVariable( Tcl_UtfToUpper(nameUpper); Tcl_DStringInit(&envString); - for (i = 0, env = (const char *)tenviron[i]; + for (i = 0, env = _wenviron[i]; env != NULL; - i++, env = (const char *)tenviron[i]) { + i++, env = _wenviron[i]) { /* * Chop the env string off after the equal sign, then Convert the name * to all upper case, so we do not have to convert all the characters * after the equal sign. */ - envUpper = tenviron2utfdstr(env, -1, &envString); + Tcl_DStringInit(&envString); + envUpper = Tcl_WCharToUtfDString(env, -1, &envString); p1 = strchr(envUpper, '='); if (p1 == NULL) { continue; diff --git a/win/tclWinSock.c b/win/tclWinSock.c index 87e0dc6..48b3cee 100644 --- a/win/tclWinSock.c +++ b/win/tclWinSock.c @@ -372,7 +372,7 @@ InitializeHostName( Tcl_DString ds; Tcl_DStringInit(&ds); - if (GetComputerNameExW(ComputerNameDnsFullyQualified, wbuf, &length) != 0) { + if (GetComputerNameExW(ComputerNamePhysicalDnsFullyQualified, wbuf, &length) != 0) { /* * Convert string from native to UTF then change to lowercase. */ diff --git a/win/tclWinTest.c b/win/tclWinTest.c index 5841509..91a3010 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -13,7 +13,11 @@ # define USE_TCL_STUBS #endif #include "tclInt.h" -#include "tclTomMath.h" +#ifdef TCL_WITH_EXTERNAL_TOMMATH +# include "tommath.h" +#else +# include "tclTomMath.h" +#endif /* * For TestplatformChmod on Windows diff --git a/win/tclWinTime.c b/win/tclWinTime.c index 277b57f..ef2b9d2 100644 --- a/win/tclWinTime.c +++ b/win/tclWinTime.c @@ -1364,7 +1364,7 @@ TclpGmtime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return gmtime(timePtr); #else - return _gmtime32((CONST __time32_t *)timePtr); + return _gmtime32((const __time32_t *)timePtr); #endif } @@ -1399,7 +1399,7 @@ TclpLocaltime( #if defined(_WIN64) || defined(_USE_64BIT_TIME_T) || (defined(_MSC_VER) && _MSC_VER < 1400) return localtime(timePtr); #else - return _localtime32((CONST __time32_t *)timePtr); + return _localtime32((const __time32_t *)timePtr); #endif } #endif /* TCL_NO_DEPRECATED */ |