diff options
-rw-r--r-- | doc/Utf.3 | 28 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 2 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 16 | ||||
-rw-r--r-- | generic/tclDate.c | 2 | ||||
-rw-r--r-- | generic/tclInt.h | 11 | ||||
-rw-r--r-- | generic/tclParse.c | 2 | ||||
-rw-r--r-- | generic/tclResult.c | 11 | ||||
-rw-r--r-- | generic/tclStrToD.c | 6 | ||||
-rw-r--r-- | generic/tclStringObj.c | 24 | ||||
-rw-r--r-- | generic/tclStringTrim.h | 2 | ||||
-rw-r--r-- | generic/tclTest.c | 434 | ||||
-rw-r--r-- | generic/tclUtf.c | 65 | ||||
-rw-r--r-- | generic/tclUtil.c | 413 | ||||
-rw-r--r-- | tests/dstring.test | 64 | ||||
-rw-r--r-- | tests/encoding.test | 30 | ||||
-rw-r--r-- | tests/string.test | 34 | ||||
-rw-r--r-- | tests/utf.test | 163 | ||||
-rw-r--r-- | tests/util.test | 59 | ||||
-rw-r--r-- | unix/tclUnixFile.c | 2 |
19 files changed, 841 insertions, 527 deletions
@@ -223,13 +223,27 @@ string. The caller must not ask for the next character after the last character in the string if the string is not terminated by a null character. .PP -Given \fIsrc\fR, a pointer to some location in a UTF-8 string (or to a -null byte immediately following such a string), \fBTcl_UtfPrev\fR -returns a pointer to the closest preceding byte that starts a UTF-8 -character. -This function will not back up to a position before \fIstart\fR, -the start of the UTF-8 string. If \fIsrc\fR was already at \fIstart\fR, the -return value will be \fIstart\fR. +\fBTcl_UtfPrev\fR is used to step backward through but not beyond the +UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made +up entirely of complete and well-formed characters, and \fIsrc\fR points +to the lead byte of one of those characters (or to the location one byte +past the end of the string), then repeated calls of \fBTcl_UtfPrev\fR will +return pointers to the lead bytes of each character in the string, one +character at a time, terminating when it returns \fIstart\fR. +.PP +When the conditions of completeness and well-formedness may not be satisfied, +a more precise description of the function of \fBTcl_UtfPrev\fR is necessary. +It always returns a pointer greater than or equal to \fIstart\fR; that is, +always a pointer to a location in the string. It always returns a pointer to +a byte that begins a character when scanning for characters beginning +from \fIstart\fR. When \fIsrc\fR is greater than \fIstart\fR, it +always returns a pointer less than \fIsrc\fR and greater than or +equal to (\fIsrc\fR - \fBTCL_UTF_MAX\fR). The character that begins +at the returned pointer is the first one that either includes the +byte \fIsrc[-1]\fR, or might include it if the right trail bytes are +present at \fIsrc\fR and greater. \fBTcl_UtfPrev\fR never reads the +byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte +\fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR. .PP \fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the Pascal Ord() function. It returns the Tcl_UniChar represented at the diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 40a10ba..46ee157 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -228,7 +228,7 @@ Tcl_CaseObjCmd( pat = TclGetString(caseObjv[i]); for (p = pat; *p != '\0'; p++) { - if (TclIsSpaceProc(*p) || (*p == '\\')) { + if (TclIsSpaceProcM(*p) || (*p == '\\')) { break; } } diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 23370a8..6b59e52 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1738,7 +1738,7 @@ StringIsCmd( * if it is the first "element" that has the failure. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } TclNewStringObj(tmpStr, string1, p-string1); @@ -2477,12 +2477,22 @@ StringStartCmd( cur = 0; if (index > 0) { p = Tcl_UtfAtIndex(string, index); + + TclUtfToUniChar(p, &ch); for (cur = index; cur >= 0; cur--) { - TclUtfToUniChar(p, &ch); + int delta = 0; + const char *next; + if (!Tcl_UniCharIsWordChar(ch)) { break; } - p = Tcl_UtfPrev(p, string); + + next = Tcl_UtfPrev(p, string); + do { + next += delta; + delta = TclUtfToUniChar(next, &ch); + } while (next + delta < p); + p = next; } if (cur != index) { cur += 1; diff --git a/generic/tclDate.c b/generic/tclDate.c index 717a1b3..ea1f0f3 100644 --- a/generic/tclDate.c +++ b/generic/tclDate.c @@ -2680,7 +2680,7 @@ TclDatelex( location->first_column = yyInput - info->dateStart; for ( ; ; ) { - while (TclIsSpaceProc(*yyInput)) { + while (TclIsSpaceProcM(*yyInput)) { yyInput++; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 74b2cc9..a7df6ab 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3025,7 +3025,6 @@ MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE void TclInitSubsystems(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); -MODULE_SCOPE int TclIsSpaceProc(int byte); MODULE_SCOPE int TclIsBareword(int byte); MODULE_SCOPE Tcl_Obj * TclJoinPath(int elements, Tcl_Obj * const objv[], int forceRelative); @@ -3234,6 +3233,16 @@ MODULE_SCOPE void * TclpThreadGetMasterTSD(void *tsdKeyPtr); MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, const char *msg, int length); /* + * Many parsing tasks need a common definition of whitespace. + * Use this routine and macro to achieve that and place + * optimization (fragile on changes) in one place. + */ + +MODULE_SCOPE int TclIsSpaceProc(int byte); +# define TclIsSpaceProcM(byte) \ + (((byte) > 0x20) ? 0 : TclIsSpaceProc(byte)) + +/* *---------------------------------------------------------------- * Command procedures in the generic core: *---------------------------------------------------------------- diff --git a/generic/tclParse.c b/generic/tclParse.c index 03cee64..4d7e6b8 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -1830,7 +1830,7 @@ Tcl_ParseBraces( openBrace = 0; break; case '#' : - if (openBrace && TclIsSpaceProc(src[-1])) { + if (openBrace && TclIsSpaceProcM(src[-1])) { Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), ": possible unbalanced brace in comment", -1); goto error; diff --git a/generic/tclResult.c b/generic/tclResult.c index aceb2f5..caad71e 100644 --- a/generic/tclResult.c +++ b/generic/tclResult.c @@ -725,6 +725,7 @@ Tcl_AppendElement( char *dst; int size; int flags; + int quoteHash = 1; /* * If the string result is empty, move the object result to the string @@ -761,9 +762,17 @@ Tcl_AppendElement( * then this element will not lead a list, and need not have it's * leading '#' quoted. */ - + quoteHash = 0; + } else { + while ((--dst >= iPtr->appendResult) && TclIsSpaceProcM(*dst)) { + } + quoteHash = !TclNeedSpace(iPtr->appendResult, dst+1); + } + dst = iPtr->appendResult + iPtr->appendUsed; + if (!quoteHash) { flags |= TCL_DONT_QUOTE_HASH; } + iPtr->appendUsed += Tcl_ConvertElement(element, dst, flags); } diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 66c8f19..749abcf 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -568,7 +568,7 @@ TclParseNumber( * I, N, and whitespace. */ - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { if (flags & TCL_PARSE_NO_WHITESPACE) { goto endgame; } @@ -1095,7 +1095,7 @@ TclParseNumber( } /* FALLTHROUGH */ case sNANPAREN: - if (TclIsSpaceProc(c)) { + if (TclIsSpaceProcM(c)) { break; } if (numSigDigs < 13) { @@ -1149,7 +1149,7 @@ TclParseNumber( * Accept trailing whitespace. */ - while (len != 0 && TclIsSpaceProc(*p)) { + while (len != 0 && TclIsSpaceProcM(*p)) { p++; len--; } diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index d4f45d7..84a1339 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1151,10 +1151,7 @@ Tcl_AppendLimitedToObj( { String *stringPtr; int toCopy = 0; - - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); - } + int eLen = 0; if (length < 0) { length = (bytes ? strlen(bytes) : 0); @@ -1162,6 +1159,9 @@ Tcl_AppendLimitedToObj( if (length == 0) { return; } + if (limit <= 0) { + return; + } if (length <= limit) { toCopy = length; @@ -1169,8 +1169,12 @@ Tcl_AppendLimitedToObj( if (ellipsis == NULL) { ellipsis = "..."; } - toCopy = (bytes == NULL) ? limit - : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; + eLen = strlen(ellipsis); + while (eLen > limit) { + eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis; + } + + toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes; } /* @@ -1179,6 +1183,10 @@ Tcl_AppendLimitedToObj( * objPtr's string rep. */ + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); + } + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); @@ -1194,9 +1202,9 @@ Tcl_AppendLimitedToObj( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode && stringPtr->numChars > 0) { - AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis)); + AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); } else { - AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis)); + AppendUtfToUtfRep(objPtr, ellipsis, eLen); } } diff --git a/generic/tclStringTrim.h b/generic/tclStringTrim.h index 030e4ec..7067428 100644 --- a/generic/tclStringTrim.h +++ b/generic/tclStringTrim.h @@ -28,6 +28,8 @@ MODULE_SCOPE const char tclDefaultTrimSet[]; /* * The whitespace trimming set used when [concat]enating. This is a subset of * the above, and deliberately so. + * + * TODO: Find a reasonable way to guarantee in sync with TclIsSpaceProc() */ #define CONCAT_TRIM_SET " \f\v\r\t\n" diff --git a/generic/tclTest.c b/generic/tclTest.c index e187ec2..2a43d91 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -98,7 +98,7 @@ static Tcl_Trace cmdTrace; * TestdelCmd: */ -typedef struct DelCmd { +typedef struct { Tcl_Interp *interp; /* Interpreter in which command exists. */ char *deleteCmd; /* Script to execute when command is deleted. * Malloc'ed. */ @@ -109,7 +109,7 @@ typedef struct DelCmd { * command. */ -typedef struct TclEncoding { +typedef struct { Tcl_Interp *interp; char *toUtfCmd; char *fromUtfCmd; @@ -132,7 +132,7 @@ static int exitMainLoop = 0; * Event structure used in testing the event queue management procedures. */ -typedef struct TestEvent { +typedef struct { Tcl_Event header; /* Header common to all events */ Tcl_Interp *interp; /* Interpreter that will handle the event */ Tcl_Obj *command; /* Command to evaluate when the event occurs */ @@ -164,10 +164,8 @@ static void CleanupTestSetassocdataTests( ClientData clientData, Tcl_Interp *interp); static void CmdDelProc1(ClientData clientData); static void CmdDelProc2(ClientData clientData); -static int CmdProc1(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int CmdProc2(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_CmdProc CmdProc1; +static Tcl_CmdProc CmdProc2; static void CmdTraceDeleteProc( ClientData clientData, Tcl_Interp *interp, int level, char *command, Tcl_CmdProc *cmdProc, @@ -177,16 +175,11 @@ static void CmdTraceProc(ClientData clientData, Tcl_Interp *interp, int level, char *command, Tcl_CmdProc *cmdProc, ClientData cmdClientData, int argc, const char *argv[]); -static int CreatedCommandProc( - ClientData clientData, Tcl_Interp *interp, - int argc, const char **argv); -static int CreatedCommandProc2( - ClientData clientData, Tcl_Interp *interp, - int argc, const char **argv); +static Tcl_CmdProc CreatedCommandProc; +static Tcl_CmdProc CreatedCommandProc2; static void DelCallbackProc(ClientData clientData, Tcl_Interp *interp); -static int DelCmdProc(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); +static Tcl_CmdProc DelCmdProc; static void DelDeleteProc(ClientData clientData); static void EncodingFreeProc(ClientData clientData); static int EncodingToUtfProc(ClientData clientData, @@ -201,15 +194,10 @@ static int EncodingFromUtfProc(ClientData clientData, int *dstCharsPtr); static void ExitProcEven(ClientData clientData); static void ExitProcOdd(ClientData clientData); -static int GetTimesObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc GetTimesObjCmd; static void MainLoop(void); -static int NoopCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int NoopObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc NoopCmd; +static Tcl_ObjCmdProc NoopObjCmd; static int ObjTraceProc(ClientData clientData, Tcl_Interp *interp, int level, const char *command, Tcl_Command commandToken, int objc, @@ -218,167 +206,81 @@ static void ObjTraceDeleteProc(ClientData clientData); static void PrintParse(Tcl_Interp *interp, Tcl_Parse *parsePtr); static void SpecialFree(char *blockPtr); static int StaticInitProc(Tcl_Interp *interp); -static int TestasyncCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestbumpinterpepochObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestpurebytesobjObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestsetbytearraylengthObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestbytestringObjCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestcmdinfoCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcmdtokenCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcmdtraceCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestconcatobjCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestcreatecommandCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdcallCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdelCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdelassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestdoubledigitsObjCmd(ClientData dummy, - Tcl_Interp* interp, - int objc, Tcl_Obj* const objv[]); -static int TestdstringCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestencodingObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestevalexObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestevalobjvObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TesteventObjCmd(ClientData unused, - Tcl_Interp *interp, int argc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestasyncCmd; +static Tcl_ObjCmdProc TestbumpinterpepochObjCmd; +static Tcl_ObjCmdProc TestsetbytearraylengthObjCmd; +static Tcl_ObjCmdProc TestpurebytesobjObjCmd; +static Tcl_ObjCmdProc TestbytestringObjCmd; +static Tcl_ObjCmdProc TeststringbytesObjCmd; +static Tcl_CmdProc TestcmdinfoCmd; +static Tcl_CmdProc TestcmdtokenCmd; +static Tcl_CmdProc TestcmdtraceCmd; +static Tcl_CmdProc TestconcatobjCmd; +static Tcl_CmdProc TestcreatecommandCmd; +static Tcl_CmdProc TestdcallCmd; +static Tcl_CmdProc TestdelCmd; +static Tcl_CmdProc TestdelassocdataCmd; +static Tcl_ObjCmdProc TestdoubledigitsObjCmd; +static Tcl_CmdProc TestdstringCmd; +static Tcl_ObjCmdProc TestencodingObjCmd; +static Tcl_ObjCmdProc TestevalexObjCmd; +static Tcl_ObjCmdProc TestevalobjvObjCmd; +static Tcl_ObjCmdProc TesteventObjCmd; static int TesteventProc(Tcl_Event *event, int flags); static int TesteventDeleteProc(Tcl_Event *event, ClientData clientData); -static int TestexithandlerCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprlongCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprlongobjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprdoubleCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexprdoubleobjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprparserObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestexprstringCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestfileCmd(ClientData dummy, - Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static int TestfilelinkCmd(ClientData dummy, - Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static int TestfeventCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetintCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetplatformCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestgetvarfullnameCmd( - ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestinterpdeleteCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestlinkCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestlocaleCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestexithandlerCmd; +static Tcl_CmdProc TestexprlongCmd; +static Tcl_ObjCmdProc TestexprlongobjCmd; +static Tcl_CmdProc TestexprdoubleCmd; +static Tcl_ObjCmdProc TestexprdoubleobjCmd; +static Tcl_ObjCmdProc TestexprparserObjCmd; +static Tcl_CmdProc TestexprstringCmd; +static Tcl_ObjCmdProc TestfileCmd; +static Tcl_ObjCmdProc TestfilelinkCmd; +static Tcl_CmdProc TestfeventCmd; +static Tcl_CmdProc TestgetassocdataCmd; +static Tcl_CmdProc TestgetintCmd; +static Tcl_CmdProc TestgetplatformCmd; +static Tcl_ObjCmdProc TestgetvarfullnameCmd; +static Tcl_CmdProc TestinterpdeleteCmd; +static Tcl_CmdProc TestlinkCmd; +static Tcl_ObjCmdProc TestlocaleCmd; static int TestMathFunc(ClientData clientData, Tcl_Interp *interp, Tcl_Value *args, Tcl_Value *resultPtr); static int TestMathFunc2(ClientData clientData, Tcl_Interp *interp, Tcl_Value *args, Tcl_Value *resultPtr); -static int TestmainthreadCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetmainloopCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestexitmainloopCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestpanicCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestparseargsCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestparserObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestparsevarObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestparsevarnameObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestregexpObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestreturnObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestmainthreadCmd; +static Tcl_CmdProc TestsetmainloopCmd; +static Tcl_CmdProc TestexitmainloopCmd; +static Tcl_CmdProc TestpanicCmd; +static Tcl_ObjCmdProc TestparseargsCmd; +static Tcl_ObjCmdProc TestparserObjCmd; +static Tcl_ObjCmdProc TestparsevarObjCmd; +static Tcl_ObjCmdProc TestparsevarnameObjCmd; +static Tcl_ObjCmdProc TestregexpObjCmd; +static Tcl_ObjCmdProc TestreturnObjCmd; static void TestregexpXflags(const char *string, int length, int *cflagsPtr, int *eflagsPtr); -static int TestsaveresultCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestsaveresultCmd; static void TestsaveresultFree(char *blockPtr); -static int TestsetassocdataCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int Testset2Cmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestseterrorcodeCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestsetobjerrorcodeCmd( - ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestsetplatformCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TeststaticpkgCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TesttranslatefilenameCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestupvarCmd(ClientData dummy, - Tcl_Interp *interp, int argc, const char **argv); -static int TestWrongNumArgsObjCmd( - ClientData clientData, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestGetIndexFromObjStructObjCmd( - ClientData clientData, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int TestChannelCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int TestChannelEventCmd(ClientData clientData, - Tcl_Interp *interp, int argc, const char **argv); -static int TestFilesystemObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestSimpleFilesystemObjCmd( - ClientData dummy, Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_CmdProc TestsetassocdataCmd; +static Tcl_CmdProc TestsetCmd; +static Tcl_CmdProc Testset2Cmd; +static Tcl_CmdProc TestseterrorcodeCmd; +static Tcl_ObjCmdProc TestsetobjerrorcodeCmd; +static Tcl_CmdProc TestsetplatformCmd; +static Tcl_CmdProc TeststaticpkgCmd; +static Tcl_CmdProc TesttranslatefilenameCmd; +static Tcl_CmdProc TestupvarCmd; +static Tcl_ObjCmdProc TestWrongNumArgsObjCmd; +static Tcl_ObjCmdProc TestGetIndexFromObjStructObjCmd; +static Tcl_CmdProc TestChannelCmd; +static Tcl_CmdProc TestChannelEventCmd; +static Tcl_ObjCmdProc TestFilesystemObjCmd; +static Tcl_ObjCmdProc TestSimpleFilesystemObjCmd; static void TestReport(const char *cmd, Tcl_Obj *arg1, Tcl_Obj *arg2); static Tcl_Obj * TestReportGetNativePath(Tcl_Obj *pathPtr); @@ -413,33 +315,18 @@ static Tcl_FSListVolumesProc SimpleListVolumes; static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; -static int TestNumUtfCharsCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestFindFirstCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestFindLastCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestHashSystemHashCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestUtfPrevCmd; +static Tcl_ObjCmdProc TestNumUtfCharsCmd; +static Tcl_ObjCmdProc TestFindFirstCmd; +static Tcl_ObjCmdProc TestFindLastCmd; +static Tcl_ObjCmdProc TestHashSystemHashCmd; static Tcl_NRPostProc NREUnwind_callback; -static int TestNREUnwind(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestNRELevels(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestInterpResolverCmd(ClientData clientData, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestNREUnwind; +static Tcl_ObjCmdProc TestNRELevels; +static Tcl_ObjCmdProc TestInterpResolverCmd; #if defined(HAVE_CPUID) || defined(_WIN32) -static int TestcpuidCmd(ClientData dummy, - Tcl_Interp* interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestcpuidCmd; #endif static const Tcl_Filesystem testReportingFilesystem = { @@ -549,8 +436,7 @@ Tcltest_Init( { Tcl_ValueType t3ArgTypes[2]; - Tcl_Obj *listPtr; - Tcl_Obj **objv; + Tcl_Obj **objv, *objPtr; int objc, index; static const char *const specialOptions[] = { "-appinitprocerror", "-appinitprocdeleteinterp", @@ -582,6 +468,7 @@ Tcltest_Init( Tcl_CreateObjCommand(interp, "testpurebytesobj", TestpurebytesobjObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testsetbytearraylength", TestsetbytearraylengthObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testbytestring", TestbytestringObjCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "teststringbytes", TeststringbytesObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testwrongnumargs", TestWrongNumArgsObjCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfilesystem", TestFilesystemObjCmd, @@ -685,6 +572,8 @@ Tcltest_Init( NULL, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", TestsetobjerrorcodeCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testutfprev", + TestUtfPrevCmd, (ClientData) 0, NULL); Tcl_CreateObjCommand(interp, "testnumutfchars", TestNumUtfCharsCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testfindfirst", @@ -708,7 +597,7 @@ Tcltest_Init( NULL, NULL); #if defined(HAVE_CPUID) || defined(_WIN32) Tcl_CreateObjCommand(interp, "testcpuid", TestcpuidCmd, - (ClientData) 0, NULL); + NULL, NULL); #endif t3ArgTypes[0] = TCL_EITHER; t3ArgTypes[1] = TCL_EITHER; @@ -738,9 +627,9 @@ Tcltest_Init( * Check for special options used in ../tests/main.test */ - listPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY); - if (listPtr != NULL) { - if (Tcl_ListObjGetElements(interp, listPtr, &objc, &objv) != TCL_OK) { + objPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY); + if (objPtr != NULL) { + if (Tcl_ListObjGetElements(interp, objPtr, &objc, &objv) != TCL_OK) { return TCL_ERROR; } if (objc && (Tcl_GetIndexFromObj(NULL, objv[0], specialOptions, NULL, @@ -831,18 +720,19 @@ TestasyncCmd( TestAsyncHandler *asyncPtr, *prevPtr; int id, code; static int nextId = 1; + (void)dummy; if (argc < 2) { wrongNumArgs: - Tcl_SetResult(interp, "wrong # args", TCL_STATIC); + Tcl_AppendResult(interp, "wrong # args", NULL); return TCL_ERROR; } if (strcmp(argv[1], "create") == 0) { if (argc != 3) { goto wrongNumArgs; } - asyncPtr = ckalloc(sizeof(TestAsyncHandler)); - asyncPtr->command = ckalloc(strlen(argv[2]) + 1); + asyncPtr = (TestAsyncHandler *)ckalloc(sizeof(TestAsyncHandler)); + asyncPtr->command = (char *)ckalloc(strlen(argv[2]) + 1); strcpy(asyncPtr->command, argv[2]); Tcl_MutexLock(&asyncTestMutex); asyncPtr->id = nextId; @@ -924,7 +814,7 @@ TestasyncCmd( if (Tcl_CreateThread(&threadID, AsyncThreadProc, INT2PTR(id), TCL_THREAD_STACK_DEFAULT, TCL_THREAD_NOFLAGS) != TCL_OK) { - Tcl_SetResult(interp, "can't create thread", TCL_STATIC); + Tcl_AppendResult(interp, "can't create thread", NULL); Tcl_MutexUnlock(&asyncTestMutex); return TCL_ERROR; } @@ -1039,6 +929,7 @@ TestbumpinterpepochObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { Interp *iPtr = (Interp *)interp; + if (objc != 1) { Tcl_WrongNumArgs(interp, 1, objv, ""); return TCL_ERROR; @@ -1074,6 +965,7 @@ TestcmdinfoCmd( const char **argv) /* Argument strings. */ { Tcl_CmdInfo info; + (void)dummy; if (argc != 3) { Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], @@ -1089,7 +981,7 @@ TestcmdinfoCmd( Tcl_DStringResult(interp, &delString); } else if (strcmp(argv[1], "get") == 0) { if (Tcl_GetCommandInfo(interp, argv[2], &info) ==0) { - Tcl_SetResult(interp, "??", TCL_STATIC); + Tcl_AppendResult(interp, "??", NULL); return TCL_OK; } if (info.proc == CmdProc1) { @@ -4990,6 +4882,41 @@ NoopObjCmd( /* *---------------------------------------------------------------------- * + * TeststringbytesObjCmd -- + * Returns bytearray value of the bytes in argument string rep + * + * Results: + * Returns the TCL_OK result code. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static int +TeststringbytesObjCmd( + ClientData dummy, + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* The argument objects. */ +{ + int n; + const unsigned char *p; + (void)dummy; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "value"); + return TCL_ERROR; + } + p = (const unsigned char *)Tcl_GetStringFromObj(objv[1], &n); + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(p, n)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * TestpurebytesobjObjCmd -- * * This object-based procedure constructs a pure bytes object @@ -5010,12 +4937,13 @@ NoopObjCmd( static int TestpurebytesobjObjCmd( - ClientData unused, /* Not used. */ + ClientData dummy, /* Not used. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { Tcl_Obj *objPtr; + (void)dummy; if (objc > 2) { Tcl_WrongNumArgs(interp, 1, objv, "?string?"); @@ -5030,7 +4958,7 @@ TestpurebytesobjObjCmd( if (objc == 2) { const char *s = Tcl_GetString(objv[1]); objPtr->length = objv[1]->length; - objPtr->bytes = ckalloc(objPtr->length + 1); + objPtr->bytes = (char *)ckalloc(objPtr->length + 1); memcpy(objPtr->bytes, s, objPtr->length); objPtr->bytes[objPtr->length] = 0; } @@ -5057,13 +4985,14 @@ TestpurebytesobjObjCmd( static int TestsetbytearraylengthObjCmd( - ClientData unused, /* Not used. */ + ClientData dummy, /* Not used. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { int n; Tcl_Obj *obj = NULL; + (void)dummy; if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "value length"); @@ -5101,18 +5030,20 @@ TestsetbytearraylengthObjCmd( static int TestbytestringObjCmd( - ClientData unused, /* Not used. */ + ClientData dummy, /* Not used. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - int n; + int n = 0; const char *p; + (void)dummy; if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "bytearray"); return TCL_ERROR; } + p = (const char *)Tcl_GetByteArrayFromObj(objv[1], &n); Tcl_SetObjResult(interp, Tcl_NewStringObj(p, n)); return TCL_OK; @@ -5135,11 +5066,10 @@ TestbytestringObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int TestsetCmd( ClientData data, /* Additional flags for Get/SetVar2. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5171,7 +5101,7 @@ TestsetCmd( static int Testset2Cmd( ClientData data, /* Additional flags for Get/SetVar2. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5222,7 +5152,7 @@ Testset2Cmd( static int TestsaveresultCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { @@ -5355,7 +5285,7 @@ TestsaveresultFree( static int TestmainthreadCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5416,7 +5346,7 @@ MainLoop(void) static int TestsetmainloopCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -5445,7 +5375,7 @@ TestsetmainloopCmd( static int TestexitmainloopCmd( ClientData dummy, /* Not used. */ - register Tcl_Interp *interp,/* Current interpreter. */ + Tcl_Interp *interp,/* Current interpreter. */ int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { @@ -6794,6 +6724,55 @@ SimpleListVolumes(void) } /* + * Used to check operations of Tcl_UtfPrev. + * + * Usage: testutfprev $bytes $offset + */ + +static int +TestUtfPrevCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int numBytes, offset; + char *bytes; + const char *result; + Tcl_Obj *copy; + + if (objc < 2 || objc > 3) { + Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?"); + return TCL_ERROR; + } + + bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes); + + if (objc == 3) { + if (TCL_OK != Tcl_GetIntFromObj(interp, objv[2], &offset)) { + return TCL_ERROR; + } + if (offset < 0) { + offset = 0; + } + if (offset > numBytes) { + offset = numBytes; + } + } else { + offset = numBytes; + } + copy = Tcl_DuplicateObj(objv[1]); + bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1); + bytes[numBytes] = '\0'; + + result = Tcl_UtfPrev(bytes + offset, bytes); + + Tcl_DecrRefCount(copy); + Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes)); + return TCL_OK; +} + +/* * Used to check correct string-length determining in Tcl_NumUtfChars */ @@ -6928,7 +6907,7 @@ TestcpuidCmd( static int TestHashSystemHashCmd( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -6940,6 +6919,7 @@ TestHashSystemHashCmd( Tcl_HashTable hash; Tcl_HashEntry *hPtr; int i, isNew, limit = 100; + (void)dummy; if (objc>1 && Tcl_GetIntFromObj(interp, objv[1], &limit)!=TCL_OK) { return TCL_ERROR; @@ -7009,6 +6989,8 @@ TestgetintCmd( int argc, const char **argv) { + (void)dummy; + if (argc < 2) { Tcl_SetResult(interp, "wrong # args", TCL_STATIC); return TCL_ERROR; @@ -7033,6 +7015,7 @@ NREUnwind_callback( int result) { int none; + (void)result; if (data[0] == INT2PTR(-1)) { Tcl_NRAddCallback(interp, NREUnwind_callback, &none, INT2PTR(-1), @@ -7055,11 +7038,15 @@ NREUnwind_callback( static int TestNREUnwind( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) { + (void)dummy; + (void)objc; + (void)objv; + /* * Insure that callbacks effectively run at the proper level during the * unwinding of the NRE stack. @@ -7073,7 +7060,7 @@ TestNREUnwind( static int TestNRELevels( - ClientData clientData, + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -7084,6 +7071,9 @@ TestNRELevels( Tcl_Obj *levels[6]; int i = 0; NRE_callback *cbPtr = iPtr->execEnvPtr->callbackPtr; + (void)dummy; + (void)objc; + (void)objv; if (refDepth == NULL) { refDepth = &depth; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 12d764c..3377b70 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,6 +64,17 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 +}; + +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #if TCL_UTF_MAX > 4 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -497,7 +508,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[(unsigned char)*src]; + return length >= complete[(unsigned char)*src]; } /* @@ -675,15 +686,43 @@ Tcl_UtfNext( * * Tcl_UtfPrev -- * - * Given a pointer to some current location in a UTF-8 string, move - * backwards one character. This works correctly when the pointer is in - * the middle of a UTF-8 character. + * The aim of this routine is to provide a way to move backward + * through a UTF-8 string. The caller is expected to pass non-NULL + * pointer arguments start and src. start points to the beginning + * of a string, and src >= start points to a location within (or just + * past the end) of the string. This routine always returns a + * pointer within the string (>= start). When (src == start), it + * returns start. When (src > start), it returns a pointer (< src) + * and (>= src - TCL_UTF_MAX). Subject to these constraints, the + * routine returns a pointer to the earliest byte in the string that + * starts a character when characters are read starting at start and + * that character might include the byte src[-1]. The routine will + * examine only those bytes in the range that might be returned. + * It will not examine the byte *src, and because of that cannot + * determine for certain in all circumstances whether the character + * that begins with the returned pointer will or will not include + * the byte src[-1]. In the scenario, where src points to the end of + * a buffer being filled, the returned pointer point to either the + * final complete character in the string or to the earliest byte + * that might start an incomplete character waiting for more bytes to + * complete. + * + * Because this routine always returns a value < src until the point + * it is forced to return start, it is useful as a backward iterator + * through a string that will always make progress and always be + * prevented from running past the beginning of the string. + * + * In a string where all characters are complete and properly formed, + * and the value of src points to the first byte of a character, + * repeated Tcl_UtfPrev calls will step to the starting bytes of + * characters, one character at a time. Within those limitations, + * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot + * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and + * the caller will have to take greater care. * * Results: - * The return value is a pointer to the previous character in the UTF-8 - * string. If the current location was already at the beginning of the - * string, the return value will also be a pointer to the beginning of - * the string. + * A pointer to the start of a character in the string as described + * above. * * Side effects: * None. @@ -693,9 +732,8 @@ Tcl_UtfNext( const char * Tcl_UtfPrev( - const char *src, /* The current location in the string. */ - const char *start) /* Pointer to the beginning of the string, to - * avoid going backwards too far. */ + const char *src, /* A location in a UTF-8 string. */ + const char *start) /* Pointer to the beginning of the string */ { const char *look; int i, byte; @@ -713,6 +751,9 @@ Tcl_UtfPrev( break; } if (byte >= 0xC0) { + if (totalBytes[byte] <= i) { + break; + } return look; } look--; @@ -1692,7 +1733,7 @@ Tcl_UniCharIsSpace( */ if (ch < 0x80) { - return TclIsSpaceProc((char) ch); + return TclIsSpaceProcM((char) ch); #if TCL_UTF_MAX > 3 } else if (UNICODE_OUT_OF_RANGE(ch)) { return 0; diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 4a20c3e..d7d6134 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -404,7 +404,7 @@ TclMaxListLength( * No list element before leading white space. */ - count += 1 - TclIsSpaceProc(*bytes); + count += 1 - TclIsSpaceProcM(*bytes); /* * Count white space runs as potential element separators. @@ -414,7 +414,7 @@ TclMaxListLength( if ((numBytes == -1) && (*bytes == '\0')) { break; } - if (TclIsSpaceProc(*bytes)) { + if (TclIsSpaceProcM(*bytes)) { /* * Space run started; bump count. */ @@ -423,7 +423,7 @@ TclMaxListLength( do { bytes++; numBytes -= (numBytes != -1); - } while (numBytes && TclIsSpaceProc(*bytes)); + } while (numBytes && TclIsSpaceProcM(*bytes)); if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { break; } @@ -440,7 +440,7 @@ TclMaxListLength( * No list element following trailing white space. */ - count -= TclIsSpaceProc(bytes[-1]); + count -= TclIsSpaceProcM(bytes[-1]); done: if (endPtr) { @@ -589,7 +589,7 @@ FindElement( */ limit = (string + stringLength); - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } if (p == limit) { /* no element found */ @@ -634,7 +634,7 @@ FindElement( } else if (openBraces == 1) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -644,7 +644,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -679,23 +679,6 @@ FindElement( break; /* - * Space: ignore if element is in braces or quotes; otherwise - * terminate element. - */ - - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - if ((openBraces == 0) && !inQuotes) { - size = (p - elemStart); - goto done; - } - break; - - /* * Double-quote: if element is in quotes then terminate it. */ @@ -703,7 +686,7 @@ FindElement( if (inQuotes) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProc(*p)) { + if ((p >= limit) || TclIsSpaceProcM(*p)) { goto done; } @@ -713,7 +696,7 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProc(*p2)) + while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) && (p2 < p+20)) { p2++; } @@ -726,6 +709,20 @@ FindElement( return TCL_ERROR; } break; + + default: + if (TclIsSpaceProcM(*p)) { + /* + * Space: ignore if element is in braces or quotes; + * otherwise terminate element. + */ + if ((openBraces == 0) && !inQuotes) { + size = (p - elemStart); + goto done; + } + } + break; + } p++; } @@ -756,7 +753,7 @@ FindElement( } done: - while ((p < limit) && (TclIsSpaceProc(*p))) { + while ((p < limit) && (TclIsSpaceProcM(*p))) { p++; } *elementPtr = elemStart; @@ -1112,12 +1109,6 @@ TclScanElement( case '[': /* TYPE_SUBS */ case '$': /* TYPE_SUBS */ case ';': /* TYPE_COMMAND_END */ - case ' ': /* TYPE_SPACE */ - case '\f': /* TYPE_SPACE */ - case '\n': /* TYPE_COMMAND_END */ - case '\r': /* TYPE_SPACE */ - case '\t': /* TYPE_SPACE */ - case '\v': /* TYPE_SPACE */ forbidNone = 1; extra++; /* Escape sequences all one byte longer. */ #if COMPAT @@ -1162,6 +1153,15 @@ TclScanElement( } /* TODO: Panic on improper encoding? */ break; + default: + if (TclIsSpaceProcM(*p)) { + forbidNone = 1; + extra++; /* Escape sequences all one byte longer. */ +#if COMPAT + preferBrace = 1; +#endif + } + break; } } length -= (length > 0); @@ -1660,42 +1660,6 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * UtfWellFormedEnd -- - * Checks the end of utf string is malformed, if yes - wraps bytes - * to the given buffer (as well-formed NTS string). The buffer - * argument should be initialized by the caller and ready to use. - * - * Results: - * The bytes with well-formed end of the string. - * - * Side effects: - * Buffer (DString) may be allocated, so must be released. - * - *---------------------------------------------------------------------- - */ - -static inline const char* -UtfWellFormedEnd( - Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ - const char *bytes, /* Pointer to the beginning of the string. */ - int length) /* Length of the string. */ -{ - const char *l = bytes + length; - const char *p = Tcl_UtfPrev(l, bytes); - - if (Tcl_UtfCharComplete(p, l - p)) { - return bytes; - } - /* - * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, - * avoid segfault by access violation out of range. - */ - Tcl_DStringAppend(buffer, bytes, length); - return Tcl_DStringValue(buffer); -} -/* - *---------------------------------------------------------------------- - * * TclTrimRight -- * Takes two counted strings in the Tcl encoding. Conceptually * finds the sub string (offset) to trim from the right side of the @@ -1710,15 +1674,23 @@ UtfWellFormedEnd( *---------------------------------------------------------------------- */ -static inline int -TrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { - const char *p = bytes + numBytes; - int pInc; + const char *pp, *p = bytes + numBytes; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } Tcl_UniChar ch1 = 0, ch2 = 0; /* @@ -1727,10 +1699,13 @@ TrimRight( do { const char *q = trim; - int bytesLeft = numTrim; + int pInc = 0, bytesLeft = numTrim; - p = Tcl_UtfPrev(p, bytes); - pInc = TclUtfToUniChar(p, &ch1); + pp = Tcl_UtfPrev(p, bytes); + do { + pp += pInc; + pInc = TclUtfToUniChar(pp, &ch1); + } while (pp + pInc < p); /* * Inner loop: scan trim string for match to current character. @@ -1752,44 +1727,13 @@ TrimRight( * No match; trim task done; *p is last non-trimmed char. */ - p += pInc; break; } + p = pp; } while (p > bytes); return numBytes - (p - bytes); } - -int -TclTrimRight( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimRight(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1809,21 +1753,30 @@ TclTrimRight( *---------------------------------------------------------------------- */ -static inline int -TrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (bytes[numBytes] == '\0'). */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ + /* Calls to TclUtfToUniChar() in this routine + * rely on (trim[numTrim] == '\0'). */ { const char *p = bytes; - Tcl_UniChar ch1 = 0, ch2 = 0; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } /* * Outer loop: iterate over string to be trimmed. */ do { + Tcl_UniChar ch1 = 0; int pInc = TclUtfToUniChar(p, &ch1); const char *q = trim; int bytesLeft = numTrim; @@ -1833,6 +1786,7 @@ TrimLeft( */ do { + Tcl_UniChar ch2 = 0; int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { @@ -1857,37 +1811,6 @@ TrimLeft( return p - bytes; } - -int -TclTrimLeft( - const char *bytes, /* String to be trimmed... */ - int numBytes, /* ...and its length in bytes */ - const char *trim, /* String of trim characters... */ - int numTrim) /* ...and its length in bytes */ -{ - int res; - Tcl_DString bytesBuf, trimBuf; - - /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - res = TrimLeft(bytes, numBytes, trim, numTrim); - if (res > numBytes) { - res = numBytes; - } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - - return res; -} /* *---------------------------------------------------------------------- @@ -1909,41 +1832,38 @@ int TclTrim( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ + /* Calls in this routine + * rely on (bytes[numBytes] == '\0'). */ const char *trim, /* String of trim characters... */ int numTrim, /* ...and its length in bytes */ - int *trimRight) /* Offset from the end of the string. */ + /* Calls in this routine + * rely on (trim[numTrim] == '\0'). */ + int *trimRightPtr) /* Offset from the end of the string. */ { - int trimLeft; - Tcl_DString bytesBuf, trimBuf; + int trimLeft = 0, trimRight = 0; - *trimRight = 0; /* Empty strings -> nothing to do */ - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - - Tcl_DStringInit(&bytesBuf); - Tcl_DStringInit(&trimBuf); - bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); - trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); - - trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); - if (trimLeft > numBytes) { - trimLeft = numBytes; - } - numBytes -= trimLeft; - /* have to trim yet (first char was already verified within TrimLeft) */ - if (numBytes > 1) { - bytes += trimLeft; - *trimRight = TrimRight(bytes, numBytes, trim, numTrim); - if (*trimRight > numBytes) { - *trimRight = numBytes; + if ((numBytes > 0) && (numTrim > 0)) { + + /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ + trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); + numBytes -= trimLeft; + + /* If we did not trim the whole string, it starts with a character + * that we will not trim. Skip over it. */ + if (numBytes > 0) { + const char *first = bytes + trimLeft; + bytes = Tcl_UtfNext(first); + numBytes -= (bytes - first); + + if (numBytes > 0) { + /* When bytes is NUL-terminated, returns + * 0 <= trimRight <= numBytes */ + trimRight = TclTrimRight(bytes, numBytes, trim, numTrim); + } } } - - Tcl_DStringFree(&bytesBuf); - Tcl_DStringFree(&trimBuf); - + *trimRightPtr = trimRight; return trimLeft; } @@ -2234,7 +2154,6 @@ Tcl_StringCaseMatch( int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen; - const char *pstart = pattern; Tcl_UniChar ch1 = 0, ch2 = 0; while (1) { @@ -2399,10 +2318,13 @@ Tcl_StringCaseMatch( break; } } + /* If we reach here, we matched. Need to move past closing ] */ while (*pattern != ']') { if (*pattern == '\0') { - pattern = Tcl_UtfPrev(pattern, pstart); - break; + /* We ran out of pattern after matching something in + * (unclosed!) brackets. So long as we ran out of string + * at the same time, we have a match. Otherwise, not. */ + return (*str == '\0'); } pattern++; } @@ -2831,9 +2753,37 @@ Tcl_DStringAppendElement( { char *dst = dsPtr->string + dsPtr->length; int needSpace = TclNeedSpace(dsPtr->string, dst); - char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; - int newSize = dsPtr->length + needSpace - + TclScanElement(element, -1, &flags); + char flags = 0; + int quoteHash = 1, newSize; + + if (needSpace) { + /* + * If we need a space to separate the new element from something + * already ending the string, we're not appending the first element + * of any list, so we need not quote any leading hash character. + */ + quoteHash = 0; + } else { + /* + * We don't need a space, maybe because there's some already there. + * Checking whether we might be appending a first element is a bit + * more involved. + * + * Backtrack over all whitespace. + */ + while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) { + } + + /* Call again without whitespace to confound things. */ + quoteHash = !TclNeedSpace(dsPtr->string, dst+1); + } + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } + newSize = dsPtr->length + needSpace + TclScanElement(element, -1, &flags); + if (!quoteHash) { + flags |= TCL_DONT_QUOTE_HASH; + } /* * Allocate a larger buffer for the string if the current one isn't large @@ -2865,8 +2815,8 @@ Tcl_DStringAppendElement( element = dsPtr->string + offset; } } - dst = dsPtr->string + dsPtr->length; } + dst = dsPtr->string + dsPtr->length; /* * Convert the new string to a list element and copy it into the buffer at @@ -2877,15 +2827,8 @@ Tcl_DStringAppendElement( *dst = ' '; dst++; dsPtr->length++; - - /* - * If we need a space to separate this element from preceding stuff, - * then this element will not lead a list, and need not have it's - * leading '#' quoted. - */ - - flags |= TCL_DONT_QUOTE_HASH; } + dsPtr->length += TclConvertElement(element, -1, dst, flags); dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; @@ -3505,63 +3448,69 @@ TclNeedSpace( /* * A space is needed unless either: * (a) we're at the start of the string, or - */ + * + * (NOTE: This check is now absorbed into the loop below.) + * if (end == start) { return 0; } + * + */ + /* * (b) we're at the start of a nested list-element, quoted with an open * curly brace; we can be nested arbitrarily deep, so long as the * first curly brace starts an element, so backtrack over open curly * braces that are trailing characters of the string; and - */ + * + * (NOTE: Every character our parser is looking for is a proper + * single-byte encoding of an ASCII value. It does not accept + * overlong encodings. Given that, there's no benefit using + * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte + * backward scan. Save routine call overhead and risk of wrong + * results should the behavior of Tcl_UtfPrev change in unexpected ways. + * Reconsider this if we ever start treating non-ASCII Unicode + * characters as meaningful list syntax, expanded Unicode spaces as + * element separators, for example.) + * end = Tcl_UtfPrev(end, start); while (*end == '{') { - if (end == start) { - return 0; - } - end = Tcl_UtfPrev(end, start); + if (end == start) { + return 0; + } + end = Tcl_UtfPrev(end, start); + } + + * + */ + + while ((--end >= start) && (*end == '{')) { + } + if (end < start) { + return 0; } /* * (c) the trailing character of the string is already a list-element - * separator (according to TclFindElement); that is, one of these - * characters: - * \u0009 \t TAB - * \u000A \n NEWLINE - * \u000B \v VERTICAL TAB - * \u000C \f FORM FEED - * \u000D \r CARRIAGE RETURN - * \u0020 SPACE - * with the condition that the penultimate character is not a - * backslash. + * separator, Use the same testing routine as TclFindElement to + * enforce consistency. */ - if (*end > 0x20) { + if (TclIsSpaceProcM(*end)) { + int result = 0; + /* - * Performance tweak. All ASCII spaces are <= 0x20. So get a quick - * answer for most characters before comparing against all spaces in - * the switch below. - * - * NOTE: Remove this if other Unicode spaces ever get accepted as - * list-element separators. + * Trailing whitespace might be part of a backslash escape + * sequence. Handle that possibility. */ - return 1; - } - switch (*end) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\v': - case '\f': - if ((end == start) || (end[-1] != '\\')) { - return 0; + while ((--end >= start) && (*end == '\\')) { + result = !result; } + return result; } return 1; } @@ -3689,7 +3638,7 @@ TclGetIntForIndex( * Leading whitespace is acceptable in an index. */ - while (length && TclIsSpaceProc(*bytes)) { + while (length && TclIsSpaceProcM(*bytes)) { bytes++; length--; } @@ -3702,7 +3651,7 @@ TclGetIntForIndex( if ((savedOp != '+') && (savedOp != '-')) { goto parseError; } - if (TclIsSpaceProc(opPtr[1])) { + if (TclIsSpaceProcM(opPtr[1])) { goto parseError; } *opPtr = '\0'; @@ -3876,7 +3825,7 @@ SetEndOffsetFromAny( * after "end-" to Tcl_GetInt, then reverse for offset. */ - if (TclIsSpaceProc(bytes[4])) { + if (TclIsSpaceProcM(bytes[4])) { goto badIndexFormat; } if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) { @@ -4081,7 +4030,7 @@ TclCheckBadOctal( * zero. Try to generate a meaningful error message. */ - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '+' || *p == '-') { @@ -4094,7 +4043,7 @@ TclCheckBadOctal( while (isdigit(UCHAR(*p))) { /* INTL: digit. */ p++; } - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } if (*p == '\0') { diff --git a/tests/dstring.test b/tests/dstring.test index 06121a3..5feb355 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -180,16 +180,37 @@ test dstring-2.12 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} -test dstring-2.13 {appending list elements} -constraints testdstring -body { - # This test shows lack of sophistication in Tcl_DStringAppendElement's - # decision about whether #-quoting can be disabled. +test dstring-2.13 {appending list elements} -constraints testdstring -setup { testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. testdstring append "x " -1 testdstring element # testdstring get } -cleanup { testdstring free -} -result {x {#}} +} -result {x #} +test dstring-2.14 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + testdstring append " " -1 + testdstring element # + testdstring get +} -cleanup { + testdstring free +} -result { {#}} +test dstring-2.15 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. + testdstring append "x " -1 + testdstring element # + testdstring get +} -cleanup { + testdstring free +} -result {x #} test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free @@ -306,10 +327,11 @@ test dstring-3.9 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x {x #}} -test dstring-3.10 {appending list elements} -constraints testdstring -body { - # This test shows lack of sophistication in Tcl_DStringAppendElement's - # decision about whether #-quoting can be disabled. +test dstring-3.10 {appending list elements} -constraints testdstring -setup { testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. testdstring append x -1 testdstring start testdstring append "x " -1 @@ -318,7 +340,33 @@ test dstring-3.10 {appending list elements} -constraints testdstring -body { testdstring get } -cleanup { testdstring free -} -result {x {x {#}}} +} -result {x {x #}} +test dstring-3.11 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + testdstring append x -1 + testdstring start + testdstring append " " -1 + testdstring element # + testdstring end + testdstring get +} -cleanup { + testdstring free +} -result {x { {#}}} +test dstring-3.12 {appending list elements} -constraints testdstring -setup { + testdstring free +} -body { + # This test checks the sophistication in Tcl_DStringAppendElement's + # decision about whether #-quoting can be disabled. + testdstring append x -1 + testdstring start + testdstring append "x " -1 + testdstring element # + testdstring end + testdstring get +} -cleanup { + testdstring free +} -result {x {x #}} test dstring-4.1 {truncation} -constraints testdstring -setup { testdstring free diff --git a/tests/encoding.test b/tests/encoding.test index 21edf79..6fef748 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -34,6 +34,8 @@ proc runtests {} { # Some tests require the testencoding command testConstraint testencoding [llength [info commands testencoding]] +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}] testConstraint exec [llength [info commands exec]] testConstraint testgetdefenc [llength [info commands testgetdefenc]] @@ -315,24 +317,20 @@ test encoding-14.1 {BinaryProc} { test encoding-15.1 {UtfToUtfProc} { encoding convertto utf-8 \xa3 } "\xc2\xa3" -test encoding-15.2 {UtfToUtfProc null character output} { - set x \u0000 - set y [encoding convertto utf-8 \u0000] - set y [encoding convertfrom identity $y] - binary scan $y H* z - list [string bytelength $x] [string bytelength $y] $z -} {2 1 00} -test encoding-15.3 {UtfToUtfProc null character input} { - set x [encoding convertfrom identity \x00] - set y [encoding convertfrom utf-8 $x] - binary scan [encoding convertto identity $y] H* z - list [string bytelength $x] [string bytelength $y] $z -} {1 2 c080} -test encoding-15.4 {UtfToUtfProc emoji character input} { +test encoding-15.2 {UtfToUtfProc null character output} testbytestring { + binary scan [testbytestring [encoding convertto utf-8 \u0000]] H* z + set z +} 00 +test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { + set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]] + binary scan [teststringbytes $y] H* z + set z +} c080 +test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y -} "6 \uD83D\uDE02" +} -result "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] @@ -716,7 +714,7 @@ test encoding-28.0 {all encodings load} -body { encoding convertto $name $string # discard the cached internal representation of Tcl_Encoding - # Unfortunately, without this, encoding 2-1 fails. + # Unfortunately, without this, encoding 2-1 fails. llength $name } return $count diff --git a/tests/string.test b/tests/string.test index f5defd9..e4c39d2 100644 --- a/tests/string.test +++ b/tests/string.test @@ -25,6 +25,7 @@ catch [list package require -exact Tcltest [info patchlevel]] testConstraint testobj [expr {[info commands testobj] != {}}] testConstraint testindexobj [expr {[info commands testindexobj] != {}}] testConstraint tip389 [expr {[string length \U010000] == 2}] +testConstraint testbytestring [llength [info commands testbytestring]] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] @@ -1561,6 +1562,34 @@ test string-20.5 {string trimright} { test string-20.6 {string trimright, unicode default} { string trimright ABC\u1361\u0085\x00\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000 } ABC\u1361 +test string-20.7 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} testbytestring { + set result {} + set a [testbytestring \xc0\x80\xA0] + set b foo$a + set m [list \u0000 U \xA0 V [testbytestring \xA0] W] + lappend result [string map $m $b] + lappend result [string map $m [string trimright $b x]] + lappend result [string map $m [string trimright $b \u0000]] + lappend result [string map $m [string trimleft $b fox]] + lappend result [string map $m [string trimleft $b fo\u0000]] + lappend result [string map $m [string trim $b fox]] + lappend result [string map $m [string trim $b fo\u0000]] +} [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] +test string-20.8 {[c61818e4c9] [string trimright] fails when UtfPrev is ok} testbytestring { + set result {} + set a [testbytestring \xE8\xA0] + set b foo$a + set m [list \xE8 U \xA0 V [testbytestring \xE8] W [testbytestring \xA0] X]] + lappend result [string map $m $b] + lappend result [string map $m [string trimright $b x]] + lappend result [string map $m [string trimright $b \xE8]] + lappend result [string map $m [string trimright $b [bytestring \xE8]]] + lappend result [string map $m [string trimright $b \xA0]] + lappend result [string map $m [string trimright $b [bytestring \xA0]]] + lappend result [string map $m [string trimright $b \xE8\xA0]] + lappend result [string map $m [string trimright $b [bytestring \xE8\xA0]]] + lappend result [string map $m [string trimright $b \u0000]] +} [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV] test string-21.1 {string wordend} { list [catch {string wordend a} msg] $msg @@ -1644,6 +1673,11 @@ test string-22.12 {string wordstart, unicode} { test string-22.13 {string wordstart, unicode} { string wordstart "\uC700\uC700 abc" 8 } 3 +test string-22.14 {string wordstart, invalid UTF-8} testbytestring { + # See Bug c61818e4c9 + set demo [testbytestring "abc def\xE0\xA9ghi"] + string index $demo [string wordstart $demo 10] +} g test string-23.0 {string is boolean, Bug 1187123} testindexobj { set x 5 diff --git a/tests/utf.test b/tests/utf.test index 1a879ad..1b7b409 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -156,8 +156,167 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} { test utf-6.1 {Tcl_UtfNext} { } {} -test utf-7.1 {Tcl_UtfPrev} { -} {} +testConstraint testutfprev [llength [info commands testutfprev]] + +test utf-7.1 {Tcl_UtfPrev} testutfprev { + testutfprev {} +} 0 +test utf-7.2 {Tcl_UtfPrev} testutfprev { + testutfprev A +} 0 +test utf-7.3 {Tcl_UtfPrev} testutfprev { + testutfprev AA +} 1 +test utf-7.4 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8 +} 1 +test utf-7.4.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0\xA0 2 +} 1 +test utf-7.4.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xF8\xA0\xA0 2 +} 1 +test utf-7.5 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4 +} 1 +test utf-7.5.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0\xA0 2 +} 1 +test utf-7.5.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xF8\xA0\xA0 2 +} 1 +test utf-7.6 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8 +} 1 +test utf-7.6.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0\xA0 2 +} 1 +test utf-7.6.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xF8\xA0\xA0 2 +} 1 +test utf-7.7 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0 +} 1 +test utf-7.7.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0\xA0 2 +} 1 +test utf-7.7.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xF8\xA0\xA0 2 +} 1 +test utf-7.8 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0 +} 1 +test utf-7.8.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0\xA0 2 +} 1 +test utf-7.8.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xF8\xA0\xA0 2 +} 1 +test utf-7.9 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0 +} 2 +test utf-7.9.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0\xA0 3 +} 2 +test utf-7.9.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xF8\xA0 3 +} 2 +test utf-7.10 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0 +} 1 +test utf-7.10.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0\xA0 3 +} 1 +test utf-7.10.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xF8\xA0 3 +} 1 +test utf-7.11 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0 +} 1 +test utf-7.11.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0\xA0 3 +} 1 +test utf-7.11.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xF8\xA0 3 +} 1 +test utf-7.12 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0 +} 1 +test utf-7.12.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0\xA0 3 +} 1 +test utf-7.12.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xF8\xA0 3 +} 1 +test utf-7.13 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0 +} 2 +test utf-7.13.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0\xA0 3 +} 2 +test utf-7.13.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xF8\xA0 3 +} 2 +test utf-7.14 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0 +} 3 +test utf-7.14.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0\xA0 4 +} 3 +test utf-7.14.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0\xF8 4 +} 3 +test utf-7.15 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0 +} 1 +test utf-7.15.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0\xA0 4 +} 1 +test utf-7.15.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0\xF8 4 +} 1 +test utf-7.16 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0 +} 1 +test utf-7.16.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0\xA0 4 +} 1 +test utf-7.16.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0\xF8 4 +} 1 +test utf-7.17 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0 +} 3 +test utf-7.17.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0\xA0 4 +} 3 +test utf-7.17.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0\xF8 4 +} 3 +test utf-7.18 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0 +} 3 +test utf-7.18.1 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0\xA0 4 +} 3 +test utf-7.18.2 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0\xF8 4 +} 3 +test utf-7.19 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF8\xA0\xA0\xA0 +} 4 +test utf-7.20 {Tcl_UtfPrev} testutfprev { + testutfprev A\xF4\xA0\xA0\xA0 +} 1 +test utf-7.21 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xA0\xA0 +} 4 +test utf-7.22 {Tcl_UtfPrev} testutfprev { + testutfprev A\xD0\xA0\xA0\xA0 +} 4 +test utf-7.23 {Tcl_UtfPrev} testutfprev { + testutfprev A\xA0\xA0\xA0\xA0 +} 4 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 diff --git a/tests/util.test b/tests/util.test index 2ac11bf..f5a59ee 100644 --- a/tests/util.test +++ b/tests/util.test @@ -383,6 +383,10 @@ test util-5.50 {Tcl_StringMatch} { test util-5.51 {Tcl_StringMatch} { Wrapper_Tcl_StringMatch "" "" } 1 +test util-5.52 {Tcl_StringMatch} { + Wrapper_Tcl_StringMatch \[a\u0000 a\x80 +} 0 + test util-6.1 {Tcl_PrintDouble - using tcl_precision} -setup { set old_precision $::tcl_precision @@ -508,25 +512,64 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring { llength [testdstring get] } 2 test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring { - # Note that in this test TclNeedSpace actually gets it wrong, - # claiming we need a space when we really do not. Extra space - # between list elements is harmless though, and better to have - # extra space in really weird string reps of lists, than to - # invest the effort required to make TclNeedSpace foolproof. testdstring free testdstring append {\\ } -1 testdstring element foo list [llength [testdstring get]] [string length [testdstring get]] -} {2 7} +} {2 6} test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring { - # Another example of TclNeedSpace harmlessly getting it wrong. testdstring free testdstring append {\\ } -1 testdstring append \{ -1 testdstring element foo testdstring append \} -1 list [llength [testdstring get]] [string length [testdstring get]] -} {2 9} +} {2 8} +test util-8.7 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\ } -1 + testdstring start + testdstring end + + # Should make {\ {}} + list [llength [testdstring get]] [string index [testdstring get] 3] +} {2 \{} +test util-8.8 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\ } -1 + testdstring start + testdstring end + + # Should make {\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 3] +} {2 \{} +test util-8.9 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 5] +} {2 \{} +test util-8.10 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\\\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\\\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 9] +} {2 \{} +test util-8.11 {TclNeedSpace - watch out for escaped space} { + testdstring free + testdstring append {\\\\\\\\ } -1 + testdstring start + testdstring end + + # Should make {\\\\\\\\ {}} + list [llength [testdstring get]] [string index [testdstring get] 9] +} {2 \{} test util-9.0.0 {TclGetIntForIndex} { string index abcd 0 diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 5684b16..f70ce6a 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -98,7 +98,7 @@ TclpFindExecutable( */ while (1) { - while (TclIsSpaceProc(*p)) { + while (TclIsSpaceProcM(*p)) { p++; } name = p; |