summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/Utf.328
-rw-r--r--generic/tclCmdAH.c2
-rw-r--r--generic/tclCmdMZ.c16
-rw-r--r--generic/tclDate.c2
-rw-r--r--generic/tclInt.h11
-rw-r--r--generic/tclParse.c2
-rw-r--r--generic/tclResult.c11
-rw-r--r--generic/tclStrToD.c6
-rw-r--r--generic/tclStringObj.c24
-rw-r--r--generic/tclStringTrim.h2
-rw-r--r--generic/tclTest.c434
-rw-r--r--generic/tclUtf.c65
-rw-r--r--generic/tclUtil.c413
-rw-r--r--tests/dstring.test64
-rw-r--r--tests/encoding.test30
-rw-r--r--tests/string.test34
-rw-r--r--tests/utf.test163
-rw-r--r--tests/util.test59
-rw-r--r--unix/tclUnixFile.c2
19 files changed, 841 insertions, 527 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index c00033f..d01721c 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -223,13 +223,27 @@ string. The caller must not ask for the next character after the last
character in the string if the string is not terminated by a null
character.
.PP
-Given \fIsrc\fR, a pointer to some location in a UTF-8 string (or to a
-null byte immediately following such a string), \fBTcl_UtfPrev\fR
-returns a pointer to the closest preceding byte that starts a UTF-8
-character.
-This function will not back up to a position before \fIstart\fR,
-the start of the UTF-8 string. If \fIsrc\fR was already at \fIstart\fR, the
-return value will be \fIstart\fR.
+\fBTcl_UtfPrev\fR is used to step backward through but not beyond the
+UTF-8 string that begins at \fIstart\fR. If the UTF-8 string is made
+up entirely of complete and well-formed characters, and \fIsrc\fR points
+to the lead byte of one of those characters (or to the location one byte
+past the end of the string), then repeated calls of \fBTcl_UtfPrev\fR will
+return pointers to the lead bytes of each character in the string, one
+character at a time, terminating when it returns \fIstart\fR.
+.PP
+When the conditions of completeness and well-formedness may not be satisfied,
+a more precise description of the function of \fBTcl_UtfPrev\fR is necessary.
+It always returns a pointer greater than or equal to \fIstart\fR; that is,
+always a pointer to a location in the string. It always returns a pointer to
+a byte that begins a character when scanning for characters beginning
+from \fIstart\fR. When \fIsrc\fR is greater than \fIstart\fR, it
+always returns a pointer less than \fIsrc\fR and greater than or
+equal to (\fIsrc\fR - \fBTCL_UTF_MAX\fR). The character that begins
+at the returned pointer is the first one that either includes the
+byte \fIsrc[-1]\fR, or might include it if the right trail bytes are
+present at \fIsrc\fR and greater. \fBTcl_UtfPrev\fR never reads the
+byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte
+\fIsrc[-\fBTCL_UTF_MAX\fI-1]\fR.
.PP
\fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the
Pascal Ord() function. It returns the Tcl_UniChar represented at the
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 40a10ba..46ee157 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -228,7 +228,7 @@ Tcl_CaseObjCmd(
pat = TclGetString(caseObjv[i]);
for (p = pat; *p != '\0'; p++) {
- if (TclIsSpaceProc(*p) || (*p == '\\')) {
+ if (TclIsSpaceProcM(*p) || (*p == '\\')) {
break;
}
}
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 23370a8..6b59e52 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1738,7 +1738,7 @@ StringIsCmd(
* if it is the first "element" that has the failure.
*/
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
TclNewStringObj(tmpStr, string1, p-string1);
@@ -2477,12 +2477,22 @@ StringStartCmd(
cur = 0;
if (index > 0) {
p = Tcl_UtfAtIndex(string, index);
+
+ TclUtfToUniChar(p, &ch);
for (cur = index; cur >= 0; cur--) {
- TclUtfToUniChar(p, &ch);
+ int delta = 0;
+ const char *next;
+
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
- p = Tcl_UtfPrev(p, string);
+
+ next = Tcl_UtfPrev(p, string);
+ do {
+ next += delta;
+ delta = TclUtfToUniChar(next, &ch);
+ } while (next + delta < p);
+ p = next;
}
if (cur != index) {
cur += 1;
diff --git a/generic/tclDate.c b/generic/tclDate.c
index 717a1b3..ea1f0f3 100644
--- a/generic/tclDate.c
+++ b/generic/tclDate.c
@@ -2680,7 +2680,7 @@ TclDatelex(
location->first_column = yyInput - info->dateStart;
for ( ; ; ) {
- while (TclIsSpaceProc(*yyInput)) {
+ while (TclIsSpaceProcM(*yyInput)) {
yyInput++;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 74b2cc9..a7df6ab 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3025,7 +3025,6 @@ MODULE_SCOPE void TclInitNotifier(void);
MODULE_SCOPE void TclInitObjSubsystem(void);
MODULE_SCOPE void TclInitSubsystems(void);
MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp);
-MODULE_SCOPE int TclIsSpaceProc(int byte);
MODULE_SCOPE int TclIsBareword(int byte);
MODULE_SCOPE Tcl_Obj * TclJoinPath(int elements, Tcl_Obj * const objv[],
int forceRelative);
@@ -3234,6 +3233,16 @@ MODULE_SCOPE void * TclpThreadGetMasterTSD(void *tsdKeyPtr);
MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, const char *msg, int length);
/*
+ * Many parsing tasks need a common definition of whitespace.
+ * Use this routine and macro to achieve that and place
+ * optimization (fragile on changes) in one place.
+ */
+
+MODULE_SCOPE int TclIsSpaceProc(int byte);
+# define TclIsSpaceProcM(byte) \
+ (((byte) > 0x20) ? 0 : TclIsSpaceProc(byte))
+
+/*
*----------------------------------------------------------------
* Command procedures in the generic core:
*----------------------------------------------------------------
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 03cee64..4d7e6b8 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -1830,7 +1830,7 @@ Tcl_ParseBraces(
openBrace = 0;
break;
case '#' :
- if (openBrace && TclIsSpaceProc(src[-1])) {
+ if (openBrace && TclIsSpaceProcM(src[-1])) {
Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp),
": possible unbalanced brace in comment", -1);
goto error;
diff --git a/generic/tclResult.c b/generic/tclResult.c
index aceb2f5..caad71e 100644
--- a/generic/tclResult.c
+++ b/generic/tclResult.c
@@ -725,6 +725,7 @@ Tcl_AppendElement(
char *dst;
int size;
int flags;
+ int quoteHash = 1;
/*
* If the string result is empty, move the object result to the string
@@ -761,9 +762,17 @@ Tcl_AppendElement(
* then this element will not lead a list, and need not have it's
* leading '#' quoted.
*/
-
+ quoteHash = 0;
+ } else {
+ while ((--dst >= iPtr->appendResult) && TclIsSpaceProcM(*dst)) {
+ }
+ quoteHash = !TclNeedSpace(iPtr->appendResult, dst+1);
+ }
+ dst = iPtr->appendResult + iPtr->appendUsed;
+ if (!quoteHash) {
flags |= TCL_DONT_QUOTE_HASH;
}
+
iPtr->appendUsed += Tcl_ConvertElement(element, dst, flags);
}
diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c
index 66c8f19..749abcf 100644
--- a/generic/tclStrToD.c
+++ b/generic/tclStrToD.c
@@ -568,7 +568,7 @@ TclParseNumber(
* I, N, and whitespace.
*/
- if (TclIsSpaceProc(c)) {
+ if (TclIsSpaceProcM(c)) {
if (flags & TCL_PARSE_NO_WHITESPACE) {
goto endgame;
}
@@ -1095,7 +1095,7 @@ TclParseNumber(
}
/* FALLTHROUGH */
case sNANPAREN:
- if (TclIsSpaceProc(c)) {
+ if (TclIsSpaceProcM(c)) {
break;
}
if (numSigDigs < 13) {
@@ -1149,7 +1149,7 @@ TclParseNumber(
* Accept trailing whitespace.
*/
- while (len != 0 && TclIsSpaceProc(*p)) {
+ while (len != 0 && TclIsSpaceProcM(*p)) {
p++;
len--;
}
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index d4f45d7..84a1339 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1151,10 +1151,7 @@ Tcl_AppendLimitedToObj(
{
String *stringPtr;
int toCopy = 0;
-
- if (Tcl_IsShared(objPtr)) {
- Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
- }
+ int eLen = 0;
if (length < 0) {
length = (bytes ? strlen(bytes) : 0);
@@ -1162,6 +1159,9 @@ Tcl_AppendLimitedToObj(
if (length == 0) {
return;
}
+ if (limit <= 0) {
+ return;
+ }
if (length <= limit) {
toCopy = length;
@@ -1169,8 +1169,12 @@ Tcl_AppendLimitedToObj(
if (ellipsis == NULL) {
ellipsis = "...";
}
- toCopy = (bytes == NULL) ? limit
- : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
+ eLen = strlen(ellipsis);
+ while (eLen > limit) {
+ eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis;
+ }
+
+ toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes;
}
/*
@@ -1179,6 +1183,10 @@ Tcl_AppendLimitedToObj(
* objPtr's string rep.
*/
+ if (Tcl_IsShared(objPtr)) {
+ Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj");
+ }
+
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
@@ -1194,9 +1202,9 @@ Tcl_AppendLimitedToObj(
stringPtr = GET_STRING(objPtr);
if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
- AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis));
+ AppendUtfToUnicodeRep(objPtr, ellipsis, eLen);
} else {
- AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis));
+ AppendUtfToUtfRep(objPtr, ellipsis, eLen);
}
}
diff --git a/generic/tclStringTrim.h b/generic/tclStringTrim.h
index 030e4ec..7067428 100644
--- a/generic/tclStringTrim.h
+++ b/generic/tclStringTrim.h
@@ -28,6 +28,8 @@ MODULE_SCOPE const char tclDefaultTrimSet[];
/*
* The whitespace trimming set used when [concat]enating. This is a subset of
* the above, and deliberately so.
+ *
+ * TODO: Find a reasonable way to guarantee in sync with TclIsSpaceProc()
*/
#define CONCAT_TRIM_SET " \f\v\r\t\n"
diff --git a/generic/tclTest.c b/generic/tclTest.c
index e187ec2..2a43d91 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -98,7 +98,7 @@ static Tcl_Trace cmdTrace;
* TestdelCmd:
*/
-typedef struct DelCmd {
+typedef struct {
Tcl_Interp *interp; /* Interpreter in which command exists. */
char *deleteCmd; /* Script to execute when command is deleted.
* Malloc'ed. */
@@ -109,7 +109,7 @@ typedef struct DelCmd {
* command.
*/
-typedef struct TclEncoding {
+typedef struct {
Tcl_Interp *interp;
char *toUtfCmd;
char *fromUtfCmd;
@@ -132,7 +132,7 @@ static int exitMainLoop = 0;
* Event structure used in testing the event queue management procedures.
*/
-typedef struct TestEvent {
+typedef struct {
Tcl_Event header; /* Header common to all events */
Tcl_Interp *interp; /* Interpreter that will handle the event */
Tcl_Obj *command; /* Command to evaluate when the event occurs */
@@ -164,10 +164,8 @@ static void CleanupTestSetassocdataTests(
ClientData clientData, Tcl_Interp *interp);
static void CmdDelProc1(ClientData clientData);
static void CmdDelProc2(ClientData clientData);
-static int CmdProc1(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
-static int CmdProc2(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
+static Tcl_CmdProc CmdProc1;
+static Tcl_CmdProc CmdProc2;
static void CmdTraceDeleteProc(
ClientData clientData, Tcl_Interp *interp,
int level, char *command, Tcl_CmdProc *cmdProc,
@@ -177,16 +175,11 @@ static void CmdTraceProc(ClientData clientData,
Tcl_Interp *interp, int level, char *command,
Tcl_CmdProc *cmdProc, ClientData cmdClientData,
int argc, const char *argv[]);
-static int CreatedCommandProc(
- ClientData clientData, Tcl_Interp *interp,
- int argc, const char **argv);
-static int CreatedCommandProc2(
- ClientData clientData, Tcl_Interp *interp,
- int argc, const char **argv);
+static Tcl_CmdProc CreatedCommandProc;
+static Tcl_CmdProc CreatedCommandProc2;
static void DelCallbackProc(ClientData clientData,
Tcl_Interp *interp);
-static int DelCmdProc(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
+static Tcl_CmdProc DelCmdProc;
static void DelDeleteProc(ClientData clientData);
static void EncodingFreeProc(ClientData clientData);
static int EncodingToUtfProc(ClientData clientData,
@@ -201,15 +194,10 @@ static int EncodingFromUtfProc(ClientData clientData,
int *dstCharsPtr);
static void ExitProcEven(ClientData clientData);
static void ExitProcOdd(ClientData clientData);
-static int GetTimesObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc GetTimesObjCmd;
static void MainLoop(void);
-static int NoopCmd(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
-static int NoopObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_CmdProc NoopCmd;
+static Tcl_ObjCmdProc NoopObjCmd;
static int ObjTraceProc(ClientData clientData,
Tcl_Interp *interp, int level, const char *command,
Tcl_Command commandToken, int objc,
@@ -218,167 +206,81 @@ static void ObjTraceDeleteProc(ClientData clientData);
static void PrintParse(Tcl_Interp *interp, Tcl_Parse *parsePtr);
static void SpecialFree(char *blockPtr);
static int StaticInitProc(Tcl_Interp *interp);
-static int TestasyncCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestbumpinterpepochObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestpurebytesobjObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestsetbytearraylengthObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestbytestringObjCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestcmdinfoCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestcmdtokenCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestcmdtraceCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestconcatobjCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestcreatecommandCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestdcallCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestdelCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestdelassocdataCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestdoubledigitsObjCmd(ClientData dummy,
- Tcl_Interp* interp,
- int objc, Tcl_Obj* const objv[]);
-static int TestdstringCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestencodingObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestevalexObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestevalobjvObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TesteventObjCmd(ClientData unused,
- Tcl_Interp *interp, int argc,
- Tcl_Obj *const objv[]);
+static Tcl_CmdProc TestasyncCmd;
+static Tcl_ObjCmdProc TestbumpinterpepochObjCmd;
+static Tcl_ObjCmdProc TestsetbytearraylengthObjCmd;
+static Tcl_ObjCmdProc TestpurebytesobjObjCmd;
+static Tcl_ObjCmdProc TestbytestringObjCmd;
+static Tcl_ObjCmdProc TeststringbytesObjCmd;
+static Tcl_CmdProc TestcmdinfoCmd;
+static Tcl_CmdProc TestcmdtokenCmd;
+static Tcl_CmdProc TestcmdtraceCmd;
+static Tcl_CmdProc TestconcatobjCmd;
+static Tcl_CmdProc TestcreatecommandCmd;
+static Tcl_CmdProc TestdcallCmd;
+static Tcl_CmdProc TestdelCmd;
+static Tcl_CmdProc TestdelassocdataCmd;
+static Tcl_ObjCmdProc TestdoubledigitsObjCmd;
+static Tcl_CmdProc TestdstringCmd;
+static Tcl_ObjCmdProc TestencodingObjCmd;
+static Tcl_ObjCmdProc TestevalexObjCmd;
+static Tcl_ObjCmdProc TestevalobjvObjCmd;
+static Tcl_ObjCmdProc TesteventObjCmd;
static int TesteventProc(Tcl_Event *event, int flags);
static int TesteventDeleteProc(Tcl_Event *event,
ClientData clientData);
-static int TestexithandlerCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestexprlongCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestexprlongobjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestexprdoubleCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestexprdoubleobjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestexprparserObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestexprstringCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestfileCmd(ClientData dummy,
- Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]);
-static int TestfilelinkCmd(ClientData dummy,
- Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]);
-static int TestfeventCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestgetassocdataCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestgetintCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestgetplatformCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestgetvarfullnameCmd(
- ClientData dummy, Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int TestinterpdeleteCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestlinkCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestlocaleCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_CmdProc TestexithandlerCmd;
+static Tcl_CmdProc TestexprlongCmd;
+static Tcl_ObjCmdProc TestexprlongobjCmd;
+static Tcl_CmdProc TestexprdoubleCmd;
+static Tcl_ObjCmdProc TestexprdoubleobjCmd;
+static Tcl_ObjCmdProc TestexprparserObjCmd;
+static Tcl_CmdProc TestexprstringCmd;
+static Tcl_ObjCmdProc TestfileCmd;
+static Tcl_ObjCmdProc TestfilelinkCmd;
+static Tcl_CmdProc TestfeventCmd;
+static Tcl_CmdProc TestgetassocdataCmd;
+static Tcl_CmdProc TestgetintCmd;
+static Tcl_CmdProc TestgetplatformCmd;
+static Tcl_ObjCmdProc TestgetvarfullnameCmd;
+static Tcl_CmdProc TestinterpdeleteCmd;
+static Tcl_CmdProc TestlinkCmd;
+static Tcl_ObjCmdProc TestlocaleCmd;
static int TestMathFunc(ClientData clientData,
Tcl_Interp *interp, Tcl_Value *args,
Tcl_Value *resultPtr);
static int TestMathFunc2(ClientData clientData,
Tcl_Interp *interp, Tcl_Value *args,
Tcl_Value *resultPtr);
-static int TestmainthreadCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestsetmainloopCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestexitmainloopCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestpanicCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestparseargsCmd(ClientData dummy, Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int TestparserObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestparsevarObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestparsevarnameObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestregexpObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestreturnObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_CmdProc TestmainthreadCmd;
+static Tcl_CmdProc TestsetmainloopCmd;
+static Tcl_CmdProc TestexitmainloopCmd;
+static Tcl_CmdProc TestpanicCmd;
+static Tcl_ObjCmdProc TestparseargsCmd;
+static Tcl_ObjCmdProc TestparserObjCmd;
+static Tcl_ObjCmdProc TestparsevarObjCmd;
+static Tcl_ObjCmdProc TestparsevarnameObjCmd;
+static Tcl_ObjCmdProc TestregexpObjCmd;
+static Tcl_ObjCmdProc TestreturnObjCmd;
static void TestregexpXflags(const char *string,
int length, int *cflagsPtr, int *eflagsPtr);
-static int TestsaveresultCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc TestsaveresultCmd;
static void TestsaveresultFree(char *blockPtr);
-static int TestsetassocdataCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestsetCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int Testset2Cmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestseterrorcodeCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestsetobjerrorcodeCmd(
- ClientData dummy, Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int TestsetplatformCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TeststaticpkgCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TesttranslatefilenameCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestupvarCmd(ClientData dummy,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestWrongNumArgsObjCmd(
- ClientData clientData, Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int TestGetIndexFromObjStructObjCmd(
- ClientData clientData, Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int TestChannelCmd(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestChannelEventCmd(ClientData clientData,
- Tcl_Interp *interp, int argc, const char **argv);
-static int TestFilesystemObjCmd(ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestSimpleFilesystemObjCmd(
- ClientData dummy, Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_CmdProc TestsetassocdataCmd;
+static Tcl_CmdProc TestsetCmd;
+static Tcl_CmdProc Testset2Cmd;
+static Tcl_CmdProc TestseterrorcodeCmd;
+static Tcl_ObjCmdProc TestsetobjerrorcodeCmd;
+static Tcl_CmdProc TestsetplatformCmd;
+static Tcl_CmdProc TeststaticpkgCmd;
+static Tcl_CmdProc TesttranslatefilenameCmd;
+static Tcl_CmdProc TestupvarCmd;
+static Tcl_ObjCmdProc TestWrongNumArgsObjCmd;
+static Tcl_ObjCmdProc TestGetIndexFromObjStructObjCmd;
+static Tcl_CmdProc TestChannelCmd;
+static Tcl_CmdProc TestChannelEventCmd;
+static Tcl_ObjCmdProc TestFilesystemObjCmd;
+static Tcl_ObjCmdProc TestSimpleFilesystemObjCmd;
static void TestReport(const char *cmd, Tcl_Obj *arg1,
Tcl_Obj *arg2);
static Tcl_Obj * TestReportGetNativePath(Tcl_Obj *pathPtr);
@@ -413,33 +315,18 @@ static Tcl_FSListVolumesProc SimpleListVolumes;
static Tcl_FSPathInFilesystemProc SimplePathInFilesystem;
static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr);
static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory;
-static int TestNumUtfCharsCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestFindFirstCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestFindLastCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestHashSystemHashCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc TestUtfPrevCmd;
+static Tcl_ObjCmdProc TestNumUtfCharsCmd;
+static Tcl_ObjCmdProc TestFindFirstCmd;
+static Tcl_ObjCmdProc TestFindLastCmd;
+static Tcl_ObjCmdProc TestHashSystemHashCmd;
static Tcl_NRPostProc NREUnwind_callback;
-static int TestNREUnwind(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestNRELevels(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int TestInterpResolverCmd(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc TestNREUnwind;
+static Tcl_ObjCmdProc TestNRELevels;
+static Tcl_ObjCmdProc TestInterpResolverCmd;
#if defined(HAVE_CPUID) || defined(_WIN32)
-static int TestcpuidCmd(ClientData dummy,
- Tcl_Interp* interp, int objc,
- Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc TestcpuidCmd;
#endif
static const Tcl_Filesystem testReportingFilesystem = {
@@ -549,8 +436,7 @@ Tcltest_Init(
{
Tcl_ValueType t3ArgTypes[2];
- Tcl_Obj *listPtr;
- Tcl_Obj **objv;
+ Tcl_Obj **objv, *objPtr;
int objc, index;
static const char *const specialOptions[] = {
"-appinitprocerror", "-appinitprocdeleteinterp",
@@ -582,6 +468,7 @@ Tcltest_Init(
Tcl_CreateObjCommand(interp, "testpurebytesobj", TestpurebytesobjObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testsetbytearraylength", TestsetbytearraylengthObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testbytestring", TestbytestringObjCmd, NULL, NULL);
+ Tcl_CreateObjCommand(interp, "teststringbytes", TeststringbytesObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testwrongnumargs", TestWrongNumArgsObjCmd,
NULL, NULL);
Tcl_CreateObjCommand(interp, "testfilesystem", TestFilesystemObjCmd,
@@ -685,6 +572,8 @@ Tcltest_Init(
NULL, NULL);
Tcl_CreateObjCommand(interp, "testsetobjerrorcode",
TestsetobjerrorcodeCmd, NULL, NULL);
+ Tcl_CreateObjCommand(interp, "testutfprev",
+ TestUtfPrevCmd, (ClientData) 0, NULL);
Tcl_CreateObjCommand(interp, "testnumutfchars",
TestNumUtfCharsCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testfindfirst",
@@ -708,7 +597,7 @@ Tcltest_Init(
NULL, NULL);
#if defined(HAVE_CPUID) || defined(_WIN32)
Tcl_CreateObjCommand(interp, "testcpuid", TestcpuidCmd,
- (ClientData) 0, NULL);
+ NULL, NULL);
#endif
t3ArgTypes[0] = TCL_EITHER;
t3ArgTypes[1] = TCL_EITHER;
@@ -738,9 +627,9 @@ Tcltest_Init(
* Check for special options used in ../tests/main.test
*/
- listPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY);
- if (listPtr != NULL) {
- if (Tcl_ListObjGetElements(interp, listPtr, &objc, &objv) != TCL_OK) {
+ objPtr = Tcl_GetVar2Ex(interp, "argv", NULL, TCL_GLOBAL_ONLY);
+ if (objPtr != NULL) {
+ if (Tcl_ListObjGetElements(interp, objPtr, &objc, &objv) != TCL_OK) {
return TCL_ERROR;
}
if (objc && (Tcl_GetIndexFromObj(NULL, objv[0], specialOptions, NULL,
@@ -831,18 +720,19 @@ TestasyncCmd(
TestAsyncHandler *asyncPtr, *prevPtr;
int id, code;
static int nextId = 1;
+ (void)dummy;
if (argc < 2) {
wrongNumArgs:
- Tcl_SetResult(interp, "wrong # args", TCL_STATIC);
+ Tcl_AppendResult(interp, "wrong # args", NULL);
return TCL_ERROR;
}
if (strcmp(argv[1], "create") == 0) {
if (argc != 3) {
goto wrongNumArgs;
}
- asyncPtr = ckalloc(sizeof(TestAsyncHandler));
- asyncPtr->command = ckalloc(strlen(argv[2]) + 1);
+ asyncPtr = (TestAsyncHandler *)ckalloc(sizeof(TestAsyncHandler));
+ asyncPtr->command = (char *)ckalloc(strlen(argv[2]) + 1);
strcpy(asyncPtr->command, argv[2]);
Tcl_MutexLock(&asyncTestMutex);
asyncPtr->id = nextId;
@@ -924,7 +814,7 @@ TestasyncCmd(
if (Tcl_CreateThread(&threadID, AsyncThreadProc,
INT2PTR(id), TCL_THREAD_STACK_DEFAULT,
TCL_THREAD_NOFLAGS) != TCL_OK) {
- Tcl_SetResult(interp, "can't create thread", TCL_STATIC);
+ Tcl_AppendResult(interp, "can't create thread", NULL);
Tcl_MutexUnlock(&asyncTestMutex);
return TCL_ERROR;
}
@@ -1039,6 +929,7 @@ TestbumpinterpepochObjCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
Interp *iPtr = (Interp *)interp;
+
if (objc != 1) {
Tcl_WrongNumArgs(interp, 1, objv, "");
return TCL_ERROR;
@@ -1074,6 +965,7 @@ TestcmdinfoCmd(
const char **argv) /* Argument strings. */
{
Tcl_CmdInfo info;
+ (void)dummy;
if (argc != 3) {
Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
@@ -1089,7 +981,7 @@ TestcmdinfoCmd(
Tcl_DStringResult(interp, &delString);
} else if (strcmp(argv[1], "get") == 0) {
if (Tcl_GetCommandInfo(interp, argv[2], &info) ==0) {
- Tcl_SetResult(interp, "??", TCL_STATIC);
+ Tcl_AppendResult(interp, "??", NULL);
return TCL_OK;
}
if (info.proc == CmdProc1) {
@@ -4990,6 +4882,41 @@ NoopObjCmd(
/*
*----------------------------------------------------------------------
*
+ * TeststringbytesObjCmd --
+ * Returns bytearray value of the bytes in argument string rep
+ *
+ * Results:
+ * Returns the TCL_OK result code.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+TeststringbytesObjCmd(
+ ClientData dummy,
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* The argument objects. */
+{
+ int n;
+ const unsigned char *p;
+ (void)dummy;
+
+ if (objc != 2) {
+ Tcl_WrongNumArgs(interp, 1, objv, "value");
+ return TCL_ERROR;
+ }
+ p = (const unsigned char *)Tcl_GetStringFromObj(objv[1], &n);
+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(p, n));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TestpurebytesobjObjCmd --
*
* This object-based procedure constructs a pure bytes object
@@ -5010,12 +4937,13 @@ NoopObjCmd(
static int
TestpurebytesobjObjCmd(
- ClientData unused, /* Not used. */
+ ClientData dummy, /* Not used. */
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* The argument objects. */
{
Tcl_Obj *objPtr;
+ (void)dummy;
if (objc > 2) {
Tcl_WrongNumArgs(interp, 1, objv, "?string?");
@@ -5030,7 +4958,7 @@ TestpurebytesobjObjCmd(
if (objc == 2) {
const char *s = Tcl_GetString(objv[1]);
objPtr->length = objv[1]->length;
- objPtr->bytes = ckalloc(objPtr->length + 1);
+ objPtr->bytes = (char *)ckalloc(objPtr->length + 1);
memcpy(objPtr->bytes, s, objPtr->length);
objPtr->bytes[objPtr->length] = 0;
}
@@ -5057,13 +4985,14 @@ TestpurebytesobjObjCmd(
static int
TestsetbytearraylengthObjCmd(
- ClientData unused, /* Not used. */
+ ClientData dummy, /* Not used. */
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* The argument objects. */
{
int n;
Tcl_Obj *obj = NULL;
+ (void)dummy;
if (objc != 3) {
Tcl_WrongNumArgs(interp, 1, objv, "value length");
@@ -5101,18 +5030,20 @@ TestsetbytearraylengthObjCmd(
static int
TestbytestringObjCmd(
- ClientData unused, /* Not used. */
+ ClientData dummy, /* Not used. */
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* The argument objects. */
{
- int n;
+ int n = 0;
const char *p;
+ (void)dummy;
if (objc != 2) {
Tcl_WrongNumArgs(interp, 1, objv, "bytearray");
return TCL_ERROR;
}
+
p = (const char *)Tcl_GetByteArrayFromObj(objv[1], &n);
Tcl_SetObjResult(interp, Tcl_NewStringObj(p, n));
return TCL_OK;
@@ -5135,11 +5066,10 @@ TestbytestringObjCmd(
*----------------------------------------------------------------------
*/
- /* ARGSUSED */
static int
TestsetCmd(
ClientData data, /* Additional flags for Get/SetVar2. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
@@ -5171,7 +5101,7 @@ TestsetCmd(
static int
Testset2Cmd(
ClientData data, /* Additional flags for Get/SetVar2. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
@@ -5222,7 +5152,7 @@ Testset2Cmd(
static int
TestsaveresultCmd(
ClientData dummy, /* Not used. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* The argument objects. */
{
@@ -5355,7 +5285,7 @@ TestsaveresultFree(
static int
TestmainthreadCmd(
ClientData dummy, /* Not used. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
@@ -5416,7 +5346,7 @@ MainLoop(void)
static int
TestsetmainloopCmd(
ClientData dummy, /* Not used. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
@@ -5445,7 +5375,7 @@ TestsetmainloopCmd(
static int
TestexitmainloopCmd(
ClientData dummy, /* Not used. */
- register Tcl_Interp *interp,/* Current interpreter. */
+ Tcl_Interp *interp,/* Current interpreter. */
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
@@ -6794,6 +6724,55 @@ SimpleListVolumes(void)
}
/*
+ * Used to check operations of Tcl_UtfPrev.
+ *
+ * Usage: testutfprev $bytes $offset
+ */
+
+static int
+TestUtfPrevCmd(
+ ClientData clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *const objv[])
+{
+ int numBytes, offset;
+ char *bytes;
+ const char *result;
+ Tcl_Obj *copy;
+
+ if (objc < 2 || objc > 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "bytes ?offset?");
+ return TCL_ERROR;
+ }
+
+ bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+
+ if (objc == 3) {
+ if (TCL_OK != Tcl_GetIntFromObj(interp, objv[2], &offset)) {
+ return TCL_ERROR;
+ }
+ if (offset < 0) {
+ offset = 0;
+ }
+ if (offset > numBytes) {
+ offset = numBytes;
+ }
+ } else {
+ offset = numBytes;
+ }
+ copy = Tcl_DuplicateObj(objv[1]);
+ bytes = (char *) Tcl_SetByteArrayLength(copy, numBytes+1);
+ bytes[numBytes] = '\0';
+
+ result = Tcl_UtfPrev(bytes + offset, bytes);
+
+ Tcl_DecrRefCount(copy);
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(result - bytes));
+ return TCL_OK;
+}
+
+/*
* Used to check correct string-length determining in Tcl_NumUtfChars
*/
@@ -6928,7 +6907,7 @@ TestcpuidCmd(
static int
TestHashSystemHashCmd(
- ClientData clientData,
+ ClientData dummy,
Tcl_Interp *interp,
int objc,
Tcl_Obj *const objv[])
@@ -6940,6 +6919,7 @@ TestHashSystemHashCmd(
Tcl_HashTable hash;
Tcl_HashEntry *hPtr;
int i, isNew, limit = 100;
+ (void)dummy;
if (objc>1 && Tcl_GetIntFromObj(interp, objv[1], &limit)!=TCL_OK) {
return TCL_ERROR;
@@ -7009,6 +6989,8 @@ TestgetintCmd(
int argc,
const char **argv)
{
+ (void)dummy;
+
if (argc < 2) {
Tcl_SetResult(interp, "wrong # args", TCL_STATIC);
return TCL_ERROR;
@@ -7033,6 +7015,7 @@ NREUnwind_callback(
int result)
{
int none;
+ (void)result;
if (data[0] == INT2PTR(-1)) {
Tcl_NRAddCallback(interp, NREUnwind_callback, &none, INT2PTR(-1),
@@ -7055,11 +7038,15 @@ NREUnwind_callback(
static int
TestNREUnwind(
- ClientData clientData,
+ ClientData dummy,
Tcl_Interp *interp,
int objc,
Tcl_Obj *const objv[])
{
+ (void)dummy;
+ (void)objc;
+ (void)objv;
+
/*
* Insure that callbacks effectively run at the proper level during the
* unwinding of the NRE stack.
@@ -7073,7 +7060,7 @@ TestNREUnwind(
static int
TestNRELevels(
- ClientData clientData,
+ ClientData dummy,
Tcl_Interp *interp,
int objc,
Tcl_Obj *const objv[])
@@ -7084,6 +7071,9 @@ TestNRELevels(
Tcl_Obj *levels[6];
int i = 0;
NRE_callback *cbPtr = iPtr->execEnvPtr->callbackPtr;
+ (void)dummy;
+ (void)objc;
+ (void)objv;
if (refDepth == NULL) {
refDepth = &depth;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 12d764c..3377b70 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,6 +64,17 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
+};
+
+static const unsigned char complete[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
#if TCL_UTF_MAX > 4
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -497,7 +508,7 @@ Tcl_UtfCharComplete(
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
- return length >= totalBytes[(unsigned char)*src];
+ return length >= complete[(unsigned char)*src];
}
/*
@@ -675,15 +686,43 @@ Tcl_UtfNext(
*
* Tcl_UtfPrev --
*
- * Given a pointer to some current location in a UTF-8 string, move
- * backwards one character. This works correctly when the pointer is in
- * the middle of a UTF-8 character.
+ * The aim of this routine is to provide a way to move backward
+ * through a UTF-8 string. The caller is expected to pass non-NULL
+ * pointer arguments start and src. start points to the beginning
+ * of a string, and src >= start points to a location within (or just
+ * past the end) of the string. This routine always returns a
+ * pointer within the string (>= start). When (src == start), it
+ * returns start. When (src > start), it returns a pointer (< src)
+ * and (>= src - TCL_UTF_MAX). Subject to these constraints, the
+ * routine returns a pointer to the earliest byte in the string that
+ * starts a character when characters are read starting at start and
+ * that character might include the byte src[-1]. The routine will
+ * examine only those bytes in the range that might be returned.
+ * It will not examine the byte *src, and because of that cannot
+ * determine for certain in all circumstances whether the character
+ * that begins with the returned pointer will or will not include
+ * the byte src[-1]. In the scenario, where src points to the end of
+ * a buffer being filled, the returned pointer point to either the
+ * final complete character in the string or to the earliest byte
+ * that might start an incomplete character waiting for more bytes to
+ * complete.
+ *
+ * Because this routine always returns a value < src until the point
+ * it is forced to return start, it is useful as a backward iterator
+ * through a string that will always make progress and always be
+ * prevented from running past the beginning of the string.
+ *
+ * In a string where all characters are complete and properly formed,
+ * and the value of src points to the first byte of a character,
+ * repeated Tcl_UtfPrev calls will step to the starting bytes of
+ * characters, one character at a time. Within those limitations,
+ * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ * the caller will have to take greater care.
*
* Results:
- * The return value is a pointer to the previous character in the UTF-8
- * string. If the current location was already at the beginning of the
- * string, the return value will also be a pointer to the beginning of
- * the string.
+ * A pointer to the start of a character in the string as described
+ * above.
*
* Side effects:
* None.
@@ -693,9 +732,8 @@ Tcl_UtfNext(
const char *
Tcl_UtfPrev(
- const char *src, /* The current location in the string. */
- const char *start) /* Pointer to the beginning of the string, to
- * avoid going backwards too far. */
+ const char *src, /* A location in a UTF-8 string. */
+ const char *start) /* Pointer to the beginning of the string */
{
const char *look;
int i, byte;
@@ -713,6 +751,9 @@ Tcl_UtfPrev(
break;
}
if (byte >= 0xC0) {
+ if (totalBytes[byte] <= i) {
+ break;
+ }
return look;
}
look--;
@@ -1692,7 +1733,7 @@ Tcl_UniCharIsSpace(
*/
if (ch < 0x80) {
- return TclIsSpaceProc((char) ch);
+ return TclIsSpaceProcM((char) ch);
#if TCL_UTF_MAX > 3
} else if (UNICODE_OUT_OF_RANGE(ch)) {
return 0;
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 4a20c3e..d7d6134 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -404,7 +404,7 @@ TclMaxListLength(
* No list element before leading white space.
*/
- count += 1 - TclIsSpaceProc(*bytes);
+ count += 1 - TclIsSpaceProcM(*bytes);
/*
* Count white space runs as potential element separators.
@@ -414,7 +414,7 @@ TclMaxListLength(
if ((numBytes == -1) && (*bytes == '\0')) {
break;
}
- if (TclIsSpaceProc(*bytes)) {
+ if (TclIsSpaceProcM(*bytes)) {
/*
* Space run started; bump count.
*/
@@ -423,7 +423,7 @@ TclMaxListLength(
do {
bytes++;
numBytes -= (numBytes != -1);
- } while (numBytes && TclIsSpaceProc(*bytes));
+ } while (numBytes && TclIsSpaceProcM(*bytes));
if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) {
break;
}
@@ -440,7 +440,7 @@ TclMaxListLength(
* No list element following trailing white space.
*/
- count -= TclIsSpaceProc(bytes[-1]);
+ count -= TclIsSpaceProcM(bytes[-1]);
done:
if (endPtr) {
@@ -589,7 +589,7 @@ FindElement(
*/
limit = (string + stringLength);
- while ((p < limit) && (TclIsSpaceProc(*p))) {
+ while ((p < limit) && (TclIsSpaceProcM(*p))) {
p++;
}
if (p == limit) { /* no element found */
@@ -634,7 +634,7 @@ FindElement(
} else if (openBraces == 1) {
size = (p - elemStart);
p++;
- if ((p >= limit) || TclIsSpaceProc(*p)) {
+ if ((p >= limit) || TclIsSpaceProcM(*p)) {
goto done;
}
@@ -644,7 +644,7 @@ FindElement(
if (interp != NULL) {
p2 = p;
- while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ while ((p2 < limit) && (!TclIsSpaceProcM(*p2))
&& (p2 < p+20)) {
p2++;
}
@@ -679,23 +679,6 @@ FindElement(
break;
/*
- * Space: ignore if element is in braces or quotes; otherwise
- * terminate element.
- */
-
- case ' ':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- if ((openBraces == 0) && !inQuotes) {
- size = (p - elemStart);
- goto done;
- }
- break;
-
- /*
* Double-quote: if element is in quotes then terminate it.
*/
@@ -703,7 +686,7 @@ FindElement(
if (inQuotes) {
size = (p - elemStart);
p++;
- if ((p >= limit) || TclIsSpaceProc(*p)) {
+ if ((p >= limit) || TclIsSpaceProcM(*p)) {
goto done;
}
@@ -713,7 +696,7 @@ FindElement(
if (interp != NULL) {
p2 = p;
- while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ while ((p2 < limit) && (!TclIsSpaceProcM(*p2))
&& (p2 < p+20)) {
p2++;
}
@@ -726,6 +709,20 @@ FindElement(
return TCL_ERROR;
}
break;
+
+ default:
+ if (TclIsSpaceProcM(*p)) {
+ /*
+ * Space: ignore if element is in braces or quotes;
+ * otherwise terminate element.
+ */
+ if ((openBraces == 0) && !inQuotes) {
+ size = (p - elemStart);
+ goto done;
+ }
+ }
+ break;
+
}
p++;
}
@@ -756,7 +753,7 @@ FindElement(
}
done:
- while ((p < limit) && (TclIsSpaceProc(*p))) {
+ while ((p < limit) && (TclIsSpaceProcM(*p))) {
p++;
}
*elementPtr = elemStart;
@@ -1112,12 +1109,6 @@ TclScanElement(
case '[': /* TYPE_SUBS */
case '$': /* TYPE_SUBS */
case ';': /* TYPE_COMMAND_END */
- case ' ': /* TYPE_SPACE */
- case '\f': /* TYPE_SPACE */
- case '\n': /* TYPE_COMMAND_END */
- case '\r': /* TYPE_SPACE */
- case '\t': /* TYPE_SPACE */
- case '\v': /* TYPE_SPACE */
forbidNone = 1;
extra++; /* Escape sequences all one byte longer. */
#if COMPAT
@@ -1162,6 +1153,15 @@ TclScanElement(
}
/* TODO: Panic on improper encoding? */
break;
+ default:
+ if (TclIsSpaceProcM(*p)) {
+ forbidNone = 1;
+ extra++; /* Escape sequences all one byte longer. */
+#if COMPAT
+ preferBrace = 1;
+#endif
+ }
+ break;
}
}
length -= (length > 0);
@@ -1660,42 +1660,6 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
- * UtfWellFormedEnd --
- * Checks the end of utf string is malformed, if yes - wraps bytes
- * to the given buffer (as well-formed NTS string). The buffer
- * argument should be initialized by the caller and ready to use.
- *
- * Results:
- * The bytes with well-formed end of the string.
- *
- * Side effects:
- * Buffer (DString) may be allocated, so must be released.
- *
- *----------------------------------------------------------------------
- */
-
-static inline const char*
-UtfWellFormedEnd(
- Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
- const char *bytes, /* Pointer to the beginning of the string. */
- int length) /* Length of the string. */
-{
- const char *l = bytes + length;
- const char *p = Tcl_UtfPrev(l, bytes);
-
- if (Tcl_UtfCharComplete(p, l - p)) {
- return bytes;
- }
- /*
- * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
- * avoid segfault by access violation out of range.
- */
- Tcl_DStringAppend(buffer, bytes, length);
- return Tcl_DStringValue(buffer);
-}
-/*
- *----------------------------------------------------------------------
- *
* TclTrimRight --
* Takes two counted strings in the Tcl encoding. Conceptually
* finds the sub string (offset) to trim from the right side of the
@@ -1710,15 +1674,23 @@ UtfWellFormedEnd(
*----------------------------------------------------------------------
*/
-static inline int
-TrimRight(
- const char *bytes, /* String to be trimmed... */
- int numBytes, /* ...and its length in bytes */
- const char *trim, /* String of trim characters... */
- int numTrim) /* ...and its length in bytes */
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ /* Calls to TclUtfToUniChar() in this routine
+ * rely on (bytes[numBytes] == '\0'). */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+ /* Calls to TclUtfToUniChar() in this routine
+ * rely on (trim[numTrim] == '\0'). */
{
- const char *p = bytes + numBytes;
- int pInc;
+ const char *pp, *p = bytes + numBytes;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
Tcl_UniChar ch1 = 0, ch2 = 0;
/*
@@ -1727,10 +1699,13 @@ TrimRight(
do {
const char *q = trim;
- int bytesLeft = numTrim;
+ int pInc = 0, bytesLeft = numTrim;
- p = Tcl_UtfPrev(p, bytes);
- pInc = TclUtfToUniChar(p, &ch1);
+ pp = Tcl_UtfPrev(p, bytes);
+ do {
+ pp += pInc;
+ pInc = TclUtfToUniChar(pp, &ch1);
+ } while (pp + pInc < p);
/*
* Inner loop: scan trim string for match to current character.
@@ -1752,44 +1727,13 @@ TrimRight(
* No match; trim task done; *p is last non-trimmed char.
*/
- p += pInc;
break;
}
+ p = pp;
} while (p > bytes);
return numBytes - (p - bytes);
}
-
-int
-TclTrimRight(
- const char *bytes, /* String to be trimmed... */
- int numBytes, /* ...and its length in bytes */
- const char *trim, /* String of trim characters... */
- int numTrim) /* ...and its length in bytes */
-{
- int res;
- Tcl_DString bytesBuf, trimBuf;
-
- /* Empty strings -> nothing to do */
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
- Tcl_DStringInit(&bytesBuf);
- Tcl_DStringInit(&trimBuf);
- bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
- trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
-
- res = TrimRight(bytes, numBytes, trim, numTrim);
- if (res > numBytes) {
- res = numBytes;
- }
-
- Tcl_DStringFree(&bytesBuf);
- Tcl_DStringFree(&trimBuf);
-
- return res;
-}
/*
*----------------------------------------------------------------------
@@ -1809,21 +1753,30 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-static inline int
-TrimLeft(
- const char *bytes, /* String to be trimmed... */
- int numBytes, /* ...and its length in bytes */
- const char *trim, /* String of trim characters... */
- int numTrim) /* ...and its length in bytes */
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ /* Calls to TclUtfToUniChar() in this routine
+ * rely on (bytes[numBytes] == '\0'). */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+ /* Calls to TclUtfToUniChar() in this routine
+ * rely on (trim[numTrim] == '\0'). */
{
const char *p = bytes;
- Tcl_UniChar ch1 = 0, ch2 = 0;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
/*
* Outer loop: iterate over string to be trimmed.
*/
do {
+ Tcl_UniChar ch1 = 0;
int pInc = TclUtfToUniChar(p, &ch1);
const char *q = trim;
int bytesLeft = numTrim;
@@ -1833,6 +1786,7 @@ TrimLeft(
*/
do {
+ Tcl_UniChar ch2 = 0;
int qInc = TclUtfToUniChar(q, &ch2);
if (ch1 == ch2) {
@@ -1857,37 +1811,6 @@ TrimLeft(
return p - bytes;
}
-
-int
-TclTrimLeft(
- const char *bytes, /* String to be trimmed... */
- int numBytes, /* ...and its length in bytes */
- const char *trim, /* String of trim characters... */
- int numTrim) /* ...and its length in bytes */
-{
- int res;
- Tcl_DString bytesBuf, trimBuf;
-
- /* Empty strings -> nothing to do */
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
- Tcl_DStringInit(&bytesBuf);
- Tcl_DStringInit(&trimBuf);
- bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
- trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
-
- res = TrimLeft(bytes, numBytes, trim, numTrim);
- if (res > numBytes) {
- res = numBytes;
- }
-
- Tcl_DStringFree(&bytesBuf);
- Tcl_DStringFree(&trimBuf);
-
- return res;
-}
/*
*----------------------------------------------------------------------
@@ -1909,41 +1832,38 @@ int
TclTrim(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
+ /* Calls in this routine
+ * rely on (bytes[numBytes] == '\0'). */
const char *trim, /* String of trim characters... */
int numTrim, /* ...and its length in bytes */
- int *trimRight) /* Offset from the end of the string. */
+ /* Calls in this routine
+ * rely on (trim[numTrim] == '\0'). */
+ int *trimRightPtr) /* Offset from the end of the string. */
{
- int trimLeft;
- Tcl_DString bytesBuf, trimBuf;
+ int trimLeft = 0, trimRight = 0;
- *trimRight = 0;
/* Empty strings -> nothing to do */
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
- Tcl_DStringInit(&bytesBuf);
- Tcl_DStringInit(&trimBuf);
- bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
- trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
-
- trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
- if (trimLeft > numBytes) {
- trimLeft = numBytes;
- }
- numBytes -= trimLeft;
- /* have to trim yet (first char was already verified within TrimLeft) */
- if (numBytes > 1) {
- bytes += trimLeft;
- *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
- if (*trimRight > numBytes) {
- *trimRight = numBytes;
+ if ((numBytes > 0) && (numTrim > 0)) {
+
+ /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */
+ trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim);
+ numBytes -= trimLeft;
+
+ /* If we did not trim the whole string, it starts with a character
+ * that we will not trim. Skip over it. */
+ if (numBytes > 0) {
+ const char *first = bytes + trimLeft;
+ bytes = Tcl_UtfNext(first);
+ numBytes -= (bytes - first);
+
+ if (numBytes > 0) {
+ /* When bytes is NUL-terminated, returns
+ * 0 <= trimRight <= numBytes */
+ trimRight = TclTrimRight(bytes, numBytes, trim, numTrim);
+ }
}
}
-
- Tcl_DStringFree(&bytesBuf);
- Tcl_DStringFree(&trimBuf);
-
+ *trimRightPtr = trimRight;
return trimLeft;
}
@@ -2234,7 +2154,6 @@ Tcl_StringCaseMatch(
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
int p, charLen;
- const char *pstart = pattern;
Tcl_UniChar ch1 = 0, ch2 = 0;
while (1) {
@@ -2399,10 +2318,13 @@ Tcl_StringCaseMatch(
break;
}
}
+ /* If we reach here, we matched. Need to move past closing ] */
while (*pattern != ']') {
if (*pattern == '\0') {
- pattern = Tcl_UtfPrev(pattern, pstart);
- break;
+ /* We ran out of pattern after matching something in
+ * (unclosed!) brackets. So long as we ran out of string
+ * at the same time, we have a match. Otherwise, not. */
+ return (*str == '\0');
}
pattern++;
}
@@ -2831,9 +2753,37 @@ Tcl_DStringAppendElement(
{
char *dst = dsPtr->string + dsPtr->length;
int needSpace = TclNeedSpace(dsPtr->string, dst);
- char flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
- int newSize = dsPtr->length + needSpace
- + TclScanElement(element, -1, &flags);
+ char flags = 0;
+ int quoteHash = 1, newSize;
+
+ if (needSpace) {
+ /*
+ * If we need a space to separate the new element from something
+ * already ending the string, we're not appending the first element
+ * of any list, so we need not quote any leading hash character.
+ */
+ quoteHash = 0;
+ } else {
+ /*
+ * We don't need a space, maybe because there's some already there.
+ * Checking whether we might be appending a first element is a bit
+ * more involved.
+ *
+ * Backtrack over all whitespace.
+ */
+ while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) {
+ }
+
+ /* Call again without whitespace to confound things. */
+ quoteHash = !TclNeedSpace(dsPtr->string, dst+1);
+ }
+ if (!quoteHash) {
+ flags |= TCL_DONT_QUOTE_HASH;
+ }
+ newSize = dsPtr->length + needSpace + TclScanElement(element, -1, &flags);
+ if (!quoteHash) {
+ flags |= TCL_DONT_QUOTE_HASH;
+ }
/*
* Allocate a larger buffer for the string if the current one isn't large
@@ -2865,8 +2815,8 @@ Tcl_DStringAppendElement(
element = dsPtr->string + offset;
}
}
- dst = dsPtr->string + dsPtr->length;
}
+ dst = dsPtr->string + dsPtr->length;
/*
* Convert the new string to a list element and copy it into the buffer at
@@ -2877,15 +2827,8 @@ Tcl_DStringAppendElement(
*dst = ' ';
dst++;
dsPtr->length++;
-
- /*
- * If we need a space to separate this element from preceding stuff,
- * then this element will not lead a list, and need not have it's
- * leading '#' quoted.
- */
-
- flags |= TCL_DONT_QUOTE_HASH;
}
+
dsPtr->length += TclConvertElement(element, -1, dst, flags);
dsPtr->string[dsPtr->length] = '\0';
return dsPtr->string;
@@ -3505,63 +3448,69 @@ TclNeedSpace(
/*
* A space is needed unless either:
* (a) we're at the start of the string, or
- */
+ *
+ * (NOTE: This check is now absorbed into the loop below.)
+ *
if (end == start) {
return 0;
}
+ *
+ */
+
/*
* (b) we're at the start of a nested list-element, quoted with an open
* curly brace; we can be nested arbitrarily deep, so long as the
* first curly brace starts an element, so backtrack over open curly
* braces that are trailing characters of the string; and
- */
+ *
+ * (NOTE: Every character our parser is looking for is a proper
+ * single-byte encoding of an ASCII value. It does not accept
+ * overlong encodings. Given that, there's no benefit using
+ * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte
+ * backward scan. Save routine call overhead and risk of wrong
+ * results should the behavior of Tcl_UtfPrev change in unexpected ways.
+ * Reconsider this if we ever start treating non-ASCII Unicode
+ * characters as meaningful list syntax, expanded Unicode spaces as
+ * element separators, for example.)
+ *
end = Tcl_UtfPrev(end, start);
while (*end == '{') {
- if (end == start) {
- return 0;
- }
- end = Tcl_UtfPrev(end, start);
+ if (end == start) {
+ return 0;
+ }
+ end = Tcl_UtfPrev(end, start);
+ }
+
+ *
+ */
+
+ while ((--end >= start) && (*end == '{')) {
+ }
+ if (end < start) {
+ return 0;
}
/*
* (c) the trailing character of the string is already a list-element
- * separator (according to TclFindElement); that is, one of these
- * characters:
- * \u0009 \t TAB
- * \u000A \n NEWLINE
- * \u000B \v VERTICAL TAB
- * \u000C \f FORM FEED
- * \u000D \r CARRIAGE RETURN
- * \u0020 SPACE
- * with the condition that the penultimate character is not a
- * backslash.
+ * separator, Use the same testing routine as TclFindElement to
+ * enforce consistency.
*/
- if (*end > 0x20) {
+ if (TclIsSpaceProcM(*end)) {
+ int result = 0;
+
/*
- * Performance tweak. All ASCII spaces are <= 0x20. So get a quick
- * answer for most characters before comparing against all spaces in
- * the switch below.
- *
- * NOTE: Remove this if other Unicode spaces ever get accepted as
- * list-element separators.
+ * Trailing whitespace might be part of a backslash escape
+ * sequence. Handle that possibility.
*/
- return 1;
- }
- switch (*end) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\v':
- case '\f':
- if ((end == start) || (end[-1] != '\\')) {
- return 0;
+ while ((--end >= start) && (*end == '\\')) {
+ result = !result;
}
+ return result;
}
return 1;
}
@@ -3689,7 +3638,7 @@ TclGetIntForIndex(
* Leading whitespace is acceptable in an index.
*/
- while (length && TclIsSpaceProc(*bytes)) {
+ while (length && TclIsSpaceProcM(*bytes)) {
bytes++;
length--;
}
@@ -3702,7 +3651,7 @@ TclGetIntForIndex(
if ((savedOp != '+') && (savedOp != '-')) {
goto parseError;
}
- if (TclIsSpaceProc(opPtr[1])) {
+ if (TclIsSpaceProcM(opPtr[1])) {
goto parseError;
}
*opPtr = '\0';
@@ -3876,7 +3825,7 @@ SetEndOffsetFromAny(
* after "end-" to Tcl_GetInt, then reverse for offset.
*/
- if (TclIsSpaceProc(bytes[4])) {
+ if (TclIsSpaceProcM(bytes[4])) {
goto badIndexFormat;
}
if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) {
@@ -4081,7 +4030,7 @@ TclCheckBadOctal(
* zero. Try to generate a meaningful error message.
*/
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
if (*p == '+' || *p == '-') {
@@ -4094,7 +4043,7 @@ TclCheckBadOctal(
while (isdigit(UCHAR(*p))) { /* INTL: digit. */
p++;
}
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
if (*p == '\0') {
diff --git a/tests/dstring.test b/tests/dstring.test
index 06121a3..5feb355 100644
--- a/tests/dstring.test
+++ b/tests/dstring.test
@@ -180,16 +180,37 @@ test dstring-2.12 {appending list elements} -constraints testdstring -setup {
} -cleanup {
testdstring free
} -result {x #}
-test dstring-2.13 {appending list elements} -constraints testdstring -body {
- # This test shows lack of sophistication in Tcl_DStringAppendElement's
- # decision about whether #-quoting can be disabled.
+test dstring-2.13 {appending list elements} -constraints testdstring -setup {
testdstring free
+} -body {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
testdstring append "x " -1
testdstring element #
testdstring get
} -cleanup {
testdstring free
-} -result {x {#}}
+} -result {x #}
+test dstring-2.14 {appending list elements} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring append " " -1
+ testdstring element #
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result { {#}}
+test dstring-2.15 {appending list elements} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
+ testdstring append "x " -1
+ testdstring element #
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result {x #}
test dstring-3.1 {nested sublists} -constraints testdstring -setup {
testdstring free
@@ -306,10 +327,11 @@ test dstring-3.9 {appending list elements} -constraints testdstring -setup {
} -cleanup {
testdstring free
} -result {x {x #}}
-test dstring-3.10 {appending list elements} -constraints testdstring -body {
- # This test shows lack of sophistication in Tcl_DStringAppendElement's
- # decision about whether #-quoting can be disabled.
+test dstring-3.10 {appending list elements} -constraints testdstring -setup {
testdstring free
+} -body {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
testdstring append x -1
testdstring start
testdstring append "x " -1
@@ -318,7 +340,33 @@ test dstring-3.10 {appending list elements} -constraints testdstring -body {
testdstring get
} -cleanup {
testdstring free
-} -result {x {x {#}}}
+} -result {x {x #}}
+test dstring-3.11 {appending list elements} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring append x -1
+ testdstring start
+ testdstring append " " -1
+ testdstring element #
+ testdstring end
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result {x { {#}}}
+test dstring-3.12 {appending list elements} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ # This test checks the sophistication in Tcl_DStringAppendElement's
+ # decision about whether #-quoting can be disabled.
+ testdstring append x -1
+ testdstring start
+ testdstring append "x " -1
+ testdstring element #
+ testdstring end
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result {x {x #}}
test dstring-4.1 {truncation} -constraints testdstring -setup {
testdstring free
diff --git a/tests/encoding.test b/tests/encoding.test
index 21edf79..6fef748 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -34,6 +34,8 @@ proc runtests {} {
# Some tests require the testencoding command
testConstraint testencoding [llength [info commands testencoding]]
+testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint teststringbytes [llength [info commands teststringbytes]]
testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
testConstraint exec [llength [info commands exec]]
testConstraint testgetdefenc [llength [info commands testgetdefenc]]
@@ -315,24 +317,20 @@ test encoding-14.1 {BinaryProc} {
test encoding-15.1 {UtfToUtfProc} {
encoding convertto utf-8 \xa3
} "\xc2\xa3"
-test encoding-15.2 {UtfToUtfProc null character output} {
- set x \u0000
- set y [encoding convertto utf-8 \u0000]
- set y [encoding convertfrom identity $y]
- binary scan $y H* z
- list [string bytelength $x] [string bytelength $y] $z
-} {2 1 00}
-test encoding-15.3 {UtfToUtfProc null character input} {
- set x [encoding convertfrom identity \x00]
- set y [encoding convertfrom utf-8 $x]
- binary scan [encoding convertto identity $y] H* z
- list [string bytelength $x] [string bytelength $y] $z
-} {1 2 c080}
-test encoding-15.4 {UtfToUtfProc emoji character input} {
+test encoding-15.2 {UtfToUtfProc null character output} testbytestring {
+ binary scan [testbytestring [encoding convertto utf-8 \u0000]] H* z
+ set z
+} 00
+test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
+ set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]]
+ binary scan [teststringbytes $y] H* z
+ set z
+} c080
+test encoding-15.4 {UtfToUtfProc emoji character input} -body {
set x \xED\xA0\xBD\xED\xB8\x82
set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82]
list [string length $x] $y
-} "6 \uD83D\uDE02"
+} -result "6 \uD83D\uDE02"
test encoding-15.5 {UtfToUtfProc emoji character input} {
set x \xF0\x9F\x98\x82
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
@@ -716,7 +714,7 @@ test encoding-28.0 {all encodings load} -body {
encoding convertto $name $string
# discard the cached internal representation of Tcl_Encoding
- # Unfortunately, without this, encoding 2-1 fails.
+ # Unfortunately, without this, encoding 2-1 fails.
llength $name
}
return $count
diff --git a/tests/string.test b/tests/string.test
index f5defd9..e4c39d2 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -25,6 +25,7 @@ catch [list package require -exact Tcltest [info patchlevel]]
testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
testConstraint tip389 [expr {[string length \U010000] == 2}]
+testConstraint testbytestring [llength [info commands testbytestring]]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
@@ -1561,6 +1562,34 @@ test string-20.5 {string trimright} {
test string-20.6 {string trimright, unicode default} {
string trimright ABC\u1361\u0085\x00\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000
} ABC\u1361
+test string-20.7 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} testbytestring {
+ set result {}
+ set a [testbytestring \xc0\x80\xA0]
+ set b foo$a
+ set m [list \u0000 U \xA0 V [testbytestring \xA0] W]
+ lappend result [string map $m $b]
+ lappend result [string map $m [string trimright $b x]]
+ lappend result [string map $m [string trimright $b \u0000]]
+ lappend result [string map $m [string trimleft $b fox]]
+ lappend result [string map $m [string trimleft $b fo\u0000]]
+ lappend result [string map $m [string trim $b fox]]
+ lappend result [string map $m [string trim $b fo\u0000]]
+} [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]]
+test string-20.8 {[c61818e4c9] [string trimright] fails when UtfPrev is ok} testbytestring {
+ set result {}
+ set a [testbytestring \xE8\xA0]
+ set b foo$a
+ set m [list \xE8 U \xA0 V [testbytestring \xE8] W [testbytestring \xA0] X]]
+ lappend result [string map $m $b]
+ lappend result [string map $m [string trimright $b x]]
+ lappend result [string map $m [string trimright $b \xE8]]
+ lappend result [string map $m [string trimright $b [bytestring \xE8]]]
+ lappend result [string map $m [string trimright $b \xA0]]
+ lappend result [string map $m [string trimright $b [bytestring \xA0]]]
+ lappend result [string map $m [string trimright $b \xE8\xA0]]
+ lappend result [string map $m [string trimright $b [bytestring \xE8\xA0]]]
+ lappend result [string map $m [string trimright $b \u0000]]
+} [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV]
test string-21.1 {string wordend} {
list [catch {string wordend a} msg] $msg
@@ -1644,6 +1673,11 @@ test string-22.12 {string wordstart, unicode} {
test string-22.13 {string wordstart, unicode} {
string wordstart "\uC700\uC700 abc" 8
} 3
+test string-22.14 {string wordstart, invalid UTF-8} testbytestring {
+ # See Bug c61818e4c9
+ set demo [testbytestring "abc def\xE0\xA9ghi"]
+ string index $demo [string wordstart $demo 10]
+} g
test string-23.0 {string is boolean, Bug 1187123} testindexobj {
set x 5
diff --git a/tests/utf.test b/tests/utf.test
index 1a879ad..1b7b409 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -156,8 +156,167 @@ test utf-5.2 {Tcl_UtfFindLast} {testfindlast testbytestring} {
test utf-6.1 {Tcl_UtfNext} {
} {}
-test utf-7.1 {Tcl_UtfPrev} {
-} {}
+testConstraint testutfprev [llength [info commands testutfprev]]
+
+test utf-7.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev {}
+} 0
+test utf-7.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A
+} 0
+test utf-7.3 {Tcl_UtfPrev} testutfprev {
+ testutfprev AA
+} 1
+test utf-7.4 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8
+} 1
+test utf-7.4.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0\xA0 2
+} 1
+test utf-7.4.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xF8\xA0\xA0 2
+} 1
+test utf-7.5 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4
+} 1
+test utf-7.5.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xA0 2
+} 1
+test utf-7.5.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xF8\xA0\xA0 2
+} 1
+test utf-7.6 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8
+} 1
+test utf-7.6.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0\xA0 2
+} 1
+test utf-7.6.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xF8\xA0\xA0 2
+} 1
+test utf-7.7 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0
+} 1
+test utf-7.7.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0\xA0 2
+} 1
+test utf-7.7.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xF8\xA0\xA0 2
+} 1
+test utf-7.8 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0
+} 1
+test utf-7.8.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0\xA0 2
+} 1
+test utf-7.8.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xF8\xA0\xA0 2
+} 1
+test utf-7.9 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0
+} 2
+test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0\xA0 3
+} 2
+test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xF8\xA0 3
+} 2
+test utf-7.10 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0
+} 1
+test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xA0 3
+} 1
+test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xF8\xA0 3
+} 1
+test utf-7.11 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0
+} 1
+test utf-7.11.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0\xA0 3
+} 1
+test utf-7.11.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xF8\xA0 3
+} 1
+test utf-7.12 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0
+} 1
+test utf-7.12.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0\xA0 3
+} 1
+test utf-7.12.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xF8\xA0 3
+} 1
+test utf-7.13 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0
+} 2
+test utf-7.13.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0\xA0 3
+} 2
+test utf-7.13.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xF8\xA0 3
+} 2
+test utf-7.14 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0
+} 3
+test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0\xA0 4
+} 3
+test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0\xF8 4
+} 3
+test utf-7.15 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0
+} 1
+test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xA0 4
+} 1
+test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xF8 4
+} 1
+test utf-7.16 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0
+} 1
+test utf-7.16.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0\xA0 4
+} 1
+test utf-7.16.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0\xF8 4
+} 1
+test utf-7.17 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0
+} 3
+test utf-7.17.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0\xA0 4
+} 3
+test utf-7.17.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0\xF8 4
+} 3
+test utf-7.18 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0
+} 3
+test utf-7.18.1 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0\xA0 4
+} 3
+test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0\xF8 4
+} 3
+test utf-7.19 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF8\xA0\xA0\xA0
+} 4
+test utf-7.20 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xF4\xA0\xA0\xA0
+} 1
+test utf-7.21 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xA0\xA0
+} 4
+test utf-7.22 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xD0\xA0\xA0\xA0
+} 4
+test utf-7.23 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xA0\xA0\xA0\xA0
+} 4
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
string index abcd 0
diff --git a/tests/util.test b/tests/util.test
index 2ac11bf..f5a59ee 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -383,6 +383,10 @@ test util-5.50 {Tcl_StringMatch} {
test util-5.51 {Tcl_StringMatch} {
Wrapper_Tcl_StringMatch "" ""
} 1
+test util-5.52 {Tcl_StringMatch} {
+ Wrapper_Tcl_StringMatch \[a\u0000 a\x80
+} 0
+
test util-6.1 {Tcl_PrintDouble - using tcl_precision} -setup {
set old_precision $::tcl_precision
@@ -508,25 +512,64 @@ test util-8.4 {TclNeedSpace - correct UTF8 handling} testdstring {
llength [testdstring get]
} 2
test util-8.5 {TclNeedSpace - correct UTF8 handling} testdstring {
- # Note that in this test TclNeedSpace actually gets it wrong,
- # claiming we need a space when we really do not. Extra space
- # between list elements is harmless though, and better to have
- # extra space in really weird string reps of lists, than to
- # invest the effort required to make TclNeedSpace foolproof.
testdstring free
testdstring append {\\ } -1
testdstring element foo
list [llength [testdstring get]] [string length [testdstring get]]
-} {2 7}
+} {2 6}
test util-8.6 {TclNeedSpace - correct UTF8 handling} testdstring {
- # Another example of TclNeedSpace harmlessly getting it wrong.
testdstring free
testdstring append {\\ } -1
testdstring append \{ -1
testdstring element foo
testdstring append \} -1
list [llength [testdstring get]] [string length [testdstring get]]
-} {2 9}
+} {2 8}
+test util-8.7 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 3]
+} {2 \{}
+test util-8.8 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 3]
+} {2 \{}
+test util-8.9 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 5]
+} {2 \{}
+test util-8.10 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\\\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\\\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 9]
+} {2 \{}
+test util-8.11 {TclNeedSpace - watch out for escaped space} {
+ testdstring free
+ testdstring append {\\\\\\\\ } -1
+ testdstring start
+ testdstring end
+
+ # Should make {\\\\\\\\ {}}
+ list [llength [testdstring get]] [string index [testdstring get] 9]
+} {2 \{}
test util-9.0.0 {TclGetIntForIndex} {
string index abcd 0
diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c
index 5684b16..f70ce6a 100644
--- a/unix/tclUnixFile.c
+++ b/unix/tclUnixFile.c
@@ -98,7 +98,7 @@ TclpFindExecutable(
*/
while (1) {
- while (TclIsSpaceProc(*p)) {
+ while (TclIsSpaceProcM(*p)) {
p++;
}
name = p;