From 8a8e70d5a6a4db9ed214aa5d0b81bf0a63f910cd Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 30 Jan 2019 20:32:18 +0000 Subject: =?UTF-8?q?fixes=20utf-8=20compatibility=20of=20proc/lambda=20argu?= =?UTF-8?q?ments=20(regression=20on=20compiled=20locals/variables=20contai?= =?UTF-8?q?ning=20utf-8=20characters)=20-=20CompiledLocal::nameLength=20is?= =?UTF-8?q?=20length=20in=20bytes=20not=20in=20chars=20everywhere=20in=20t?= =?UTF-8?q?cl;=20simplest=20example:=20=20=20%=20apply=20{{=E2=82=AC}=20{?= =?UTF-8?q?=20set=20"=E2=82=AC"=20}}=201;=20#=20or=20apply=20[list=20\u20a?= =?UTF-8?q?c=20{=20set=20\u20ac=20}]=201=20=20=20can't=20read=20"=E2=82=AC?= =?UTF-8?q?":=20no=20such=20variable?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic/tclInt.h | 2 +- generic/tclProc.c | 25 +++++++++++-------------- tests/var.test | 22 ++++++++++++++++++++++ 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 0b67709..5b0206f 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -902,7 +902,7 @@ typedef struct CompiledLocal { /* Next compiler-recognized local variable for * this procedure, or NULL if this is the last * local. */ - int nameLength; /* The number of characters in local + int nameLength; /* The number of bytes in local * variable's name. Used to speed up variable * lookups. */ int frameIndex; /* Index in the array of compiler-assigned diff --git a/generic/tclProc.c b/generic/tclProc.c index 533b817..f9869b8 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -474,7 +474,7 @@ TclCreateProc( } for (i = 0; i < numArgs; i++) { - int fieldCount, nameLength, valueLength; + int fieldCount, nameLength; Tcl_Obj **fieldValues; /* @@ -504,20 +504,14 @@ TclCreateProc( goto procError; } - argname = Tcl_GetStringFromObj(fieldValues[0], &plen); - nameLength = Tcl_NumUtfChars(argname, plen); - if (fieldCount == 2) { - const char * value = TclGetString(fieldValues[1]); - valueLength = Tcl_NumUtfChars(value, fieldValues[1]->length); - } else { - valueLength = 0; - } + argname = Tcl_GetStringFromObj(fieldValues[0], &nameLength); /* * Check that the formal parameter name is a scalar. */ argnamei = argname; + plen = nameLength; argnamelast = argname[plen-1]; while (plen--) { if (argnamei[0] == '(') { @@ -554,7 +548,7 @@ TclCreateProc( */ if ((localPtr->nameLength != nameLength) - || (Tcl_UtfNcmp(localPtr->name, argname, nameLength)) + || (memcmp(localPtr->name, argname, nameLength) != 0) || (localPtr->frameIndex != i) || !(localPtr->flags & VAR_ARGUMENT) || (localPtr->defValuePtr == NULL && fieldCount == 2) @@ -572,12 +566,15 @@ TclCreateProc( */ if (localPtr->defValuePtr != NULL) { - int tmpLength; + int tmpLength, valueLength; const char *tmpPtr = TclGetStringFromObj(localPtr->defValuePtr, &tmpLength); + const char *value = TclGetStringFromObj(fieldValues[1], + &valueLength); - if ((valueLength != tmpLength) || - Tcl_UtfNcmp(Tcl_GetString(fieldValues[1]), tmpPtr, tmpLength)) { + if ((valueLength != tmpLength) + || memcmp(value, tmpPtr, tmpLength) != 0 + ) { errorObj = Tcl_ObjPrintf( "procedure \"%s\": formal parameter \"" ,procName); Tcl_AppendObjToObj(errorObj, fieldValues[0]); @@ -626,7 +623,7 @@ TclCreateProc( if ((i == numArgs - 1) && (localPtr->nameLength == 4) && (localPtr->name[0] == 'a') - && (strcmp(localPtr->name, "args") == 0)) { + && (memcmp(localPtr->name, "args", 4) == 0)) { localPtr->flags |= VAR_IS_ARGS; } } diff --git a/tests/var.test b/tests/var.test index 8d86fce..32388a2 100644 --- a/tests/var.test +++ b/tests/var.test @@ -201,6 +201,28 @@ test var-1.18 {TclLookupVar, resurrect array element via upvar to deleted array: test var-1.19 {TclLookupVar, right error message when parsing variable name} -body { [format set] thisvar(doesntexist) } -returnCodes error -result {can't read "thisvar(doesntexist)": no such variable} +test var-1.20 {TclLookupVar, regression on utf-8 variable names} -setup { + proc p [list \u20ac \xe4] {info vars} +} -body { + # test variable with non-ascii name is available (euro and a-uml chars here): + list \ + [p 1 2] \ + [apply [list [list \u20ac \xe4] {info vars}] 1 2] \ + [apply [list [list [list \u20ac \u20ac] [list \xe4 \xe4]] {info vars}]] \ +} -cleanup { + rename p {} +} -result [lrepeat 3 [list \u20ac \xe4]] +test var-1.21 {TclLookupVar, regression on utf-8 variable names} -setup { + proc p [list [list \u20ac v\u20ac] [list \xe4 v\xe4]] {list [set \u20ac] [set \xe4]} +} -body { + # test variable with non-ascii name (and default) is resolvable (euro and a-uml chars here): + list \ + [p] \ + [apply [list [list \u20ac \xe4] {list [set \u20ac] [set \xe4]}] v\u20ac v\xe4] \ + [apply [list [list [list \u20ac v\u20ac] [list \xe4 v\xe4]] {list [set \u20ac] [set \xe4]}]] \ +} -cleanup { + rename p {} +} -result [lrepeat 3 [list v\u20ac v\xe4]] test var-2.1 {Tcl_LappendObjCmd, create var if new} { catch {unset x} -- cgit v0.12 From fc5dc8e81101cd4ed4970ab6451792f7fa975811 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 31 Jan 2019 12:52:56 +0000 Subject: code review with small amend (note nameLength is number of bytes, the argument may be utf-8 as well as not necessarily a NTS, so access of char after end may cause segfault). --- generic/tclInt.h | 5 ++--- generic/tclProc.c | 33 ++++++++++++++++----------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 5b0206f..b5f9f76 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -902,9 +902,8 @@ typedef struct CompiledLocal { /* Next compiler-recognized local variable for * this procedure, or NULL if this is the last * local. */ - int nameLength; /* The number of bytes in local - * variable's name. Used to speed up variable - * lookups. */ + int nameLength; /* The number of bytes in local variable's name. + * Among others used to speed up var lookups. */ int frameIndex; /* Index in the array of compiler-assigned * variables in the procedure call frame. */ int flags; /* Flag bits for the local variable. Same as diff --git a/generic/tclProc.c b/generic/tclProc.c index f9869b8..03cb0f0 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -371,11 +371,9 @@ TclCreateProc( Interp *iPtr = (Interp *) interp; register Proc *procPtr; - int i, result, numArgs, plen; - const char *bytes, *argname, *argnamei; - char argnamelast; + int i, result, numArgs; register CompiledLocal *localPtr = NULL; - Tcl_Obj *defPtr, *errorObj, **argArray; + Tcl_Obj **argArray; int precompiled = 0; if (bodyPtr->typePtr == &tclProcBodyType) { @@ -412,6 +410,7 @@ TclCreateProc( */ if (Tcl_IsShared(bodyPtr)) { + const char *bytes; int length; Tcl_Obj *sharedBodyPtr = bodyPtr; @@ -474,6 +473,7 @@ TclCreateProc( } for (i = 0; i < numArgs; i++) { + const char *argname, *argnamei, *argnamelast; int fieldCount, nameLength; Tcl_Obj **fieldValues; @@ -487,7 +487,7 @@ TclCreateProc( goto procError; } if (fieldCount > 2) { - errorObj = Tcl_NewStringObj( + Tcl_Obj *errorObj = Tcl_NewStringObj( "too many fields in argument specifier \"", -1); Tcl_AppendObjToObj(errorObj, argArray[i]); Tcl_AppendToObj(errorObj, "\"", -1); @@ -511,11 +511,10 @@ TclCreateProc( */ argnamei = argname; - plen = nameLength; - argnamelast = argname[plen-1]; - while (plen--) { - if (argnamei[0] == '(') { - if (argnamelast == ')') { /* We have an array element. */ + argnamelast = Tcl_UtfPrev(argname + nameLength, argname); + while (argnamei < argnamelast) { + if (*argnamei == '(') { + if (*argnamelast == ')') { /* We have an array element. */ Tcl_SetObjResult(interp, Tcl_ObjPrintf( "formal parameter \"%s\" is an array element", Tcl_GetString(fieldValues[0]))); @@ -523,8 +522,9 @@ TclCreateProc( "FORMALARGUMENTFORMAT", NULL); goto procError; } - } else if ((argnamei[0] == ':') && (argnamei[1] == ':')) { - errorObj = Tcl_NewStringObj("formal parameter \"", -1); + } else if (*argnamei == ':' && *(argnamei+1) == ':') { + Tcl_Obj *errorObj = Tcl_NewStringObj( + "formal parameter \"", -1); Tcl_AppendObjToObj(errorObj, fieldValues[0]); Tcl_AppendToObj(errorObj, "\" is not a simple name", -1); Tcl_SetObjResult(interp, errorObj); @@ -575,8 +575,8 @@ TclCreateProc( if ((valueLength != tmpLength) || memcmp(value, tmpPtr, tmpLength) != 0 ) { - errorObj = Tcl_ObjPrintf( - "procedure \"%s\": formal parameter \"" ,procName); + Tcl_Obj *errorObj = Tcl_ObjPrintf( + "procedure \"%s\": formal parameter \"", procName); Tcl_AppendObjToObj(errorObj, fieldValues[0]); Tcl_AppendToObj(errorObj, "\" has " "default value inconsistent with precompiled body", -1); @@ -641,9 +641,8 @@ TclCreateProc( localPtr = procPtr->firstLocalPtr; procPtr->firstLocalPtr = localPtr->nextPtr; - defPtr = localPtr->defValuePtr; - if (defPtr != NULL) { - Tcl_DecrRefCount(defPtr); + if (localPtr->defValuePtr != NULL) { + Tcl_DecrRefCount(localPtr->defValuePtr); } ckfree(localPtr); -- cgit v0.12 From 272e429c2c0bc13b8284eae3a420a69c6140d38d Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 31 Jan 2019 16:22:45 +0000 Subject: TclPushVarName: rewritten iterators over token-strings considering utf-8 multi-bytes. --- generic/tclCompCmds.c | 62 +++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 838e9d7..d8f0aeb 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -3408,10 +3408,10 @@ TclPushVarName( int *isScalarPtr) /* Must not be NULL. */ { register const char *p; - const char *name, *elName; - register int i, n; + const char *last, *name, *elName; + register int n; Tcl_Token *elemTokenPtr = NULL; - int nameChars, elNameChars, simpleVarName, localIndex; + int nameLen, elNameLen, simpleVarName, localIndex; int elemTokenCount = 0, allocedTokens = 0, removedParen = 0; /* @@ -3424,7 +3424,7 @@ TclPushVarName( simpleVarName = 0; name = elName = NULL; - nameChars = elNameChars = 0; + nameLen = elNameLen = 0; localIndex = -1; if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) { @@ -3436,22 +3436,25 @@ TclPushVarName( simpleVarName = 1; name = varTokenPtr[1].start; - nameChars = varTokenPtr[1].size; - if (name[nameChars-1] == ')') { + nameLen = varTokenPtr[1].size; + if (name[nameLen-1] == ')') { /* * last char is ')' => potential array reference. */ - - for (i=0,p=name ; itype = TCL_TOKEN_TEXT; elemTokenPtr->start = elName; - elemTokenPtr->size = elNameChars; + elemTokenPtr->size = elNameLen; elemTokenPtr->numComponents = 0; elemTokenCount = 1; } @@ -3469,21 +3472,22 @@ TclPushVarName( } else if (interp && ((n = varTokenPtr->numComponents) > 1) && (varTokenPtr[1].type == TCL_TOKEN_TEXT) && (varTokenPtr[n].type == TCL_TOKEN_TEXT) - && (varTokenPtr[n].start[varTokenPtr[n].size - 1] == ')')) { + && (*((p = varTokenPtr[n].start + varTokenPtr[n].size)-1) == ')') + && (*Tcl_UtfPrev(p, varTokenPtr[n].start) == ')')) { /* * Check for parentheses inside first token. */ simpleVarName = 0; - for (i = 0, p = varTokenPtr[1].start; - i < varTokenPtr[1].size; i++, p++) { + for (p = varTokenPtr[1].start, + last = p + varTokenPtr[1].size; p < last; p = Tcl_UtfNext(p)) { if (*p == '(') { simpleVarName = 1; break; } } if (simpleVarName) { - int remainingChars; + int remainingLen; /* * Check the last token: if it is just ')', do not count it. @@ -3499,13 +3503,13 @@ TclPushVarName( } name = varTokenPtr[1].start; - nameChars = p - varTokenPtr[1].start; + nameLen = p - varTokenPtr[1].start; elName = p + 1; - remainingChars = (varTokenPtr[2].start - p) - 1; - elNameChars = (varTokenPtr[n].start-p) + varTokenPtr[n].size - 1; + remainingLen = (varTokenPtr[2].start - p) - 1; + elNameLen = (varTokenPtr[n].start-p) + varTokenPtr[n].size - 1; if (!(flags & TCL_NO_ELEMENT)) { - if (remainingChars) { + if (remainingLen) { /* * Make a first token with the extra characters in the first * token. @@ -3515,7 +3519,7 @@ TclPushVarName( allocedTokens = 1; elemTokenPtr->type = TCL_TOKEN_TEXT; elemTokenPtr->start = elName; - elemTokenPtr->size = remainingChars; + elemTokenPtr->size = remainingLen; elemTokenPtr->numComponents = 0; elemTokenCount = n; @@ -3544,8 +3548,8 @@ TclPushVarName( int hasNsQualifiers = 0; - for (i = 0, p = name; i < nameChars; i++, p++) { - if ((*p == ':') && ((i+1) < nameChars) && (*(p+1) == ':')) { + for (p = name, last = p + nameLen-1; p < last; p = Tcl_UtfNext(p)) { + if ((*p == ':') && (*(p+1) == ':')) { hasNsQualifiers = 1; break; } @@ -3558,7 +3562,7 @@ TclPushVarName( */ if (!hasNsQualifiers) { - localIndex = TclFindCompiledLocal(name, nameChars, 1, envPtr); + localIndex = TclFindCompiledLocal(name, nameLen, 1, envPtr); if ((flags & TCL_NO_LARGE_INDEX) && (localIndex > 255)) { /* * We'll push the name. @@ -3568,7 +3572,7 @@ TclPushVarName( } } if (interp && localIndex < 0) { - PushLiteral(envPtr, name, nameChars); + PushLiteral(envPtr, name, nameLen); } /* @@ -3577,7 +3581,7 @@ TclPushVarName( */ if (elName != NULL && !(flags & TCL_NO_ELEMENT)) { - if (elNameChars) { + if (elNameLen) { TclCompileTokens(interp, elemTokenPtr, elemTokenCount, envPtr); } else { -- cgit v0.12