From 72c1f925db451c491dbe57887feed9b5460dd24e Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 1 Dec 2016 20:13:10 +0000 Subject: Reports from NSF that command epoch bumping isn't properly timed. http://paste.tclers.tk/4030 Since the epoch should bump to indicate when Tcl_FindCommand() would produce a different result from the cached value, the bump ought to be connected to the state change that would have that effect. This checkin appears to be the more correct answer, and it makes the Delete path get into agreement with the Rename path. Review would be good. --- generic/tclBasic.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index c1dd52d..686c292 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -3024,13 +3024,6 @@ Tcl_DeleteCommandFromToken( Tcl_Command importCmd; /* - * Bump the command epoch counter. This will invalidate all cached - * references that point to this command. - */ - - cmdPtr->cmdEpoch++; - - /* * The code here is tricky. We can't delete the hash table entry before * invoking the deletion callback because there are cases where the * deletion callback needs to invoke the command (e.g. object systems such @@ -3052,6 +3045,14 @@ Tcl_DeleteCommandFromToken( Tcl_DeleteHashEntry(cmdPtr->hPtr); cmdPtr->hPtr = NULL; } + + /* + * Bump the command epoch counter. This will invalidate all cached + * references that point to this command. + */ + + cmdPtr->cmdEpoch++; + return 0; } @@ -3154,6 +3155,14 @@ Tcl_DeleteCommandFromToken( if (cmdPtr->hPtr != NULL) { Tcl_DeleteHashEntry(cmdPtr->hPtr); cmdPtr->hPtr = NULL; + /* + * Bump the command epoch counter. This will invalidate all cached + * references that point to this command. + */ + + cmdPtr->cmdEpoch++; + + cmdPtr->cmdEpoch++; } /* -- cgit v0.12 From 674aeae3b03a081aff35f8b1bba05e8e82040692 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 2 Dec 2016 13:26:08 +0000 Subject: Remove dup line. --- generic/tclBasic.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 686c292..81b3513 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -3155,14 +3155,13 @@ Tcl_DeleteCommandFromToken( if (cmdPtr->hPtr != NULL) { Tcl_DeleteHashEntry(cmdPtr->hPtr); cmdPtr->hPtr = NULL; - /* - * Bump the command epoch counter. This will invalidate all cached - * references that point to this command. - */ - cmdPtr->cmdEpoch++; + /* + * Bump the command epoch counter. This will invalidate all cached + * references that point to this command. + */ - cmdPtr->cmdEpoch++; + cmdPtr->cmdEpoch++; } /* -- cgit v0.12 From 0714cd9321bd746c492495904d66208bad3f7520 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 2 Dec 2016 18:18:59 +0000 Subject: Added long comment explaining history and work in progress making bytearray interfaces usable. --- generic/tclBinary.c | 109 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 22 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index cb4534d..4d063b2 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -155,28 +155,93 @@ static const EnsembleImplMap decodeMap[] = { }; /* - * The following object type represents an array of bytes. An array of bytes - * is not equivalent to an internationalized string. Conceptually, a string is - * an array of 16-bit quantities organized as a sequence of properly formed - * UTF-8 characters, while a ByteArray is an array of 8-bit quantities. - * Accessor functions are provided to convert a ByteArray to a String or a - * String to a ByteArray. Two or more consecutive bytes in an array of bytes - * may look like a single UTF-8 character if the array is casually treated as - * a string. But obtaining the String from a ByteArray is guaranteed to - * produced properly formed UTF-8 sequences so that there is a one-to-one map - * between bytes and characters. - * - * Converting a ByteArray to a String proceeds by casting each byte in the - * array to a 16-bit quantity, treating that number as a Unicode character, - * and storing the UTF-8 version of that Unicode character in the String. For - * ByteArrays consisting entirely of values 1..127, the corresponding String - * representation is the same as the ByteArray representation. - * - * Converting a String to a ByteArray proceeds by getting the Unicode - * representation of each character in the String, casting it to a byte by - * truncating the upper 8 bits, and then storing the byte in the ByteArray. - * Converting from ByteArray to String and back to ByteArray is not lossy, but - * converting an arbitrary String to a ByteArray may be. + * The following object types represent an array of bytes. The intent is + * to allow arbitrary binary data to pass through Tcl as a Tcl value + * without loss or damage. Such values are useful for things like + * encoded strings or Tk images to name just two. + * + * It's strange to have two Tcl_ObjTypes in place for this task when + * one would do, so a bit of detail and history how we got to this point + * and where we might go from here. + * + * A bytearray is an ordered sequence of bytes. Each byte is an integer + * value in the range [0-255]. To be a Tcl value type, we need a way to + * encode each value in the value set as a Tcl string. The simplest + * encoding is to represent each byte value as the same codepoint value. + * A bytearray of N bytes is encoded into a Tcl string of N characters + * where the codepoint of each character is the value of corresponding byte. + * This approach creates a one-to-one map between all bytearray values + * and a subset of Tcl string values. + * + * When converting a Tcl string value to the bytearray internal rep, the + * question arises what to do with strings outside that subset? That is, + * those Tcl strings containing at least one codepoint greater than 255? + * The obviously correct answer is to raise an error! That string value + * does not represent any valid bytearray value. Full Stop. The + * setFromAnyProc signature has a completion code return value for just + * this reason, to reject invalid inputs. + * + * Unfortunately this was not the path taken by the authors of the + * original tclByteArrayType. They chose to accept all Tcl string values + * as acceptable string encodings of the bytearray values that result + * from masking away the high bits of any codepoint value at all. This + * meant that every bytearray value had multiple accepted string + * representations. + * + * The implications of this choice are truly ugly. When a Tcl value has + * a string representation, we are required to accept that as the true + * value. Bytearray values that possess a string representation cannot + * be processed as bytearrays because we cannot know which true value + * that bytearray represents. The consequence is that we drag around + * an internal rep that we cannot make any use of. This painful price + * is extracted at any point after a string rep happens to be generated + * for the value. This happens even when the troublesome codepoints + * outside the byte range never show up. This happens rather routinely + * in normal Tcl operations unless we burden the script writer with the + * cognitive burden of avoiding it. The price is also paid by callers + * of the C interface. The routine + * + * unsigned char *Tcl_GetByteArrayFromObj(objPtr, lenPtr) + * + * has a guarantee to always return a non-NULL value, but that value + * points to a byte sequence that cannot be used by the caller to + * process the Tcl value absent some sideband testing that objPtr + * is "pure". Tcl offers no public interface to perform this test, + * so callers either break encapsulation or are unavoidably buggy. Tcl + * has defined a public interface that cannot be used correctly. The + * Tcl source code itself suffers the same problem, and has been buggy, + * but progressively less so as more and more portions of the code have + * been retrofitted with the required "purity testing". The set of values + * able to pass the purity test can be increased via the introduction of + * a "canonical" flag marker, but the only way the broken interface itself + * can be discarded is to start over and define the Tcl_ObjType properly. + * Bytearrays should simply be usable as bytearrays without a kabuki + * dance of testing. + * + * The Tcl_ObjType "properByteArrayType" is (nearly) a correct + * implementation of bytearrays. Any Tcl value with the type + * properByteArrayType can have its bytearray value fetched and + * used with confidence that acting on that value is equivalent to + * acting on the true Tcl string value. This still implies a side + * testing burden -- past mistakes will not let us avoid that + * immediately, but it is at least a conventional test of type, and + * can be implemented entirely by examining the objPtr fields, with + * no need to query the intrep, as a canonical flag would require. + * + * Until Tcl_GetByteArrayFromObj() and Tcl_SetByteArrayLength() can + * be revised to admit the possibility of returning NULL when the true + * value is not a valid bytearray, we need a mechanism to retain + * compatibility with the deployed callers of the broken interface. + * That's what the retained "tclByteArrayType" provides. In those + * unusual circumstances where we convert an invalid bytearray value + * to a bytearray type, it is to this legacy type. Essentially any + * time this legacy type gets used, it's a signal of a bug being ignored. + * A TIP should be drafted to remove this connection to the broken past + * so that Tcl 9 will no longer have any trace of it. Prescribing a + * migration path will be the key element of that work. The internal + * changes now in place are the limit of what can be done short of + * interface repair. They provide a great expansion of the histories + * over which bytearray values can be useful in the meanwhile. */ static const Tcl_ObjType properByteArrayType = { -- cgit v0.12