summaryrefslogtreecommitdiffstats
path: root/generic/tclBinary.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-11-08 23:13:03 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-11-08 23:13:03 (GMT)
commit37295b4c1cb1855a1b41ee04672cf25df3e56f9d (patch)
treed1407a7ce174b1cc48e6e088e9d98f7cbf142673 /generic/tclBinary.c
parentd29c3178051f09e66db3204230d26fa28364fdd4 (diff)
parentfa56430c8b7ebbd159d6e3b45ecfd04f2a193d9b (diff)
downloadtcl-37295b4c1cb1855a1b41ee04672cf25df3e56f9d.zip
tcl-37295b4c1cb1855a1b41ee04672cf25df3e56f9d.tar.gz
tcl-37295b4c1cb1855a1b41ee04672cf25df3e56f9d.tar.bz2
Merge 8.7
Diffstat (limited to 'generic/tclBinary.c')
-rw-r--r--generic/tclBinary.c267
1 files changed, 136 insertions, 131 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 633bc1e..a586f18 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -2,7 +2,7 @@
* tclBinary.c --
*
* This file contains the implementation of the "binary" Tcl built-in
- * command and the Tcl binary data object.
+ * command and the Tcl value internal representation for binary data.
*
* Copyright © 1997 Sun Microsystems, Inc.
* Copyright © 1998-1999 Scriptics Corporation.
@@ -75,31 +75,15 @@ static int NeedReversing(int format);
static void CopyNumber(const void *from, void *to,
unsigned length, int type);
/* Binary ensemble commands */
-static int BinaryFormatCmd(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int BinaryScanCmd(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc BinaryFormatCmd;
+static Tcl_ObjCmdProc BinaryScanCmd;
/* Binary encoding sub-ensemble commands */
-static int BinaryEncodeHex(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int BinaryDecodeHex(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int BinaryEncode64(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int BinaryDecode64(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
-static int BinaryEncodeUu(ClientData clientData,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *const objv[]);
-static int BinaryDecodeUu(ClientData clientData,
- Tcl_Interp *interp,
- int objc, Tcl_Obj *const objv[]);
+static Tcl_ObjCmdProc BinaryEncodeHex;
+static Tcl_ObjCmdProc BinaryDecodeHex;
+static Tcl_ObjCmdProc BinaryEncode64;
+static Tcl_ObjCmdProc BinaryDecode64;
+static Tcl_ObjCmdProc BinaryEncodeUu;
+static Tcl_ObjCmdProc BinaryDecodeUu;
/*
* The following tables are used by the binary encoders
@@ -165,12 +149,11 @@ static const EnsembleImplMap decodeMap[] = {
* images to name just two.
*
* It's strange to have two Tcl_ObjTypes in place for this task when one would
- * do, so a bit of detail and history how we got to this point and where we
- * might go from here.
+ * do, so a bit of detail and history will aid understanding.
*
* A bytearray is an ordered sequence of bytes. Each byte is an integer value
* in the range [0-255]. To be a Tcl value type, we need a way to encode each
- * value in the value set as a Tcl string. The simplest encoding is to
+ * value in the value set as a Tcl string. A simple encoding is to
* represent each byte value as the same codepoint value. A bytearray of N
* bytes is encoded into a Tcl string of N characters where the codepoint of
* each character is the value of corresponding byte. This approach creates a
@@ -181,9 +164,7 @@ static const EnsembleImplMap decodeMap[] = {
* question arises what to do with strings outside that subset? That is,
* those Tcl strings containing at least one codepoint greater than 255? The
* obviously correct answer is to raise an error! That string value does not
- * represent any valid bytearray value. Full Stop. The setFromAnyProc
- * signature has a completion code return value for just this reason, to
- * reject invalid inputs.
+ * represent any valid bytearray value.
*
* Unfortunately this was not the path taken by the authors of the original
* tclByteArrayType. They chose to accept all Tcl string values as acceptable
@@ -191,33 +172,10 @@ static const EnsembleImplMap decodeMap[] = {
* high bits of any codepoint value at all. This meant that every bytearray
* value had multiple accepted string representations.
*
- * The implications of this choice are truly ugly. When a Tcl value has a
- * string representation, we are required to accept that as the true value.
- * Bytearray values that possess a string representation cannot be processed
- * as bytearrays because we cannot know which true value that bytearray
- * represents. The consequence is that we drag around an internal rep that we
- * cannot make any use of. This painful price is extracted at any point after
- * a string rep happens to be generated for the value. This happens even when
- * the troublesome codepoints outside the byte range never show up. This
- * happens rather routinely in normal Tcl operations unless we burden the
- * script writer with the cognitive burden of avoiding it. The price is also
- * paid by callers of the C interface. The routine
- *
- * unsigned char *Tcl_GetByteArrayFromObj(objPtr, lenPtr)
- *
- * has a guarantee to always return a non-NULL value, but that value points to
- * a byte sequence that cannot be used by the caller to process the Tcl value
- * absent some sideband testing that objPtr is "pure". Tcl offers no public
- * interface to perform this test, so callers either break encapsulation or
- * are unavoidably buggy. Tcl has defined a public interface that cannot be
- * used correctly. The Tcl source code itself suffers the same problem, and
- * has been buggy, but progressively less so as more and more portions of the
- * code have been retrofitted with the required "purity testing". The set of
- * values able to pass the purity test can be increased via the introduction
- * of a "canonical" flag marker, but the only way the broken interface itself
- * can be discarded is to start over and define the Tcl_ObjType properly.
- * Bytearrays should simply be usable as bytearrays without a kabuki dance of
- * testing.
+ * The implications of this choice are truly ugly, and motivated the proposal
+ * of TIP 568 to migrate away from it and to the more sensible design where
+ * each bytearray value has only one string representation. Full details are
+ * recorded in that TIP for those who seek them.
*
* The Tcl_ObjType "properByteArrayType" is (nearly) a correct implementation
* of bytearrays. Any Tcl value with the type properByteArrayType can have
@@ -226,21 +184,24 @@ static const EnsembleImplMap decodeMap[] = {
* implies a side testing burden -- past mistakes will not let us avoid that
* immediately, but it is at least a conventional test of type, and can be
* implemented entirely by examining the objPtr fields, with no need to query
- * the internalrep, as a canonical flag would require.
- *
- * Until Tcl_GetByteArrayFromObj() and Tcl_SetByteArrayLength() can be revised
- * to admit the possibility of returning NULL when the true value is not a
- * valid bytearray, we need a mechanism to retain compatibility with the
- * deployed callers of the broken interface. That's what the retained
- * "tclByteArrayType" provides. In those unusual circumstances where we
- * convert an invalid bytearray value to a bytearray type, it is to this
- * legacy type. Essentially any time this legacy type gets used, it's a
- * signal of a bug being ignored. A TIP should be drafted to remove this
- * connection to the broken past so that Tcl 9 will no longer have any trace
- * of it. Prescribing a migration path will be the key element of that work.
- * The internal changes now in place are the limit of what can be done short
- * of interface repair. They provide a great expansion of the histories over
- * which bytearray values can be useful in the meanwhile.
+ * the internalrep, as a canonical flag would require. This benefit is made
+ * available to extensions through the public routine Tcl_GetBytesFromObj(),
+ * first available in Tcl 8.7.
+ *
+ * The public routines Tcl_GetByteArrayFromObj() and Tcl_SetByteArrayLength()
+ * must continue to follow their documented behavior through the 8.* series of
+ * releases. To support that legacy operation, we need a mechanism to retain
+ * compatibility with the deployed callers of the broken interface. That's
+ * what the retained "tclByteArrayType" provides. In those unusual
+ * circumstances where we convert an invalid bytearray value to a bytearray
+ * type, it is to this legacy type. Essentially any time this legacy type
+ * shows up, it's a signal of a bug being ignored.
+ *
+ * In Tcl 9, the incompatibility in the behavior of these public routines
+ * has been approved, and the legacy internal rep is no longer retained.
+ * The internal changes seen below are the limit of what can be done
+ * in a Tcl 8.* release. They provide a great expansion of the histories
+ * over which bytearray values can be useful.
*/
static const Tcl_ObjType properByteArrayType = {
@@ -267,15 +228,16 @@ const Tcl_ObjType tclByteArrayType = {
*/
typedef struct ByteArray {
- unsigned int bad; /* Index of the character that is a nonbyte.
- * If all characters are bytes, bad = used,
- * though then we should never read it. */
+ unsigned int bad; /* Index of first character that is a nonbyte.
+ * If all characters are bytes, bad = used. */
unsigned int used; /* The number of bytes used in the byte
- * array. */
- unsigned int allocated; /* The amount of space actually allocated
- * minus 1 byte. */
- unsigned char bytes[TCLFLEXARRAY]; /* The array of bytes. The actual size of this
- * field depends on the 'allocated' field
+ * array. Must be <= allocated. The bytes
+ * used to store the value are indexed from
+ * 0 to used-1. */
+ unsigned int allocated; /* The number of bytes of space allocated. */
+ unsigned char bytes[TCLFLEXARRAY];
+ /* The array of bytes. The actual size of this
+ * field is stored in the 'allocated' field
* above. */
} ByteArray;
@@ -301,7 +263,7 @@ TclIsPureByteArray(
* from the given array of bytes.
*
* Results:
- * The newly create object is returned. This object will have no initial
+ * The newly created object is returned. This object has no initial
* string representation. The returned object has a ref count of 0.
*
* Side effects:
@@ -316,16 +278,16 @@ Tcl_Obj *
Tcl_NewByteArrayObj(
const unsigned char *bytes, /* The array of bytes used to initialize the
* new object. */
- int length) /* Length of the array of bytes, which must be
- * >= 0. */
+ int numBytes) /* Number of bytes in the array,
+ * must be >= 0. */
{
#ifdef TCL_MEM_DEBUG
- return Tcl_DbNewByteArrayObj(bytes, length, "unknown", 0);
+ return Tcl_DbNewByteArrayObj(bytes, numBytes, "unknown", 0);
#else /* if not TCL_MEM_DEBUG */
Tcl_Obj *objPtr;
TclNewObj(objPtr);
- Tcl_SetByteArrayObj(objPtr, bytes, length);
+ Tcl_SetByteArrayObj(objPtr, bytes, numBytes);
return objPtr;
#endif /* TCL_MEM_DEBUG */
}
@@ -346,7 +308,7 @@ Tcl_NewByteArrayObj(
* result of calling Tcl_NewByteArrayObj.
*
* Results:
- * The newly create object is returned. This object will have no initial
+ * The newly created object is returned. This object has no initial
* string representation. The returned object has a ref count of 0.
*
* Side effects:
@@ -360,8 +322,8 @@ Tcl_Obj *
Tcl_DbNewByteArrayObj(
const unsigned char *bytes, /* The array of bytes used to initialize the
* new object. */
- int length, /* Length of the array of bytes, which must be
- * >= 0. */
+ int numBytes, /* Number of bytes in the array,
+ * must be >= 0. */
const char *file, /* The name of the source file calling this
* procedure; used for debugging. */
int line) /* Line number in the source file; used for
@@ -370,7 +332,7 @@ Tcl_DbNewByteArrayObj(
Tcl_Obj *objPtr;
TclDbNewObj(objPtr, file, line);
- Tcl_SetByteArrayObj(objPtr, bytes, length);
+ Tcl_SetByteArrayObj(objPtr, bytes, numBytes);
return objPtr;
}
#else /* if not TCL_MEM_DEBUG */
@@ -378,12 +340,12 @@ Tcl_Obj *
Tcl_DbNewByteArrayObj(
const unsigned char *bytes, /* The array of bytes used to initialize the
* new object. */
- int length, /* Length of the array of bytes, which must be
- * >= 0. */
+ int numBytes, /* Number of bytes in the array,
+ * must be >= 0. */
TCL_UNUSED(const char *) /*file*/,
TCL_UNUSED(int) /*line*/)
{
- return Tcl_NewByteArrayObj(bytes, length);
+ return Tcl_NewByteArrayObj(bytes, numBytes);
}
#endif /* TCL_MEM_DEBUG */
@@ -409,9 +371,9 @@ void
Tcl_SetByteArrayObj(
Tcl_Obj *objPtr, /* Object to initialize as a ByteArray. */
const unsigned char *bytes, /* The array of bytes to use as the new value.
- * May be NULL even if length > 0. */
- int length) /* Length of the array of bytes, which must
- * be >= 0. */
+ * May be NULL even if numBytes > 0. */
+ int numBytes) /* Number of bytes in the array,
+ * must be >= 0. */
{
ByteArray *byteArrayPtr;
Tcl_ObjInternalRep ir;
@@ -421,16 +383,14 @@ Tcl_SetByteArrayObj(
}
TclInvalidateStringRep(objPtr);
- if (length < 0) {
- length = 0;
- }
- byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
- byteArrayPtr->bad = length;
- byteArrayPtr->used = length;
- byteArrayPtr->allocated = length;
+ assert(numBytes >= 0);
+ byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(numBytes));
+ byteArrayPtr->bad = numBytes;
+ byteArrayPtr->used = numBytes;
+ byteArrayPtr->allocated = numBytes;
- if ((bytes != NULL) && (length > 0)) {
- memcpy(byteArrayPtr->bytes, bytes, length);
+ if ((bytes != NULL) && (numBytes > 0)) {
+ memcpy(byteArrayPtr->bytes, bytes, numBytes);
}
SET_BYTEARRAY(&ir, byteArrayPtr);
@@ -440,16 +400,16 @@ Tcl_SetByteArrayObj(
/*
*----------------------------------------------------------------------
*
- * TclGetBytesFromObj --
+ * Tcl_GetBytesFromObj/TclGetBytesFromObj --
*
* Attempt to extract the value from objPtr in the representation
* of a byte sequence. On success return the extracted byte sequence.
- * On failures, return NULL and record error message and code in
+ * On failure, return NULL and record error message and code in
* interp (if not NULL).
*
* Results:
- * Pointer to array of bytes, or NULL. representing the ByteArray object.
- * Writes number of bytes in array to *lengthPtr.
+ * NULL or pointer to array of bytes representing the ByteArray object.
+ * Writes number of bytes in array to *numBytesPtr.
*
*----------------------------------------------------------------------
*/
@@ -458,8 +418,47 @@ unsigned char *
TclGetBytesFromObj(
Tcl_Interp *interp, /* For error reporting */
Tcl_Obj *objPtr, /* Value to extract from */
- int *lengthPtr) /* If non-NULL, filled with length of the
- * array of bytes in the ByteArray object. */
+ int *numBytesPtr) /* If non-NULL, write the number of bytes
+ * in the array here */
+{
+ ByteArray *baPtr;
+ const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType);
+
+ if (irPtr == NULL) {
+ SetByteArrayFromAny(NULL, objPtr);
+ irPtr = TclFetchInternalRep(objPtr, &properByteArrayType);
+ if (irPtr == NULL) {
+ if (interp) {
+ const char *nonbyte;
+ int ucs4;
+
+ irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType);
+ baPtr = GET_BYTEARRAY(irPtr);
+ nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad);
+ TclUtfToUCS4(nonbyte, &ucs4);
+
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "expected byte sequence but character %d "
+ "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4));
+ Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL);
+ }
+ return NULL;
+ }
+ }
+ baPtr = GET_BYTEARRAY(irPtr);
+
+ if (numBytesPtr != NULL) {
+ *numBytesPtr = baPtr->used;
+ }
+ return baPtr->bytes;
+}
+#undef Tcl_GetBytesFromObj
+unsigned char *
+Tcl_GetBytesFromObj(
+ Tcl_Interp *interp, /* For error reporting */
+ Tcl_Obj *objPtr, /* Value to extract from */
+ size_t *numBytesPtr) /* If non-NULL, write the number of bytes
+ * in the array here */
{
ByteArray *baPtr;
const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType);
@@ -487,8 +486,8 @@ TclGetBytesFromObj(
}
baPtr = GET_BYTEARRAY(irPtr);
- if (lengthPtr != NULL) {
- *lengthPtr = baPtr->used;
+ if (numBytesPtr != NULL) {
+ *numBytesPtr = baPtr->used;
}
return baPtr->bytes;
}
@@ -515,12 +514,12 @@ TclGetBytesFromObj(
unsigned char *
Tcl_GetByteArrayFromObj(
Tcl_Obj *objPtr, /* The ByteArray object. */
- int *lengthPtr) /* If non-NULL, filled with length of the
- * array of bytes in the ByteArray object. */
+ int *numBytesPtr) /* If non-NULL, write the number of bytes
+ * in the array here */
{
ByteArray *baPtr;
const Tcl_ObjInternalRep *irPtr;
- unsigned char *result = TclGetBytesFromObj(NULL, objPtr, lengthPtr);
+ unsigned char *result = TclGetBytesFromObj(NULL, objPtr, numBytesPtr);
if (result) {
return result;
@@ -531,8 +530,8 @@ Tcl_GetByteArrayFromObj(
baPtr = GET_BYTEARRAY(irPtr);
- if (lengthPtr != NULL) {
- *lengthPtr = baPtr->used;
+ if (numBytesPtr != NULL) {
+ *numBytesPtr = baPtr->used;
}
return (unsigned char *) baPtr->bytes;
}
@@ -540,12 +539,12 @@ Tcl_GetByteArrayFromObj(
unsigned char *
TclGetByteArrayFromObj(
Tcl_Obj *objPtr, /* The ByteArray object. */
- size_t *lengthPtr) /* If non-NULL, filled with length of the
- * array of bytes in the ByteArray object. */
+ size_t *numBytesPtr) /* If non-NULL, write the number of bytes
+ * in the array here */
{
ByteArray *baPtr;
const Tcl_ObjInternalRep *irPtr;
- unsigned char *result = TclGetBytesFromObj(NULL, objPtr, (int *)NULL);
+ unsigned char *result = Tcl_GetBytesFromObj(NULL, objPtr, numBytesPtr);
if (result) {
return result;
@@ -556,11 +555,12 @@ TclGetByteArrayFromObj(
baPtr = GET_BYTEARRAY(irPtr);
- if (lengthPtr != NULL) {
+ if (numBytesPtr != NULL) {
#if TCL_MAJOR_VERSION > 8
- *lengthPtr = baPtr->used;
+ *numBytesPtr = baPtr->used;
#else
- *lengthPtr = ((size_t)(unsigned)(baPtr->used + 1)) - 1;
+ /* TODO: What's going on here? Document or eliminate. */
+ *numBytesPtr = ((size_t)(unsigned)(baPtr->used + 1)) - 1;
#endif
}
return baPtr->bytes;
@@ -591,14 +591,14 @@ TclGetByteArrayFromObj(
unsigned char *
Tcl_SetByteArrayLength(
Tcl_Obj *objPtr, /* The ByteArray object. */
- int length) /* New length for internal byte array. */
+ int numBytes) /* Number of bytes in resized array */
{
ByteArray *byteArrayPtr;
unsigned newLength;
Tcl_ObjInternalRep *irPtr;
- assert(length >= 0);
- newLength = (unsigned int)length;
+ assert(numBytes >= 0);
+ newLength = (unsigned int)numBytes;
if (Tcl_IsShared(objPtr)) {
Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength");
@@ -616,6 +616,9 @@ Tcl_SetByteArrayLength(
}
}
+ /* Note that during truncation, the implementation does not free
+ * memory that is no longer needed. */
+
byteArrayPtr = GET_BYTEARRAY(irPtr);
if (newLength > byteArrayPtr->allocated) {
byteArrayPtr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(newLength));
@@ -668,6 +671,9 @@ SetByteArrayFromAny(
length = bad = objPtr->length;
srcEnd = src + length;
+ /* Note the allocation is over-sized, possibly by a factor of four,
+ * or even a factor of two with a proper byte array value. */
+
byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length));
for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
src += TclUtfToUniChar(src, &ch);
@@ -836,7 +842,6 @@ UpdateStringOfByteArray(
for (i = 0; i < length; i++) {
dst += Tcl_UniCharToUtf(src[i], dst);
}
- (void) Tcl_InitStringRep(objPtr, NULL, size);
}
}
@@ -1503,7 +1508,7 @@ BinaryFormatCmd(
*----------------------------------------------------------------------
*/
-int
+static int
BinaryScanCmd(
TCL_UNUSED(ClientData),
Tcl_Interp *interp, /* Current interpreter. */
@@ -2123,7 +2128,7 @@ CopyNumber(
*
* FormatNumber --
*
- * This routine is called by Tcl_BinaryObjCmd to format a number into a
+ * This routine is called by BinaryFormatCmd to format a number into a
* location pointed at by cursor.
*
* Results:
@@ -2292,7 +2297,7 @@ FormatNumber(
*
* ScanNumber --
*
- * This routine is called by Tcl_BinaryObjCmd to scan a number out of a
+ * This routine is called by BinaryScanCmd to scan a number out of a
* buffer.
*
* Results: