diff options
-rw-r--r-- | doc/ByteArrObj.3 | 176 | ||||
-rw-r--r-- | doc/binary.n | 11 | ||||
-rw-r--r-- | generic/tclBinary.c | 196 | ||||
-rw-r--r-- | generic/tclDecls.h | 4 |
4 files changed, 181 insertions, 206 deletions
diff --git a/doc/ByteArrObj.3 b/doc/ByteArrObj.3 index 053401a..7fdaea6 100644 --- a/doc/ByteArrObj.3 +++ b/doc/ByteArrObj.3 @@ -4,87 +4,147 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -.TH Tcl_ByteArrayObj 3 8.1 Tcl "Tcl Library Procedures" +.TH Tcl_ByteArrayObj 3 9.0 Tcl "Tcl Library Procedures" .so man.macros .BS .SH NAME -Tcl_NewByteArrayObj, Tcl_SetByteArrayObj, Tcl_GetByteArrayFromObj, Tcl_SetByteArrayLength \- manipulate Tcl values as a arrays of bytes +Tcl_NewByteArrayObj, Tcl_SetByteArrayObj, Tcl_GetBytesFromObj, Tcl_GetByteArrayFromObj, Tcl_SetByteArrayLength \- manipulate a Tcl value as an array of bytes .SH SYNOPSIS .nf \fB#include <tcl.h>\fR .sp Tcl_Obj * -\fBTcl_NewByteArrayObj\fR(\fIbytes, length\fR) +\fBTcl_NewByteArrayObj\fR(\fIbytes, numBytes\fR) .sp void -\fBTcl_SetByteArrayObj\fR(\fIobjPtr, bytes, length\fR) +\fBTcl_SetByteArrayObj\fR(\fIobjPtr, bytes, numBytes\fR) .sp +.VS TIP568 unsigned char * -\fBTcl_GetByteArrayFromObj\fR(\fIobjPtr, lengthPtr\fR) +\fBTcl_GetBytesFromObj\fR(\fIinterp, objPtr, numBytesPtr\fR) +.VE TIP568 .sp unsigned char * -\fBTcl_SetByteArrayLength\fR(\fIobjPtr, length\fR) +\fBTcl_GetByteArrayFromObj\fR(\fIobjPtr, numBytesPtr\fR) +.sp +unsigned char * +\fBTcl_SetByteArrayLength\fR(\fIobjPtr, numBytes\fR) .SH ARGUMENTS -.AS "const unsigned char" *lengthPtr in/out +.AS "const unsigned char" *numBytesPtr in/out .AP "const unsigned char" *bytes in The array of bytes used to initialize or set a byte-array value. May be NULL -even if \fIlength\fR is non-zero. -.AP size_t length in -The length of the array of bytes. +even if \fInumBytes\fR is non-zero. +.AP size_t numBytes in +The number of bytes in the array. .AP Tcl_Obj *objPtr in/out -For \fBTcl_SetByteArrayObj\fR, this points to the value to be converted to -byte-array type. For \fBTcl_GetByteArrayFromObj\fR and -\fBTcl_SetByteArrayLength\fR, this points to the value from which to get -the byte-array value; if \fIobjPtr\fR does not already point to a byte-array -value, it will be converted to one. -.AP size_t | int *lengthPtr out -Filled with the length of the array of bytes in the value. -May be (int *)NULL when not used. +For \fBTcl_SetByteArrayObj\fR, this points to an unshared value to be +overwritten by a byte-array value. For \fBTcl_GetBytesFromObj\fR, +\fBTcl_GetByteArrayFromObj\fR and \fBTcl_SetByteArrayLength\fR, this points +to the value from which to extract an array of bytes. +.AP Tcl_Interp *interp in +Interpreter to use for error reporting. +.AP "size_t | int" *numBytesPtr out +Points to space where the number of bytes in the array may be written. +Caller may pass NULL when it does not need this information. .BE .SH DESCRIPTION .PP -These procedures are used to create, modify, and read Tcl byte-array values -from C code. Byte-array values are typically used to hold the -results of binary IO operations or data structures created with the -\fBbinary\fR command. In Tcl, an array of bytes is not equivalent to a -string. Conceptually, a string is an array of Unicode characters, while a -byte-array is an array of 8-bit quantities with no implicit meaning. -Accessor functions are provided to get the string representation of a -byte-array or to convert an arbitrary value to a byte-array. Obtaining the +These routines are used to create, modify, store, transfer, and retrieve +arbitrary binary data in Tcl values. Specifically, data that can be +represented as a sequence of arbitrary byte values is supported. +This includes data read from binary channels, values created by the +\fBbinary\fR command, encrypted data, or other information representable as +a finite byte sequence. +.PP +A byte is an 8-bit quantity with no inherent meaning. When the 8 bits are +interpreted as an integer value, the range of possible values is (0-255). +The C type best suited to store a byte is the \fBunsigned char\fR. +An \fBunsigned char\fR array of size \fIN\fR stores an aribtrary binary +value of size \fIN\fR bytes. We call this representation a byte-array. +Here we document the routines that allow us to operate on Tcl values as +byte-arrays. +.PP +All Tcl values must correspond to a string representation. +When a byte-array value must be processed as a string, the sequence +of \fIN\fR bytes is transformed into the corresponding sequence +of \fIN\fR characters, where each byte value transforms to the same +character codepoint value in the range (U+0000 - U+00FF). Obtaining the string representation of a byte-array value (by calling -\fBTcl_GetStringFromObj\fR) produces a properly formed UTF-8 sequence with a -one-to-one mapping between the bytes in the internal representation and the -UTF-8 characters in the string representation. +\fBTcl_GetStringFromObj\fR) produces this string in Tcl's usual +Modified UTF-8 encoding. .PP -\fBTcl_NewByteArrayObj\fR and \fBTcl_SetByteArrayObj\fR will -create a new value of byte-array type or modify an existing value to have a -byte-array type. Both of these procedures set the value's type to be -byte-array and set the value's internal representation to a copy of the -array of bytes given by \fIbytes\fR. \fBTcl_NewByteArrayObj\fR returns a -pointer to a newly allocated value with a reference count of zero. -\fBTcl_SetByteArrayObj\fR invalidates any old string representation and, if -the value is not already a byte-array value, frees any old internal -representation. If \fIbytes\fR is NULL then the new byte array contains -arbitrary values. +\fBTcl_NewByteArrayObj\fR and \fBTcl_SetByteArrayObj\fR +create a new value or overwrite an existing unshared value, respectively, +to hold a byte-array value of \fInumBytes\fR bytes. When a caller +passes a non-NULL value of \fIbytes\fR, it must point to memory from +which \fInumBytes\fR bytes can be read. These routines +allocate \fInumBytes\fR bytes of memory, copy \fInumBytes\fR +bytes from \fIbytes\fR into it, and keep the result in the internal +representation of the new or overwritten value. +When the caller passes a NULL value of \fIbytes\fR, the data copying +step is skipped, and the bytes stored in the value are undefined. +A \fIbytes\fR value of NULL is useful only when the caller will arrange +to write known contents into the byte-array through a pointer retrieved +by a call to one of the routines explained below. \fBTcl_NewByteArrayObj\fR +returns a pointer to the created value with a reference count of zero. +\fBTcl_SetByteArrayObj\fR overwrites and invalidates any old contents +of the unshared \fIobjPtr\fR as appropriate, and keeps its reference +count (0 or 1) unchanged. The value produced by these routines has no +string representation. Any memory allocation failure may cause a panic. .PP -\fBTcl_GetByteArrayFromObj\fR converts a Tcl value to byte-array type and -returns a pointer to the value's new internal representation as an array of -bytes. The length of this array is stored in \fIlengthPtr\fR if -\fIlengthPtr\fR is non-NULL. The storage for the array of bytes is owned by -the value and should not be freed. The contents of the array may be -modified by the caller only if the value is not shared and the caller -invalidates the string representation. +\fBTcl_GetBytesFromObj\fR performs the opposite function of +\fBTcl_SetByteArrayObj\fR, providing access to read a byte-array from +a Tcl value that was previously written into it. When \fIobjPtr\fR +is a value previously produced by \fBTcl_NewByteArrayObj\fR or +\fBTcl_SetByteArrayObj\fR, then \fBTcl_GetBytesFromObj\fR returns +a pointer to the byte-array kept in the value's internal representation. +If the caller provides a non-NULL value for \fInumBytesPtr\fR, it must +point to memory where \fBTcl_GetBytesFromObj\fR can write the number +of bytes in the value's internal byte-array. With both pieces of +information, the caller is able to retrieve any information about the +contents of that byte-array that it seeks. When \fIobjPtr\fR does +not already contain an internal byte-array, \fBTcl_GetBytesFromObj\fR +will try to create one from the value's string representation. Any +string value that does not include any character codepoints outside +the range (U+0000 - U+00FF) will successfully translate to a unique +byte-array value. With the created byte-array, the routine returns +as before. For any string representation which does contain +a forbidden character codepoint, the conversion fails, and +\fBTcl_GetBytesFromObj\fR returns NULL to signal that failure. On +failure, nothing will be written to \fInumBytesPtr\fR, and if +the \fIinterp\fR argument is non-NULL, then error messages and +codes are left in it recording the error. .PP -\fBTcl_SetByteArrayLength\fR converts the Tcl value to byte-array type -and changes the length of the value's internal representation as an -array of bytes. If \fIlength\fR is greater than the space currently -allocated for the array, the array is reallocated to the new length; the -newly allocated bytes at the end of the array have arbitrary values. If -\fIlength\fR is less than the space currently allocated for the array, -the length of array is reduced to the new length. The return value is a -pointer to the value's new array of bytes. - +\fBTcl_GetByteArrayFromObj\fR performs exactly the same function as +\fBTcl_GetBytesFromObj\fR does when called with the \fIinterp\fR +argument passed the value NULL. This is incompatible with the +way \fBTcl_GetByteArrayFromObj\fR functioned in Tcl 8. +\fBTcl_GetBytesFromObj\fR is the more capable interface and should +usually be favored for use over \fBTcl_GetByteArrayFromObj\fR. +.PP +On success, both \fBTcl_GetByteFromObj\fR and \fBTcl_GetByteArrayFromObj\fR +return a pointer into the internal representation of a \fBTcl_Obj\fR. +That pointer must not be freed by the caller, and should not be retained +for use beyond the known time the internal representation of the value +has not been disturbed. The pointer may be used to overwrite the byte +contents of the internal representation, so long as the value is unshared +and any string representation is invalidated. +.PP +\fBTcl_SetByteArrayLength\fR enables a caller to change the size of a +byte-array in the internal representation of an unshared \fIobjPtr\fR to +become \fInumBytes\fR bytes. This is most often useful after the +bytes of the internal byte-array have been directly overwritten and it +has been discovered that the required size differs from the first +estimate used in the allocation. \fBTcl_SetByteArrayLength\fR returns +a pointer to the resized byte-array. Because resizing the byte-array +changes the internal representation, \fBTcl_SetByteArrayLength\fR +also invalidates any string representation in \fIobjPtr\fR. If resizing +grows the byte-array, the new byte values are undefined. If \fIobjPtr\fR +does not already possess an internal byte-array, one is produced in the +same way that \fBTcl_GetBytesFromObj\fR does, also returning NULL +when any characters of the value in \fIobjPtr\fR (up to +\fInumBytes\fR of them) are not valid bytes. .SH "REFERENCE COUNT MANAGEMENT" .PP \fBTcl_NewByteArrayObj\fR always returns a zero-reference object, much @@ -94,11 +154,11 @@ like \fBTcl_NewObj\fR. reference count of their \fIobjPtr\fR arguments, but do require that the object be unshared. .PP -\fBTcl_GetByteArrayFromObj\fR does not modify the reference count of its -\fIobjPtr\fR argument; it only reads. +\fBTcl_GetBytesFromObj\fR and \fBTcl_GetByteArrayFromObj\fR do not modify +the reference count of \fIobjPtr\fR; they only read. .SH "SEE ALSO" Tcl_GetStringFromObj, Tcl_NewObj, Tcl_IncrRefCount, Tcl_DecrRefCount .SH KEYWORDS -value, binary data, byte array, utf, unicode, internationalization +value, binary data, byte array, utf, unicode diff --git a/doc/binary.n b/doc/binary.n index 9ab694e..1a62ad6 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -44,8 +44,9 @@ the range \eu0000\-\eu00FF. When encoding binary data as a readable string, the starting binary data is passed to the \fBbinary encode\fR command, together with the name of the encoding to use and any encoding-specific options desired. Data which has been -encoded can be converted back to binary form using \fBbinary decode\fR. The -following formats and options are supported. +encoded can be converted back to binary form using \fBbinary decode\fR. +The \fBbinary encode\fR command raises an error if the \fIdata\fR argument +is not binary data. The following formats and options are supported. .TP \fBbase64\fR . @@ -607,9 +608,9 @@ will return .PP The \fBbinary scan\fR command parses fields from a binary string, returning the number of conversions performed. \fIString\fR gives the -input bytes to be parsed (one byte per character, and characters not -representable as a byte have their high bits chopped) -and \fIformatString\fR indicates how to parse it. +input bytes to be parsed and \fIformatString\fR indicates how to parse it. +An error is raised if \fIstring\fR is anything other than a valid binary +data value. Each \fIvarName\fR gives the name of a variable; when a field is scanned from \fIstring\fR the result is assigned to the corresponding variable. diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 0486521..af16ca0 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -140,84 +140,21 @@ static const EnsembleImplMap decodeMap[] = { }; /* - * The following object types represent an array of bytes. The intent is to + * The following Tcl_ObjType represents an array of bytes. The intent is to * allow arbitrary binary data to pass through Tcl as a Tcl value without loss * or damage. Such values are useful for things like encoded strings or Tk * images to name just two. * * A bytearray is an ordered sequence of bytes. Each byte is an integer value * in the range [0-255]. To be a Tcl value type, we need a way to encode each - * value in the value set as a Tcl string. The simplest encoding is to + * value in the value set as a Tcl string. A simple encoding is to * represent each byte value as the same codepoint value. A bytearray of N * bytes is encoded into a Tcl string of N characters where the codepoint of * each character is the value of corresponding byte. This approach creates a * one-to-one map between all bytearray values and a subset of Tcl string - * values. - * - * When converting a Tcl string value to the bytearray internal rep, the - * question arises what to do with strings outside that subset? That is, - * those Tcl strings containing at least one codepoint greater than 255? The - * obviously correct answer is to raise an error! That string value does not - * represent any valid bytearray value. Full Stop. The setFromAnyProc - * signature has a completion code return value for just this reason, to - * reject invalid inputs. - * - * Unfortunately this was not the path taken by the authors of the original - * tclByteArrayType. They chose to accept all Tcl string values as acceptable - * string encodings of the bytearray values that result from masking away the - * high bits of any codepoint value at all. This meant that every bytearray - * value had multiple accepted string representations. - * - * The implications of this choice are truly ugly. When a Tcl value has a - * string representation, we are required to accept that as the true value. - * Bytearray values that possess a string representation cannot be processed - * as bytearrays because we cannot know which true value that bytearray - * represents. The consequence is that we drag around an internal rep that we - * cannot make any use of. This painful price is extracted at any point after - * a string rep happens to be generated for the value. This happens even when - * the troublesome codepoints outside the byte range never show up. This - * happens rather routinely in normal Tcl operations unless we burden the - * script writer with the cognitive burden of avoiding it. The price is also - * paid by callers of the C interface. The routine - * - * unsigned char *Tcl_GetByteArrayFromObj(objPtr, lenPtr) - * - * has a guarantee to always return a non-NULL value, but that value points to - * a byte sequence that cannot be used by the caller to process the Tcl value - * absent some sideband testing that objPtr is "pure". Tcl offers no public - * interface to perform this test, so callers either break encapsulation or - * are unavoidably buggy. Tcl has defined a public interface that cannot be - * used correctly. The Tcl source code itself suffers the same problem, and - * has been buggy, but progressively less so as more and more portions of the - * code have been retrofitted with the required "purity testing". The set of - * values able to pass the purity test can be increased via the introduction - * of a "canonical" flag marker, but the only way the broken interface itself - * can be discarded is to start over and define the Tcl_ObjType properly. - * Bytearrays should simply be usable as bytearrays without a kabuki dance of - * testing. - * - * The Tcl_ObjType "properByteArrayType" is (nearly) a correct implementation - * of bytearrays. Any Tcl value with the type properByteArrayType can have - * its bytearray value fetched and used with confidence that acting on that - * value is equivalent to acting on the true Tcl string value. This still - * implies a side testing burden -- past mistakes will not let us avoid that - * immediately, but it is at least a conventional test of type, and can be - * implemented entirely by examining the objPtr fields, with no need to query - * the intrep, as a canonical flag would require. - * - * Until Tcl_GetByteArrayFromObj() and Tcl_SetByteArrayLength() can be revised - * to admit the possibility of returning NULL when the true value is not a - * valid bytearray, we need a mechanism to retain compatibility with the - * deployed callers of the broken interface. That's what the retained - * "tclByteArrayType" provides. In those unusual circumstances where we - * convert an invalid bytearray value to a bytearray type, it is to this - * legacy type. Essentially any time this legacy type gets used, it's a - * signal of a bug being ignored. A TIP should be drafted to remove this - * connection to the broken past so that Tcl 9 will no longer have any trace - * of it. Prescribing a migration path will be the key element of that work. - * The internal changes now in place are the limit of what can be done short - * of interface repair. They provide a great expansion of the histories over - * which bytearray values can be useful in the meanwhile. + * values. Tcl string values outside that subset do no represent any valid + * bytearray value. Attempts to treat those values as bytearrays will lead + * to errors. See TIP 568 for how this differs from Tcl 8. */ static const Tcl_ObjType properByteArrayType = { @@ -282,15 +219,15 @@ Tcl_Obj * Tcl_NewByteArrayObj( const unsigned char *bytes, /* The array of bytes used to initialize the * new object. */ - size_t length) /* Length of the array of bytes */ + size_t numBytes) /* Number of bytes in the array */ { #ifdef TCL_MEM_DEBUG - return Tcl_DbNewByteArrayObj(bytes, length, "unknown", 0); + return Tcl_DbNewByteArrayObj(bytes, numBytes, "unknown", 0); #else /* if not TCL_MEM_DEBUG */ Tcl_Obj *objPtr; TclNewObj(objPtr); - Tcl_SetByteArrayObj(objPtr, bytes, length); + Tcl_SetByteArrayObj(objPtr, bytes, numBytes); return objPtr; #endif /* TCL_MEM_DEBUG */ } @@ -311,7 +248,7 @@ Tcl_NewByteArrayObj( * result of calling Tcl_NewByteArrayObj. * * Results: - * The newly create object is returned. This object will have no initial + * The newly created object is returned. This object has no initial * string representation. The returned object has a ref count of 0. * * Side effects: @@ -325,7 +262,7 @@ Tcl_Obj * Tcl_DbNewByteArrayObj( const unsigned char *bytes, /* The array of bytes used to initialize the * new object. */ - size_t length, /* Length of the array of bytes. */ + size_t numBytes, /* Number of bytes in the array */ const char *file, /* The name of the source file calling this * procedure; used for debugging. */ int line) /* Line number in the source file; used for @@ -334,7 +271,7 @@ Tcl_DbNewByteArrayObj( Tcl_Obj *objPtr; TclDbNewObj(objPtr, file, line); - Tcl_SetByteArrayObj(objPtr, bytes, length); + Tcl_SetByteArrayObj(objPtr, bytes, numBytes); return objPtr; } #else /* if not TCL_MEM_DEBUG */ @@ -342,12 +279,11 @@ Tcl_Obj * Tcl_DbNewByteArrayObj( const unsigned char *bytes, /* The array of bytes used to initialize the * new object. */ - size_t length, /* Length of the array of bytes, which must be - * >= 0. */ + size_t numBytes, /* Number of bytes in the array */ TCL_UNUSED(const char *) /*file*/, TCL_UNUSED(int) /*line*/) { - return Tcl_NewByteArrayObj(bytes, length); + return Tcl_NewByteArrayObj(bytes, numBytes); } #endif /* TCL_MEM_DEBUG */ @@ -373,9 +309,8 @@ void Tcl_SetByteArrayObj( Tcl_Obj *objPtr, /* Object to initialize as a ByteArray. */ const unsigned char *bytes, /* The array of bytes to use as the new value. - * May be NULL even if length > 0. */ - size_t length) /* Length of the array of bytes, which must - * be >= 0. */ + * May be NULL even if numBytes > 0. */ + size_t numBytes) /* Number of bytes in the array */ { ByteArray *byteArrayPtr; Tcl_ObjInternalRep ir; @@ -385,12 +320,12 @@ Tcl_SetByteArrayObj( } TclInvalidateStringRep(objPtr); - byteArrayPtr = (ByteArray *)Tcl_Alloc(BYTEARRAY_SIZE(length)); - byteArrayPtr->used = length; - byteArrayPtr->allocated = length; + byteArrayPtr = (ByteArray *)Tcl_Alloc(BYTEARRAY_SIZE(numBytes)); + byteArrayPtr->used = numBytes; + byteArrayPtr->allocated = numBytes; - if ((bytes != NULL) && (length > 0)) { - memcpy(byteArrayPtr->bytes, bytes, length); + if ((bytes != NULL) && (numBytes > 0)) { + memcpy(byteArrayPtr->bytes, bytes, numBytes); } SET_BYTEARRAY(&ir, byteArrayPtr); @@ -408,8 +343,8 @@ Tcl_SetByteArrayObj( * interp (if not NULL). * * Results: - * Pointer to array of bytes, or NULL. representing the ByteArray object. - * Writes number of bytes in array to *lengthPtr. + * NULL or pointer to array of bytes representing the ByteArray object. + * Writes number of bytes in array to *numBytesPtr. * *---------------------------------------------------------------------- */ @@ -419,11 +354,12 @@ unsigned char * Tcl_GetBytesFromObj( Tcl_Interp *interp, /* For error reporting */ Tcl_Obj *objPtr, /* Value to extract from */ - size_t *lengthPtr) /* If non-NULL, filled with length of the - * array of bytes in the ByteArray object. */ + size_t *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ { ByteArray *baPtr; - const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + const Tcl_ObjInternalRep *irPtr + = TclFetchInternalRep(objPtr, &properByteArrayType); if (irPtr == NULL) { if (TCL_ERROR == SetByteArrayFromAny(interp, TCL_INDEX_NONE, objPtr)) { @@ -433,8 +369,8 @@ Tcl_GetBytesFromObj( } baPtr = GET_BYTEARRAY(irPtr); - if (lengthPtr != NULL) { - *lengthPtr = baPtr->used; + if (numBytesPtr != NULL) { + *numBytesPtr = baPtr->used; } return baPtr->bytes; } @@ -443,26 +379,25 @@ unsigned char * TclGetBytesFromObj( Tcl_Interp *interp, /* For error reporting */ Tcl_Obj *objPtr, /* Value to extract from */ - int *lengthPtr) /* If non-NULL, filled with length of the - * array of bytes in the ByteArray object. */ + int *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ { size_t numBytes = 0; unsigned char *bytes = Tcl_GetBytesFromObj(interp, objPtr, &numBytes); - if (lengthPtr) { + if (bytes && numBytesPtr) { if (numBytes > INT_MAX) { - /* Caller asked for an int length, but true length is outside - * the int range. This case will be developed out of existence - * in Tcl 9. As interim measure, fail. */ + /* Caller asked for numBytes to be written to an int, but the + * value is outside the int range. */ if (interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "byte sequence length exceeds INT_MAX", -1)); + Tcl_SetErrorCode(interp, "TCL", "API", "OUTDATED", NULL); } - *lengthPtr = 0; return NULL; } else { - *lengthPtr = (int) numBytes; + *numBytesPtr = (int) numBytes; } } return bytes; @@ -490,42 +425,19 @@ TclGetBytesFromObj( unsigned char * TclGetByteArrayFromObj( Tcl_Obj *objPtr, /* The ByteArray object. */ - int *lengthPtr) /* If non-NULL, filled with length of the - * array of bytes in the ByteArray object. */ + int *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ { - size_t numBytes = 0; - unsigned char *bytes = Tcl_GetBytesFromObj(NULL, objPtr, &numBytes); - - /* Macro TclGetByteArrayFromObj passes NULL for lengthPtr as - * a trick to get around changing size. */ - if (lengthPtr) { - if (numBytes > INT_MAX) { - /* Caller asked for an int length, but true length is outside - * the int range. */ - *lengthPtr = 0; - return NULL; - } else { - *lengthPtr = (int) numBytes; - } - } - return bytes; + return TclGetBytesFromObj(NULL, objPtr, numBytesPtr); } unsigned char * Tcl_GetByteArrayFromObj( Tcl_Obj *objPtr, /* The ByteArray object. */ - size_t *lengthPtr) /* If non-NULL, filled with length of the - * array of bytes in the ByteArray object. */ + size_t *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ { - size_t numBytes = 0; - unsigned char *bytes = Tcl_GetBytesFromObj(NULL, objPtr, &numBytes); - - /* Macro TclGetByteArrayFromObj passes NULL for lengthPtr as - * a trick to get around changing size. */ - if (lengthPtr) { - *lengthPtr = numBytes; - } - return bytes; + return Tcl_GetBytesFromObj(NULL, objPtr, numBytesPtr); } /* @@ -553,7 +465,7 @@ Tcl_GetByteArrayFromObj( unsigned char * Tcl_SetByteArrayLength( Tcl_Obj *objPtr, /* The ByteArray object. */ - size_t length) /* New length for internal byte array. */ + size_t numBytes) /* Number of bytes in resized array */ { ByteArray *byteArrayPtr; Tcl_ObjInternalRep *irPtr; @@ -564,20 +476,21 @@ Tcl_SetByteArrayLength( irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); if (irPtr == NULL) { - if (TCL_ERROR == SetByteArrayFromAny(NULL, length, objPtr)) { + if (TCL_ERROR == SetByteArrayFromAny(NULL, numBytes, objPtr)) { return NULL; } irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); } byteArrayPtr = GET_BYTEARRAY(irPtr); - if (length > byteArrayPtr->allocated) { - byteArrayPtr = (ByteArray *)Tcl_Realloc(byteArrayPtr, BYTEARRAY_SIZE(length)); - byteArrayPtr->allocated = length; + if (numBytes > byteArrayPtr->allocated) { + byteArrayPtr = (ByteArray *)Tcl_Realloc(byteArrayPtr, + BYTEARRAY_SIZE(numBytes)); + byteArrayPtr->allocated = numBytes; SET_BYTEARRAY(irPtr, byteArrayPtr); } TclInvalidateStringRep(objPtr); - byteArrayPtr->used = length; + byteArrayPtr->used = numBytes; return byteArrayPtr->bytes; } @@ -645,7 +558,7 @@ MakeByteArray( *dst++ = UCHAR(ch); } byteArrayPtr->used = dst - byteArrayPtr->bytes; - byteArrayPtr->allocated = length; + byteArrayPtr->allocated = numBytes; *byteArrayPtrPtr = byteArrayPtr; return proper; @@ -873,9 +786,14 @@ TclAppendBytesToByteArray( } byteArrayPtr = GET_BYTEARRAY(irPtr); + /* Size limit check now commented out. Used to protect calls to + * Tcl_*Alloc*() limited by unsigned int arguments. + * if (len > UINT_MAX - byteArrayPtr->used) { Tcl_Panic("max size for a Tcl value (%u bytes) exceeded", UINT_MAX); } + * + */ needed = byteArrayPtr->used + len; /* @@ -899,11 +817,7 @@ TclAppendBytesToByteArray( * Try to allocate double the increment that is needed (plus). */ - size_t limit = UINT_MAX - needed; - size_t extra = len + TCL_MIN_GROWTH; - size_t growth = (extra > limit) ? limit : extra; - - attempt = needed + growth; + attempt = needed + len + TCL_MIN_GROWTH; ptr = (ByteArray *)Tcl_AttemptRealloc(byteArrayPtr, BYTEARRAY_SIZE(attempt)); } if (ptr == NULL) { diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 71e49c4..459ddc5 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -3839,14 +3839,14 @@ extern const TclStubs *tclStubsPtr; #define Tcl_GetStringFromObj(objPtr, sizePtr) \ (sizeof(*sizePtr) <= sizeof(int) ? tclStubsPtr->tclGetStringFromObj(objPtr, (int *)sizePtr) : tclStubsPtr->tcl_GetStringFromObj(objPtr, (size_t *)sizePtr)) #define Tcl_GetByteArrayFromObj(objPtr, sizePtr) \ - (sizeof(*sizePtr) <= sizeof(int) ? tclStubsPtr->tclGetByteArrayFromObj(objPtr, (int *)sizePtr) : tclStubsPtr->tcl_GetByteArrayFromObj(objPtr, (size_t *)sizePtr)) + (sizeof(*sizePtr) <= sizeof(int) ? tclStubsPtr->tclGetBytesFromObj(NULL, objPtr, (int *)sizePtr) : tclStubsPtr->tcl_GetBytesFromObj(NULL, objPtr, (size_t *)sizePtr)) #define Tcl_GetUnicodeFromObj(objPtr, sizePtr) \ (sizeof(*sizePtr) <= sizeof(int) ? tclStubsPtr->tclGetUnicodeFromObj(objPtr, (int *)sizePtr) : tclStubsPtr->tcl_GetUnicodeFromObj(objPtr, (size_t *)sizePtr)) #else #define Tcl_GetStringFromObj(objPtr, sizePtr) \ (sizeof(*sizePtr) <= sizeof(int) ? (TclGetStringFromObj)(objPtr, (int *)sizePtr) : (Tcl_GetStringFromObj)(objPtr, (size_t *)sizePtr)) #define Tcl_GetByteArrayFromObj(objPtr, sizePtr) \ - (sizeof(*sizePtr) <= sizeof(int) ? (TclGetByteArrayFromObj)(objPtr, (int *)sizePtr) : Tcl_GetByteArrayFromObj(objPtr, (size_t *)sizePtr)) + (sizeof(*sizePtr) <= sizeof(int) ? (TclGetBytesFromObj)(NULL, objPtr, (int *)sizePtr) : Tcl_GetBytesFromObj(NULL, objPtr, (size_t *)sizePtr)) #define Tcl_GetUnicodeFromObj(objPtr, sizePtr) \ (sizeof(*sizePtr) <= sizeof(int) ? (TclGetUnicodeFromObj)(objPtr, (int *)sizePtr) : Tcl_GetUnicodeFromObj(objPtr, (size_t *)sizePtr)) #endif |