diff options
Diffstat (limited to 'generic/tclBinary.c')
-rw-r--r-- | generic/tclBinary.c | 735 |
1 files changed, 513 insertions, 222 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 5d317fa..5ac08e9 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -2,19 +2,20 @@ * tclBinary.c -- * * This file contains the implementation of the "binary" Tcl built-in - * command and the Tcl binary data object. + * command and the Tcl value internal representation for binary data. * - * Copyright (c) 1997 by Sun Microsystems, Inc. - * Copyright (c) 1998-1999 by Scriptics Corporation. + * Copyright © 1997 Sun Microsystems, Inc. + * Copyright © 1998-1999 Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tommath.h" +#include "tclTomMath.h" #include <math.h> +#include <assert.h> /* * The following constants are used by GetFormatSpec to indicate various @@ -56,9 +57,12 @@ static void DupByteArrayInternalRep(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr); +static void DupProperByteArrayInternalRep(Tcl_Obj *srcPtr, + Tcl_Obj *copyPtr); static int FormatNumber(Tcl_Interp *interp, int type, Tcl_Obj *src, unsigned char **cursorPtr); static void FreeByteArrayInternalRep(Tcl_Obj *objPtr); +static void FreeProperByteArrayInternalRep(Tcl_Obj *objPtr); static int GetFormatSpec(const char **formatPtr, char *cmdPtr, int *countPtr, int *flagsPtr); static Tcl_Obj * ScanNumber(unsigned char *buffer, int type, @@ -139,35 +143,80 @@ static const EnsembleImplMap decodeMap[] = { }; /* - * The following object type represents an array of bytes. An array of bytes - * is not equivalent to an internationalized string. Conceptually, a string is - * an array of 16-bit quantities organized as a sequence of properly formed - * UTF-8 characters, while a ByteArray is an array of 8-bit quantities. - * Accessor functions are provided to convert a ByteArray to a String or a - * String to a ByteArray. Two or more consecutive bytes in an array of bytes - * may look like a single UTF-8 character if the array is casually treated as - * a string. But obtaining the String from a ByteArray is guaranteed to - * produced properly formed UTF-8 sequences so that there is a one-to-one map - * between bytes and characters. - * - * Converting a ByteArray to a String proceeds by casting each byte in the - * array to a 16-bit quantity, treating that number as a Unicode character, - * and storing the UTF-8 version of that Unicode character in the String. For - * ByteArrays consisting entirely of values 1..127, the corresponding String - * representation is the same as the ByteArray representation. - * - * Converting a String to a ByteArray proceeds by getting the Unicode - * representation of each character in the String, casting it to a byte by - * truncating the upper 8 bits, and then storing the byte in the ByteArray. - * Converting from ByteArray to String and back to ByteArray is not lossy, but - * converting an arbitrary String to a ByteArray may be. + * The following object types represent an array of bytes. The intent is to + * allow arbitrary binary data to pass through Tcl as a Tcl value without loss + * or damage. Such values are useful for things like encoded strings or Tk + * images to name just two. + * + * It's strange to have two Tcl_ObjTypes in place for this task when one would + * do, so a bit of detail and history will aid understanding. + * + * A bytearray is an ordered sequence of bytes. Each byte is an integer value + * in the range [0-255]. To be a Tcl value type, we need a way to encode each + * value in the value set as a Tcl string. A simple encoding is to + * represent each byte value as the same codepoint value. A bytearray of N + * bytes is encoded into a Tcl string of N characters where the codepoint of + * each character is the value of corresponding byte. This approach creates a + * one-to-one map between all bytearray values and a subset of Tcl string + * values. + * + * When converting a Tcl string value to the bytearray internal rep, the + * question arises what to do with strings outside that subset? That is, + * those Tcl strings containing at least one codepoint greater than 255? The + * obviously correct answer is to raise an error! That string value does not + * represent any valid bytearray value. + * + * Unfortunately this was not the path taken by the authors of the original + * tclByteArrayType. They chose to accept all Tcl string values as acceptable + * string encodings of the bytearray values that result from masking away the + * high bits of any codepoint value at all. This meant that every bytearray + * value had multiple accepted string representations. + * + * The implications of this choice are truly ugly, and motivated the proposal + * of TIP 568 to migrate away from it and to the more sensible design where + * each bytearray value has only one string representation. Full details are + * recorded in that TIP for those who seek them. + * + * The Tcl_ObjType "properByteArrayType" is (nearly) a correct implementation + * of bytearrays. Any Tcl value with the type properByteArrayType can have + * its bytearray value fetched and used with confidence that acting on that + * value is equivalent to acting on the true Tcl string value. This still + * implies a side testing burden -- past mistakes will not let us avoid that + * immediately, but it is at least a conventional test of type, and can be + * implemented entirely by examining the objPtr fields, with no need to query + * the internalrep, as a canonical flag would require. This benefit is made + * available to extensions through the public routine Tcl_GetBytesFromObj(), + * first available in Tcl 8.7. + * + * The public routines Tcl_GetByteArrayFromObj() and Tcl_SetByteArrayLength() + * must continue to follow their documented behavior through the 8.* series of + * releases. To support that legacy operation, we need a mechanism to retain + * compatibility with the deployed callers of the broken interface. That's + * what the retained "tclByteArrayType" provides. In those unusual + * circumstances where we convert an invalid bytearray value to a bytearray + * type, it is to this legacy type. Essentially any time this legacy type + * shows up, it's a signal of a bug being ignored. + * + * In Tcl 9, the incompatibility in the behavior of these public routines + * has been approved, and the legacy internal rep is no longer retained. + * The internal changes seen below are the limit of what can be done + * in a Tcl 8.* release. They provide a great expansion of the histories + * over which bytearray values can be useful. */ +static const Tcl_ObjType properByteArrayType = { + "bytearray", + FreeProperByteArrayInternalRep, + DupProperByteArrayInternalRep, + UpdateStringOfByteArray, + NULL +}; + const Tcl_ObjType tclByteArrayType = { "bytearray", FreeByteArrayInternalRep, DupByteArrayInternalRep, - UpdateStringOfByteArray, + NULL, SetByteArrayFromAny }; @@ -179,22 +228,31 @@ const Tcl_ObjType tclByteArrayType = { */ typedef struct ByteArray { - int used; /* The number of bytes used in the byte - * array. */ - int allocated; /* The amount of space actually allocated - * minus 1 byte. */ - unsigned char bytes[TCLFLEXARRAY]; /* The array of bytes. The actual size of this - * field depends on the 'allocated' field + unsigned int bad; /* Index of first character that is a nonbyte. + * If all characters are bytes, bad = used. */ + unsigned int used; /* The number of bytes used in the byte + * array. Must be <= allocated. The bytes + * used to store the value are indexed from + * 0 to used-1. */ + unsigned int allocated; /* The number of bytes of space allocated. */ + unsigned char bytes[TCLFLEXARRAY]; + /* The array of bytes. The actual size of this + * field is stored in the 'allocated' field * above. */ } ByteArray; #define BYTEARRAY_SIZE(len) \ - (((unsigned)TclOffset(ByteArray, bytes) + (len))) -#define GET_BYTEARRAY(objPtr) \ - ((ByteArray *) (objPtr)->internalRep.twoPtrValue.ptr1) -#define SET_BYTEARRAY(objPtr, baPtr) \ - (objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (baPtr) - + (offsetof(ByteArray, bytes) + (len)) +#define GET_BYTEARRAY(irPtr) ((ByteArray *) (irPtr)->twoPtrValue.ptr1) +#define SET_BYTEARRAY(irPtr, baPtr) \ + (irPtr)->twoPtrValue.ptr1 = (void *) (baPtr) + +int +TclIsPureByteArray( + Tcl_Obj * objPtr) +{ + return TclHasInternalRep(objPtr, &properByteArrayType); +} /* *---------------------------------------------------------------------- @@ -205,7 +263,7 @@ typedef struct ByteArray { * from the given array of bytes. * * Results: - * The newly create object is returned. This object will have no initial + * The newly created object is returned. This object has no initial * string representation. The returned object has a ref count of 0. * * Side effects: @@ -220,16 +278,16 @@ Tcl_Obj * Tcl_NewByteArrayObj( const unsigned char *bytes, /* The array of bytes used to initialize the * new object. */ - int length) /* Length of the array of bytes, which must be - * >= 0. */ + int numBytes) /* Number of bytes in the array, + * must be >= 0. */ { #ifdef TCL_MEM_DEBUG - return Tcl_DbNewByteArrayObj(bytes, length, "unknown", 0); + return Tcl_DbNewByteArrayObj(bytes, numBytes, "unknown", 0); #else /* if not TCL_MEM_DEBUG */ Tcl_Obj *objPtr; TclNewObj(objPtr); - Tcl_SetByteArrayObj(objPtr, bytes, length); + Tcl_SetByteArrayObj(objPtr, bytes, numBytes); return objPtr; #endif /* TCL_MEM_DEBUG */ } @@ -250,7 +308,7 @@ Tcl_NewByteArrayObj( * result of calling Tcl_NewByteArrayObj. * * Results: - * The newly create object is returned. This object will have no initial + * The newly created object is returned. This object has no initial * string representation. The returned object has a ref count of 0. * * Side effects: @@ -259,27 +317,37 @@ Tcl_NewByteArrayObj( *---------------------------------------------------------------------- */ +#ifdef TCL_MEM_DEBUG Tcl_Obj * Tcl_DbNewByteArrayObj( const unsigned char *bytes, /* The array of bytes used to initialize the * new object. */ - int length, /* Length of the array of bytes, which must be - * >= 0. */ + int numBytes, /* Number of bytes in the array, + * must be >= 0. */ const char *file, /* The name of the source file calling this * procedure; used for debugging. */ int line) /* Line number in the source file; used for * debugging. */ { -#ifdef TCL_MEM_DEBUG Tcl_Obj *objPtr; TclDbNewObj(objPtr, file, line); - Tcl_SetByteArrayObj(objPtr, bytes, length); + Tcl_SetByteArrayObj(objPtr, bytes, numBytes); return objPtr; +} #else /* if not TCL_MEM_DEBUG */ - return Tcl_NewByteArrayObj(bytes, length); -#endif /* TCL_MEM_DEBUG */ +Tcl_Obj * +Tcl_DbNewByteArrayObj( + const unsigned char *bytes, /* The array of bytes used to initialize the + * new object. */ + int numBytes, /* Number of bytes in the array, + * must be >= 0. */ + TCL_UNUSED(const char *) /*file*/, + TCL_UNUSED(int) /*line*/) +{ + return Tcl_NewByteArrayObj(bytes, numBytes); } +#endif /* TCL_MEM_DEBUG */ /* *--------------------------------------------------------------------------- @@ -303,36 +371,131 @@ void Tcl_SetByteArrayObj( Tcl_Obj *objPtr, /* Object to initialize as a ByteArray. */ const unsigned char *bytes, /* The array of bytes to use as the new value. - * May be NULL even if length > 0. */ - int length) /* Length of the array of bytes, which must - * be >= 0. */ + * May be NULL even if numBytes > 0. */ + int numBytes) /* Number of bytes in the array, + * must be >= 0. */ { ByteArray *byteArrayPtr; + Tcl_ObjInternalRep ir; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayObj"); } - TclFreeIntRep(objPtr); TclInvalidateStringRep(objPtr); - if (length < 0) { - length = 0; + assert(numBytes >= 0); + byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(numBytes)); + byteArrayPtr->bad = numBytes; + byteArrayPtr->used = numBytes; + byteArrayPtr->allocated = numBytes; + + if ((bytes != NULL) && (numBytes > 0)) { + memcpy(byteArrayPtr->bytes, bytes, numBytes); } - byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length)); - byteArrayPtr->used = length; - byteArrayPtr->allocated = length; + SET_BYTEARRAY(&ir, byteArrayPtr); - if ((bytes != NULL) && (length > 0)) { - memcpy(byteArrayPtr->bytes, bytes, length); + Tcl_StoreInternalRep(objPtr, &properByteArrayType, &ir); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetBytesFromObj/TclGetBytesFromObj -- + * + * Attempt to extract the value from objPtr in the representation + * of a byte sequence. On success return the extracted byte sequence. + * On failure, return NULL and record error message and code in + * interp (if not NULL). + * + * Results: + * NULL or pointer to array of bytes representing the ByteArray object. + * Writes number of bytes in array to *numBytesPtr. + * + *---------------------------------------------------------------------- + */ + +unsigned char * +TclGetBytesFromObj( + Tcl_Interp *interp, /* For error reporting */ + Tcl_Obj *objPtr, /* Value to extract from */ + int *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ +{ + ByteArray *baPtr; + const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + + if (irPtr == NULL) { + SetByteArrayFromAny(NULL, objPtr); + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + if (interp) { + const char *nonbyte; + int ucs4; + + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + baPtr = GET_BYTEARRAY(irPtr); + nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + TclUtfToUCS4(nonbyte, &ucs4); + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "expected byte sequence but character %d " + "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); + Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL); + } + return NULL; + } + } + baPtr = GET_BYTEARRAY(irPtr); + + if (numBytesPtr != NULL) { + *numBytesPtr = baPtr->used; + } + return baPtr->bytes; +} +#undef Tcl_GetBytesFromObj +unsigned char * +Tcl_GetBytesFromObj( + Tcl_Interp *interp, /* For error reporting */ + Tcl_Obj *objPtr, /* Value to extract from */ + size_t *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ +{ + ByteArray *baPtr; + const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + + if (irPtr == NULL) { + SetByteArrayFromAny(NULL, objPtr); + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + if (interp) { + const char *nonbyte; + int ucs4; + + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + baPtr = GET_BYTEARRAY(irPtr); + nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + TclUtfToUCS4(nonbyte, &ucs4); + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "expected byte sequence but character %d " + "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); + Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL); + } + return NULL; + } + } + baPtr = GET_BYTEARRAY(irPtr); + + if (numBytesPtr != NULL) { + *numBytesPtr = baPtr->used; } - objPtr->typePtr = &tclByteArrayType; - SET_BYTEARRAY(objPtr, byteArrayPtr); + return baPtr->bytes; } /* *---------------------------------------------------------------------- * - * Tcl_GetByteArrayFromObj -- + * Tcl_GetByteArrayFromObj/TclGetByteArrayFromObj -- * * Attempt to get the array of bytes from the Tcl object. If the object * is not already a ByteArray object, an attempt will be made to convert @@ -347,24 +510,61 @@ Tcl_SetByteArrayObj( *---------------------------------------------------------------------- */ +#undef Tcl_GetByteArrayFromObj unsigned char * Tcl_GetByteArrayFromObj( Tcl_Obj *objPtr, /* The ByteArray object. */ - int *lengthPtr) /* If non-NULL, filled with length of the - * array of bytes in the ByteArray object. */ + int *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ { ByteArray *baPtr; + const Tcl_ObjInternalRep *irPtr; + unsigned char *result = TclGetBytesFromObj(NULL, objPtr, numBytesPtr); - if (objPtr->typePtr != &tclByteArrayType) { - SetByteArrayFromAny(NULL, objPtr); + if (result) { + return result; } - baPtr = GET_BYTEARRAY(objPtr); - if (lengthPtr != NULL) { - *lengthPtr = baPtr->used; + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + assert(irPtr != NULL); + + baPtr = GET_BYTEARRAY(irPtr); + + if (numBytesPtr != NULL) { + *numBytesPtr = baPtr->used; } return (unsigned char *) baPtr->bytes; } + +unsigned char * +TclGetByteArrayFromObj( + Tcl_Obj *objPtr, /* The ByteArray object. */ + size_t *numBytesPtr) /* If non-NULL, write the number of bytes + * in the array here */ +{ + ByteArray *baPtr; + const Tcl_ObjInternalRep *irPtr; + unsigned char *result = Tcl_GetBytesFromObj(NULL, objPtr, numBytesPtr); + + if (result) { + return result; + } + + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + assert(irPtr != NULL); + + baPtr = GET_BYTEARRAY(irPtr); + + if (numBytesPtr != NULL) { +#if TCL_MAJOR_VERSION > 8 + *numBytesPtr = baPtr->used; +#else + /* TODO: What's going on here? Document or eliminate. */ + *numBytesPtr = ((size_t)(unsigned int)(baPtr->used + 1)) - 1; +#endif + } + return baPtr->bytes; +} /* *---------------------------------------------------------------------- @@ -391,25 +591,44 @@ Tcl_GetByteArrayFromObj( unsigned char * Tcl_SetByteArrayLength( Tcl_Obj *objPtr, /* The ByteArray object. */ - int length) /* New length for internal byte array. */ + int numBytes) /* Number of bytes in resized array */ { ByteArray *byteArrayPtr; + unsigned newLength; + Tcl_ObjInternalRep *irPtr; + + assert(numBytes >= 0); + newLength = (unsigned int)numBytes; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength"); } - if (objPtr->typePtr != &tclByteArrayType) { - SetByteArrayFromAny(NULL, objPtr); + + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + if (irPtr == NULL) { + SetByteArrayFromAny(NULL, objPtr); + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + } + } } - byteArrayPtr = GET_BYTEARRAY(objPtr); - if (length > byteArrayPtr->allocated) { - byteArrayPtr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(length)); - byteArrayPtr->allocated = length; - SET_BYTEARRAY(objPtr, byteArrayPtr); + /* Note that during truncation, the implementation does not free + * memory that is no longer needed. */ + + byteArrayPtr = GET_BYTEARRAY(irPtr); + if (newLength > byteArrayPtr->allocated) { + byteArrayPtr = (ByteArray *)ckrealloc(byteArrayPtr, BYTEARRAY_SIZE(newLength)); + byteArrayPtr->allocated = newLength; + SET_BYTEARRAY(irPtr, byteArrayPtr); } TclInvalidateStringRep(objPtr); - byteArrayPtr->used = length; + objPtr->typePtr = &properByteArrayType; + byteArrayPtr->bad = newLength; + byteArrayPtr->used = newLength; return byteArrayPtr->bytes; } @@ -431,32 +650,51 @@ Tcl_SetByteArrayLength( static int SetByteArrayFromAny( - Tcl_Interp *interp, /* Not used. */ + TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert to type ByteArray. */ { - int length; + size_t length, bad; const char *src, *srcEnd; unsigned char *dst; - ByteArray *byteArrayPtr; Tcl_UniChar ch = 0; + ByteArray *byteArrayPtr; + Tcl_ObjInternalRep ir; + + if (TclHasInternalRep(objPtr, &properByteArrayType)) { + return TCL_OK; + } + if (TclHasInternalRep(objPtr, &tclByteArrayType)) { + return TCL_OK; + } + + src = TclGetString(objPtr); + length = bad = objPtr->length; + srcEnd = src + length; - if (objPtr->typePtr != &tclByteArrayType) { - src = TclGetStringFromObj(objPtr, &length); - srcEnd = src + length; + /* Note the allocation is over-sized, possibly by a factor of four, + * or even a factor of two with a proper byte array value. */ - byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length)); - for (dst = byteArrayPtr->bytes; src < srcEnd; ) { - src += TclUtfToUniChar(src, &ch); - *dst++ = UCHAR(ch); + byteArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length)); + for (dst = byteArrayPtr->bytes; src < srcEnd; ) { + src += TclUtfToUniChar(src, &ch); + if ((bad == length) && (ch > 255)) { + bad = dst - byteArrayPtr->bytes; } + *dst++ = UCHAR(ch); + } - byteArrayPtr->used = dst - byteArrayPtr->bytes; - byteArrayPtr->allocated = length; + SET_BYTEARRAY(&ir, byteArrayPtr); + byteArrayPtr->allocated = length; + byteArrayPtr->used = dst - byteArrayPtr->bytes; - TclFreeIntRep(objPtr); - objPtr->typePtr = &tclByteArrayType; - SET_BYTEARRAY(objPtr, byteArrayPtr); + if (bad == length) { + byteArrayPtr->bad = byteArrayPtr->used; + Tcl_StoreInternalRep(objPtr, &properByteArrayType, &ir); + } else { + byteArrayPtr->bad = bad; + Tcl_StoreInternalRep(objPtr, &tclByteArrayType, &ir); } + return TCL_OK; } @@ -481,8 +719,14 @@ static void FreeByteArrayInternalRep( Tcl_Obj *objPtr) /* Object with internal rep to free. */ { - ckfree(GET_BYTEARRAY(objPtr)); - objPtr->typePtr = NULL; + ckfree(GET_BYTEARRAY(TclFetchInternalRep(objPtr, &tclByteArrayType))); +} + +static void +FreeProperByteArrayInternalRep( + Tcl_Obj *objPtr) /* Object with internal rep to free. */ +{ + ckfree(GET_BYTEARRAY(TclFetchInternalRep(objPtr, &properByteArrayType))); } /* @@ -507,19 +751,43 @@ DupByteArrayInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { - int length; + unsigned int length; ByteArray *srcArrayPtr, *copyArrayPtr; + Tcl_ObjInternalRep ir; - srcArrayPtr = GET_BYTEARRAY(srcPtr); + srcArrayPtr = GET_BYTEARRAY(TclFetchInternalRep(srcPtr, &tclByteArrayType)); length = srcArrayPtr->used; copyArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length)); + copyArrayPtr->bad = srcArrayPtr->bad; copyArrayPtr->used = length; copyArrayPtr->allocated = length; memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, length); - SET_BYTEARRAY(copyPtr, copyArrayPtr); - copyPtr->typePtr = &tclByteArrayType; + SET_BYTEARRAY(&ir, copyArrayPtr); + Tcl_StoreInternalRep(copyPtr, &tclByteArrayType, &ir); +} + +static void +DupProperByteArrayInternalRep( + Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr) /* Object with internal rep to set. */ +{ + unsigned int length; + ByteArray *srcArrayPtr, *copyArrayPtr; + Tcl_ObjInternalRep ir; + + srcArrayPtr = GET_BYTEARRAY(TclFetchInternalRep(srcPtr, &properByteArrayType)); + length = srcArrayPtr->used; + + copyArrayPtr = (ByteArray *)ckalloc(BYTEARRAY_SIZE(length)); + copyArrayPtr->bad = length; + copyArrayPtr->used = length; + copyArrayPtr->allocated = length; + memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, length); + + SET_BYTEARRAY(&ir, copyArrayPtr); + Tcl_StoreInternalRep(copyPtr, &properByteArrayType, &ir); } /* @@ -527,9 +795,7 @@ DupByteArrayInternalRep( * * UpdateStringOfByteArray -- * - * Update the string representation for a ByteArray data object. Note: - * This procedure does not invalidate an existing old string rep so - * storage will be lost if this has not already been done. + * Update the string representation for a ByteArray data object. * * Results: * None. @@ -538,9 +804,6 @@ DupByteArrayInternalRep( * The object's string is set to a valid string that results from the * ByteArray-to-string conversion. * - * The object becomes a string object -- the internal rep is discarded - * and the typePtr becomes NULL. - * *---------------------------------------------------------------------- */ @@ -549,41 +812,36 @@ UpdateStringOfByteArray( Tcl_Obj *objPtr) /* ByteArray object whose string rep to * update. */ { - int i, length, size; - unsigned char *src; - char *dst; - ByteArray *byteArrayPtr; - - byteArrayPtr = GET_BYTEARRAY(objPtr); - src = byteArrayPtr->bytes; - length = byteArrayPtr->used; + const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + ByteArray *byteArrayPtr = GET_BYTEARRAY(irPtr); + unsigned char *src = byteArrayPtr->bytes; + unsigned int i, length = byteArrayPtr->used; + unsigned int size = length; /* * How much space will string rep need? */ - size = length; - for (i = 0; i < length && size >= 0; i++) { + for (i = 0; i < length && size <= INT_MAX; i++) { if ((src[i] == 0) || (src[i] > 127)) { size++; } } - if (size < 0) { + if (size > INT_MAX) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } - dst = (char *)ckalloc(size + 1); - objPtr->bytes = dst; - objPtr->length = size; - if (size == length) { - memcpy(dst, src, size); - dst[size] = '\0'; + char *dst = Tcl_InitStringRep(objPtr, (char *)src, size); + + TclOOM(dst, size); } else { + char *dst = Tcl_InitStringRep(objPtr, NULL, size); + + TclOOM(dst, size); for (i = 0; i < length; i++) { dst += Tcl_UniCharToUtf(src[i], dst); } - *dst = '\0'; } } @@ -613,7 +871,8 @@ TclAppendBytesToByteArray( int len) { ByteArray *byteArrayPtr; - int needed; + unsigned int length, needed; + Tcl_ObjInternalRep *irPtr; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object","TclAppendBytesToByteArray"); @@ -629,23 +888,34 @@ TclAppendBytesToByteArray( return; } - if (objPtr->typePtr != &tclByteArrayType) { - SetByteArrayFromAny(NULL, objPtr); + + length = (unsigned int) len; + + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + if (irPtr == NULL) { + SetByteArrayFromAny(NULL, objPtr); + irPtr = TclFetchInternalRep(objPtr, &properByteArrayType); + if (irPtr == NULL) { + irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); + } + } } - byteArrayPtr = GET_BYTEARRAY(objPtr); + byteArrayPtr = GET_BYTEARRAY(irPtr); - if (len > INT_MAX - byteArrayPtr->used) { + if (length > INT_MAX - byteArrayPtr->used) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } - needed = byteArrayPtr->used + len; + needed = byteArrayPtr->used + length; /* * If we need to, resize the allocated space in the byte array. */ if (needed > byteArrayPtr->allocated) { ByteArray *ptr = NULL; - int attempt; + unsigned int attempt; if (needed <= INT_MAX/2) { /* @@ -661,7 +931,7 @@ TclAppendBytesToByteArray( */ unsigned int limit = INT_MAX - needed; - unsigned int extra = len + TCL_MIN_GROWTH; + unsigned int extra = length + TCL_MIN_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; @@ -677,14 +947,15 @@ TclAppendBytesToByteArray( } byteArrayPtr = ptr; byteArrayPtr->allocated = attempt; - SET_BYTEARRAY(objPtr, byteArrayPtr); + SET_BYTEARRAY(irPtr, byteArrayPtr); } if (bytes) { - memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, len); + memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, length); } - byteArrayPtr->used += len; + byteArrayPtr->used += length; TclInvalidateStringRep(objPtr); + objPtr->typePtr = &properByteArrayType; } /* @@ -734,7 +1005,7 @@ TclInitBinaryCmd( static int BinaryFormatCmd( - ClientData dummy, /* Not used. */ + TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -1206,11 +1477,11 @@ BinaryFormatCmd( badField: { - int ch; - char buf[8] = ""; + Tcl_UniChar ch = 0; + char buf[5] = ""; - TclUtfToUCS4(errorString, &ch); - buf[TclUCS4ToUtf(ch, buf)] = '\0'; + TclUtfToUniChar(errorString, &ch); + buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); return TCL_ERROR; @@ -1239,7 +1510,7 @@ BinaryFormatCmd( static int BinaryScanCmd( - ClientData dummy, /* Not used. */ + TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -1257,9 +1528,8 @@ BinaryScanCmd( unsigned char *buffer; /* Start of result buffer. */ const char *errorString; const char *str; - int offset, size, length; + int offset, size, length, i; - int i; Tcl_Obj *valuePtr, *elementPtr; Tcl_HashTable numberCacheHash; Tcl_HashTable *numberCachePtr; @@ -1283,7 +1553,8 @@ BinaryScanCmd( } switch (cmd) { case 'a': - case 'A': { + case 'A': + case 'C': { unsigned char *src; if (arg >= objc) { @@ -1305,10 +1576,18 @@ BinaryScanCmd( size = count; /* - * Trim trailing nulls and spaces, if necessary. + * Apply C string semantics or trim trailing + * nulls and spaces, if necessary. */ - if (cmd == 'A') { + if (cmd == 'C') { + for (i = 0; i < size; i++) { + if (src[i] == '\0') { + size = i; + break; + } + } + } else if (cmd == 'A') { while (size > 0) { if (src[size - 1] != '\0' && src[size - 1] != ' ') { break; @@ -1561,7 +1840,7 @@ BinaryScanCmd( */ done: - Tcl_SetObjResult(interp, Tcl_NewLongObj(arg - 3)); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(arg - 3)); DeleteScanNumberCache(numberCachePtr); return TCL_OK; @@ -1576,11 +1855,11 @@ BinaryScanCmd( badField: { - int ch; - char buf[8] = ""; + Tcl_UniChar ch = 0; + char buf[5] = ""; - TclUtfToUCS4(errorString, &ch); - buf[TclUCS4ToUtf(ch, buf)] = '\0'; + TclUtfToUniChar(errorString, &ch); + buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); return TCL_ERROR; @@ -1849,7 +2128,7 @@ CopyNumber( * * FormatNumber -- * - * This routine is called by Tcl_BinaryObjCmd to format a number into a + * This routine is called by BinaryFormatCmd to format a number into a * location pointed at by cursor. * * Results: @@ -1869,7 +2148,6 @@ FormatNumber( Tcl_Obj *src, /* Number to format. */ unsigned char **cursorPtr) /* Pointer to index into destination buffer. */ { - long value; double dvalue; Tcl_WideInt wvalue; float fvalue; @@ -1885,10 +2163,11 @@ FormatNumber( */ if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) { - if (src->typePtr != &tclDoubleType) { + const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(src, &tclDoubleType); + if (irPtr == NULL) { return TCL_ERROR; } - dvalue = src->internalRep.doubleValue; + dvalue = irPtr->doubleValue; } CopyNumber(&dvalue, *cursorPtr, sizeof(double), type); *cursorPtr += sizeof(double); @@ -1904,10 +2183,12 @@ FormatNumber( */ if (Tcl_GetDoubleFromObj(interp, src, &dvalue) != TCL_OK) { - if (src->typePtr != &tclDoubleType) { + const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(src, &tclDoubleType); + + if (irPtr == NULL) { return TCL_ERROR; } - dvalue = src->internalRep.doubleValue; + dvalue = irPtr->doubleValue; } /* @@ -1931,7 +2212,7 @@ FormatNumber( case 'w': case 'W': case 'm': - if (Tcl_GetWideIntFromObj(interp, src, &wvalue) != TCL_OK) { + if (TclGetWideBitsFromObj(interp, src, &wvalue) != TCL_OK) { return TCL_ERROR; } if (NeedReversing(type)) { @@ -1961,19 +2242,19 @@ FormatNumber( case 'i': case 'I': case 'n': - if (TclGetLongFromObj(interp, src, &value) != TCL_OK) { + if (TclGetWideBitsFromObj(interp, src, &wvalue) != TCL_OK) { return TCL_ERROR; } if (NeedReversing(type)) { - *(*cursorPtr)++ = UCHAR(value); - *(*cursorPtr)++ = UCHAR(value >> 8); - *(*cursorPtr)++ = UCHAR(value >> 16); - *(*cursorPtr)++ = UCHAR(value >> 24); + *(*cursorPtr)++ = UCHAR(wvalue); + *(*cursorPtr)++ = UCHAR(wvalue >> 8); + *(*cursorPtr)++ = UCHAR(wvalue >> 16); + *(*cursorPtr)++ = UCHAR(wvalue >> 24); } else { - *(*cursorPtr)++ = UCHAR(value >> 24); - *(*cursorPtr)++ = UCHAR(value >> 16); - *(*cursorPtr)++ = UCHAR(value >> 8); - *(*cursorPtr)++ = UCHAR(value); + *(*cursorPtr)++ = UCHAR(wvalue >> 24); + *(*cursorPtr)++ = UCHAR(wvalue >> 16); + *(*cursorPtr)++ = UCHAR(wvalue >> 8); + *(*cursorPtr)++ = UCHAR(wvalue); } return TCL_OK; @@ -1983,15 +2264,15 @@ FormatNumber( case 's': case 'S': case 't': - if (TclGetLongFromObj(interp, src, &value) != TCL_OK) { + if (TclGetWideBitsFromObj(interp, src, &wvalue) != TCL_OK) { return TCL_ERROR; } if (NeedReversing(type)) { - *(*cursorPtr)++ = UCHAR(value); - *(*cursorPtr)++ = UCHAR(value >> 8); + *(*cursorPtr)++ = UCHAR(wvalue); + *(*cursorPtr)++ = UCHAR(wvalue >> 8); } else { - *(*cursorPtr)++ = UCHAR(value >> 8); - *(*cursorPtr)++ = UCHAR(value); + *(*cursorPtr)++ = UCHAR(wvalue >> 8); + *(*cursorPtr)++ = UCHAR(wvalue); } return TCL_OK; @@ -1999,10 +2280,10 @@ FormatNumber( * 8-bit integer values. */ case 'c': - if (TclGetLongFromObj(interp, src, &value) != TCL_OK) { + if (TclGetWideBitsFromObj(interp, src, &wvalue) != TCL_OK) { return TCL_ERROR; } - *(*cursorPtr)++ = UCHAR(value); + *(*cursorPtr)++ = UCHAR(wvalue); return TCL_OK; default: @@ -2016,7 +2297,7 @@ FormatNumber( * * ScanNumber -- * - * This routine is called by Tcl_BinaryObjCmd to scan a number out of a + * This routine is called by BinaryScanCmd to scan a number out of a * buffer. * * Results: @@ -2128,7 +2409,7 @@ ScanNumber( returnNumericObject: if (*numberCachePtrPtr == NULL) { - return Tcl_NewLongObj(value); + return Tcl_NewWideIntObj(value); } else { Tcl_HashTable *tablePtr = *numberCachePtrPtr; Tcl_HashEntry *hPtr; @@ -2139,8 +2420,9 @@ ScanNumber( return (Tcl_Obj *)Tcl_GetHashValue(hPtr); } if (tablePtr->numEntries <= BINARY_SCAN_MAX_CACHE) { - Tcl_Obj *objPtr = Tcl_NewLongObj(value); + Tcl_Obj *objPtr; + TclNewIntObj(objPtr, value); Tcl_IncrRefCount(objPtr); Tcl_SetHashValue(hPtr, objPtr); return objPtr; @@ -2157,7 +2439,7 @@ ScanNumber( DeleteScanNumberCache(tablePtr); *numberCachePtrPtr = NULL; - return Tcl_NewLongObj(value); + return Tcl_NewWideIntObj(value); } /* @@ -2191,8 +2473,9 @@ ScanNumber( Tcl_Obj *bigObj = NULL; mp_int big; - TclBNInitBignumFromWideUInt(&big, uwvalue); - bigObj = Tcl_NewBignumObj(&big); + if (mp_init_u64(&big, uwvalue) == MP_OKAY) { + bigObj = Tcl_NewBignumObj(&big); + } return bigObj; } return Tcl_NewWideIntObj((Tcl_WideInt) uwvalue); @@ -2304,7 +2587,7 @@ DeleteScanNumberCache( static int BinaryEncodeHex( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2348,7 +2631,7 @@ BinaryEncodeHex( static int BinaryDecodeHex( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2356,8 +2639,8 @@ BinaryDecodeHex( Tcl_Obj *resultObj = NULL; unsigned char *data, *datastart, *dataend; unsigned char *begin, *cursor, c; - int i, index, value, size, pure, count = 0, cut = 0, strict = 0; - Tcl_UniChar ch = 0; + int i, index, value, size, pure = 1, count = 0, cut = 0, strict = 0; + int ucs4; enum {OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2378,9 +2661,12 @@ BinaryDecodeHex( } TclNewObj(resultObj); - pure = TclIsPureByteArray(objv[objc - 1]); - datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count) - : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + data = TclGetBytesFromObj(NULL, objv[objc - 1], &count); + if (data == NULL) { + pure = 0; + data = (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + } + datastart = data; dataend = data + count; size = (count + 1) / 2; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); @@ -2393,7 +2679,7 @@ BinaryDecodeHex( } c = *data++; - if (!isxdigit((int) c)) { + if (!isxdigit(UCHAR(c))) { if (strict || !TclIsSpaceProc(c)) { goto badChar; } @@ -2426,14 +2712,14 @@ BinaryDecodeHex( badChar: if (pure) { - ch = c; + ucs4 = c; } else { - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } TclDecrRefCount(resultObj); Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid hexadecimal digit \"%c\" at position %d", - ch, (int) (data - datastart - 1))); + "invalid hexadecimal digit \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); return TCL_ERROR; } @@ -2469,7 +2755,7 @@ BinaryDecodeHex( static int BinaryEncode64( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2507,12 +2793,11 @@ BinaryEncode64( } break; case OPT_WRAPCHAR: - purewrap = TclIsPureByteArray(objv[i + 1]); - if (purewrap) { - wrapchar = (const char *) Tcl_GetByteArrayFromObj( - objv[i + 1], &wrapcharlen); - } else { - wrapchar = Tcl_GetStringFromObj(objv[i + 1], &wrapcharlen); + wrapchar = (const char *)TclGetBytesFromObj(NULL, + objv[i + 1], &wrapcharlen); + if (wrapchar == NULL) { + purewrap = 0; + wrapchar = TclGetStringFromObj(objv[i + 1], &wrapcharlen); } break; } @@ -2592,7 +2877,7 @@ BinaryEncode64( static int BinaryEncodeUu( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2741,7 +3026,7 @@ BinaryEncodeUu( static int BinaryDecodeUu( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2749,9 +3034,9 @@ BinaryDecodeUu( Tcl_Obj *resultObj = NULL; unsigned char *data, *datastart, *dataend; unsigned char *begin, *cursor; - int i, index, size, pure, count = 0, strict = 0, lineLen; + int i, index, size, pure = 1, count = 0, strict = 0, lineLen; unsigned char c; - Tcl_UniChar ch = 0; + int ucs4; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2772,9 +3057,12 @@ BinaryDecodeUu( } TclNewObj(resultObj); - pure = TclIsPureByteArray(objv[objc - 1]); - datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count) - : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + data = TclGetBytesFromObj(NULL, objv[objc - 1], &count); + if (data == NULL) { + pure = 0; + data = (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + } + datastart = data; dataend = data + count; size = ((count + 3) & ~3) * 3 / 4; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); @@ -2881,13 +3169,13 @@ BinaryDecodeUu( badUu: if (pure) { - ch = c; + ucs4 = c; } else { - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid uuencode character \"%c\" at position %d", - ch, (int) (data - datastart - 1))); + "invalid uuencode character \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; @@ -2911,7 +3199,7 @@ BinaryDecodeUu( static int BinaryDecode64( - ClientData clientData, + TCL_UNUSED(ClientData), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) @@ -2920,9 +3208,9 @@ BinaryDecode64( unsigned char *data, *datastart, *dataend, c = '\0'; unsigned char *begin = NULL; unsigned char *cursor = NULL; - int pure, strict = 0; + int pure = 1, strict = 0; int i, index, size, cut = 0, count = 0; - Tcl_UniChar ch = 0; + int ucs4; enum { OPT_STRICT }; static const char *const optStrings[] = { "-strict", NULL }; @@ -2943,9 +3231,12 @@ BinaryDecode64( } TclNewObj(resultObj); - pure = TclIsPureByteArray(objv[objc - 1]); - datastart = data = pure ? Tcl_GetByteArrayFromObj(objv[objc - 1], &count) - : (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + data = TclGetBytesFromObj(NULL, objv[objc - 1], &count); + if (data == NULL) { + pure = 0; + data = (unsigned char *) TclGetStringFromObj(objv[objc - 1], &count); + } + datastart = data; dataend = data + count; size = ((count + 3) & ~3) * 3 / 4; begin = cursor = Tcl_SetByteArrayLength(resultObj, size); @@ -3047,19 +3338,19 @@ BinaryDecode64( bad64: if (pure) { - ch = c; + ucs4 = c; } else { /* The decoder is byte-oriented. If we saw a byte that's not a * valid member of the base64 alphabet, it could be the lead byte * of a multi-byte character. */ /* Safe because we know data is NUL-terminated */ - TclUtfToUniChar((const char *)(data - 1), &ch); + TclUtfToUCS4((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "invalid base64 character \"%c\" at position %d", ch, - (int) (data - datastart - 1))); + "invalid base64 character \"%c\" (U+%06X) at position %d", + ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); TclDecrRefCount(resultObj); return TCL_ERROR; |