From d281007655b47cbc0a3449aaa0c605bce151fec4 Mon Sep 17 00:00:00 2001 From: dkf Date: Tue, 22 Mar 2016 20:30:08 +0000 Subject: Factor out string internal rep definition so fix for [1af8de570511] is less awful. --- generic/tclExecute.c | 8 ++--- generic/tclStringObj.c | 55 +--------------------------- generic/tclStringRep.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 58 deletions(-) create mode 100644 generic/tclStringRep.h diff --git a/generic/tclExecute.c b/generic/tclExecute.c index c43cc40..d4077f5 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -19,6 +19,7 @@ #include "tclCompile.h" #include "tclOOInt.h" #include "tommath.h" +#include "tclStringRep.h" #include #include @@ -5739,14 +5740,13 @@ TEBCresume( /* * Flush the info in the string internal rep that refers to the - * about-to-be-invalidated UTF-8 rep. This sets the 'allocated' - * field of the String structure to 0 to indicate that a new - * buffer needs to be allocated. This assumes that the value is + * about-to-be-invalidated UTF-8 rep. This indicates that a new + * buffer needs to be allocated, and assumes that the value is * already of tclStringTypePtr type, which should be true provided * we call it after Tcl_GetUnicodeFromObj. */ #define MarkStringInternalRepForFlush(objPtr) \ - (((int *) ((objPtr)->internalRep.twoPtrValue.ptr1))[1] = 0) + (GET_STRING(objPtr)->allocated = 0) if (Tcl_IsShared(valuePtr)) { objResultPtr = Tcl_DuplicateObj(valuePtr); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8d70d20..11a57e9 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -36,6 +36,7 @@ #include "tclInt.h" #include "tommath.h" +#include "tclStringRep.h" /* * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5. @@ -89,60 +90,6 @@ const Tcl_ObjType tclStringType = { UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; - -/* - * The following structure is the internal rep for a String object. It keeps - * track of how much memory has been used and how much has been allocated for - * the Unicode and UTF string to enable growing and shrinking of the UTF and - * Unicode reps of the String object with fewer mallocs. To optimize string - * length and indexing operations, this structure also stores the number of - * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! - */ - -typedef struct String { - int numChars; /* The number of chars in the string. -1 means - * this value has not been calculated. >= 0 - * means that there is a valid Unicode rep, or - * that the number of UTF bytes == the number - * of chars. */ - int allocated; /* The amount of space actually allocated for - * the UTF string (minus 1 byte for the - * termination char). */ - int maxChars; /* Max number of chars that can fit in the - * space allocated for the unicode array. */ - int hasUnicode; /* Boolean determining whether the string has - * a Unicode representation. */ - Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size - * of this field depends on the 'maxChars' - * field above. */ -} String; - -#define STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)) -#define STRING_SIZE(numChars) \ - (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar))) -#define stringCheckLimits(numChars) \ - if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ - Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ - STRING_MAXCHARS); \ - } -#define stringAttemptAlloc(numChars) \ - (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringAlloc(numChars) \ - (String *) ckalloc((unsigned) STRING_SIZE(numChars) ) -#define stringRealloc(ptr, numChars) \ - (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define stringAttemptRealloc(ptr, numChars) \ - (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) ) -#define GET_STRING(objPtr) \ - ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) -#define SET_STRING(objPtr, stringPtr) \ - ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) /* * TCL STRING GROWTH ALGORITHM diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h new file mode 100644 index 0000000..227e6bc --- /dev/null +++ b/generic/tclStringRep.h @@ -0,0 +1,97 @@ +/* + * tclStringRep.h -- + * + * This file contains the definition of the Unicode string internal + * representation and macros to access it. + * + * A Unicode string is an internationalized string. Conceptually, a + * Unicode string is an array of 16-bit quantities organized as a + * sequence of properly formed UTF-8 characters. There is a one-to-one + * map between Unicode and UTF characters. Because Unicode characters + * have a fixed width, operations such as indexing operate on Unicode + * data. The String object is optimized for the case where each UTF char + * in a string is only one byte. In this case, we store the value of + * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode + * is explicitly called). + * + * The String object type stores one or both formats. The default + * behavior is to store UTF. Once Unicode is calculated by a function, it + * is stored in the internal rep for future access (without an additional + * O(n) cost). + * + * To allow many appends to be done to an object without constantly + * reallocating the space for the string or Unicode representation, we + * allocate double the space for the string or Unicode and use the + * internal representation to keep track of how much space is used vs. + * allocated. + * + * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. + * + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. + */ + +/* + * The following structure is the internal rep for a String object. It keeps + * track of how much memory has been used and how much has been allocated for + * the Unicode and UTF string to enable growing and shrinking of the UTF and + * Unicode reps of the String object with fewer mallocs. To optimize string + * length and indexing operations, this structure also stores the number of + * characters (same of UTF and Unicode!) once that value has been computed. + * + * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 + * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This + * can be officially modified by altering the definition of Tcl_UniChar in + * tcl.h, but do not do that unless you are sure what you're doing! + */ + +typedef struct String { + int numChars; /* The number of chars in the string. -1 means + * this value has not been calculated. >= 0 + * means that there is a valid Unicode rep, or + * that the number of UTF bytes == the number + * of chars. */ + int allocated; /* The amount of space actually allocated for + * the UTF string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the unicode array. */ + int hasUnicode; /* Boolean determining whether the string has + * a Unicode representation. */ + Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} String; + +#define STRING_MAXCHARS \ + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)) +#define STRING_SIZE(numChars) \ + (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar))) +#define stringCheckLimits(numChars) \ + do { \ + if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ + Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ + STRING_MAXCHARS); \ + } \ + } while (0) +#define stringAttemptAlloc(numChars) \ + (String *) attemptckalloc((unsigned) STRING_SIZE(numChars)) +#define stringAlloc(numChars) \ + (String *) ckalloc((unsigned) STRING_SIZE(numChars)) +#define stringRealloc(ptr, numChars) \ + (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars)) +#define stringAttemptRealloc(ptr, numChars) \ + (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars)) +#define GET_STRING(objPtr) \ + ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) +#define SET_STRING(objPtr, stringPtr) \ + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ -- cgit v0.12