diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2016-03-22 20:30:08 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2016-03-22 20:30:08 (GMT) |
commit | d281007655b47cbc0a3449aaa0c605bce151fec4 (patch) | |
tree | 0233f4d33fa80369a2d2d3159f58d8659e4e27dc /generic/tclStringRep.h | |
parent | f0d0319ba6c340e680ec612d980061897bf07b4f (diff) | |
download | tcl-d281007655b47cbc0a3449aaa0c605bce151fec4.zip tcl-d281007655b47cbc0a3449aaa0c605bce151fec4.tar.gz tcl-d281007655b47cbc0a3449aaa0c605bce151fec4.tar.bz2 |
Factor out string internal rep definition so fix for [1af8de570511] is less awful.
Diffstat (limited to 'generic/tclStringRep.h')
-rw-r--r-- | generic/tclStringRep.h | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h new file mode 100644 index 0000000..227e6bc --- /dev/null +++ b/generic/tclStringRep.h @@ -0,0 +1,97 @@ +/* + * tclStringRep.h -- + * + * This file contains the definition of the Unicode string internal + * representation and macros to access it. + * + * A Unicode string is an internationalized string. Conceptually, a + * Unicode string is an array of 16-bit quantities organized as a + * sequence of properly formed UTF-8 characters. There is a one-to-one + * map between Unicode and UTF characters. Because Unicode characters + * have a fixed width, operations such as indexing operate on Unicode + * data. The String object is optimized for the case where each UTF char + * in a string is only one byte. In this case, we store the value of + * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode + * is explicitly called). + * + * The String object type stores one or both formats. The default + * behavior is to store UTF. Once Unicode is calculated by a function, it + * is stored in the internal rep for future access (without an additional + * O(n) cost). + * + * To allow many appends to be done to an object without constantly + * reallocating the space for the string or Unicode representation, we + * allocate double the space for the string or Unicode and use the + * internal representation to keep track of how much space is used vs. + * allocated. + * + * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. + * + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. + */ + +/* + * The following structure is the internal rep for a String object. It keeps + * track of how much memory has been used and how much has been allocated for + * the Unicode and UTF string to enable growing and shrinking of the UTF and + * Unicode reps of the String object with fewer mallocs. To optimize string + * length and indexing operations, this structure also stores the number of + * characters (same of UTF and Unicode!) once that value has been computed. + * + * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 + * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This + * can be officially modified by altering the definition of Tcl_UniChar in + * tcl.h, but do not do that unless you are sure what you're doing! + */ + +typedef struct String { + int numChars; /* The number of chars in the string. -1 means + * this value has not been calculated. >= 0 + * means that there is a valid Unicode rep, or + * that the number of UTF bytes == the number + * of chars. */ + int allocated; /* The amount of space actually allocated for + * the UTF string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the unicode array. */ + int hasUnicode; /* Boolean determining whether the string has + * a Unicode representation. */ + Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} String; + +#define STRING_MAXCHARS \ + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)) +#define STRING_SIZE(numChars) \ + (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar))) +#define stringCheckLimits(numChars) \ + do { \ + if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ + Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ + STRING_MAXCHARS); \ + } \ + } while (0) +#define stringAttemptAlloc(numChars) \ + (String *) attemptckalloc((unsigned) STRING_SIZE(numChars)) +#define stringAlloc(numChars) \ + (String *) ckalloc((unsigned) STRING_SIZE(numChars)) +#define stringRealloc(ptr, numChars) \ + (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars)) +#define stringAttemptRealloc(ptr, numChars) \ + (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars)) +#define GET_STRING(objPtr) \ + ((String *) (objPtr)->internalRep.twoPtrValue.ptr1) +#define SET_STRING(objPtr, stringPtr) \ + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |