diff options
Diffstat (limited to 'generic/tclStringRep.h')
-rw-r--r-- | generic/tclStringRep.h | 48 |
1 files changed, 13 insertions, 35 deletions
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h index d0c76cb..768c1ee 100644 --- a/generic/tclStringRep.h +++ b/generic/tclStringRep.h @@ -1,29 +1,12 @@ /* * tclStringRep.h -- * - * This file contains the definition of the Unicode string internal - * representation and macros to access it. + * This file contains the definition of internal representations of a string + * and macros to access it. * - * A Unicode string is an internationalized string. Conceptually, a - * Unicode string is an array of 16-bit quantities organized as a - * sequence of properly formed UTF-8 characters. There is a one-to-one - * map between Unicode and UTF characters. Because Unicode characters - * have a fixed width, operations such as indexing operate on Unicode - * data. The String object is optimized for the case where each UTF char - * in a string is only one byte. In this case, we store the value of - * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode - * is explicitly called). - * - * The String object type stores one or both formats. The default - * behavior is to store UTF. Once Unicode is calculated by a function, it - * is stored in the internal rep for future access (without an additional - * O(n) cost). - * - * To allow many appends to be done to an object without constantly - * reallocating the space for the string or Unicode representation, we - * allocate double the space for the string or Unicode and use the - * internal representation to keep track of how much space is used vs. - * allocated. + * Conceptually, a string is a sequence of Unicode code points. Internally + * it may be stored in an encoding form such as a modified version of UTF-8 + * or UTF-16 (when TCL_UTF_MAX=3) or UTF-32. * * Copyright (c) 1995-1997 Sun Microsystems, Inc. * Copyright (c) 1999 by Scriptics Corporation. @@ -39,15 +22,10 @@ /* * The following structure is the internal rep for a String object. It keeps * track of how much memory has been used and how much has been allocated for - * the Unicode and UTF string to enable growing and shrinking of the UTF and - * Unicode reps of the String object with fewer mallocs. To optimize string + * the various representations to enable growing and shrinking of + * the String object with fewer mallocs. To optimize string * length and indexing operations, this structure also stores the number of - * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! + * code points (independent of encoding form) once that value has been computed. */ typedef struct { @@ -57,15 +35,15 @@ typedef struct { * Unicode rep, or that the number of UTF bytes == * the number of chars. */ Tcl_Size allocated; /* The amount of space actually allocated for - * the UTF string (minus 1 byte for the + * the UTF-8 string (minus 1 byte for the * termination char). */ Tcl_Size maxChars; /* Max number of chars that can fit in the * space allocated for the Unicode array. */ int hasUnicode; /* Boolean determining whether the string has - * a Unicode representation. */ - Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size - * of this field depends on the 'maxChars' - * field above. */ + * a Tcl_UniChar representation. */ + Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Tcl_UniChar units. + * The actual size of this field depends on + * the maxChars field above. */ } String; /* Limit on string lengths. The -1 because limit does not include the nul */ |