summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2016-03-22 20:30:08 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2016-03-22 20:30:08 (GMT)
commitd281007655b47cbc0a3449aaa0c605bce151fec4 (patch)
tree0233f4d33fa80369a2d2d3159f58d8659e4e27dc /generic
parentf0d0319ba6c340e680ec612d980061897bf07b4f (diff)
downloadtcl-d281007655b47cbc0a3449aaa0c605bce151fec4.zip
tcl-d281007655b47cbc0a3449aaa0c605bce151fec4.tar.gz
tcl-d281007655b47cbc0a3449aaa0c605bce151fec4.tar.bz2
Factor out string internal rep definition so fix for [1af8de570511] is less awful.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclExecute.c8
-rw-r--r--generic/tclStringObj.c55
-rw-r--r--generic/tclStringRep.h97
3 files changed, 102 insertions, 58 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index c43cc40..d4077f5 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -19,6 +19,7 @@
#include "tclCompile.h"
#include "tclOOInt.h"
#include "tommath.h"
+#include "tclStringRep.h"
#include <math.h>
#include <assert.h>
@@ -5739,14 +5740,13 @@ TEBCresume(
/*
* Flush the info in the string internal rep that refers to the
- * about-to-be-invalidated UTF-8 rep. This sets the 'allocated'
- * field of the String structure to 0 to indicate that a new
- * buffer needs to be allocated. This assumes that the value is
+ * about-to-be-invalidated UTF-8 rep. This indicates that a new
+ * buffer needs to be allocated, and assumes that the value is
* already of tclStringTypePtr type, which should be true provided
* we call it after Tcl_GetUnicodeFromObj.
*/
#define MarkStringInternalRepForFlush(objPtr) \
- (((int *) ((objPtr)->internalRep.twoPtrValue.ptr1))[1] = 0)
+ (GET_STRING(objPtr)->allocated = 0)
if (Tcl_IsShared(valuePtr)) {
objResultPtr = Tcl_DuplicateObj(valuePtr);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 8d70d20..11a57e9 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -36,6 +36,7 @@
#include "tclInt.h"
#include "tommath.h"
+#include "tclStringRep.h"
/*
* Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5.
@@ -89,60 +90,6 @@ const Tcl_ObjType tclStringType = {
UpdateStringOfString, /* updateStringProc */
SetStringFromAny /* setFromAnyProc */
};
-
-/*
- * The following structure is the internal rep for a String object. It keeps
- * track of how much memory has been used and how much has been allocated for
- * the Unicode and UTF string to enable growing and shrinking of the UTF and
- * Unicode reps of the String object with fewer mallocs. To optimize string
- * length and indexing operations, this structure also stores the number of
- * characters (same of UTF and Unicode!) once that value has been computed.
- *
- * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
- * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
- * can be officially modified by altering the definition of Tcl_UniChar in
- * tcl.h, but do not do that unless you are sure what you're doing!
- */
-
-typedef struct String {
- int numChars; /* The number of chars in the string. -1 means
- * this value has not been calculated. >= 0
- * means that there is a valid Unicode rep, or
- * that the number of UTF bytes == the number
- * of chars. */
- int allocated; /* The amount of space actually allocated for
- * the UTF string (minus 1 byte for the
- * termination char). */
- int maxChars; /* Max number of chars that can fit in the
- * space allocated for the unicode array. */
- int hasUnicode; /* Boolean determining whether the string has
- * a Unicode representation. */
- Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size
- * of this field depends on the 'maxChars'
- * field above. */
-} String;
-
-#define STRING_MAXCHARS \
- (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar))
-#define STRING_SIZE(numChars) \
- (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar)))
-#define stringCheckLimits(numChars) \
- if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
- Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
- STRING_MAXCHARS); \
- }
-#define stringAttemptAlloc(numChars) \
- (String *) attemptckalloc((unsigned) STRING_SIZE(numChars) )
-#define stringAlloc(numChars) \
- (String *) ckalloc((unsigned) STRING_SIZE(numChars) )
-#define stringRealloc(ptr, numChars) \
- (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars) )
-#define stringAttemptRealloc(ptr, numChars) \
- (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars) )
-#define GET_STRING(objPtr) \
- ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
-#define SET_STRING(objPtr, stringPtr) \
- ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
/*
* TCL STRING GROWTH ALGORITHM
diff --git a/generic/tclStringRep.h b/generic/tclStringRep.h
new file mode 100644
index 0000000..227e6bc
--- /dev/null
+++ b/generic/tclStringRep.h
@@ -0,0 +1,97 @@
+/*
+ * tclStringRep.h --
+ *
+ * This file contains the definition of the Unicode string internal
+ * representation and macros to access it.
+ *
+ * A Unicode string is an internationalized string. Conceptually, a
+ * Unicode string is an array of 16-bit quantities organized as a
+ * sequence of properly formed UTF-8 characters. There is a one-to-one
+ * map between Unicode and UTF characters. Because Unicode characters
+ * have a fixed width, operations such as indexing operate on Unicode
+ * data. The String object is optimized for the case where each UTF char
+ * in a string is only one byte. In this case, we store the value of
+ * numChars, but we don't store the Unicode data (unless Tcl_GetUnicode
+ * is explicitly called).
+ *
+ * The String object type stores one or both formats. The default
+ * behavior is to store UTF. Once Unicode is calculated by a function, it
+ * is stored in the internal rep for future access (without an additional
+ * O(n) cost).
+ *
+ * To allow many appends to be done to an object without constantly
+ * reallocating the space for the string or Unicode representation, we
+ * allocate double the space for the string or Unicode and use the
+ * internal representation to keep track of how much space is used vs.
+ * allocated.
+ *
+ * Copyright (c) 1995-1997 Sun Microsystems, Inc.
+ * Copyright (c) 1999 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ */
+
+/*
+ * The following structure is the internal rep for a String object. It keeps
+ * track of how much memory has been used and how much has been allocated for
+ * the Unicode and UTF string to enable growing and shrinking of the UTF and
+ * Unicode reps of the String object with fewer mallocs. To optimize string
+ * length and indexing operations, this structure also stores the number of
+ * characters (same of UTF and Unicode!) once that value has been computed.
+ *
+ * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16
+ * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This
+ * can be officially modified by altering the definition of Tcl_UniChar in
+ * tcl.h, but do not do that unless you are sure what you're doing!
+ */
+
+typedef struct String {
+ int numChars; /* The number of chars in the string. -1 means
+ * this value has not been calculated. >= 0
+ * means that there is a valid Unicode rep, or
+ * that the number of UTF bytes == the number
+ * of chars. */
+ int allocated; /* The amount of space actually allocated for
+ * the UTF string (minus 1 byte for the
+ * termination char). */
+ int maxChars; /* Max number of chars that can fit in the
+ * space allocated for the unicode array. */
+ int hasUnicode; /* Boolean determining whether the string has
+ * a Unicode representation. */
+ Tcl_UniChar unicode[1]; /* The array of Unicode chars. The actual size
+ * of this field depends on the 'maxChars'
+ * field above. */
+} String;
+
+#define STRING_MAXCHARS \
+ (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar))
+#define STRING_SIZE(numChars) \
+ (sizeof(String) + ((numChars) * sizeof(Tcl_UniChar)))
+#define stringCheckLimits(numChars) \
+ do { \
+ if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
+ Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
+ STRING_MAXCHARS); \
+ } \
+ } while (0)
+#define stringAttemptAlloc(numChars) \
+ (String *) attemptckalloc((unsigned) STRING_SIZE(numChars))
+#define stringAlloc(numChars) \
+ (String *) ckalloc((unsigned) STRING_SIZE(numChars))
+#define stringRealloc(ptr, numChars) \
+ (String *) ckrealloc((ptr), (unsigned) STRING_SIZE(numChars))
+#define stringAttemptRealloc(ptr, numChars) \
+ (String *) attemptckrealloc((ptr), (unsigned) STRING_SIZE(numChars))
+#define GET_STRING(objPtr) \
+ ((String *) (objPtr)->internalRep.twoPtrValue.ptr1)
+#define SET_STRING(objPtr, stringPtr) \
+ ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr))
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */