summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c3817
1 files changed, 2697 insertions, 1120 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index b327b99..5119456 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1,118 +1,466 @@
-/*
+/*
* tclUtil.c --
*
- * This file contains utility procedures that are used by many Tcl
+ * This file contains utility functions that are used by many Tcl
* commands.
*
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1998 Sun Microsystems, Inc.
- * Copyright (c) 2001 by Kevin B. Kenny. All rights reserved.
+ * Copyright (c) 2001 by Kevin B. Kenny. All rights reserved.
*
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
#include "tclInt.h"
-#include "tclPort.h"
+#include <float.h>
+#include <math.h>
/*
- * The following variable holds the full path name of the binary
- * from which this application was executed, or NULL if it isn't
- * know. The value of the variable is set by the procedure
- * Tcl_FindExecutable. The storage space is dynamically allocated.
+ * The absolute pathname of the executable in which this Tcl library is
+ * running.
*/
-char *tclExecutableName = NULL;
-char *tclNativeExecutableName = NULL;
+static ProcessGlobalValue executableName = {
+ 0, 0, NULL, NULL, NULL, NULL, NULL
+};
/*
- * The following values are used in the flags returned by Tcl_ScanElement
- * and used by Tcl_ConvertElement. The value TCL_DONT_USE_BRACES is also
- * defined in tcl.h; make sure its value doesn't overlap with any of the
- * values below.
- *
- * TCL_DONT_USE_BRACES - 1 means the string mustn't be enclosed in
- * braces (e.g. it contains unmatched braces,
- * or ends in a backslash character, or user
- * just doesn't want braces); handle all
- * special characters by adding backslashes.
- * USE_BRACES - 1 means the string contains a special
- * character that can be handled simply by
- * enclosing the entire argument in braces.
- * BRACES_UNMATCHED - 1 means that braces aren't properly matched
- * in the argument.
+ * The following values are used in the flags arguments of Tcl*Scan*Element and
+ * Tcl*Convert*Element. The values TCL_DONT_USE_BRACES and TCL_DONT_QUOTE_HASH
+ * are defined in tcl.h, like so:
+ *
+#define TCL_DONT_USE_BRACES 1
+#define TCL_DONT_QUOTE_HASH 8
+ *
+ * Those are public flag bits which callers of the public routines
+ * Tcl_Convert*Element() can use to indicate:
+ *
+ * TCL_DONT_USE_BRACES - 1 means the caller is insisting that brace
+ * quoting not be used when converting the list
+ * element.
+ * TCL_DONT_QUOTE_HASH - 1 means the caller insists that a leading hash
+ * character ('#') should *not* be quoted. This
+ * is appropriate when the caller can guarantee
+ * the element is not the first element of a
+ * list, so [eval] cannot mis-parse the element
+ * as a comment.
+ *
+ * The remaining values which can be carried by the flags of these routines
+ * are for internal use only. Make sure they do not overlap with the public
+ * values above.
+ *
+ * The Tcl*Scan*Element() routines make a determination which of 4 modes of
+ * conversion is most appropriate for Tcl*Convert*Element() to perform, and
+ * sets two bits of the flags value to indicate the mode selected.
+ *
+ * CONVERT_NONE The element needs no quoting. Its literal string
+ * is suitable as is.
+ * CONVERT_BRACE The conversion should be enclosing the literal string
+ * in braces.
+ * CONVERT_ESCAPE The conversion should be using backslashes to escape
+ * any characters in the string that require it.
+ * CONVERT_MASK A mask value used to extract the conversion mode from
+ * the flags argument.
+ * Also indicates a strange conversion mode where all
+ * special characters are escaped with backslashes
+ * *except for braces*. This is a strange and unnecessary
+ * case, but it's part of the historical way in which
+ * lists have been formatted in Tcl. To experiment with
+ * removing this case, set the value of COMPAT to 0.
+ *
+ * One last flag value is used only by callers of TclScanElement(). The flag
+ * value produced by a call to Tcl*Scan*Element() will never leave this bit
+ * set.
+ *
+ * CONVERT_ANY The caller of TclScanElement() declares it can make
+ * no promise about what public flags will be passed to
+ * the matching call of TclConvertElement(). As such,
+ * TclScanElement() has to determine the worst case
+ * destination buffer length over all possibilities, and
+ * in other cases this means an overestimate of the
+ * required size.
+ *
+ * For more details, see the comments on the Tcl*Scan*Element and
+ * Tcl*Convert*Element routines.
*/
-#define USE_BRACES 2
-#define BRACES_UNMATCHED 4
+#define COMPAT 1
+#define CONVERT_NONE 0
+#define CONVERT_BRACE 2
+#define CONVERT_ESCAPE 4
+#define CONVERT_MASK (CONVERT_BRACE | CONVERT_ESCAPE)
+#define CONVERT_ANY 16
/*
- * The following values determine the precision used when converting
- * floating-point values to strings. This information is linked to all
- * of the tcl_precision variables in all interpreters via the procedure
- * TclPrecTraceProc.
+ * The following key is used by Tcl_PrintDouble and TclPrecTraceProc to
+ * access the precision to be used for double formatting.
*/
-static char precisionString[10] = "12";
- /* The string value of all the tcl_precision
- * variables. */
-static char precisionFormat[10] = "%.12g";
- /* The format string actually used in calls
- * to sprintf. */
-TCL_DECLARE_MUTEX(precisionMutex)
+static Tcl_ThreadDataKey precisionKey;
/*
- * Prototypes for procedures defined later in this file.
+ * Prototypes for functions defined later in this file.
*/
-static void UpdateStringOfEndOffset _ANSI_ARGS_((Tcl_Obj* objPtr));
-static int SetEndOffsetFromAny _ANSI_ARGS_((Tcl_Interp* interp,
- Tcl_Obj* objPtr));
+static void ClearHash(Tcl_HashTable *tablePtr);
+static void FreeProcessGlobalValue(ClientData clientData);
+static void FreeThreadHash(ClientData clientData);
+static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr);
+static int SetEndOffsetFromAny(Tcl_Interp* interp,
+ Tcl_Obj* objPtr);
+static void UpdateStringOfEndOffset(Tcl_Obj* objPtr);
/*
- * The following is the Tcl object type definition for an object
- * that represents a list index in the form, "end-offset". It is
- * used as a performance optimization in TclGetIntForIndex. The
- * internal rep is an integer, so no memory management is required
- * for it.
+ * The following is the Tcl object type definition for an object that
+ * represents a list index in the form, "end-offset". It is used as a
+ * performance optimization in TclGetIntForIndex. The internal rep is an
+ * integer, so no memory management is required for it.
*/
Tcl_ObjType tclEndOffsetType = {
"end-offset", /* name */
- (Tcl_FreeInternalRepProc*) NULL, /* freeIntRepProc */
- (Tcl_DupInternalRepProc*) NULL, /* dupIntRepProc */
+ NULL, /* freeIntRepProc */
+ NULL, /* dupIntRepProc */
UpdateStringOfEndOffset, /* updateStringProc */
- SetEndOffsetFromAny
+ SetEndOffsetFromAny
};
+
+/*
+ * * STRING REPRESENTATION OF LISTS * * *
+ *
+ * The next several routines implement the conversions of strings to and
+ * from Tcl lists. To understand their operation, the rules of parsing
+ * and generating the string representation of lists must be known. Here
+ * we describe them in one place.
+ *
+ * A list is made up of zero or more elements. Any string is a list if
+ * it is made up of alternating substrings of element-separating ASCII
+ * whitespace and properly formatted elements.
+ *
+ * The ASCII characters which can make up the whitespace between list
+ * elements are:
+ *
+ * \u0009 \t TAB
+ * \u000A \n NEWLINE
+ * \u000B \v VERTICAL TAB
+ * \u000C \f FORM FEED
+ * \u000D \r CARRIAGE RETURN
+ * \u0020 SPACE
+ *
+ * NOTE: differences between this and other places where Tcl defines a role
+ * for "whitespace".
+ *
+ * * Unlike command parsing, here NEWLINE is just another whitespace
+ * character; its role as a command terminator in a script has no
+ * importance here.
+ *
+ * * Unlike command parsing, the BACKSLASH NEWLINE sequence is not
+ * considered to be a whitespace character.
+ *
+ * * Other Unicode whitespace characters (recognized by
+ * [string is space] or Tcl_UniCharIsSpace()) do not play any role
+ * as element separators in Tcl lists.
+ *
+ * * The NUL byte ought not appear, as it is not in strings properly
+ * encoded for Tcl, but if it is present, it is not treated as
+ * separating whitespace, or a string terminator. It is just
+ * another character in a list element.
+ *
+ * The interpretaton of a formatted substring as a list element follows
+ * rules similar to the parsing of the words of a command in a Tcl script.
+ * Backslash substitution plays a key role, and is defined exactly as it is
+ * in command parsing. The same routine, TclParseBackslash() is used in both
+ * command parsing and list parsing.
+ *
+ * NOTE: This means that if and when backslash substitution rules ever
+ * change for command parsing, the interpretation of strings as lists also
+ * changes.
+ *
+ * Backslash substitution replaces an "escape sequence" of one or more
+ * characters starting with
+ * \u005c \ BACKSLASH
+ * with a single character. The one character escape sequent case happens
+ * only when BACKSLASH is the last character in the string. In all other
+ * cases, the escape sequence is at least two characters long.
+ *
+ * The formatted substrings are interpreted as element values according to
+ * the following cases:
+ *
+ * * If the first character of a formatted substring is
+ * \u007b { OPEN BRACE
+ * then the end of the substring is the matching
+ * \u007d } CLOSE BRACE
+ * character, where matching is determined by counting nesting levels,
+ * and not including any brace characters that are contained within a
+ * backslash escape sequence in the nesting count. Having found the
+ * matching brace, all characters between the braces are the string
+ * value of the element. If no matching close brace is found before the
+ * end of the string, the string is not a Tcl list. If the character
+ * following the close brace is not an element separating whitespace
+ * character, or the end of the string, then the string is not a Tcl list.
+ *
+ * NOTE: this differs from a brace-quoted word in the parsing of a
+ * Tcl command only in its treatment of the backslash-newline sequence.
+ * In a list element, the literal characters in the backslash-newline
+ * sequence become part of the element value. In a script word,
+ * conversion to a single SPACE character is done.
+ *
+ * NOTE: Most list element values can be represented by a formatted
+ * substring using brace quoting. The exceptions are any element value
+ * that includes an unbalanced brace not in a backslash escape sequence,
+ * and any value that ends with a backslash not itself in a backslash
+ * escape sequence.
+ *
+ * * If the first character of a formatted substring is
+ * \u0022 " QUOTE
+ * then the end of the substring is the next QUOTE character, not counting
+ * any QUOTE characters that are contained within a backslash escape
+ * sequence. If no next QUOTE is found before the end of the string, the
+ * string is not a Tcl list. If the character following the closing QUOTE
+ * is not an element separating whitespace character, or the end of the
+ * string, then the string is not a Tcl list. Having found the limits
+ * of the substring, the element value is produced by performing backslash
+ * substitution on the character sequence between the open and close QUOTEs.
+ *
+ * NOTE: Any element value can be represented by this style of formatting,
+ * given suitable choice of backslash escape sequences.
+ *
+ * * All other formatted substrings are terminated by the next element
+ * separating whitespace character in the string. Having found the limits
+ * of the substring, the element value is produced by performing backslash
+ * substitution on it.
+ *
+ * NOTE: Any element value can be represented by this style of formatting,
+ * given suitable choice of backslash escape sequences, with one exception.
+ * The empty string cannot be represented as a list element without the use
+ * of either braces or quotes to delimit it.
+ *
+ * This collection of parsing rules is implemented in the routine
+ * TclFindElement().
+ *
+ * In order to produce lists that can be parsed by these rules, we need
+ * the ability to distinguish between characters that are part of a list
+ * element value from characters providing syntax that define the structure
+ * of the list. This means that our code that generates lists must at a
+ * minimum be able to produce escape sequences for the 10 characters
+ * identified above that have significance to a list parser.
+ *
+ * * * CANONICAL LISTS * * * * *
+ *
+ * In addition to the basic rules for parsing strings into Tcl lists, there
+ * are additional properties to be met by the set of list values that are
+ * generated by Tcl. Such list values are often said to be in "canonical
+ * form":
+ *
+ * * When any canonical list is evaluated as a Tcl script, it is a script
+ * of either zero commands (an empty list) or exactly one command. The
+ * command word is exactly the first element of the list, and each argument
+ * word is exactly one of the following elements of the list. This means
+ * that any characters that have special meaning during script evaluation
+ * need special treatment when canonical lists are produced:
+ *
+ * * Whitespace between elements may not include NEWLINE.
+ * * The command terminating character,
+ * \u003b ; SEMICOLON
+ * must be BRACEd, QUOTEd, or escaped so that it does not terminate
+ * the command prematurely.
+ * * Any of the characters that begin substitutions in scripts,
+ * \u0024 $ DOLLAR
+ * \u005b [ OPEN BRACKET
+ * \u005c \ BACKSLASH
+ * need to be BRACEd or escaped.
+ * * In any list where the first character of the first element is
+ * \u0023 # HASH
+ * that HASH character must be BRACEd, QUOTEd, or escaped so that it
+ * does not convert the command into a comment.
+ * * Any list element that contains the character sequence
+ * BACKSLASH NEWLINE cannot be formatted with BRACEs. The
+ * BACKSLASH character must be represented by an escape
+ * sequence, and unless QUOTEs are used, the NEWLINE must
+ * be as well.
+ *
+ * * It is also guaranteed that one can use a canonical list as a building
+ * block of a larger script within command substitution, as in this example:
+ * set script "puts \[[list $cmd $arg]]"; eval $script
+ * To support this usage, any appearance of the character
+ * \u005d ] CLOSE BRACKET
+ * in a list element must be BRACEd, QUOTEd, or escaped.
+ *
+ * * Finally it is guaranteed that enclosing a canonical list in braces
+ * produces a new value that is also a canonical list. This new list has
+ * length 1, and its only element is the original canonical list. This
+ * same guarantee also makes it possible to construct scripts where an
+ * argument word is given a list value by enclosing the canonical form
+ * of that list in braces:
+ * set script "puts {[list $one $two $three]}"; eval $script
+ * This sort of coding was once fairly common, though it's become more
+ * idiomatic to see the following instead:
+ * set script [list puts [list $one $two $three]]; eval $script
+ * In order to support this guarantee, every canonical list must have
+ * balance when counting those braces that are not in escape sequences.
+ *
+ * Within these constraints, the canonical list generation routines
+ * TclScanElement() and TclConvertElement() attempt to generate the string
+ * for any list that is easiest to read. When an element value is itself
+ * acceptable as the formatted substring, it is usually used (CONVERT_NONE).
+ * When some quoting or escaping is required, use of BRACEs (CONVERT_BRACE)
+ * is usually preferred over the use of escape sequences (CONVERT_ESCAPE).
+ * There are some exceptions to both of these preferences for reasons of
+ * code simplicity, efficiency, and continuation of historical habits.
+ * Canonical lists never use the QUOTE formatting to delimit their elements
+ * because that form of quoting does not nest, which makes construction of
+ * nested lists far too much trouble. Canonical lists always use only a
+ * single SPACE character for element-separating whitespace.
+ *
+ * * * FUTURE CONSIDERATIONS * * *
+ *
+ * When a list element requires quoting or escaping due to a CLOSE BRACKET
+ * character or an internal QUOTE character, a strange formatting mode is
+ * recommended. For example, if the value "a{b]c}d" is converted by the
+ * usual modes:
+ *
+ * CONVERT_BRACE: a{b]c}d => {a{b]c}d}
+ * CONVERT_ESCAPE: a{b]c}d => a\{b\]c\}d
+ *
+ * we get perfectly usable formatted list elements. However, this is not
+ * what Tcl releases have been producing. Instead, we have:
+ *
+ * CONVERT_MASK: a{b]c}d => a{b\]c}d
+ *
+ * where the CLOSE BRACKET is escaped, but the BRACEs are not. The same
+ * effect can be seen replacing ] with " in this example. There does not
+ * appear to be any functional or aesthetic purpose for this strange
+ * additional mode. The sole purpose I can see for preserving it is to
+ * keep generating the same formatted lists programmers have become accustomed
+ * to, and perhaps written tests to expect. That is, compatibility only.
+ * The additional code complexity required to support this mode is significant.
+ * The lines of code supporting it are delimited in the routines below with
+ * #if COMPAT directives. This makes it easy to experiment with eliminating
+ * this formatting mode simply with "#define COMPAT 0" above. I believe
+ * this is worth considering.
+ *
+ * Another consideration is the treatment of QUOTE characters in list elements.
+ * TclConvertElement() must have the ability to produce the escape sequence
+ * \" so that when a list element begins with a QUOTE we do not confuse
+ * that first character with a QUOTE used as list syntax to define list
+ * structure. However, that is the only place where QUOTE characters need
+ * quoting. In this way, handling QUOTE could really be much more like
+ * the way we handle HASH which also needs quoting and escaping only in
+ * particular situations. Following up this could increase the set of
+ * list elements that can use the CONVERT_NONE formatting mode.
+ *
+ * More speculative is that the demands of canonical list form require brace
+ * balance for the list as a whole, while the current implementation achieves
+ * this by establishing brace balance for every element.
+ *
+ * Finally, a reminder that the rules for parsing and formatting lists are
+ * closely tied together with the rules for parsing and evaluating scripts,
+ * and will need to evolve in sync.
+ */
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclMaxListLength --
+ *
+ * Given 'bytes' pointing to 'numBytes' bytes, scan through them and
+ * count the number of whitespace runs that could be list element
+ * separators. If 'numBytes' is -1, scan to the terminating '\0'.
+ * Not a full list parser. Typically used to get a quick and dirty
+ * overestimate of length size in order to allocate space for an
+ * actual list parser to operate with.
+ *
+ * Results:
+ * Returns the largest number of list elements that could possibly
+ * be in this string, interpreted as a Tcl list. If 'endPtr' is not
+ * NULL, writes a pointer to the end of the string scanned there.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclMaxListLength(
+ CONST char *bytes,
+ int numBytes,
+ CONST char **endPtr)
+{
+ int count = 0;
+
+ if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) {
+ /* Empty string case - quick exit */
+ goto done;
+ }
+ /* No list element before leading white space */
+ count += 1 - TclIsSpaceProc(*bytes);
+
+ /* Count white space runs as potential element separators */
+ while (numBytes) {
+ if ((numBytes == -1) && (*bytes == '\0')) {
+ break;
+ }
+ if (TclIsSpaceProc(*bytes)) {
+ /* Space run started; bump count */
+ count++;
+ do {
+ bytes++;
+ numBytes -= (numBytes != -1);
+ } while (numBytes && TclIsSpaceProc(*bytes));
+ if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) {
+ break;
+ }
+ /* (*bytes) is non-space; return to counting state */
+ }
+ bytes++;
+ numBytes -= (numBytes != -1);
+ }
+
+ /* No list element following trailing white space */
+ count -= TclIsSpaceProc(bytes[-1]);
+
+ done:
+ if (endPtr) {
+ *endPtr = bytes;
+ }
+ return count;
+}
/*
*----------------------------------------------------------------------
*
* TclFindElement --
*
- * Given a pointer into a Tcl list, locate the first (or next)
- * element in the list.
+ * Given a pointer into a Tcl list, locate the first (or next) element in
+ * the list.
*
* Results:
- * The return value is normally TCL_OK, which means that the
- * element was successfully located. If TCL_ERROR is returned
- * it means that list didn't have proper list structure;
- * the interp's result contains a more detailed error message.
+ * The return value is normally TCL_OK, which means that the element was
+ * successfully located. If TCL_ERROR is returned it means that list
+ * didn't have proper list structure; the interp's result contains a more
+ * detailed error message.
*
* If TCL_OK is returned, then *elementPtr will be set to point to the
* first element of list, and *nextPtr will be set to point to the
* character just after any white space following the last character
- * that's part of the element. If this is the last argument in the
- * list, then *nextPtr will point just after the last character in the
- * list (i.e., at the character at list+listLength). If sizePtr is
- * non-NULL, *sizePtr is filled in with the number of characters in the
- * element. If the element is in braces, then *elementPtr will point
- * to the character after the opening brace and *sizePtr will not
- * include either of the braces. If there isn't an element in the list,
- * *sizePtr will be zero, and both *elementPtr and *termPtr will point
- * just after the last character in the list. Note: this procedure does
- * NOT collapse backslash sequences.
+ * that's part of the element. If this is the last argument in the list,
+ * then *nextPtr will point just after the last character in the list
+ * (i.e., at the character at list+listLength). If sizePtr is non-NULL,
+ * *sizePtr is filled in with the number of bytes in the element. If
+ * the element is in braces, then *elementPtr will point to the character
+ * after the opening brace and *sizePtr will not include either of the
+ * braces. If there isn't an element in the list, *sizePtr will be zero,
+ * and both *elementPtr and *nextPtr will point just after the last
+ * character in the list. If literalPtr is non-NULL, *literalPtr is set
+ * to a boolean value indicating whether the substring returned as
+ * the values of **elementPtr and *sizePtr is the literal value of
+ * a list element. If not, a call to TclCopyAndCollapse() is needed
+ * to produce the actual value of the list element. Note: this function
+ * does NOT collapse backslash sequences, but uses *literalPtr to tell
+ * callers when it is required for them to do so.
*
* Side effects:
* None.
@@ -121,25 +469,27 @@ Tcl_ObjType tclEndOffsetType = {
*/
int
-TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
- bracePtr)
- Tcl_Interp *interp; /* Interpreter to use for error reporting.
- * If NULL, then no error message is left
- * after errors. */
- CONST char *list; /* Points to the first byte of a string
+TclFindElement(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, then no error message is left after
+ * errors. */
+ CONST char *list, /* Points to the first byte of a string
* containing a Tcl list with zero or more
* elements (possibly in braces). */
- int listLength; /* Number of bytes in the list's string. */
- CONST char **elementPtr; /* Where to put address of first significant
+ int listLength, /* Number of bytes in the list's string. */
+ CONST char **elementPtr, /* Where to put address of first significant
* character in first element of list. */
- CONST char **nextPtr; /* Fill in with location of character just
+ CONST char **nextPtr, /* Fill in with location of character just
* after all white space following end of
* argument (next arg or end of list). */
- int *sizePtr; /* If non-zero, fill in with size of
+ int *sizePtr, /* If non-zero, fill in with size of
* element. */
- int *bracePtr; /* If non-zero, fill in with non-zero/zero
- * to indicate that arg was/wasn't
- * in braces. */
+ int *literalPtr) /* If non-zero, fill in with non-zero/zero to
+ * indicate that the substring of *sizePtr
+ * bytes starting at **elementPtr is/is not
+ * the literal list element and therefore
+ * does not/does require a call to
+ * TclCopyAndCollapse() by the caller. */
{
CONST char *p = list;
CONST char *elemStart; /* Points to first byte of first element. */
@@ -148,16 +498,17 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
int inQuotes = 0;
int size = 0; /* lint. */
int numChars;
+ int literal = 1;
CONST char *p2;
-
+
/*
- * Skim off leading white space and check for an opening brace or
- * quote. We treat embedded NULLs in the list as bytes belonging to
- * a list element.
+ * Skim off leading white space and check for an opening brace or quote.
+ * We treat embedded NULLs in the list as bytes belonging to a list
+ * element.
*/
limit = (list + listLength);
- while ((p < limit) && (isspace(UCHAR(*p)))) { /* INTL: ISO space. */
+ while ((p < limit) && (TclIsSpaceProc(*p))) {
p++;
}
if (p == limit) { /* no element found */
@@ -173,9 +524,6 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
p++;
}
elemStart = p;
- if (bracePtr != 0) {
- *bracePtr = openBraces;
- }
/*
* Find element's end (a space, close brace, or the end of the string).
@@ -183,123 +531,119 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
while (p < limit) {
switch (*p) {
-
/*
* Open brace: don't treat specially unless the element is in
* braces. In this case, keep a nesting count.
*/
- case '{':
- if (openBraces != 0) {
- openBraces++;
- }
- break;
+ case '{':
+ if (openBraces != 0) {
+ openBraces++;
+ }
+ break;
/*
* Close brace: if element is in braces, keep nesting count and
* quit when the last close brace is seen.
*/
- case '}':
- if (openBraces > 1) {
- openBraces--;
- } else if (openBraces == 1) {
- size = (p - elemStart);
- p++;
- if ((p >= limit)
- || isspace(UCHAR(*p))) { /* INTL: ISO space. */
- goto done;
- }
+ case '}':
+ if (openBraces > 1) {
+ openBraces--;
+ } else if (openBraces == 1) {
+ size = (p - elemStart);
+ p++;
+ if ((p >= limit) || TclIsSpaceProc(*p)) {
+ goto done;
+ }
- /*
- * Garbage after the closing brace; return an error.
- */
-
- if (interp != NULL) {
- char buf[100];
-
- p2 = p;
- while ((p2 < limit)
- && (!isspace(UCHAR(*p2))) /* INTL: ISO space. */
- && (p2 < p+20)) {
- p2++;
- }
- sprintf(buf,
- "list element in braces followed by \"%.*s\" instead of space",
- (int) (p2-p), p);
- Tcl_SetResult(interp, buf, TCL_VOLATILE);
+ /*
+ * Garbage after the closing brace; return an error.
+ */
+
+ if (interp != NULL) {
+ p2 = p;
+ while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ && (p2 < p+20)) {
+ p2++;
}
- return TCL_ERROR;
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "list element in braces followed by \"%.*s\" "
+ "instead of space", (int) (p2-p), p));
}
- break;
+ return TCL_ERROR;
+ }
+ break;
/*
- * Backslash: skip over everything up to the end of the
- * backslash sequence.
+ * Backslash: skip over everything up to the end of the backslash
+ * sequence.
*/
- case '\\': {
- TclParseBackslash(p, limit - p, &numChars, NULL);
- p += (numChars - 1);
- break;
+ case '\\':
+ if (openBraces == 0) {
+ /*
+ * A backslash sequence not within a brace quoted element
+ * means the value of the element is different from the
+ * substring we are parsing. A call to TclCopyAndCollapse()
+ * is needed to produce the element value. Inform the caller.
+ */
+ literal = 0;
}
+ TclParseBackslash(p, limit - p, &numChars, NULL);
+ p += (numChars - 1);
+ break;
/*
* Space: ignore if element is in braces or quotes; otherwise
* terminate element.
*/
- case ' ':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- if ((openBraces == 0) && !inQuotes) {
- size = (p - elemStart);
- goto done;
- }
- break;
+ case ' ':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\v':
+ if ((openBraces == 0) && !inQuotes) {
+ size = (p - elemStart);
+ goto done;
+ }
+ break;
/*
* Double-quote: if element is in quotes then terminate it.
*/
- case '"':
- if (inQuotes) {
- size = (p - elemStart);
- p++;
- if ((p >= limit)
- || isspace(UCHAR(*p))) { /* INTL: ISO space */
- goto done;
- }
+ case '"':
+ if (inQuotes) {
+ size = (p - elemStart);
+ p++;
+ if ((p >= limit) || TclIsSpaceProc(*p)) {
+ goto done;
+ }
- /*
- * Garbage after the closing quote; return an error.
- */
-
- if (interp != NULL) {
- char buf[100];
-
- p2 = p;
- while ((p2 < limit)
- && (!isspace(UCHAR(*p2))) /* INTL: ISO space */
- && (p2 < p+20)) {
- p2++;
- }
- sprintf(buf,
- "list element in quotes followed by \"%.*s\" %s",
- (int) (p2-p), p, "instead of space");
- Tcl_SetResult(interp, buf, TCL_VOLATILE);
+ /*
+ * Garbage after the closing quote; return an error.
+ */
+
+ if (interp != NULL) {
+ p2 = p;
+ while ((p2 < limit) && (!TclIsSpaceProc(*p2))
+ && (p2 < p+20)) {
+ p2++;
}
- return TCL_ERROR;
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "list element in quotes followed by \"%.*s\" "
+ "instead of space", (int) (p2-p), p));
}
- break;
+ return TCL_ERROR;
+ }
+ break;
}
p++;
}
-
/*
* End of list: terminate element.
*/
@@ -321,8 +665,8 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
size = (p - elemStart);
}
- done:
- while ((p < limit) && (isspace(UCHAR(*p)))) { /* INTL: ISO space. */
+ done:
+ while ((p < limit) && (TclIsSpaceProc(*p))) {
p++;
}
*elementPtr = elemStart;
@@ -330,6 +674,9 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
if (sizePtr != 0) {
*sizePtr = size;
}
+ if (literalPtr != 0) {
+ *literalPtr = literal;
+ }
return TCL_OK;
}
@@ -338,7 +685,7 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
*
* TclCopyAndCollapse --
*
- * Copy a string and eliminate any backslashes that aren't in braces.
+ * Copy a string and substitute all backslash escape sequences
*
* Results:
* Count bytes get copied from src to dst. Along the way, backslash
@@ -353,10 +700,10 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,
*/
int
-TclCopyAndCollapse(count, src, dst)
- int count; /* Number of bytes to copy from src. */
- CONST char *src; /* Copy from here... */
- char *dst; /* ... to here. */
+TclCopyAndCollapse(
+ int count, /* Number of byte to copy from src. */
+ CONST char *src, /* Copy from here... */
+ char *dst) /* ... to here. */
{
int newCount = 0;
@@ -390,21 +737,19 @@ TclCopyAndCollapse(count, src, dst)
* Splits a list up into its constituent fields.
*
* Results
- * The return value is normally TCL_OK, which means that
- * the list was successfully split up. If TCL_ERROR is
- * returned, it means that "list" didn't have proper list
- * structure; the interp's result will contain a more detailed
- * error message.
- *
- * *argvPtr will be filled in with the address of an array
- * whose elements point to the elements of list, in order.
- * *argcPtr will get filled in with the number of valid elements
- * in the array. A single block of memory is dynamically allocated
- * to hold both the argv array and a copy of the list (with
- * backslashes and braces removed in the standard way).
- * The caller must eventually free this memory by calling free()
- * on *argvPtr. Note: *argvPtr and *argcPtr are only modified
- * if the procedure returns normally.
+ * The return value is normally TCL_OK, which means that the list was
+ * successfully split up. If TCL_ERROR is returned, it means that "list"
+ * didn't have proper list structure; the interp's result will contain a
+ * more detailed error message.
+ *
+ * *argvPtr will be filled in with the address of an array whose elements
+ * point to the elements of list, in order. *argcPtr will get filled in
+ * with the number of valid elements in the array. A single block of
+ * memory is dynamically allocated to hold both the argv array and a copy
+ * of the list (with backslashes and braces removed in the standard way).
+ * The caller must eventually free this memory by calling free() on
+ * *argvPtr. Note: *argvPtr and *argcPtr are only modified if the
+ * function returns normally.
*
* Side effects:
* Memory is allocated.
@@ -413,54 +758,40 @@ TclCopyAndCollapse(count, src, dst)
*/
int
-Tcl_SplitList(interp, list, argcPtr, argvPtr)
- Tcl_Interp *interp; /* Interpreter to use for error reporting.
- * If NULL, no error message is left. */
- CONST char *list; /* Pointer to string with list structure. */
- int *argcPtr; /* Pointer to location to fill in with
- * the number of elements in the list. */
- CONST char ***argvPtr; /* Pointer to place to store pointer to
- * array of pointers to list elements. */
+Tcl_SplitList(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, no error message is left. */
+ CONST char *list, /* Pointer to string with list structure. */
+ int *argcPtr, /* Pointer to location to fill in with the
+ * number of elements in the list. */
+ CONST char ***argvPtr) /* Pointer to place to store pointer to array
+ * of pointers to list elements. */
{
- CONST char **argv;
- CONST char *l;
+ CONST char **argv, *end, *element;
char *p;
- int length, size, i, result, elSize, brace;
- CONST char *element;
+ int length, size, i, result, elSize;
/*
- * Figure out how much space to allocate. There must be enough
- * space for both the array of pointers and also for a copy of
- * the list. To estimate the number of pointers needed, count
- * the number of space characters in the list.
+ * Allocate enough space to work in. A (CONST char *) for each
+ * (possible) list element plus one more for terminating NULL,
+ * plus as many bytes as in the original string value, plus one
+ * more for a terminating '\0'. Space used to hold element separating
+ * white space in the original string gets re-purposed to hold '\0'
+ * characters in the argv array.
*/
- for (size = 2, l = list; *l != 0; l++) {
- if (isspace(UCHAR(*l))) { /* INTL: ISO space. */
- size++;
- /* Consecutive space can only count as a single list delimiter */
- while (1) {
- char next = *(l + 1);
- if (next == '\0') {
- break;
- }
- ++l;
- if (isspace(UCHAR(next))) {
- continue;
- }
- break;
- }
- }
- }
- length = l - list;
+ size = TclMaxListLength(list, -1, &end) + 1;
+ length = end - list;
argv = (CONST char **) ckalloc((unsigned)
((size * sizeof(char *)) + length + 1));
+
for (i = 0, p = ((char *) argv) + size*sizeof(char *);
*list != 0; i++) {
CONST char *prevList = list;
-
- result = TclFindElement(interp, list, length, &element,
- &list, &elSize, &brace);
+ int literal;
+
+ result = TclFindElement(interp, list, length, &element, &list,
+ &elSize, &literal);
length -= (list - prevList);
if (result != TCL_OK) {
ckfree((char *) argv);
@@ -478,14 +809,13 @@ Tcl_SplitList(interp, list, argcPtr, argvPtr)
return TCL_ERROR;
}
argv[i] = p;
- if (brace) {
- memcpy((VOID *) p, (VOID *) element, (size_t) elSize);
+ if (literal) {
+ memcpy(p, element, (size_t) elSize);
p += elSize;
*p = 0;
p++;
} else {
- TclCopyAndCollapse(elSize, element, p);
- p += elSize+1;
+ p += 1 + TclCopyAndCollapse(elSize, element, p);
}
}
@@ -500,17 +830,15 @@ Tcl_SplitList(interp, list, argcPtr, argvPtr)
*
* Tcl_ScanElement --
*
- * This procedure is a companion procedure to Tcl_ConvertElement.
- * It scans a string to see what needs to be done to it (e.g. add
- * backslashes or enclosing braces) to make the string into a
- * valid Tcl list element.
+ * This function is a companion function to Tcl_ConvertElement. It scans
+ * a string to see what needs to be done to it (e.g. add backslashes or
+ * enclosing braces) to make the string into a valid Tcl list element.
*
* Results:
- * The return value is an overestimate of the number of characters
- * that will be needed by Tcl_ConvertElement to produce a valid
- * list element from string. The word at *flagPtr is filled in
- * with a value needed by Tcl_ConvertElement when doing the actual
- * conversion.
+ * The return value is an overestimate of the number of bytes that
+ * will be needed by Tcl_ConvertElement to produce a valid list element
+ * from src. The word at *flagPtr is filled in with a value needed by
+ * Tcl_ConvertElement when doing the actual conversion.
*
* Side effects:
* None.
@@ -519,12 +847,12 @@ Tcl_SplitList(interp, list, argcPtr, argvPtr)
*/
int
-Tcl_ScanElement(string, flagPtr)
- register CONST char *string; /* String to convert to list element. */
- register int *flagPtr; /* Where to store information to guide
- * Tcl_ConvertCountedElement. */
+Tcl_ScanElement(
+ register CONST char *src, /* String to convert to list element. */
+ register int *flagPtr) /* Where to store information to guide
+ * Tcl_ConvertCountedElement. */
{
- return Tcl_ScanCountedElement(string, -1, flagPtr);
+ return Tcl_ScanCountedElement(src, -1, flagPtr);
}
/*
@@ -532,19 +860,17 @@ Tcl_ScanElement(string, flagPtr)
*
* Tcl_ScanCountedElement --
*
- * This procedure is a companion procedure to
- * Tcl_ConvertCountedElement. It scans a string to see what
- * needs to be done to it (e.g. add backslashes or enclosing
- * braces) to make the string into a valid Tcl list element.
- * If length is -1, then the string is scanned up to the first
- * null byte.
+ * This function is a companion function to Tcl_ConvertCountedElement. It
+ * scans a string to see what needs to be done to it (e.g. add
+ * backslashes or enclosing braces) to make the string into a valid Tcl
+ * list element. If length is -1, then the string is scanned from src up
+ * to the first null byte.
*
* Results:
- * The return value is an overestimate of the number of characters
- * that will be needed by Tcl_ConvertCountedElement to produce a
- * valid list element from string. The word at *flagPtr is
- * filled in with a value needed by Tcl_ConvertCountedElement
- * when doing the actual conversion.
+ * The return value is an overestimate of the number of bytes that
+ * will be needed by Tcl_ConvertCountedElement to produce a valid list
+ * element from src. The word at *flagPtr is filled in with a value
+ * needed by Tcl_ConvertCountedElement when doing the actual conversion.
*
* Side effects:
* None.
@@ -553,115 +879,266 @@ Tcl_ScanElement(string, flagPtr)
*/
int
-Tcl_ScanCountedElement(string, length, flagPtr)
- CONST char *string; /* String to convert to Tcl list element. */
- int length; /* Number of bytes in string, or -1. */
- int *flagPtr; /* Where to store information to guide
+Tcl_ScanCountedElement(
+ CONST char *src, /* String to convert to Tcl list element. */
+ int length, /* Number of bytes in src, or -1. */
+ int *flagPtr) /* Where to store information to guide
* Tcl_ConvertElement. */
{
- int flags, nestingLevel;
- register CONST char *p, *lastChar;
+ int flags = CONVERT_ANY;
+ int numBytes = TclScanElement(src, length, &flags);
- /*
- * This procedure and Tcl_ConvertElement together do two things:
- *
- * 1. They produce a proper list, one that will yield back the
- * argument strings when evaluated or when disassembled with
- * Tcl_SplitList. This is the most important thing.
- *
- * 2. They try to produce legible output, which means minimizing the
- * use of backslashes (using braces instead). However, there are
- * some situations where backslashes must be used (e.g. an element
- * like "{abc": the leading brace will have to be backslashed.
- * For each element, one of three things must be done:
- *
- * (a) Use the element as-is (it doesn't contain any special
- * characters). This is the most desirable option.
- *
- * (b) Enclose the element in braces, but leave the contents alone.
- * This happens if the element contains embedded space, or if it
- * contains characters with special interpretation ($, [, ;, or \),
- * or if it starts with a brace or double-quote, or if there are
- * no characters in the element.
- *
- * (c) Don't enclose the element in braces, but add backslashes to
- * prevent special interpretation of special characters. This is a
- * last resort used when the argument would normally fall under case
- * (b) but contains unmatched braces. It also occurs if the last
- * character of the argument is a backslash or if the element contains
- * a backslash followed by newline.
- *
- * The procedure figures out how many bytes will be needed to store
- * the result (actually, it overestimates). It also collects information
- * about the element in the form of a flags word.
- *
- * Note: list elements produced by this procedure and
- * Tcl_ConvertCountedElement must have the property that they can be
- * enclosing in curly braces to make sub-lists. This means, for
- * example, that we must not leave unmatched curly braces in the
- * resulting list element. This property is necessary in order for
- * procedures like Tcl_DStringStartSublist to work.
- */
+ *flagPtr = flags;
+ return numBytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclScanElement --
+ *
+ * This function is a companion function to TclConvertElement. It
+ * scans a string to see what needs to be done to it (e.g. add
+ * backslashes or enclosing braces) to make the string into a valid Tcl
+ * list element. If length is -1, then the string is scanned from src up
+ * to the first null byte. A NULL value for src is treated as an
+ * empty string. The incoming value of *flagPtr is a report from the
+ * caller what additional flags it will pass to TclConvertElement().
+ *
+ * Results:
+ * The recommended formatting mode for the element is determined and
+ * a value is written to *flagPtr indicating that recommendation. This
+ * recommendation is combined with the incoming flag values in *flagPtr
+ * set by the caller to determine how many bytes will be needed by
+ * TclConvertElement() in which to write the formatted element following
+ * the recommendation modified by the flag values. This number of bytes
+ * is the return value of the routine. In some situations it may be
+ * an overestimate, but so long as the caller passes the same flags
+ * to TclConvertElement(), it will be large enough.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
- nestingLevel = 0;
- flags = 0;
- if (string == NULL) {
- string = "";
- }
- if (length == -1) {
- length = strlen(string);
+int
+TclScanElement(
+ CONST char *src, /* String to convert to Tcl list element. */
+ int length, /* Number of bytes in src, or -1. */
+ int *flagPtr) /* Where to store information to guide
+ * Tcl_ConvertElement. */
+{
+ CONST char *p = src;
+ int nestingLevel = 0; /* Brace nesting count */
+ int forbidNone = 0; /* Do not permit CONVERT_NONE mode. Something
+ needs protection or escape. */
+ int requireEscape = 0; /* Force use of CONVERT_ESCAPE mode. For some
+ * reason bare or brace-quoted form fails. */
+ int extra = 0; /* Count of number of extra bytes needed for
+ * formatted element, assuming we use escape
+ * sequences in formatting. */
+ int bytesNeeded; /* Buffer length computed to complete the
+ * element formatting in the selected mode. */
+#if COMPAT
+ int preferEscape = 0; /* Use preferences to track whether to use */
+ int preferBrace = 0; /* CONVERT_MASK mode. */
+ int braceCount = 0; /* Count of all braces '{' '}' seen. */
+#endif
+
+ if ((p == NULL) || (length == 0) || ((*p == '\0') && (length == -1))) {
+ /* Empty string element must be brace quoted. */
+ *flagPtr = CONVERT_BRACE;
+ return 2;
}
- lastChar = string + length;
- p = string;
- if ((p == lastChar) || (*p == '{') || (*p == '"')) {
- flags |= USE_BRACES;
+
+ if ((*p == '{') || (*p == '"')) {
+ /*
+ * Must escape or protect so leading character of value is not
+ * misinterpreted as list element delimiting syntax.
+ */
+ forbidNone = 1;
+#if COMPAT
+ preferBrace = 1;
+#endif
}
- for ( ; p < lastChar; p++) {
+
+ while (length) {
switch (*p) {
- case '{':
- nestingLevel++;
- break;
- case '}':
- nestingLevel--;
- if (nestingLevel < 0) {
- flags |= TCL_DONT_USE_BRACES|BRACES_UNMATCHED;
- }
- break;
- case '[':
- case '$':
- case ';':
- case ' ':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- flags |= USE_BRACES;
+ case '{':
+#if COMPAT
+ braceCount++;
+#endif
+ extra++; /* Escape '{' => '\{' */
+ nestingLevel++;
+ break;
+ case '}':
+#if COMPAT
+ braceCount++;
+#endif
+ extra++; /* Escape '}' => '\}' */
+ nestingLevel--;
+ if (nestingLevel < 0) {
+ /* Unbalanced braces! Cannot format with brace quoting. */
+ requireEscape = 1;
+ }
+ break;
+ case ']':
+ case '"':
+#if COMPAT
+ forbidNone = 1;
+ extra++; /* Escapes all just prepend a backslash */
+ preferEscape = 1;
+ break;
+#else
+ /* FLOW THROUGH */
+#endif
+ case '[':
+ case '$':
+ case ';':
+ case ' ':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\v':
+ forbidNone = 1;
+ extra++; /* Escape sequences all one byte longer. */
+#if COMPAT
+ preferBrace = 1;
+#endif
+ break;
+ case '\\':
+ extra++; /* Escape '\' => '\\' */
+ if ((length == 1) || ((length == -1) && (p[1] == '\0'))) {
+ /* Final backslash. Cannot format with brace quoting. */
+ requireEscape = 1;
break;
- case '\\':
- if ((p+1 == lastChar) || (p[1] == '\n')) {
- flags = TCL_DONT_USE_BRACES | BRACES_UNMATCHED;
- } else {
- int size;
-
- TclParseBackslash(p, lastChar - p, &size, NULL);
- p += size-1;
- flags |= USE_BRACES;
- }
+ }
+ if (p[1] == '\n') {
+ extra++; /* Escape newline => '\n', one byte longer */
+ /* Backslash newline sequence. Brace quoting not permitted. */
+ requireEscape = 1;
+ length -= (length > 0);
+ p++;
break;
+ }
+ if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) {
+ extra++; /* Escape sequences all one byte longer. */
+ length -= (length > 0);
+ p++;
+ }
+ forbidNone = 1;
+#if COMPAT
+ preferBrace = 1;
+#endif
+ break;
+ case '\0':
+ if (length == -1) {
+ goto endOfString;
+ }
+ /* TODO: Panic on improper encoding? */
+ break;
}
+ length -= (length > 0);
+ p++;
}
+
+ endOfString:
if (nestingLevel != 0) {
- flags = TCL_DONT_USE_BRACES | BRACES_UNMATCHED;
+ /* Unbalanced braces! Cannot format with brace quoting. */
+ requireEscape = 1;
}
- *flagPtr = flags;
- /*
- * Allow enough space to backslash every character plus leave
- * two spaces for braces.
- */
+ /* We need at least as many bytes as are in the element value... */
+ bytesNeeded = p - src;
- return 2*(p-string) + 2;
+ if (requireEscape) {
+ /*
+ * We must use escape sequences. Add all the extra bytes needed
+ * to have room to create them.
+ */
+ bytesNeeded += extra;
+ /* Make room to escape leading #, if needed. */
+ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) {
+ bytesNeeded++;
+ }
+ *flagPtr = CONVERT_ESCAPE;
+ goto overflowCheck;
+ }
+ if (*flagPtr & CONVERT_ANY) {
+ /*
+ * The caller has not let us know what flags it will pass to
+ * TclConvertElement() so compute the max size we might need for
+ * any possible choice. Normally the formatting using escape
+ * sequences is the longer one, and a minimum "extra" value of 2
+ * makes sure we don't request too small a buffer in those edge
+ * cases where that's not true.
+ */
+ if (extra < 2) {
+ extra = 2;
+ }
+ *flagPtr &= ~CONVERT_ANY;
+ *flagPtr |= TCL_DONT_USE_BRACES;
+ }
+ if (forbidNone) {
+ /* We must request some form of quoting of escaping... */
+#if COMPAT
+ if (preferEscape && !preferBrace) {
+ /*
+ * If we are quoting solely due to ] or internal " characters
+ * use the CONVERT_MASK mode where we escape all special
+ * characters except for braces. "extra" counted space needed
+ * to escape braces too, so substract "braceCount" to get our
+ * actual needs.
+ */
+ bytesNeeded += (extra - braceCount);
+ /* Make room to escape leading #, if needed. */
+ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) {
+ bytesNeeded++;
+ }
+ /*
+ * If the caller reports it will direct TclConvertElement() to
+ * use full escapes on the element, add back the bytes needed to
+ * escape the braces.
+ */
+ if (*flagPtr & TCL_DONT_USE_BRACES) {
+ bytesNeeded += braceCount;
+ }
+ *flagPtr = CONVERT_MASK;
+ goto overflowCheck;
+ }
+#endif
+ if (*flagPtr & TCL_DONT_USE_BRACES) {
+ /*
+ * If the caller reports it will direct TclConvertElement() to
+ * use escapes, add the extra bytes needed to have room for them.
+ */
+ bytesNeeded += extra;
+ /* Make room to escape leading #, if needed. */
+ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) {
+ bytesNeeded++;
+ }
+ } else {
+ /* Add 2 bytes for room for the enclosing braces. */
+ bytesNeeded += 2;
+ }
+ *flagPtr = CONVERT_BRACE;
+ goto overflowCheck;
+ }
+
+ /* So far, no need to quote or escape anything. */
+ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) {
+ /* If we need to quote a leading #, make room to enclose in braces. */
+ bytesNeeded += 2;
+ }
+ *flagPtr = CONVERT_NONE;
+
+ overflowCheck:
+ if (bytesNeeded < 0) {
+ Tcl_Panic("TclScanElement: string length overflow");
+ }
+ return bytesNeeded;
}
/*
@@ -669,16 +1146,15 @@ Tcl_ScanCountedElement(string, length, flagPtr)
*
* Tcl_ConvertElement --
*
- * This is a companion procedure to Tcl_ScanElement. Given
- * the information produced by Tcl_ScanElement, this procedure
- * converts a string to a list element equal to that string.
+ * This is a companion function to Tcl_ScanElement. Given the information
+ * produced by Tcl_ScanElement, this function converts a string to a list
+ * element equal to that string.
*
* Results:
- * Information is copied to *dst in the form of a list element
- * identical to src (i.e. if Tcl_SplitList is applied to dst it
- * will produce a string identical to src). The return value is
- * a count of the number of characters copied (not including the
- * terminating NULL character).
+ * Information is copied to *dst in the form of a list element identical
+ * to src (i.e. if Tcl_SplitList is applied to dst it will produce a
+ * string identical to src). The return value is a count of the number of
+ * characters copied (not including the terminating NULL character).
*
* Side effects:
* None.
@@ -687,10 +1163,10 @@ Tcl_ScanCountedElement(string, length, flagPtr)
*/
int
-Tcl_ConvertElement(src, dst, flags)
- register CONST char *src; /* Source information for list element. */
- register char *dst; /* Place to put list-ified element. */
- register int flags; /* Flags produced by Tcl_ScanElement. */
+Tcl_ConvertElement(
+ register CONST char *src, /* Source information for list element. */
+ register char *dst, /* Place to put list-ified element. */
+ register int flags) /* Flags produced by Tcl_ScanElement. */
{
return Tcl_ConvertCountedElement(src, -1, dst, flags);
}
@@ -700,17 +1176,15 @@ Tcl_ConvertElement(src, dst, flags)
*
* Tcl_ConvertCountedElement --
*
- * This is a companion procedure to Tcl_ScanCountedElement. Given
- * the information produced by Tcl_ScanCountedElement, this
- * procedure converts a string to a list element equal to that
- * string.
+ * This is a companion function to Tcl_ScanCountedElement. Given the
+ * information produced by Tcl_ScanCountedElement, this function converts
+ * a string to a list element equal to that string.
*
* Results:
- * Information is copied to *dst in the form of a list element
- * identical to src (i.e. if Tcl_SplitList is applied to dst it
- * will produce a string identical to src). The return value is
- * a count of the number of characters copied (not including the
- * terminating NULL character).
+ * Information is copied to *dst in the form of a list element identical
+ * to src (i.e. if Tcl_SplitList is applied to dst it will produce a
+ * string identical to src). The return value is a count of the number of
+ * characters copied (not including the terminating NULL character).
*
* Side effects:
* None.
@@ -719,118 +1193,177 @@ Tcl_ConvertElement(src, dst, flags)
*/
int
-Tcl_ConvertCountedElement(src, length, dst, flags)
- register CONST char *src; /* Source information for list element. */
- int length; /* Number of bytes in src, or -1. */
- char *dst; /* Place to put list-ified element. */
- int flags; /* Flags produced by Tcl_ScanElement. */
+Tcl_ConvertCountedElement(
+ register CONST char *src, /* Source information for list element. */
+ int length, /* Number of bytes in src, or -1. */
+ char *dst, /* Place to put list-ified element. */
+ int flags) /* Flags produced by Tcl_ScanElement. */
{
- register char *p = dst;
- register CONST char *lastChar;
+ int numBytes = TclConvertElement(src, length, dst, flags);
+ dst[numBytes] = '\0';
+ return numBytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclConvertElement --
+ *
+ * This is a companion function to TclScanElement. Given the
+ * information produced by TclScanElement, this function converts
+ * a string to a list element equal to that string.
+ *
+ * Results:
+ * Information is copied to *dst in the form of a list element identical
+ * to src (i.e. if Tcl_SplitList is applied to dst it will produce a
+ * string identical to src). The return value is a count of the number of
+ * characters copied (not including the terminating NULL character).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
- /*
- * See the comment block at the beginning of the Tcl_ScanElement
- * code for details of how this works.
- */
+int TclConvertElement(
+ register CONST char *src, /* Source information for list element. */
+ int length, /* Number of bytes in src, or -1. */
+ char *dst, /* Place to put list-ified element. */
+ int flags) /* Flags produced by Tcl_ScanElement. */
+{
+ int conversion = flags & CONVERT_MASK;
+ char *p = dst;
- if (src && length == -1) {
- length = strlen(src);
+ /* Let the caller demand we use escape sequences rather than braces. */
+ if ((flags & TCL_DONT_USE_BRACES) && (conversion & CONVERT_BRACE)) {
+ conversion = CONVERT_ESCAPE;
}
- if ((src == NULL) || (length == 0)) {
- p[0] = '{';
- p[1] = '}';
- p[2] = 0;
- return 2;
+
+ /* No matter what the caller demands, empty string must be braced! */
+ if ((src == NULL) || (length == 0) || ((*src == '\0') && (length == -1))) {
+ src = tclEmptyStringRep;
+ length = 0;
+ conversion = CONVERT_BRACE;
+ }
+
+ /* Escape leading hash as needed and requested. */
+ if ((*src == '#') && !(flags & TCL_DONT_QUOTE_HASH)) {
+ if (conversion == CONVERT_ESCAPE) {
+ p[0] = '\\';
+ p[1] = '#';
+ p += 2;
+ src++;
+ length -= (length > 0);
+ } else {
+ conversion = CONVERT_BRACE;
+ }
}
- lastChar = src + length;
- if ((flags & USE_BRACES) && !(flags & TCL_DONT_USE_BRACES)) {
+
+ /* No escape or quoting needed. Copy the literal string value. */
+ if (conversion == CONVERT_NONE) {
+ if (length == -1) {
+ /* TODO: INT_MAX overflow? */
+ while (*src) {
+ *p++ = *src++;
+ }
+ return p - dst;
+ } else {
+ memcpy(dst, src, length);
+ return length;
+ }
+ }
+
+ /* Formatted string is original string enclosed in braces. */
+ if (conversion == CONVERT_BRACE) {
*p = '{';
p++;
- for ( ; src != lastChar; src++, p++) {
- *p = *src;
+ if (length == -1) {
+ /* TODO: INT_MAX overflow? */
+ while (*src) {
+ *p++ = *src++;
+ }
+ } else {
+ memcpy(p, src, length);
+ p += length;
}
*p = '}';
p++;
- } else {
- if (*src == '{') {
- /*
- * Can't have a leading brace unless the whole element is
- * enclosed in braces. Add a backslash before the brace.
- * Furthermore, this may destroy the balance between open
- * and close braces, so set BRACES_UNMATCHED.
- */
+ return p - dst;
+ }
- p[0] = '\\';
- p[1] = '{';
- p += 2;
- src++;
- flags |= BRACES_UNMATCHED;
- }
- for (; src != lastChar; src++) {
- switch (*src) {
- case ']':
- case '[':
- case '$':
- case ';':
- case ' ':
- case '\\':
- case '"':
- *p = '\\';
- p++;
- break;
- case '{':
- case '}':
- /*
- * It may not seem necessary to backslash braces, but
- * it is. The reason for this is that the resulting
- * list element may actually be an element of a sub-list
- * enclosed in braces (e.g. if Tcl_DStringStartSublist
- * has been invoked), so there may be a brace mismatch
- * if the braces aren't backslashed.
- */
-
- if (flags & BRACES_UNMATCHED) {
- *p = '\\';
- p++;
- }
- break;
- case '\f':
- *p = '\\';
- p++;
- *p = 'f';
- p++;
- continue;
- case '\n':
- *p = '\\';
- p++;
- *p = 'n';
- p++;
- continue;
- case '\r':
- *p = '\\';
- p++;
- *p = 'r';
- p++;
- continue;
- case '\t':
- *p = '\\';
- p++;
- *p = 't';
- p++;
- continue;
- case '\v':
- *p = '\\';
- p++;
- *p = 'v';
- p++;
- continue;
+ /* conversion == CONVERT_ESCAPE or CONVERT_MASK */
+
+ /* Formatted string is original string converted to escape sequences. */
+ for ( ; length; src++, length -= (length > 0)) {
+ switch (*src) {
+ case ']':
+ case '[':
+ case '$':
+ case ';':
+ case ' ':
+ case '\\':
+ case '"':
+ *p = '\\';
+ p++;
+ break;
+ case '{':
+ case '}':
+#if COMPAT
+ if (conversion == CONVERT_ESCAPE) {
+#endif
+ *p = '\\';
+ p++;
+#if COMPAT
}
- *p = *src;
+#endif
+ break;
+ case '\f':
+ *p = '\\';
+ p++;
+ *p = 'f';
+ p++;
+ continue;
+ case '\n':
+ *p = '\\';
+ p++;
+ *p = 'n';
+ p++;
+ continue;
+ case '\r':
+ *p = '\\';
p++;
+ *p = 'r';
+ p++;
+ continue;
+ case '\t':
+ *p = '\\';
+ p++;
+ *p = 't';
+ p++;
+ continue;
+ case '\v':
+ *p = '\\';
+ p++;
+ *p = 'v';
+ p++;
+ continue;
+ case '\0':
+ if (length == -1) {
+ return p - dst;
+ }
+ /*
+ * If we reach this point, there's an embedded NULL in the
+ * string range being processed, which should not happen when
+ * the encoding rules for Tcl strings are properly followed.
+ * If the day ever comes when we stop tolerating such things,
+ * this is where to put the Tcl_Panic().
+ */
+ break;
}
+ *p = *src;
+ p++;
}
- *p = '\0';
- return p-dst;
+ return p - dst;
}
/*
@@ -838,15 +1371,14 @@ Tcl_ConvertCountedElement(src, length, dst, flags)
*
* Tcl_Merge --
*
- * Given a collection of strings, merge them together into a
- * single string that has proper Tcl list structured (i.e.
- * Tcl_SplitList may be used to retrieve strings equal to the
- * original elements, and Tcl_Eval will parse the string back
- * into its original elements).
+ * Given a collection of strings, merge them together into a single
+ * string that has proper Tcl list structured (i.e. Tcl_SplitList may be
+ * used to retrieve strings equal to the original elements, and Tcl_Eval
+ * will parse the string back into its original elements).
*
* Results:
- * The return value is the address of a dynamically-allocated
- * string containing the merged list.
+ * The return value is the address of a dynamically-allocated string
+ * containing the merged list.
*
* Side effects:
* None.
@@ -855,16 +1387,25 @@ Tcl_ConvertCountedElement(src, length, dst, flags)
*/
char *
-Tcl_Merge(argc, argv)
- int argc; /* How many strings to merge. */
- CONST char * CONST *argv; /* Array of string values. */
+Tcl_Merge(
+ int argc, /* How many strings to merge. */
+ CONST char * CONST *argv) /* Array of string values. */
{
# define LOCAL_SIZE 20
- int localFlags[LOCAL_SIZE], *flagPtr;
- int numChars;
- char *result;
- char *dst;
- int i;
+ int localFlags[LOCAL_SIZE], *flagPtr = NULL;
+ int i, bytesNeeded = 0;
+ char *result, *dst;
+ const int maxFlags = UINT_MAX / sizeof(int);
+
+ if (argc == 0) {
+ /*
+ * Handle empty list case first, so logic of the general case
+ * can be simpler.
+ */
+ result = ckalloc(1);
+ result[0] = '\0';
+ return result;
+ }
/*
* Pass 1: estimate space, gather flags.
@@ -872,31 +1413,48 @@ Tcl_Merge(argc, argv)
if (argc <= LOCAL_SIZE) {
flagPtr = localFlags;
+ } else if (argc > maxFlags) {
+ /*
+ * We cannot allocate a large enough flag array to format this
+ * list in one pass. We could imagine converting this routine
+ * to a multi-pass implementation, but for sizeof(int) == 4,
+ * the limit is a max of 2^30 list elements and since each element
+ * is at least one byte formatted, and requires one byte space
+ * between it and the next one, that a minimum space requirement
+ * of 2^31 bytes, which is already INT_MAX. If we tried to format
+ * a list of > maxFlags elements, we're just going to overflow
+ * the size limits on the formatted string anyway, so just issue
+ * that same panic early.
+ */
+ Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
} else {
flagPtr = (int *) ckalloc((unsigned) argc*sizeof(int));
}
- numChars = 1;
for (i = 0; i < argc; i++) {
- numChars += Tcl_ScanElement(argv[i], &flagPtr[i]) + 1;
+ flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 );
+ bytesNeeded += TclScanElement(argv[i], -1, &flagPtr[i]);
+ if (bytesNeeded < 0) {
+ Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
+ }
+ }
+ if (bytesNeeded > INT_MAX - argc + 1) {
+ Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
}
+ bytesNeeded += argc;
/*
* Pass two: copy into the result area.
*/
- result = (char *) ckalloc((unsigned) numChars);
+ result = ckalloc((unsigned) bytesNeeded);
dst = result;
for (i = 0; i < argc; i++) {
- numChars = Tcl_ConvertElement(argv[i], dst, flagPtr[i]);
- dst += numChars;
+ flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 );
+ dst += TclConvertElement(argv[i], -1, dst, flagPtr[i]);
*dst = ' ';
dst++;
}
- if (dst == result) {
- *dst = 0;
- } else {
- dst[-1] = 0;
- }
+ dst[-1] = 0;
if (flagPtr != localFlags) {
ckfree((char *) flagPtr);
@@ -912,10 +1470,10 @@ Tcl_Merge(argc, argv)
* Figure out how to handle a backslash sequence.
*
* Results:
- * The return value is the character that should be substituted
- * in place of the backslash sequence that starts at src. If
- * readPtr isn't NULL then it is filled in with a count of the
- * number of characters in the backslash sequence.
+ * The return value is the character that should be substituted in place
+ * of the backslash sequence that starts at src. If readPtr isn't NULL
+ * then it is filled in with a count of the number of characters in the
+ * backslash sequence.
*
* Side effects:
* None.
@@ -924,11 +1482,11 @@ Tcl_Merge(argc, argv)
*/
char
-Tcl_Backslash(src, readPtr)
- CONST char *src; /* Points to the backslash character of
- * a backslash sequence. */
- int *readPtr; /* Fill in with number of characters read
- * from src, unless NULL. */
+Tcl_Backslash(
+ CONST char *src, /* Points to the backslash character of a
+ * backslash sequence. */
+ int *readPtr) /* Fill in with number of characters read from
+ * src, unless NULL. */
{
char buf[TCL_UTF_MAX];
Tcl_UniChar ch;
@@ -941,73 +1499,228 @@ Tcl_Backslash(src, readPtr)
/*
*----------------------------------------------------------------------
*
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the right side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the end of the string.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes + numBytes;
+ int pInc;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimRight works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ p = Tcl_UtfPrev(p, bytes);
+ pInc = TclUtfToUniChar(p, &ch1);
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is last non-trimmed char */
+ p += pInc;
+ break;
+ }
+ } while (p > bytes);
+
+ return numBytes - (p - bytes);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimLeft --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the left side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimLeft works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ int pInc = TclUtfToUniChar(p, &ch1);
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is first non-trimmed char */
+ break;
+ }
+
+ p += pInc;
+ numBytes -= pInc;
+ } while (numBytes);
+
+ return p - bytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_Concat --
*
* Concatenate a set of strings into a single large string.
*
* Results:
- * The return value is dynamically-allocated string containing
- * a concatenation of all the strings in argv, with spaces between
- * the original argv elements.
+ * The return value is dynamically-allocated string containing a
+ * concatenation of all the strings in argv, with spaces between the
+ * original argv elements.
*
* Side effects:
- * Memory is allocated for the result; the caller is responsible
- * for freeing the memory.
+ * Memory is allocated for the result; the caller is responsible for
+ * freeing the memory.
*
*----------------------------------------------------------------------
*/
+/* The whitespace characters trimmed during [concat] operations */
+#define CONCAT_WS " \f\v\r\t\n"
+#define CONCAT_WS_SIZE (int) (sizeof(CONCAT_WS "") - 1)
+
char *
-Tcl_Concat(argc, argv)
- int argc; /* Number of strings to concatenate. */
- CONST char * CONST *argv; /* Array of strings to concatenate. */
+Tcl_Concat(
+ int argc, /* Number of strings to concatenate. */
+ CONST char * CONST *argv) /* Array of strings to concatenate. */
{
- int totalSize, i;
- char *p;
- char *result;
+ int i, needSpace = 0, bytesNeeded = 0;
+ char *result, *p;
- for (totalSize = 1, i = 0; i < argc; i++) {
- totalSize += strlen(argv[i]) + 1;
- }
- result = (char *) ckalloc((unsigned) totalSize);
+ /* Dispose of the empty result corner case first to simplify later code */
if (argc == 0) {
- *result = '\0';
+ result = (char *) ckalloc(1);
+ result[0] = '\0';
return result;
}
- for (p = result, i = 0; i < argc; i++) {
- CONST char *element;
- int length;
+ /* First allocate the result buffer at the size required */
+ for (i = 0; i < argc; i++) {
+ bytesNeeded += strlen(argv[i]);
+ if (bytesNeeded < 0) {
+ Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
+ }
+ }
+ if (bytesNeeded + argc - 1 < 0) {
/*
- * Clip white space off the front and back of the string
- * to generate a neater result, and ignore any empty
- * elements.
+ * Panic test could be tighter, but not going to bother for
+ * this legacy routine.
*/
+ Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
+ }
+ /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
+ result = (char *) ckalloc((unsigned) (bytesNeeded + argc));
+ for (p = result, i = 0; i < argc; i++) {
+ int trim, elemLength;
+ const char *element;
+
element = argv[i];
- while (isspace(UCHAR(*element))) { /* INTL: ISO space. */
- element++;
- }
- for (length = strlen(element);
- (length > 0)
- && (isspace(UCHAR(element[length-1]))) /* INTL: ISO space. */
- && ((length < 2) || (element[length-2] != '\\'));
- length--) {
- /* Null loop body. */
- }
- if (length == 0) {
+ elemLength = strlen(argv[i]);
+
+ /* Trim away the leading whitespace */
+ trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
+ element += trim;
+ elemLength -= trim;
+
+ /*
+ * Trim away the trailing whitespace. Do not permit trimming
+ * to expose a final backslash character.
+ */
+
+ trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
+ trim -= trim && (element[elemLength - trim - 1] == '\\');
+ elemLength -= trim;
+
+ /* If we're left with empty element after trimming, do nothing */
+ if (elemLength == 0) {
continue;
}
- memcpy((VOID *) p, (VOID *) element, (size_t) length);
- p += length;
- *p = ' ';
- p++;
- }
- if (p != result) {
- p[-1] = 0;
- } else {
- *p = 0;
+
+ /* Append to the result with space if needed */
+ if (needSpace) {
+ *p++ = ' ';
+ }
+ memcpy(p, element, (size_t) elemLength);
+ p += elemLength;
+ needSpace = 1;
}
+ *p = '\0';
return result;
}
@@ -1020,8 +1733,8 @@ Tcl_Concat(argc, argv)
* object with spaces between the original strings.
*
* Results:
- * The return value is a new string object containing a concatenation
- * of the strings in objv. Its ref count is zero.
+ * The return value is a new string object containing a concatenation of
+ * the strings in objv. Its ref count is zero.
*
* Side effects:
* A new object is created.
@@ -1030,27 +1743,29 @@ Tcl_Concat(argc, argv)
*/
Tcl_Obj *
-Tcl_ConcatObj(objc, objv)
- int objc; /* Number of objects to concatenate. */
- Tcl_Obj *CONST objv[]; /* Array of objects to concatenate. */
+Tcl_ConcatObj(
+ int objc, /* Number of objects to concatenate. */
+ Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */
{
- int allocSize, finalSize, length, elemLength, i;
- char *p;
- char *element;
- char *concatStr;
- Tcl_Obj *objPtr;
+ int i, elemLength, needSpace = 0, bytesNeeded = 0;
+ const char *element;
+ Tcl_Obj *objPtr, *resPtr;
/*
- * Check first to see if all the items are of list type. If so,
- * we will concat them together as lists, and return a list object.
- * This is only valid when the lists have no current string
- * representation, since we don't know what the original type was.
- * An original string rep may have lost some whitespace info when
- * converted which could be important.
+ * Check first to see if all the items are of list type or empty. If so,
+ * we will concat them together as lists, and return a list object. This
+ * is only valid when the lists are in canonical form.
*/
+
for (i = 0; i < objc; i++) {
+ int length;
+
objPtr = objv[i];
- if ((objPtr->typePtr != &tclListType) || (objPtr->bytes != NULL)) {
+ if (TclListObjIsCanonical(objPtr)) {
+ continue;
+ }
+ Tcl_GetStringFromObj(objPtr, &length);
+ if (length > 0) {
break;
}
}
@@ -1058,92 +1773,90 @@ Tcl_ConcatObj(objc, objv)
Tcl_Obj **listv;
int listc;
- objPtr = Tcl_NewListObj(0, NULL);
+ resPtr = NULL;
for (i = 0; i < objc; i++) {
/*
- * Tcl_ListObjAppendList could be used here, but this saves
- * us a bit of type checking (since we've already done it)
- * Use of INT_MAX tells us to always put the new stuff on
- * the end. It will be set right in Tcl_ListObjReplace.
+ * Tcl_ListObjAppendList could be used here, but this saves us a
+ * bit of type checking (since we've already done it). Use of
+ * INT_MAX tells us to always put the new stuff on the end. It
+ * will be set right in Tcl_ListObjReplace.
+ * Note that all objs at this point are either lists or have an
+ * empty string rep.
*/
- Tcl_ListObjGetElements(NULL, objv[i], &listc, &listv);
- Tcl_ListObjReplace(NULL, objPtr, INT_MAX, 0, listc, listv);
- }
- return objPtr;
- }
- allocSize = 0;
- for (i = 0; i < objc; i++) {
- objPtr = objv[i];
- element = Tcl_GetStringFromObj(objPtr, &length);
- if ((element != NULL) && (length > 0)) {
- allocSize += (length + 1);
+ objPtr = objv[i];
+ if (objPtr->bytes && objPtr->length == 0) {
+ continue;
+ }
+ TclListObjGetElements(NULL, objPtr, &listc, &listv);
+ if (listc) {
+ if (resPtr) {
+ Tcl_ListObjReplace(NULL, resPtr, INT_MAX, 0, listc, listv);
+ } else {
+ resPtr = TclListObjCopy(NULL, objPtr);
+ }
+ }
}
- }
- if (allocSize == 0) {
- allocSize = 1; /* enough for the NULL byte at end */
+ if (!resPtr) {
+ resPtr = Tcl_NewObj();
+ }
+ return resPtr;
}
/*
- * Allocate storage for the concatenated result. Note that allocSize
- * is one more than the total number of characters, and so includes
- * room for the terminating NULL byte.
+ * Something cannot be determined to be safe, so build the concatenation
+ * the slow way, using the string representations.
*/
-
- concatStr = (char *) ckalloc((unsigned) allocSize);
+ /* First try to pre-allocate the size required */
+ for (i = 0; i < objc; i++) {
+ element = TclGetStringFromObj(objv[i], &elemLength);
+ bytesNeeded += elemLength;
+ if (bytesNeeded < 0) {
+ break;
+ }
+ }
/*
- * Now concatenate the elements. Clip white space off the front and back
- * to generate a neater result, and ignore any empty elements. Also put
- * a null byte at the end.
+ * Does not matter if this fails, will simply try later to build up
+ * the string with each Append reallocating as needed with the usual
+ * string append algorithm. When that fails it will report the error.
*/
+ TclNewObj(resPtr);
+ Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
+ Tcl_SetObjLength(resPtr, 0);
- finalSize = 0;
- if (objc == 0) {
- *concatStr = '\0';
- } else {
- p = concatStr;
- for (i = 0; i < objc; i++) {
- objPtr = objv[i];
- element = Tcl_GetStringFromObj(objPtr, &elemLength);
- while ((elemLength > 0) && (UCHAR(*element) < 127)
- && isspace(UCHAR(*element))) { /* INTL: ISO C space. */
- element++;
- elemLength--;
- }
+ for (i = 0; i < objc; i++) {
+ int trim;
+
+ element = TclGetStringFromObj(objv[i], &elemLength);
- /*
- * Trim trailing white space. But, be careful not to trim
- * a space character if it is preceded by a backslash: in
- * this case it could be significant.
- */
+ /* Trim away the leading whitespace */
+ trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
+ element += trim;
+ elemLength -= trim;
- while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127)
- && isspace(UCHAR(element[elemLength-1])) /* INTL: ISO C space. */
- && ((elemLength < 2) || (element[elemLength-2] != '\\'))) {
- elemLength--;
- }
- if (elemLength == 0) {
- continue; /* nothing left of this element */
- }
- memcpy((VOID *) p, (VOID *) element, (size_t) elemLength);
- p += elemLength;
- *p = ' ';
- p++;
- finalSize += (elemLength + 1);
- }
- if (p != concatStr) {
- p[-1] = 0;
- finalSize -= 1; /* we overwrote the final ' ' */
- } else {
- *p = 0;
- }
+ /*
+ * Trim away the trailing whitespace. Do not permit trimming
+ * to expose a final backslash character.
+ */
+
+ trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
+ trim -= trim && (element[elemLength - trim - 1] == '\\');
+ elemLength -= trim;
+
+ /* If we're left with empty element after trimming, do nothing */
+ if (elemLength == 0) {
+ continue;
+ }
+
+ /* Append to the result with space if needed */
+ if (needSpace) {
+ Tcl_AppendToObj(resPtr, " ", 1);
+ }
+ Tcl_AppendToObj(resPtr, element, elemLength);
+ needSpace = 1;
}
-
- TclNewObj(objPtr);
- objPtr->bytes = concatStr;
- objPtr->length = finalSize;
- return objPtr;
+ return resPtr;
}
/*
@@ -1154,10 +1867,9 @@ Tcl_ConcatObj(objc, objv)
* See if a particular string matches a particular pattern.
*
* Results:
- * The return value is 1 if string matches pattern, and
- * 0 otherwise. The matching operation permits the following
- * special characters in the pattern: *?\[] (see the manual
- * entry for details on what these mean).
+ * The return value is 1 if string matches pattern, and 0 otherwise. The
+ * matching operation permits the following special characters in the
+ * pattern: *?\[] (see the manual entry for details on what these mean).
*
* Side effects:
* None.
@@ -1166,12 +1878,12 @@ Tcl_ConcatObj(objc, objv)
*/
int
-Tcl_StringMatch(string, pattern)
- CONST char *string; /* String. */
- CONST char *pattern; /* Pattern, which may contain special
+Tcl_StringMatch(
+ CONST char *str, /* String. */
+ CONST char *pattern) /* Pattern, which may contain special
* characters. */
{
- return Tcl_StringCaseMatch(string, pattern, 0);
+ return Tcl_StringCaseMatch(str, pattern, 0);
}
/*
@@ -1179,14 +1891,13 @@ Tcl_StringMatch(string, pattern)
*
* Tcl_StringCaseMatch --
*
- * See if a particular string matches a particular pattern.
- * Allows case insensitivity.
+ * See if a particular string matches a particular pattern. Allows case
+ * insensitivity.
*
* Results:
- * The return value is 1 if string matches pattern, and
- * 0 otherwise. The matching operation permits the following
- * special characters in the pattern: *?\[] (see the manual
- * entry for details on what these mean).
+ * The return value is 1 if string matches pattern, and 0 otherwise. The
+ * matching operation permits the following special characters in the
+ * pattern: *?\[] (see the manual entry for details on what these mean).
*
* Side effects:
* None.
@@ -1195,127 +1906,133 @@ Tcl_StringMatch(string, pattern)
*/
int
-Tcl_StringCaseMatch(string, pattern, nocase)
- CONST char *string; /* String. */
- CONST char *pattern; /* Pattern, which may contain special
+Tcl_StringCaseMatch(
+ CONST char *str, /* String. */
+ CONST char *pattern, /* Pattern, which may contain special
* characters. */
- int nocase; /* 0 for case sensitive, 1 for insensitive */
+ int nocase) /* 0 for case sensitive, 1 for insensitive */
{
int p, charLen;
CONST char *pstart = pattern;
Tcl_UniChar ch1, ch2;
-
+
while (1) {
p = *pattern;
-
+
/*
- * See if we're at the end of both the pattern and the string. If
- * so, we succeeded. If we're at the end of the pattern but not at
- * the end of the string, we failed.
+ * See if we're at the end of both the pattern and the string. If so,
+ * we succeeded. If we're at the end of the pattern but not at the end
+ * of the string, we failed.
*/
-
+
if (p == '\0') {
- return (*string == '\0');
+ return (*str == '\0');
}
- if ((*string == '\0') && (p != '*')) {
+ if ((*str == '\0') && (p != '*')) {
return 0;
}
/*
- * Check for a "*" as the next pattern character. It matches
- * any substring. We handle this by calling ourselves
- * recursively for each postfix of string, until either we
- * match or we reach the end of the string.
+ * Check for a "*" as the next pattern character. It matches any
+ * substring. We handle this by calling ourselves recursively for each
+ * postfix of string, until either we match or we reach the end of the
+ * string.
*/
-
+
if (p == '*') {
/*
* Skip all successive *'s in the pattern
*/
+
while (*(++pattern) == '*') {}
p = *pattern;
if (p == '\0') {
return 1;
}
+
/*
* This is a special case optimization for single-byte utf.
*/
+
if (UCHAR(*pattern) < 0x80) {
ch2 = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
+ (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
} else {
Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {
ch2 = Tcl_UniCharToLower(ch2);
}
}
+
while (1) {
/*
* Optimization for matching - cruise through the string
* quickly if the next char in the pattern isn't a special
* character
*/
+
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
- while (*string) {
- charLen = TclUtfToUniChar(string, &ch1);
+ while (*str) {
+ charLen = TclUtfToUniChar(str, &ch1);
if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
break;
}
- string += charLen;
+ str += charLen;
}
} else {
/*
* There's no point in trying to make this code
- * shorter, as the number of bytes you want to
- * compare each time is non-constant.
+ * shorter, as the number of bytes you want to compare
+ * each time is non-constant.
*/
- while (*string) {
- charLen = TclUtfToUniChar(string, &ch1);
+
+ while (*str) {
+ charLen = TclUtfToUniChar(str, &ch1);
if (ch2 == ch1) {
break;
}
- string += charLen;
+ str += charLen;
}
}
}
- if (Tcl_StringCaseMatch(string, pattern, nocase)) {
+ if (Tcl_StringCaseMatch(str, pattern, nocase)) {
return 1;
}
- if (*string == '\0') {
+ if (*str == '\0') {
return 0;
}
- string += TclUtfToUniChar(string, &ch1);
+ str += TclUtfToUniChar(str, &ch1);
}
}
/*
- * Check for a "?" as the next pattern character. It matches
- * any single character.
+ * Check for a "?" as the next pattern character. It matches any
+ * single character.
*/
if (p == '?') {
pattern++;
- string += TclUtfToUniChar(string, &ch1);
+ str += TclUtfToUniChar(str, &ch1);
continue;
}
/*
- * Check for a "[" as the next pattern character. It is followed
- * by a list of characters that are acceptable, or by a range
- * (two characters separated by "-").
+ * Check for a "[" as the next pattern character. It is followed by a
+ * list of characters that are acceptable, or by a range (two
+ * characters separated by "-").
*/
if (p == '[') {
Tcl_UniChar startChar, endChar;
pattern++;
- if (UCHAR(*string) < 0x80) {
+ if (UCHAR(*str) < 0x80) {
ch1 = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*string)) : UCHAR(*string));
- string++;
+ (nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
+ str++;
} else {
- string += Tcl_UtfToUniChar(string, &ch1);
+ str += Tcl_UtfToUniChar(str, &ch1);
if (nocase) {
ch1 = Tcl_UniCharToLower(ch1);
}
@@ -1325,8 +2042,8 @@ Tcl_StringCaseMatch(string, pattern, nocase)
return 0;
}
if (UCHAR(*pattern) < 0x80) {
- startChar = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
+ startChar = (Tcl_UniChar) (nocase
+ ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
pattern += Tcl_UtfToUniChar(pattern, &startChar);
@@ -1340,9 +2057,8 @@ Tcl_StringCaseMatch(string, pattern, nocase)
return 0;
}
if (UCHAR(*pattern) < 0x80) {
- endChar = (Tcl_UniChar)
- (nocase ? tolower(UCHAR(*pattern))
- : UCHAR(*pattern));
+ endChar = (Tcl_UniChar) (nocase
+ ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
pattern++;
} else {
pattern += Tcl_UtfToUniChar(pattern, &endChar);
@@ -1374,8 +2090,8 @@ Tcl_StringCaseMatch(string, pattern, nocase)
}
/*
- * If the next pattern character is '\', just strip off the '\'
- * so we do exact matching on the character that follows.
+ * If the next pattern character is '\', just strip off the '\' so we
+ * do exact matching on the character that follows.
*/
if (p == '\\') {
@@ -1386,11 +2102,11 @@ Tcl_StringCaseMatch(string, pattern, nocase)
}
/*
- * There's no special character. Just make sure that the next
- * bytes of each string match.
+ * There's no special character. Just make sure that the next bytes of
+ * each string match.
*/
- string += TclUtfToUniChar(string, &ch1);
+ str += TclUtfToUniChar(str, &ch1);
pattern += TclUtfToUniChar(pattern, &ch2);
if (nocase) {
if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
@@ -1405,14 +2121,16 @@ Tcl_StringCaseMatch(string, pattern, nocase)
/*
*----------------------------------------------------------------------
*
- * TclMatchIsTrivial --
+ * TclByteArrayMatch --
*
- * Test whether a particular glob pattern is a trivial pattern.
- * (i.e. where matching is the same as equality testing).
+ * See if a particular string matches a particular pattern. Does not
+ * allow for case insensitivity.
+ * Parallels tclUtf.c:TclUniCharMatch, adjusted for char* and sans nocase.
*
* Results:
- * A boolean indicating whether the pattern is free of all of the
- * glob special chars.
+ * The return value is 1 if string matches pattern, and 0 otherwise. The
+ * matching operation permits the following special characters in the
+ * pattern: *?\[] (see the manual entry for details on what these mean).
*
* Side effects:
* None.
@@ -1421,32 +2139,222 @@ Tcl_StringCaseMatch(string, pattern, nocase)
*/
int
-TclMatchIsTrivial(pattern)
- CONST char *pattern;
+TclByteArrayMatch(
+ const unsigned char *string, /* String. */
+ int strLen, /* Length of String */
+ const unsigned char *pattern, /* Pattern, which may contain special
+ * characters. */
+ int ptnLen, /* Length of Pattern */
+ int flags)
{
- CONST char *p = pattern;
+ const unsigned char *stringEnd, *patternEnd;
+ unsigned char p;
+
+ stringEnd = string + strLen;
+ patternEnd = pattern + ptnLen;
while (1) {
- switch (*p++) {
- case '\0':
- return 1;
- case '*':
- case '?':
- case '[':
- case '\\':
+ /*
+ * See if we're at the end of both the pattern and the string. If so,
+ * we succeeded. If we're at the end of the pattern but not at the end
+ * of the string, we failed.
+ */
+
+ if (pattern == patternEnd) {
+ return (string == stringEnd);
+ }
+ p = *pattern;
+ if ((string == stringEnd) && (p != '*')) {
+ return 0;
+ }
+
+ /*
+ * Check for a "*" as the next pattern character. It matches any
+ * substring. We handle this by skipping all the characters up to the
+ * next matching one in the pattern, and then calling ourselves
+ * recursively for each postfix of string, until either we match or we
+ * reach the end of the string.
+ */
+
+ if (p == '*') {
+ /*
+ * Skip all successive *'s in the pattern.
+ */
+
+ while ((++pattern < patternEnd) && (*pattern == '*')) {
+ /* empty body */
+ }
+ if (pattern == patternEnd) {
+ return 1;
+ }
+ p = *pattern;
+ while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character.
+ */
+
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ while ((string < stringEnd) && (p != *string)) {
+ string++;
+ }
+ }
+ if (TclByteArrayMatch(string, stringEnd - string,
+ pattern, patternEnd - pattern, 0)) {
+ return 1;
+ }
+ if (string == stringEnd) {
+ return 0;
+ }
+ string++;
+ }
+ }
+
+ /*
+ * Check for a "?" as the next pattern character. It matches any
+ * single character.
+ */
+
+ if (p == '?') {
+ pattern++;
+ string++;
+ continue;
+ }
+
+ /*
+ * Check for a "[" as the next pattern character. It is followed by a
+ * list of characters that are acceptable, or by a range (two
+ * characters separated by "-").
+ */
+
+ if (p == '[') {
+ unsigned char ch1, startChar, endChar;
+
+ pattern++;
+ ch1 = *string;
+ string++;
+ while (1) {
+ if ((*pattern == ']') || (pattern == patternEnd)) {
+ return 0;
+ }
+ startChar = *pattern;
+ pattern++;
+ if (*pattern == '-') {
+ pattern++;
+ if (pattern == patternEnd) {
+ return 0;
+ }
+ endChar = *pattern;
+ pattern++;
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*pattern != ']') {
+ if (pattern == patternEnd) {
+ pattern--;
+ break;
+ }
+ pattern++;
+ }
+ pattern++;
+ continue;
+ }
+
+ /*
+ * If the next pattern character is '\', just strip off the '\' so we
+ * do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ if (++pattern == patternEnd) {
+ return 0;
+ }
+ }
+
+ /*
+ * There's no special character. Just make sure that the next bytes of
+ * each string match.
+ */
+
+ if (*string != *pattern) {
return 0;
}
+ string++;
+ pattern++;
}
}
/*
*----------------------------------------------------------------------
*
+ * TclStringMatchObj --
+ *
+ * See if a particular string matches a particular pattern.
+ * Allows case insensitivity. This is the generic multi-type handler
+ * for the various matching algorithms.
+ *
+ * Results:
+ * The return value is 1 if string matches pattern, and 0 otherwise. The
+ * matching operation permits the following special characters in the
+ * pattern: *?\[] (see the manual entry for details on what these mean).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclStringMatchObj(
+ Tcl_Obj *strObj, /* string object. */
+ Tcl_Obj *ptnObj, /* pattern object. */
+ int flags) /* Only TCL_MATCH_NOCASE should be passed or 0. */
+{
+ int match, length, plen;
+
+ /*
+ * Promote based on the type of incoming object.
+ * XXX: Currently doesn't take advantage of exact-ness that
+ * XXX: TclReToGlob tells us about
+ trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj));
+ */
+
+ if ((strObj->typePtr == &tclStringType)) {
+ Tcl_UniChar *udata, *uptn;
+
+ udata = Tcl_GetUnicodeFromObj(strObj, &length);
+ uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
+ match = TclUniCharMatch(udata, length, uptn, plen, flags);
+ } else if ((strObj->typePtr == &tclByteArrayType) && !flags) {
+ unsigned char *data, *ptn;
+
+ data = Tcl_GetByteArrayFromObj(strObj, &length);
+ ptn = Tcl_GetByteArrayFromObj(ptnObj, &plen);
+ match = TclByteArrayMatch(data, length, ptn, plen, 0);
+ } else {
+ match = Tcl_StringCaseMatch(TclGetString(strObj),
+ TclGetString(ptnObj), flags);
+ }
+ return match;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_DStringInit --
*
- * Initializes a dynamic string, discarding any previous contents
- * of the string (Tcl_DStringFree should have been called already
- * if the dynamic string was previously in use).
+ * Initializes a dynamic string, discarding any previous contents of the
+ * string (Tcl_DStringFree should have been called already if the dynamic
+ * string was previously in use).
*
* Results:
* None.
@@ -1458,8 +2366,8 @@ TclMatchIsTrivial(pattern)
*/
void
-Tcl_DStringInit(dsPtr)
- Tcl_DString *dsPtr; /* Pointer to structure for dynamic string. */
+Tcl_DStringInit(
+ Tcl_DString *dsPtr) /* Pointer to structure for dynamic string. */
{
dsPtr->string = dsPtr->staticSpace;
dsPtr->length = 0;
@@ -1472,66 +2380,63 @@ Tcl_DStringInit(dsPtr)
*
* Tcl_DStringAppend --
*
- * Append more characters to the current value of a dynamic string.
+ * Append more bytes to the current value of a dynamic string.
*
* Results:
* The return value is a pointer to the dynamic string's new value.
*
* Side effects:
- * Length bytes from string (or all of string if length is less
- * than zero) are added to the current value of the string. Memory
- * gets reallocated if needed to accomodate the string's new size.
+ * Length bytes from "bytes" (or all of "bytes" if length is less than
+ * zero) are added to the current value of the string. Memory gets
+ * reallocated if needed to accomodate the string's new size.
*
*----------------------------------------------------------------------
*/
char *
-Tcl_DStringAppend(dsPtr, string, length)
- Tcl_DString *dsPtr; /* Structure describing dynamic string. */
- CONST char *string; /* String to append. If length is -1 then
- * this must be null-terminated. */
- int length; /* Number of characters from string to
- * append. If < 0, then append all of string,
- * up to null at end. */
+Tcl_DStringAppend(
+ Tcl_DString *dsPtr, /* Structure describing dynamic string. */
+ CONST char *bytes, /* String to append. If length is -1 then this
+ * must be null-terminated. */
+ int length) /* Number of bytes from "bytes" to append. If
+ * < 0, then append all of bytes, up to null
+ * at end. */
{
int newSize;
char *dst;
CONST char *end;
if (length < 0) {
- length = strlen(string);
+ length = strlen(bytes);
}
newSize = length + dsPtr->length;
/*
- * Allocate a larger buffer for the string if the current one isn't
- * large enough. Allocate extra space in the new buffer so that there
- * will be room to grow before we have to allocate again.
+ * Allocate a larger buffer for the string if the current one isn't large
+ * enough. Allocate extra space in the new buffer so that there will be
+ * room to grow before we have to allocate again.
*/
if (newSize >= dsPtr->spaceAvl) {
dsPtr->spaceAvl = newSize * 2;
if (dsPtr->string == dsPtr->staticSpace) {
- char *newString;
+ char *newString = ckalloc((unsigned) dsPtr->spaceAvl);
- newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl);
- memcpy((VOID *) newString, (VOID *) dsPtr->string,
- (size_t) dsPtr->length);
+ memcpy(newString, dsPtr->string, (size_t) dsPtr->length);
dsPtr->string = newString;
} else {
- dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string,
+ dsPtr->string = ckrealloc((void *) dsPtr->string,
(size_t) dsPtr->spaceAvl);
}
}
/*
- * Copy the new string into the buffer at the end of the old
- * one.
+ * Copy the new string into the buffer at the end of the old one.
*/
- for (dst = dsPtr->string + dsPtr->length, end = string+length;
- string < end; string++, dst++) {
- *dst = *string;
+ for (dst = dsPtr->string + dsPtr->length, end = bytes+length;
+ bytes < end; bytes++, dst++) {
+ *dst = *bytes;
}
*dst = '\0';
dsPtr->length += length;
@@ -1549,62 +2454,67 @@ Tcl_DStringAppend(dsPtr, string, length)
* The return value is a pointer to the dynamic string's new value.
*
* Side effects:
- * String is reformatted as a list element and added to the current
- * value of the string. Memory gets reallocated if needed to
- * accomodate the string's new size.
+ * String is reformatted as a list element and added to the current value
+ * of the string. Memory gets reallocated if needed to accomodate the
+ * string's new size.
*
*----------------------------------------------------------------------
*/
char *
-Tcl_DStringAppendElement(dsPtr, string)
- Tcl_DString *dsPtr; /* Structure describing dynamic string. */
- CONST char *string; /* String to append. Must be
+Tcl_DStringAppendElement(
+ Tcl_DString *dsPtr, /* Structure describing dynamic string. */
+ CONST char *element) /* String to append. Must be
* null-terminated. */
{
- int newSize, flags, strSize;
- char *dst;
-
- strSize = ((string == NULL) ? 0 : strlen(string));
- newSize = Tcl_ScanCountedElement(string, strSize, &flags)
- + dsPtr->length + 1;
+ char *dst = dsPtr->string + dsPtr->length;
+ int needSpace = TclNeedSpace(dsPtr->string, dst);
+ int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0;
+ int newSize = dsPtr->length + needSpace
+ + TclScanElement(element, -1, &flags);
/*
- * Allocate a larger buffer for the string if the current one isn't
- * large enough. Allocate extra space in the new buffer so that there
- * will be room to grow before we have to allocate again.
- * SPECIAL NOTE: must use memcpy, not strcpy, to copy the string
- * to a larger buffer, since there may be embedded NULLs in the
- * string in some cases.
+ * Allocate a larger buffer for the string if the current one isn't large
+ * enough. Allocate extra space in the new buffer so that there will be
+ * room to grow before we have to allocate again. SPECIAL NOTE: must use
+ * memcpy, not strcpy, to copy the string to a larger buffer, since there
+ * may be embedded NULLs in the string in some cases.
*/
if (newSize >= dsPtr->spaceAvl) {
dsPtr->spaceAvl = newSize * 2;
if (dsPtr->string == dsPtr->staticSpace) {
- char *newString;
+ char *newString = ckalloc((unsigned) dsPtr->spaceAvl);
- newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl);
- memcpy((VOID *) newString, (VOID *) dsPtr->string,
- (size_t) dsPtr->length);
+ memcpy(newString, dsPtr->string, (size_t) dsPtr->length);
dsPtr->string = newString;
} else {
- dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string,
+ dsPtr->string = (char *) ckrealloc((void *) dsPtr->string,
(size_t) dsPtr->spaceAvl);
}
+ dst = dsPtr->string + dsPtr->length;
}
/*
- * Convert the new string to a list element and copy it into the
- * buffer at the end, with a space, if needed.
+ * Convert the new string to a list element and copy it into the buffer at
+ * the end, with a space, if needed.
*/
- dst = dsPtr->string + dsPtr->length;
- if (TclNeedSpace(dsPtr->string, dst)) {
+ if (needSpace) {
*dst = ' ';
dst++;
dsPtr->length++;
+
+ /*
+ * If we need a space to separate this element from preceding stuff,
+ * then this element will not lead a list, and need not have it's
+ * leading '#' quoted.
+ */
+
+ flags |= TCL_DONT_QUOTE_HASH;
}
- dsPtr->length += Tcl_ConvertCountedElement(string, strSize, dst, flags);
+ dsPtr->length += TclConvertElement(element, -1, dst, flags);
+ dsPtr->string[dsPtr->length] = '\0';
return dsPtr->string;
}
@@ -1613,25 +2523,24 @@ Tcl_DStringAppendElement(dsPtr, string)
*
* Tcl_DStringSetLength --
*
- * Change the length of a dynamic string. This can cause the
- * string to either grow or shrink, depending on the value of
- * length.
+ * Change the length of a dynamic string. This can cause the string to
+ * either grow or shrink, depending on the value of length.
*
* Results:
* None.
*
* Side effects:
- * The length of dsPtr is changed to length and a null byte is
- * stored at that position in the string. If length is larger
- * than the space allocated for dsPtr, then a panic occurs.
+ * The length of dsPtr is changed to length and a null byte is stored at
+ * that position in the string. If length is larger than the space
+ * allocated for dsPtr, then a panic occurs.
*
*----------------------------------------------------------------------
*/
void
-Tcl_DStringSetLength(dsPtr, length)
- Tcl_DString *dsPtr; /* Structure describing dynamic string. */
- int length; /* New length for dynamic string. */
+Tcl_DStringSetLength(
+ Tcl_DString *dsPtr, /* Structure describing dynamic string. */
+ int length) /* New length for dynamic string. */
{
int newsize;
@@ -1640,15 +2549,15 @@ Tcl_DStringSetLength(dsPtr, length)
}
if (length >= dsPtr->spaceAvl) {
/*
- * There are two interesting cases here. In the first case, the user
- * may be trying to allocate a large buffer of a specific size. It
+ * There are two interesting cases here. In the first case, the user
+ * may be trying to allocate a large buffer of a specific size. It
* would be wasteful to overallocate that buffer, so we just allocate
- * enough for the requested size plus the trailing null byte. In the
+ * enough for the requested size plus the trailing null byte. In the
* second case, we are growing the buffer incrementally, so we need
- * behavior similar to Tcl_DStringAppend. The requested length will
- * usually be a small delta above the current spaceAvl, so we'll end up
- * doubling the old size. This won't grow the buffer quite as quickly,
- * but it should be close enough.
+ * behavior similar to Tcl_DStringAppend. The requested length will
+ * usually be a small delta above the current spaceAvl, so we'll end
+ * up doubling the old size. This won't grow the buffer quite as
+ * quickly, but it should be close enough.
*/
newsize = dsPtr->spaceAvl * 2;
@@ -1658,14 +2567,12 @@ Tcl_DStringSetLength(dsPtr, length)
dsPtr->spaceAvl = length + 1;
}
if (dsPtr->string == dsPtr->staticSpace) {
- char *newString;
+ char *newString = ckalloc((unsigned) dsPtr->spaceAvl);
- newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl);
- memcpy((VOID *) newString, (VOID *) dsPtr->string,
- (size_t) dsPtr->length);
+ memcpy(newString, dsPtr->string, (size_t) dsPtr->length);
dsPtr->string = newString;
} else {
- dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string,
+ dsPtr->string = (char *) ckrealloc((void *) dsPtr->string,
(size_t) dsPtr->spaceAvl);
}
}
@@ -1678,21 +2585,22 @@ Tcl_DStringSetLength(dsPtr, length)
*
* Tcl_DStringFree --
*
- * Frees up any memory allocated for the dynamic string and
- * reinitializes the string to an empty state.
+ * Frees up any memory allocated for the dynamic string and reinitializes
+ * the string to an empty state.
*
* Results:
* None.
*
* Side effects:
- * The previous contents of the dynamic string are lost, and
- * the new value is an empty string.
+ * The previous contents of the dynamic string are lost, and the new
+ * value is an empty string.
*
- *---------------------------------------------------------------------- */
+ *----------------------------------------------------------------------
+ */
void
-Tcl_DStringFree(dsPtr)
- Tcl_DString *dsPtr; /* Structure describing dynamic string. */
+Tcl_DStringFree(
+ Tcl_DString *dsPtr) /* Structure describing dynamic string. */
{
if (dsPtr->string != dsPtr->staticSpace) {
ckfree(dsPtr->string);
@@ -1708,29 +2616,28 @@ Tcl_DStringFree(dsPtr)
*
* Tcl_DStringResult --
*
- * This procedure moves the value of a dynamic string into an
- * interpreter as its string result. Afterwards, the dynamic string
- * is reset to an empty string.
+ * This function moves the value of a dynamic string into an interpreter
+ * as its string result. Afterwards, the dynamic string is reset to an
+ * empty string.
*
* Results:
* None.
*
* Side effects:
- * The string is "moved" to interp's result, and any existing
- * string result for interp is freed. dsPtr is reinitialized to
- * an empty string.
+ * The string is "moved" to interp's result, and any existing string
+ * result for interp is freed. dsPtr is reinitialized to an empty string.
*
*----------------------------------------------------------------------
*/
void
-Tcl_DStringResult(interp, dsPtr)
- Tcl_Interp *interp; /* Interpreter whose result is to be reset. */
- Tcl_DString *dsPtr; /* Dynamic string that is to become the
+Tcl_DStringResult(
+ Tcl_Interp *interp, /* Interpreter whose result is to be reset. */
+ Tcl_DString *dsPtr) /* Dynamic string that is to become the
* result of interp. */
{
Tcl_ResetResult(interp);
-
+
if (dsPtr->string != dsPtr->staticSpace) {
interp->result = dsPtr->string;
interp->freeProc = TCL_DYNAMIC;
@@ -1740,7 +2647,7 @@ Tcl_DStringResult(interp, dsPtr)
} else {
Tcl_SetResult(interp, dsPtr->string, TCL_VOLATILE);
}
-
+
dsPtr->string = dsPtr->staticSpace;
dsPtr->length = 0;
dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
@@ -1752,14 +2659,14 @@ Tcl_DStringResult(interp, dsPtr)
*
* Tcl_DStringGetResult --
*
- * This procedure moves an interpreter's result into a dynamic string.
+ * This function moves an interpreter's result into a dynamic string.
*
* Results:
* None.
*
* Side effects:
- * The interpreter's string result is cleared, and the previous
- * contents of dsPtr are freed.
+ * The interpreter's string result is cleared, and the previous contents
+ * of dsPtr are freed.
*
* If the string result is empty, the object result is moved to the
* string result, then the object result is reset.
@@ -1768,26 +2675,23 @@ Tcl_DStringResult(interp, dsPtr)
*/
void
-Tcl_DStringGetResult(interp, dsPtr)
- Tcl_Interp *interp; /* Interpreter whose result is to be reset. */
- Tcl_DString *dsPtr; /* Dynamic string that is to become the
- * result of interp. */
+Tcl_DStringGetResult(
+ Tcl_Interp *interp, /* Interpreter whose result is to be reset. */
+ Tcl_DString *dsPtr) /* Dynamic string that is to become the result
+ * of interp. */
{
Interp *iPtr = (Interp *) interp;
-
+
if (dsPtr->string != dsPtr->staticSpace) {
ckfree(dsPtr->string);
}
/*
- * If the string result is empty, move the object result to the
- * string result, then reset the object result.
+ * If the string result is empty, move the object result to the string
+ * result, then reset the object result.
*/
- if (*(iPtr->result) == 0) {
- Tcl_SetResult(interp, TclGetString(Tcl_GetObjResult(interp)),
- TCL_VOLATILE);
- }
+ (void) Tcl_GetStringResult(interp);
dsPtr->length = strlen(iPtr->result);
if (iPtr->freeProc != NULL) {
@@ -1796,7 +2700,7 @@ Tcl_DStringGetResult(interp, dsPtr)
dsPtr->spaceAvl = dsPtr->length+1;
} else {
dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length+1));
- strcpy(dsPtr->string, iPtr->result);
+ memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1);
(*iPtr->freeProc)(iPtr->result);
}
dsPtr->spaceAvl = dsPtr->length+1;
@@ -1809,9 +2713,9 @@ Tcl_DStringGetResult(interp, dsPtr)
dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length + 1));
dsPtr->spaceAvl = dsPtr->length + 1;
}
- strcpy(dsPtr->string, iPtr->result);
+ memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1);
}
-
+
iPtr->result = iPtr->resultSpace;
iPtr->resultSpace[0] = 0;
}
@@ -1821,9 +2725,9 @@ Tcl_DStringGetResult(interp, dsPtr)
*
* Tcl_DStringStartSublist --
*
- * This procedure adds the necessary information to a dynamic
- * string (e.g. " {" to start a sublist. Future element
- * appends will be in the sublist rather than the main list.
+ * This function adds the necessary information to a dynamic string
+ * (e.g. " {") to start a sublist. Future element appends will be in the
+ * sublist rather than the main list.
*
* Results:
* None.
@@ -1835,8 +2739,8 @@ Tcl_DStringGetResult(interp, dsPtr)
*/
void
-Tcl_DStringStartSublist(dsPtr)
- Tcl_DString *dsPtr; /* Dynamic string. */
+Tcl_DStringStartSublist(
+ Tcl_DString *dsPtr) /* Dynamic string. */
{
if (TclNeedSpace(dsPtr->string, dsPtr->string + dsPtr->length)) {
Tcl_DStringAppend(dsPtr, " {", -1);
@@ -1850,10 +2754,9 @@ Tcl_DStringStartSublist(dsPtr)
*
* Tcl_DStringEndSublist --
*
- * This procedure adds the necessary characters to a dynamic
- * string to end a sublist (e.g. "}"). Future element appends
- * will be in the enclosing (sub)list rather than the current
- * sublist.
+ * This function adds the necessary characters to a dynamic string to end
+ * a sublist (e.g. "}"). Future element appends will be in the enclosing
+ * (sub)list rather than the current sublist.
*
* Results:
* None.
@@ -1865,8 +2768,8 @@ Tcl_DStringStartSublist(dsPtr)
*/
void
-Tcl_DStringEndSublist(dsPtr)
- Tcl_DString *dsPtr; /* Dynamic string. */
+Tcl_DStringEndSublist(
+ Tcl_DString *dsPtr) /* Dynamic string. */
{
Tcl_DStringAppend(dsPtr, "}", -1);
}
@@ -1876,14 +2779,14 @@ Tcl_DStringEndSublist(dsPtr)
*
* Tcl_PrintDouble --
*
- * Given a floating-point value, this procedure converts it to
- * an ASCII string using.
+ * Given a floating-point value, this function converts it to an ASCII
+ * string using.
*
* Results:
- * The ASCII equivalent of "value" is written at "dst". It is
- * written using the current precision, and it is guaranteed to
- * contain a decimal point or exponent, so that it looks like
- * a floating-point value and not an integer.
+ * The ASCII equivalent of "value" is written at "dst". It is written
+ * using the current precision, and it is guaranteed to contain a decimal
+ * point or exponent, so that it looks like a floating-point value and
+ * not an integer.
*
* Side effects:
* None.
@@ -1892,43 +2795,158 @@ Tcl_DStringEndSublist(dsPtr)
*/
void
-Tcl_PrintDouble(interp, value, dst)
- Tcl_Interp *interp; /* Interpreter whose tcl_precision
- * variable used to be used to control
- * printing. It's ignored now. */
- double value; /* Value to print as string. */
- char *dst; /* Where to store converted value;
- * must have at least TCL_DOUBLE_SPACE
- * characters. */
+Tcl_PrintDouble(
+ Tcl_Interp *interp, /* Interpreter whose tcl_precision variable
+ * used to be used to control printing. It's
+ * ignored now. */
+ double value, /* Value to print as string. */
+ char *dst) /* Where to store converted value; must have
+ * at least TCL_DOUBLE_SPACE characters. */
{
char *p, c;
- Tcl_UniChar ch;
+ int exponent;
+ int signum;
+ char* digits;
+ char* end;
- Tcl_MutexLock(&precisionMutex);
- sprintf(dst, precisionFormat, value);
- Tcl_MutexUnlock(&precisionMutex);
+ int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int)sizeof(int));
/*
- * If the ASCII result looks like an integer, add ".0" so that it
- * doesn't look like an integer anymore. This prevents floating-point
- * values from being converted to integers unintentionally.
- * Check for ASCII specifically to speed up the function.
- */
+ * Handle NaN.
+ */
- for (p = dst; *p != 0; ) {
- if (UCHAR(*p) < 0x80) {
- c = *p++;
- } else {
- p += Tcl_UtfToUniChar(p, &ch);
- c = UCHAR(ch);
+ if (TclIsNaN(value)) {
+ TclFormatNaN(value, dst);
+ return;
}
- if ((c == '.') || isalpha(UCHAR(c))) { /* INTL: ISO only. */
+
+ /*
+ * Handle infinities.
+ */
+
+ if (TclIsInfinite(value)) {
+ /*
+ * Remember to copy the terminating NUL too.
+ */
+
+ if (value < 0) {
+ memcpy(dst, "-Inf", 5);
+ } else {
+ memcpy(dst, "Inf", 4);
+ }
return;
}
+
+ /*
+ * Ordinary (normal and denormal) values.
+ */
+
+ if (*precisionPtr == 0) {
+ digits = TclDoubleDigits(value, -1, TCL_DD_SHORTEST,
+ &exponent, &signum, &end);
+ } else {
+ /*
+ * There are at least two possible interpretations for tcl_precision.
+ *
+ * The first is, "choose the decimal representation having
+ * $tcl_precision digits of significance that is nearest to the
+ * given number, breaking ties by rounding to even, and then
+ * trimming trailing zeros." This gives the greatest possible
+ * precision in the decimal string, but offers the anomaly that
+ * [expr 0.1] will be "0.10000000000000001".
+ *
+ * The second is "choose the decimal representation having at
+ * most $tcl_precision digits of significance that is nearest
+ * to the given number. If no such representation converts
+ * exactly to the given number, choose the one that is closest,
+ * breaking ties by rounding to even. If more than one such
+ * representation converts exactly to the given number, choose
+ * the shortest, breaking ties in favour of the nearest, breaking
+ * remaining ties in favour of the one ending in an even digit."
+ *
+ * Tcl 8.4 implements the first of these, which gives rise to
+ * anomalies in formatting:
+ *
+ * % expr 0.1
+ * 0.10000000000000001
+ * % expr 0.01
+ * 0.01
+ * % expr 1e-7
+ * 9.9999999999999995e-08
+ *
+ * For human readability, it appears better to choose the second rule,
+ * and let [expr 0.1] return 0.1. But for 8.4 compatibility, we
+ * prefer the first (the recommended zero value for tcl_precision
+ * avoids the problem entirely).
+ *
+ * Uncomment TCL_DD_SHORTEN_FLAG in the next call to prefer the
+ * method that allows floating point values to be shortened if
+ * it can be done without loss of precision.
+ */
+
+ digits = TclDoubleDigits(value, *precisionPtr,
+ TCL_DD_E_FORMAT /* | TCL_DD_SHORTEN_FLAG */,
+ &exponent, &signum, &end);
+ }
+ if (signum) {
+ *dst++ = '-';
+ }
+ p = digits;
+ if (exponent < -4 || exponent > 16) {
+ /*
+ * E format for numbers < 1e-3 or >= 1e17.
+ */
+
+ *dst++ = *p++;
+ c = *p;
+ if (c != '\0') {
+ *dst++ = '.';
+ while (c != '\0') {
+ *dst++ = c;
+ c = *++p;
+ }
+ }
+ /*
+ * Tcl 8.4 appears to format with at least a two-digit exponent; \
+ * preserve that behaviour when tcl_precision != 0
+ */
+ if (*precisionPtr == 0) {
+ sprintf(dst, "e%+d", exponent);
+ } else {
+ sprintf(dst, "e%+03d", exponent);
+ }
+ } else {
+ /*
+ * F format for others.
+ */
+
+ if (exponent < 0) {
+ *dst++ = '0';
+ }
+ c = *p;
+ while (exponent-- >= 0) {
+ if (c != '\0') {
+ *dst++ = c;
+ c = *++p;
+ } else {
+ *dst++ = '0';
+ }
+ }
+ *dst++ = '.';
+ if (c == '\0') {
+ *dst++ = '0';
+ } else {
+ while (++exponent < -1) {
+ *dst++ = '0';
+ }
+ while (c != '\0') {
+ *dst++ = c;
+ c = *++p;
+ }
+ }
+ *dst++ = '\0';
}
- p[0] = '.';
- p[1] = '0';
- p[2] = 0;
+ ckfree(digits);
}
/*
@@ -1936,33 +2954,33 @@ Tcl_PrintDouble(interp, value, dst)
*
* TclPrecTraceProc --
*
- * This procedure is invoked whenever the variable "tcl_precision"
- * is written.
+ * This function is invoked whenever the variable "tcl_precision" is
+ * written.
*
* Results:
- * Returns NULL if all went well, or an error message if the
- * new value for the variable doesn't make sense.
+ * Returns NULL if all went well, or an error message if the new value
+ * for the variable doesn't make sense.
*
* Side effects:
- * If the new value doesn't make sense then this procedure
- * undoes the effect of the variable modification. Otherwise
- * it modifies the format string that's used by Tcl_PrintDouble.
+ * If the new value doesn't make sense then this function undoes the
+ * effect of the variable modification. Otherwise it modifies the format
+ * string that's used by Tcl_PrintDouble.
*
*----------------------------------------------------------------------
*/
/* ARGSUSED */
char *
-TclPrecTraceProc(clientData, interp, name1, name2, flags)
- ClientData clientData; /* Not used. */
- Tcl_Interp *interp; /* Interpreter containing variable. */
- CONST char *name1; /* Name of variable. */
- CONST char *name2; /* Second part of variable name. */
- int flags; /* Information about what happened. */
+TclPrecTraceProc(
+ ClientData clientData, /* Not used. */
+ Tcl_Interp *interp, /* Interpreter containing variable. */
+ CONST char *name1, /* Name of variable. */
+ CONST char *name2, /* Second part of variable name. */
+ int flags) /* Information about what happened. */
{
- CONST char *value;
- char *end;
+ Tcl_Obj* value;
int prec;
+ int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int));
/*
* If the variable is unset, then recreate the trace.
@@ -1974,54 +2992,39 @@ TclPrecTraceProc(clientData, interp, name1, name2, flags)
TCL_GLOBAL_ONLY|TCL_TRACE_READS|TCL_TRACE_WRITES
|TCL_TRACE_UNSETS, TclPrecTraceProc, clientData);
}
- return (char *) NULL;
+ return NULL;
}
/*
- * When the variable is read, reset its value from our shared
- * value. This is needed in case the variable was modified in
- * some other interpreter so that this interpreter's value is
- * out of date.
+ * When the variable is read, reset its value from our shared value. This
+ * is needed in case the variable was modified in some other interpreter
+ * so that this interpreter's value is out of date.
*/
- Tcl_MutexLock(&precisionMutex);
if (flags & TCL_TRACE_READS) {
- Tcl_SetVar2(interp, name1, name2, precisionString,
+ Tcl_SetVar2Ex(interp, name1, name2, Tcl_NewIntObj(*precisionPtr),
flags & TCL_GLOBAL_ONLY);
- Tcl_MutexUnlock(&precisionMutex);
- return (char *) NULL;
+ return NULL;
}
/*
- * The variable is being written. Check the new value and disallow
- * it if it isn't reasonable or if this is a safe interpreter (we
- * don't want safe interpreters messing up the precision of other
- * interpreters).
+ * The variable is being written. Check the new value and disallow it if
+ * it isn't reasonable or if this is a safe interpreter (we don't want
+ * safe interpreters messing up the precision of other interpreters).
*/
if (Tcl_IsSafe(interp)) {
- Tcl_SetVar2(interp, name1, name2, precisionString,
- flags & TCL_GLOBAL_ONLY);
- Tcl_MutexUnlock(&precisionMutex);
return "can't modify precision from a safe interpreter";
}
- value = Tcl_GetVar2(interp, name1, name2, flags & TCL_GLOBAL_ONLY);
- if (value == NULL) {
- value = "";
- }
- prec = strtoul(value, &end, 10);
- if ((prec <= 0) || (prec > TCL_MAX_PREC) || (prec > 100) ||
- (end == value) || (*end != 0)) {
- Tcl_SetVar2(interp, name1, name2, precisionString,
- flags & TCL_GLOBAL_ONLY);
- Tcl_MutexUnlock(&precisionMutex);
+ value = Tcl_GetVar2Ex(interp, name1, name2, flags & TCL_GLOBAL_ONLY);
+ if (value == NULL
+ || Tcl_GetIntFromObj((Tcl_Interp*) NULL, value, &prec) != TCL_OK
+ || prec < 0 || prec > TCL_MAX_PREC) {
return "improper value for precision";
}
- TclFormatInt(precisionString, prec);
- sprintf(precisionFormat, "%%.%dg", prec);
- Tcl_MutexUnlock(&precisionMutex);
- return (char *) NULL;
+ *precisionPtr = prec;
+ return NULL;
}
/*
@@ -2029,9 +3032,8 @@ TclPrecTraceProc(clientData, interp, name1, name2, flags)
*
* TclNeedSpace --
*
- * This procedure checks to see whether it is appropriate to
- * add a space before appending a new list element to an
- * existing string.
+ * This function checks to see whether it is appropriate to add a space
+ * before appending a new list element to an existing string.
*
* Results:
* The return value is 1 if a space is appropriate, 0 otherwise.
@@ -2043,24 +3045,25 @@ TclPrecTraceProc(clientData, interp, name1, name2, flags)
*/
int
-TclNeedSpace(start, end)
- CONST char *start; /* First character in string. */
- CONST char *end; /* End of string (place where space will
- * be added, if appropriate). */
+TclNeedSpace(
+ CONST char *start, /* First character in string. */
+ CONST char *end) /* End of string (place where space will be
+ * added, if appropriate). */
{
/*
- * A space is needed unless either
+ * A space is needed unless either:
* (a) we're at the start of the string, or
*/
+
if (end == start) {
return 0;
}
/*
- * (b) we're at the start of a nested list-element, quoted with an
- * open curly brace; we can be nested arbitrarily deep, so long
- * as the first curly brace starts an element, so backtrack over
- * open curly braces that are trailing characters of the string; and
+ * (b) we're at the start of a nested list-element, quoted with an open
+ * curly brace; we can be nested arbitrarily deep, so long as the
+ * first curly brace starts an element, so backtrack over open curly
+ * braces that are trailing characters of the string; and
*/
end = Tcl_UtfPrev(end, start);
@@ -2073,39 +3076,39 @@ TclNeedSpace(start, end)
/*
* (c) the trailing character of the string is already a list-element
- * separator (according to TclFindElement); that is, one of these
- * characters:
- * \u0009 \t TAB
- * \u000A \n NEWLINE
- * \u000B \v VERTICAL TAB
- * \u000C \f FORM FEED
- * \u000D \r CARRIAGE RETURN
- * \u0020 SPACE
- * with the condition that the penultimate character is not a
- * backslash.
+ * separator (according to TclFindElement); that is, one of these
+ * characters:
+ * \u0009 \t TAB
+ * \u000A \n NEWLINE
+ * \u000B \v VERTICAL TAB
+ * \u000C \f FORM FEED
+ * \u000D \r CARRIAGE RETURN
+ * \u0020 SPACE
+ * with the condition that the penultimate character is not a
+ * backslash.
*/
if (*end > 0x20) {
/*
- * Performance tweak. All ASCII spaces are <= 0x20. So get
- * a quick answer for most characters before comparing against
- * all spaces in the switch below.
+ * Performance tweak. All ASCII spaces are <= 0x20. So get a quick
+ * answer for most characters before comparing against all spaces in
+ * the switch below.
*
- * NOTE: Remove this if other Unicode spaces ever get accepted
- * as list-element separators.
+ * NOTE: Remove this if other Unicode spaces ever get accepted as
+ * list-element separators.
*/
return 1;
}
switch (*end) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\v':
- case '\f':
- if ((end == start) || (end[-1] != '\\')) {
- return 0;
- }
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\v':
+ case '\f':
+ if ((end == start) || (end[-1] != '\\')) {
+ return 0;
+ }
}
return 1;
}
@@ -2120,7 +3123,9 @@ TclNeedSpace(start, end)
* inserted at the start of the buffer. A null character is inserted at
* the end of the formatted characters. It is the caller's
* responsibility to ensure that enough storage is available. This
- * procedure has the effect of sprintf(buffer, "%d", n) but is faster.
+ * procedure has the effect of sprintf(buffer, "%ld", n) but is faster
+ * as proven in benchmarks. This is key to UpdateStringOfInt, which
+ * is a common path for a lot of code (e.g. int-indexed arrays).
*
* Results:
* An integer representing the number of characters formatted, not
@@ -2162,8 +3167,7 @@ TclFormatInt(buffer, n)
intVal = -n; /* [Bug 3390638] Workaround for*/
if (n == -n || intVal == n) { /* broken compiler optimizers. */
- sprintf(buffer, "%ld", n);
- return strlen(buffer);
+ return sprintf(buffer, "%ld", n);
}
/*
@@ -2199,133 +3203,119 @@ TclFormatInt(buffer, n)
/*
*----------------------------------------------------------------------
*
- * TclLooksLikeInt --
- *
- * This procedure decides whether the leading characters of a
- * string look like an integer or something else (such as a
- * floating-point number or string).
- *
- * Results:
- * The return value is 1 if the leading characters of p look
- * like a valid Tcl integer. If they look like a floating-point
- * number (e.g. "e01" or "2.4"), or if they don't look like a
- * number at all, then 0 is returned.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
-
-int
-TclLooksLikeInt(bytes, length)
- register CONST char *bytes; /* Points to first byte of the string. */
- int length; /* Number of bytes in the string. If < 0
- * bytes up to the first null byte are
- * considered (if they may appear in an
- * integer). */
-{
- register CONST char *p;
-
- if ((bytes == NULL) && (length > 0)) {
- Tcl_Panic("TclLooksLikeInt: cannot scan %d bytes from NULL", length);
- }
-
- if (length < 0) {
- length = (bytes? strlen(bytes) : 0);
- }
-
- p = bytes;
- while (length && isspace(UCHAR(*p))) { /* INTL: ISO space. */
- length--; p++;
- }
- if (length == 0) {
- return 0;
- }
- if ((*p == '+') || (*p == '-')) {
- p++; length--;
- }
-
- return (0 != TclParseInteger(p, length));
-}
-
-/*
- *----------------------------------------------------------------------
- *
* TclGetIntForIndex --
*
- * This procedure returns an integer corresponding to the list index
- * held in a Tcl object. The Tcl object's value is expected to be
- * either an integer or a string of the form "end([+-]integer)?".
+ * This function returns an integer corresponding to the list index held
+ * in a Tcl object. The Tcl object's value is expected to be in the
+ * format integer([+-]integer)? or the format end([+-]integer)?.
*
* Results:
* The return value is normally TCL_OK, which means that the index was
- * successfully stored into the location referenced by "indexPtr". If
- * the Tcl object referenced by "objPtr" has the value "end", the
- * value stored is "endValue". If "objPtr"s values is not of the form
- * "end([+-]integer)?" and
- * can not be converted to an integer, TCL_ERROR is returned and, if
- * "interp" is non-NULL, an error message is left in the interpreter's
- * result object.
+ * successfully stored into the location referenced by "indexPtr". If the
+ * Tcl object referenced by "objPtr" has the value "end", the value
+ * stored is "endValue". If "objPtr"s values is not of one of the
+ * expected formats, TCL_ERROR is returned and, if "interp" is non-NULL,
+ * an error message is left in the interpreter's result object.
*
* Side effects:
- * The object referenced by "objPtr" might be converted to an
- * integer, wide integer, or end-based-index object.
+ * The object referenced by "objPtr" might be converted to an integer,
+ * wide integer, or end-based-index object.
*
*----------------------------------------------------------------------
*/
int
-TclGetIntForIndex(interp, objPtr, endValue, indexPtr)
- Tcl_Interp *interp; /* Interpreter to use for error reporting.
- * If NULL, then no error message is left
- * after errors. */
- Tcl_Obj *objPtr; /* Points to an object containing either
- * "end" or an integer. */
- int endValue; /* The value to be stored at "indexPtr" if
+TclGetIntForIndex(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, then no error message is left after
+ * errors. */
+ Tcl_Obj *objPtr, /* Points to an object containing either "end"
+ * or an integer. */
+ int endValue, /* The value to be stored at "indexPtr" if
* "objPtr" holds "end". */
- int *indexPtr; /* Location filled in with an integer
+ int *indexPtr) /* Location filled in with an integer
* representing an index. */
{
- if (Tcl_GetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) {
+ int length;
+ char *opPtr, *bytes;
+
+ if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) {
return TCL_OK;
}
if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) {
/*
- * If the object is already an offset from the end of the
- * list, or can be converted to one, use it.
+ * If the object is already an offset from the end of the list, or can
+ * be converted to one, use it.
*/
*indexPtr = endValue + objPtr->internalRep.longValue;
+ return TCL_OK;
+ }
+
+ bytes = TclGetStringFromObj(objPtr, &length);
+
+ /*
+ * Leading whitespace is acceptable in an index.
+ */
+
+ while (length && TclIsSpaceProc(*bytes)) {
+ bytes++;
+ length--;
+ }
+
+ if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr,
+ TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
+ int code, first, second;
+ char savedOp = *opPtr;
+
+ if ((savedOp != '+') && (savedOp != '-')) {
+ goto parseError;
+ }
+ if (TclIsSpaceProc(opPtr[1])) {
+ goto parseError;
+ }
+ *opPtr = '\0';
+ code = Tcl_GetInt(interp, bytes, &first);
+ *opPtr = savedOp;
+ if (code == TCL_ERROR) {
+ goto parseError;
+ }
+ if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) {
+ goto parseError;
+ }
+ if (savedOp == '+') {
+ *indexPtr = first + second;
+ } else {
+ *indexPtr = first - second;
+ }
+ return TCL_OK;
+ }
+
+ /*
+ * Report a parse error.
+ */
+
+ parseError:
+ if (interp != NULL) {
+ char *bytes = Tcl_GetString(objPtr);
- } else {
/*
- * Report a parse error.
+ * The result might not be empty; this resets it which should be both
+ * a cheap operation, and of little problem because this is an
+ * error-generation path anyway.
*/
- if (interp != NULL) {
- char *bytes = Tcl_GetString(objPtr);
- /*
- * The result might not be empty; this resets it which
- * should be both a cheap operation, and of little problem
- * because this is an error-generation path anyway.
- */
- Tcl_ResetResult(interp);
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "bad index \"", bytes,
- "\": must be integer or end?-integer?",
- (char *) NULL);
- if (!strncmp(bytes, "end-", 3)) {
- bytes += 3;
- }
- TclCheckBadOctal(interp, bytes);
+ Tcl_ResetResult(interp);
+ Tcl_AppendResult(interp, "bad index \"", bytes,
+ "\": must be integer?[+-]integer? or end?[+-]integer?", NULL);
+ if (!strncmp(bytes, "end-", 4)) {
+ bytes += 4;
}
-
- return TCL_ERROR;
+ TclCheckBadOctal(interp, bytes);
}
-
- return TCL_OK;
+
+ return TCL_ERROR;
}
/*
@@ -2342,16 +3332,15 @@ TclGetIntForIndex(interp, objPtr, endValue, indexPtr)
* Side effects:
* Stores a valid string in the object's string rep.
*
- * This procedure does NOT free any earlier string rep. If it is
- * called on an object that already has a valid string rep, it will
- * leak memory.
+ * This function does NOT free any earlier string rep. If it is called on an
+ * object that already has a valid string rep, it will leak memory.
*
*----------------------------------------------------------------------
*/
static void
-UpdateStringOfEndOffset(objPtr)
- register Tcl_Obj* objPtr;
+UpdateStringOfEndOffset(
+ register Tcl_Obj* objPtr)
{
char buffer[TCL_INTEGER_SPACE + sizeof("end") + 1];
register int len;
@@ -2362,8 +3351,8 @@ UpdateStringOfEndOffset(objPtr)
buffer[len++] = '-';
len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue));
}
- objPtr->bytes = ckalloc((unsigned) (len+1));
- strcpy(objPtr->bytes, buffer);
+ objPtr->bytes = ckalloc((unsigned) len+1);
+ memcpy(objPtr->bytes, buffer, (unsigned) len+1);
objPtr->length = len;
}
@@ -2372,100 +3361,104 @@ UpdateStringOfEndOffset(objPtr)
*
* SetEndOffsetFromAny --
*
- * Look for a string of the form "end-offset" and convert it
- * to an internal representation holding the offset.
+ * Look for a string of the form "end[+-]offset" and convert it to an
+ * internal representation holding the offset.
*
* Results:
* Returns TCL_OK if ok, TCL_ERROR if the string was badly formed.
*
* Side effects:
- * If interp is not NULL, stores an error message in the
- * interpreter result.
+ * If interp is not NULL, stores an error message in the interpreter
+ * result.
*
*----------------------------------------------------------------------
*/
static int
-SetEndOffsetFromAny(interp, objPtr)
- Tcl_Interp* interp; /* Tcl interpreter or NULL */
- Tcl_Obj* objPtr; /* Pointer to the object to parse */
+SetEndOffsetFromAny(
+ Tcl_Interp *interp, /* Tcl interpreter or NULL */
+ Tcl_Obj *objPtr) /* Pointer to the object to parse */
{
int offset; /* Offset in the "end-offset" expression */
- Tcl_ObjType* oldTypePtr = objPtr->typePtr;
- /* Old internal rep type of the object */
register char* bytes; /* String rep of the object */
int length; /* Length of the object's string rep */
- /* If it's already the right type, we're fine. */
+ /*
+ * If it's already the right type, we're fine.
+ */
if (objPtr->typePtr == &tclEndOffsetType) {
return TCL_OK;
}
- /* Check for a string rep of the right form. */
+ /*
+ * Check for a string rep of the right form.
+ */
- bytes = Tcl_GetStringFromObj(objPtr, &length);
+ bytes = TclGetStringFromObj(objPtr, &length);
if ((*bytes != 'e') || (strncmp(bytes, "end",
(size_t)((length > 3) ? 3 : length)) != 0)) {
if (interp != NULL) {
Tcl_ResetResult(interp);
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "bad index \"", bytes,
- "\": must be end?-integer?",
- (char*) NULL);
+ Tcl_AppendResult(interp, "bad index \"", bytes,
+ "\": must be end?[+-]integer?", NULL);
}
return TCL_ERROR;
}
- /* Convert the string rep */
+ /*
+ * Convert the string rep.
+ */
if (length <= 3) {
offset = 0;
- } else if ((length > 4) && (bytes[3] == '-')) {
+ } else if ((length > 4) && ((bytes[3] == '-') || (bytes[3] == '+'))) {
/*
- * This is our limited string expression evaluator. Pass everything
+ * This is our limited string expression evaluator. Pass everything
* after "end-" to Tcl_GetInt, then reverse for offset.
*/
+
+ if (TclIsSpaceProc(bytes[4])) {
+ return TCL_ERROR;
+ }
if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) {
return TCL_ERROR;
}
- offset = -offset;
+ if (bytes[3] == '-') {
+ offset = -offset;
+ }
} else {
/*
- * Conversion failed. Report the error.
+ * Conversion failed. Report the error.
*/
+
if (interp != NULL) {
Tcl_ResetResult(interp);
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "bad index \"", bytes,
- "\": must be integer or end?-integer?",
- (char *) NULL);
+ Tcl_AppendResult(interp, "bad index \"", bytes,
+ "\": must be end?[+-]integer?", NULL);
}
return TCL_ERROR;
}
/*
- * The conversion succeeded. Free the old internal rep and set
- * the new one.
+ * The conversion succeeded. Free the old internal rep and set the new
+ * one.
*/
- if ((oldTypePtr != NULL) && (oldTypePtr->freeIntRepProc != NULL)) {
- oldTypePtr->freeIntRepProc(objPtr);
- }
-
+ TclFreeIntRep(objPtr);
objPtr->internalRep.longValue = offset;
objPtr->typePtr = &tclEndOffsetType;
return TCL_OK;
-}
+}
/*
*----------------------------------------------------------------------
*
* TclCheckBadOctal --
*
- * This procedure checks for a bad octal value and appends a
- * meaningful error to the interp's result.
+ * This function checks for a bad octal value and appends a meaningful
+ * error to the interp's result.
*
* Results:
* 1 if the argument was a bad octal, else 0.
@@ -2477,41 +3470,48 @@ SetEndOffsetFromAny(interp, objPtr)
*/
int
-TclCheckBadOctal(interp, value)
- Tcl_Interp *interp; /* Interpreter to use for error reporting.
- * If NULL, then no error message is left
- * after errors. */
- CONST char *value; /* String to check. */
+TclCheckBadOctal(
+ Tcl_Interp *interp, /* Interpreter to use for error reporting. If
+ * NULL, then no error message is left after
+ * errors. */
+ CONST char *value) /* String to check. */
{
register CONST char *p = value;
/*
- * A frequent mistake is invalid octal values due to an unwanted
- * leading zero. Try to generate a meaningful error message.
+ * A frequent mistake is invalid octal values due to an unwanted leading
+ * zero. Try to generate a meaningful error message.
*/
- while (isspace(UCHAR(*p))) { /* INTL: ISO space. */
+ while (TclIsSpaceProc(*p)) {
p++;
}
if (*p == '+' || *p == '-') {
p++;
}
if (*p == '0') {
+ if ((p[1] == 'o') || p[1] == 'O') {
+ p+=2;
+ }
while (isdigit(UCHAR(*p))) { /* INTL: digit. */
p++;
}
- while (isspace(UCHAR(*p))) { /* INTL: ISO space. */
+ while (TclIsSpaceProc(*p)) {
p++;
}
if (*p == '\0') {
- /* Reached end of string */
+ /*
+ * Reached end of string.
+ */
+
if (interp != NULL) {
/*
- * Don't reset the result here because we want this result
- * to be added to an existing error message as extra info.
+ * Don't reset the result here because we want this result to
+ * be added to an existing error message as extra info.
*/
+
Tcl_AppendResult(interp, " (looks like invalid octal number)",
- (char *) NULL);
+ NULL);
}
return 1;
}
@@ -2522,28 +3522,353 @@ TclCheckBadOctal(interp, value)
/*
*----------------------------------------------------------------------
*
+ * ClearHash --
+ *
+ * Remove all the entries in the hash table *tablePtr.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+ClearHash(
+ Tcl_HashTable *tablePtr)
+{
+ Tcl_HashSearch search;
+ Tcl_HashEntry *hPtr;
+
+ for (hPtr = Tcl_FirstHashEntry(tablePtr, &search); hPtr != NULL;
+ hPtr = Tcl_NextHashEntry(&search)) {
+ Tcl_Obj *objPtr = (Tcl_Obj *) Tcl_GetHashValue(hPtr);
+ Tcl_DecrRefCount(objPtr);
+ Tcl_DeleteHashEntry(hPtr);
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * GetThreadHash --
+ *
+ * Get a thread-specific (Tcl_HashTable *) associated with a thread data
+ * key.
+ *
+ * Results:
+ * The Tcl_HashTable * corresponding to *keyPtr.
+ *
+ * Side effects:
+ * The first call on a keyPtr in each thread creates a new Tcl_HashTable,
+ * and registers a thread exit handler to dispose of it.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static Tcl_HashTable *
+GetThreadHash(
+ Tcl_ThreadDataKey *keyPtr)
+{
+ Tcl_HashTable **tablePtrPtr = (Tcl_HashTable **)
+ Tcl_GetThreadData(keyPtr, (int) sizeof(Tcl_HashTable *));
+
+ if (NULL == *tablePtrPtr) {
+ *tablePtrPtr = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable));
+ Tcl_CreateThreadExitHandler(FreeThreadHash, (ClientData)*tablePtrPtr);
+ Tcl_InitHashTable(*tablePtrPtr, TCL_ONE_WORD_KEYS);
+ }
+ return *tablePtrPtr;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * FreeThreadHash --
+ *
+ * Thread exit handler used by GetThreadHash to dispose of a thread hash
+ * table.
+ *
+ * Side effects:
+ * Frees a Tcl_HashTable.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+FreeThreadHash(
+ ClientData clientData)
+{
+ Tcl_HashTable *tablePtr = (Tcl_HashTable *) clientData;
+
+ ClearHash(tablePtr);
+ Tcl_DeleteHashTable(tablePtr);
+ ckfree((char *) tablePtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * FreeProcessGlobalValue --
+ *
+ * Exit handler used by Tcl(Set|Get)ProcessGlobalValue to cleanup a
+ * ProcessGlobalValue at exit.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+FreeProcessGlobalValue(
+ ClientData clientData)
+{
+ ProcessGlobalValue *pgvPtr = (ProcessGlobalValue *) clientData;
+
+ pgvPtr->epoch++;
+ pgvPtr->numBytes = 0;
+ ckfree(pgvPtr->value);
+ pgvPtr->value = NULL;
+ if (pgvPtr->encoding) {
+ Tcl_FreeEncoding(pgvPtr->encoding);
+ pgvPtr->encoding = NULL;
+ }
+ Tcl_MutexFinalize(&pgvPtr->mutex);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclSetProcessGlobalValue --
+ *
+ * Utility routine to set a global value shared by all threads in the
+ * process while keeping a thread-local copy as well.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclSetProcessGlobalValue(
+ ProcessGlobalValue *pgvPtr,
+ Tcl_Obj *newValue,
+ Tcl_Encoding encoding)
+{
+ CONST char *bytes;
+ Tcl_HashTable *cacheMap;
+ Tcl_HashEntry *hPtr;
+ int dummy;
+
+ Tcl_MutexLock(&pgvPtr->mutex);
+
+ /*
+ * Fill the global string value.
+ */
+
+ pgvPtr->epoch++;
+ if (NULL != pgvPtr->value) {
+ ckfree(pgvPtr->value);
+ } else {
+ Tcl_CreateExitHandler(FreeProcessGlobalValue, (ClientData) pgvPtr);
+ }
+ bytes = Tcl_GetStringFromObj(newValue, &pgvPtr->numBytes);
+ pgvPtr->value = ckalloc((unsigned) pgvPtr->numBytes + 1);
+ memcpy(pgvPtr->value, bytes, (unsigned) pgvPtr->numBytes + 1);
+ if (pgvPtr->encoding) {
+ Tcl_FreeEncoding(pgvPtr->encoding);
+ }
+ pgvPtr->encoding = encoding;
+
+ /*
+ * Fill the local thread copy directly with the Tcl_Obj value to avoid
+ * loss of the intrep. Increment newValue refCount early to handle case
+ * where we set a PGV to itself.
+ */
+
+ Tcl_IncrRefCount(newValue);
+ cacheMap = GetThreadHash(&pgvPtr->key);
+ ClearHash(cacheMap);
+ hPtr = Tcl_CreateHashEntry(cacheMap,
+ (char *) INT2PTR(pgvPtr->epoch), &dummy);
+ Tcl_SetHashValue(hPtr, (ClientData) newValue);
+ Tcl_MutexUnlock(&pgvPtr->mutex);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetProcessGlobalValue --
+ *
+ * Retrieve a global value shared among all threads of the process,
+ * preferring a thread-local copy as long as it remains valid.
+ *
+ * Results:
+ * Returns a (Tcl_Obj *) that holds a copy of the global value.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclGetProcessGlobalValue(
+ ProcessGlobalValue *pgvPtr)
+{
+ Tcl_Obj *value = NULL;
+ Tcl_HashTable *cacheMap;
+ Tcl_HashEntry *hPtr;
+ int epoch = pgvPtr->epoch;
+
+ if (pgvPtr->encoding) {
+ Tcl_Encoding current = Tcl_GetEncoding(NULL, NULL);
+
+ if (pgvPtr->encoding != current) {
+ /*
+ * The system encoding has changed since the master string value
+ * was saved. Convert the master value to be based on the new
+ * system encoding.
+ */
+
+ Tcl_DString native, newValue;
+
+ Tcl_MutexLock(&pgvPtr->mutex);
+ pgvPtr->epoch++;
+ epoch = pgvPtr->epoch;
+ Tcl_UtfToExternalDString(pgvPtr->encoding, pgvPtr->value,
+ pgvPtr->numBytes, &native);
+ Tcl_ExternalToUtfDString(current, Tcl_DStringValue(&native),
+ Tcl_DStringLength(&native), &newValue);
+ Tcl_DStringFree(&native);
+ ckfree(pgvPtr->value);
+ pgvPtr->value = ckalloc((unsigned int)
+ Tcl_DStringLength(&newValue) + 1);
+ memcpy(pgvPtr->value, Tcl_DStringValue(&newValue),
+ (size_t) Tcl_DStringLength(&newValue) + 1);
+ Tcl_DStringFree(&newValue);
+ Tcl_FreeEncoding(pgvPtr->encoding);
+ pgvPtr->encoding = current;
+ Tcl_MutexUnlock(&pgvPtr->mutex);
+ } else {
+ Tcl_FreeEncoding(current);
+ }
+ }
+ cacheMap = GetThreadHash(&pgvPtr->key);
+ hPtr = Tcl_FindHashEntry(cacheMap, (char *) INT2PTR(epoch));
+ if (NULL == hPtr) {
+ int dummy;
+
+ /*
+ * No cache for the current epoch - must be a new one.
+ *
+ * First, clear the cacheMap, as anything in it must refer to some
+ * expired epoch.
+ */
+
+ ClearHash(cacheMap);
+
+ /*
+ * If no thread has set the shared value, call the initializer.
+ */
+
+ Tcl_MutexLock(&pgvPtr->mutex);
+ if ((NULL == pgvPtr->value) && (pgvPtr->proc)) {
+ pgvPtr->epoch++;
+ (*(pgvPtr->proc))(&pgvPtr->value, &pgvPtr->numBytes,
+ &pgvPtr->encoding);
+ if (pgvPtr->value == NULL) {
+ Tcl_Panic("PGV Initializer did not initialize");
+ }
+ Tcl_CreateExitHandler(FreeProcessGlobalValue, (ClientData)pgvPtr);
+ }
+
+ /*
+ * Store a copy of the shared value in our epoch-indexed cache.
+ */
+
+ value = Tcl_NewStringObj(pgvPtr->value, pgvPtr->numBytes);
+ hPtr = Tcl_CreateHashEntry(cacheMap,
+ (char *) INT2PTR(pgvPtr->epoch), &dummy);
+ Tcl_MutexUnlock(&pgvPtr->mutex);
+ Tcl_SetHashValue(hPtr, (ClientData) value);
+ Tcl_IncrRefCount(value);
+ }
+ return (Tcl_Obj *) Tcl_GetHashValue(hPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclSetObjNameOfExecutable --
+ *
+ * This function stores the absolute pathname of the executable file
+ * (normally as computed by TclpFindExecutable).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Stores the executable name.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclSetObjNameOfExecutable(
+ Tcl_Obj *name,
+ Tcl_Encoding encoding)
+{
+ TclSetProcessGlobalValue(&executableName, name, encoding);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetObjNameOfExecutable --
+ *
+ * This function retrieves the absolute pathname of the application in
+ * which the Tcl library is running, usually as previously stored by
+ * TclpFindExecutable(). This function call is the C API equivalent to
+ * the "info nameofexecutable" command.
+ *
+ * Results:
+ * A pointer to an "fsPath" Tcl_Obj, or to an empty Tcl_Obj if the
+ * pathname of the application is unknown.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclGetObjNameOfExecutable(void)
+{
+ return TclGetProcessGlobalValue(&executableName);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_GetNameOfExecutable --
*
- * This procedure simply returns a pointer to the internal full
- * path name of the executable file as computed by
- * Tcl_FindExecutable. This procedure call is the C API
- * equivalent to the "info nameofexecutable" command.
+ * This function retrieves the absolute pathname of the application in
+ * which the Tcl library is running, and returns it in string form.
+ *
+ * The returned string belongs to Tcl and should be copied if the caller
+ * plans to keep it, to guard against it becoming invalid.
*
* Results:
- * A pointer to the internal string or NULL if the internal full
- * path name has not been computed or unknown.
+ * A pointer to the internal string or NULL if the internal full path
+ * name has not been computed or unknown.
*
* Side effects:
- * The object referenced by "objPtr" might be converted to an
- * integer object.
+ * None.
*
*----------------------------------------------------------------------
*/
CONST char *
-Tcl_GetNameOfExecutable()
+Tcl_GetNameOfExecutable(void)
{
- return tclExecutableName;
+ int numBytes;
+ const char *bytes =
+ Tcl_GetStringFromObj(TclGetObjNameOfExecutable(), &numBytes);
+
+ if (numBytes == 0) {
+ return NULL;
+ }
+ return bytes;
}
/*
@@ -2551,7 +3876,9 @@ Tcl_GetNameOfExecutable()
*
* TclpGetTime --
*
- * Deprecated synonym for Tcl_GetTime.
+ * Deprecated synonym for Tcl_GetTime. This function is provided for the
+ * benefit of extensions written before Tcl_GetTime was exported from the
+ * library.
*
* Results:
* None.
@@ -2559,15 +3886,265 @@ Tcl_GetNameOfExecutable()
* Side effects:
* Stores current time in the buffer designated by "timePtr"
*
- * This procedure is provided for the benefit of extensions written
- * before Tcl_GetTime was exported from the library.
- *
*----------------------------------------------------------------------
*/
void
-TclpGetTime(timePtr)
- Tcl_Time* timePtr;
+TclpGetTime(
+ Tcl_Time *timePtr)
{
Tcl_GetTime(timePtr);
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetPlatform --
+ *
+ * This is a kludge that allows the test library to get access the
+ * internal tclPlatform variable.
+ *
+ * Results:
+ * Returns a pointer to the tclPlatform variable.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+TclPlatformType *
+TclGetPlatform(void)
+{
+ return &tclPlatform;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclReToGlob --
+ *
+ * Attempt to convert a regular expression to an equivalent glob pattern.
+ *
+ * Results:
+ * Returns TCL_OK on success, TCL_ERROR on failure. If interp is not
+ * NULL, an error message is placed in the result. On success, the
+ * DString will contain an exact equivalent glob pattern. The caller is
+ * responsible for calling Tcl_DStringFree on success. If exactPtr is not
+ * NULL, it will be 1 if an exact match qualifies.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclReToGlob(
+ Tcl_Interp *interp,
+ const char *reStr,
+ int reStrLen,
+ Tcl_DString *dsPtr,
+ int *exactPtr)
+{
+ int anchorLeft, anchorRight, lastIsStar, numStars;
+ char *dsStr, *dsStrStart, *msg;
+ const char *p, *strEnd;
+
+ strEnd = reStr + reStrLen;
+ Tcl_DStringInit(dsPtr);
+
+ /*
+ * "***=xxx" == "*xxx*", watch for glob-sensitive chars.
+ */
+
+ if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) {
+ /*
+ * At most, the glob pattern has length 2*reStrLen + 2 to
+ * backslash escape every character and have * at each end.
+ */
+ Tcl_DStringSetLength(dsPtr, 2*reStrLen + 2);
+ dsStr = dsStrStart = Tcl_DStringValue(dsPtr);
+ *dsStr++ = '*';
+ for (p = reStr + 4; p < strEnd; p++) {
+ switch (*p) {
+ case '\\': case '*': case '[': case ']': case '?':
+ /* Only add \ where necessary for glob */
+ *dsStr++ = '\\';
+ /* fall through */
+ default:
+ *dsStr++ = *p;
+ break;
+ }
+ }
+ *dsStr++ = '*';
+ Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart);
+ if (exactPtr) {
+ *exactPtr = 0;
+ }
+ return TCL_OK;
+ }
+
+ /*
+ * At most, the glob pattern has length reStrLen + 2 to account
+ * for possible * at each end.
+ */
+
+ Tcl_DStringSetLength(dsPtr, reStrLen + 2);
+ dsStr = dsStrStart = Tcl_DStringValue(dsPtr);
+
+ /*
+ * Check for anchored REs (ie ^foo$), so we can use string equal if
+ * possible. Do not alter the start of str so we can free it correctly.
+ *
+ * Keep track of the last char being an unescaped star to prevent
+ * multiple instances. Simpler than checking that the last star
+ * may be escaped.
+ */
+
+ msg = NULL;
+ p = reStr;
+ anchorRight = 0;
+ lastIsStar = 0;
+ numStars = 0;
+
+ if (*p == '^') {
+ anchorLeft = 1;
+ p++;
+ } else {
+ anchorLeft = 0;
+ *dsStr++ = '*';
+ lastIsStar = 1;
+ }
+
+ for ( ; p < strEnd; p++) {
+ switch (*p) {
+ case '\\':
+ p++;
+ switch (*p) {
+ case 'a':
+ *dsStr++ = '\a';
+ break;
+ case 'b':
+ *dsStr++ = '\b';
+ break;
+ case 'f':
+ *dsStr++ = '\f';
+ break;
+ case 'n':
+ *dsStr++ = '\n';
+ break;
+ case 'r':
+ *dsStr++ = '\r';
+ break;
+ case 't':
+ *dsStr++ = '\t';
+ break;
+ case 'v':
+ *dsStr++ = '\v';
+ break;
+ case 'B': case '\\':
+ *dsStr++ = '\\';
+ *dsStr++ = '\\';
+ anchorLeft = 0; /* prevent exact match */
+ break;
+ case '*': case '[': case ']': case '?':
+ /* Only add \ where necessary for glob */
+ *dsStr++ = '\\';
+ anchorLeft = 0; /* prevent exact match */
+ /* fall through */
+ case '{': case '}': case '(': case ')': case '+':
+ case '.': case '|': case '^': case '$':
+ *dsStr++ = *p;
+ break;
+ default:
+ msg = "invalid escape sequence";
+ goto invalidGlob;
+ }
+ break;
+ case '.':
+ anchorLeft = 0; /* prevent exact match */
+ if (p+1 < strEnd) {
+ if (p[1] == '*') {
+ p++;
+ if (!lastIsStar) {
+ *dsStr++ = '*';
+ lastIsStar = 1;
+ numStars++;
+ }
+ continue;
+ } else if (p[1] == '+') {
+ p++;
+ *dsStr++ = '?';
+ *dsStr++ = '*';
+ lastIsStar = 1;
+ numStars++;
+ continue;
+ }
+ }
+ *dsStr++ = '?';
+ break;
+ case '$':
+ if (p+1 != strEnd) {
+ msg = "$ not anchor";
+ goto invalidGlob;
+ }
+ anchorRight = 1;
+ break;
+ case '*': case '+': case '?': case '|': case '^':
+ case '{': case '}': case '(': case ')': case '[': case ']':
+ msg = "unhandled RE special char";
+ goto invalidGlob;
+ break;
+ default:
+ *dsStr++ = *p;
+ break;
+ }
+ lastIsStar = 0;
+ }
+ if (numStars > 1) {
+ /*
+ * Heuristic: if >1 non-anchoring *, the risk is large that glob
+ * matching is slower than the RE engine, so report invalid.
+ */
+ msg = "excessive recursive glob backtrack potential";
+ goto invalidGlob;
+ }
+
+ if (!anchorRight && !lastIsStar) {
+ *dsStr++ = '*';
+ }
+ Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart);
+
+ if (exactPtr) {
+ *exactPtr = (anchorLeft && anchorRight);
+ }
+
+#if 0
+ fprintf(stderr, "INPUT RE '%.*s' OUTPUT GLOB '%s' anchor %d:%d \n",
+ reStrLen, reStr,
+ Tcl_DStringValue(dsPtr), anchorLeft, anchorRight);
+ fflush(stderr);
+#endif
+ return TCL_OK;
+
+ invalidGlob:
+#if 0
+ fprintf(stderr, "INPUT RE '%.*s' NO OUTPUT GLOB %s (%c)\n",
+ reStrLen, reStr, msg, *p);
+ fflush(stderr);
+#endif
+ if (interp != NULL) {
+ Tcl_AppendResult(interp, msg, NULL);
+ }
+ Tcl_DStringFree(dsPtr);
+ return TCL_ERROR;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */