summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-03-01 07:19:07 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-03-01 07:19:07 (GMT)
commit9522e3788042247359e535da5bf235eac4c37541 (patch)
tree804fe277e92a7b078ef1528f9675a77a350ab892
parent00e1068c039491b579117c6b38d7d415cb345e68 (diff)
parent62d9150ef53073a2ab29f2ba1b46551f273e56ec (diff)
downloadtcl-9522e3788042247359e535da5bf235eac4c37541.zip
tcl-9522e3788042247359e535da5bf235eac4c37541.tar.gz
tcl-9522e3788042247359e535da5bf235eac4c37541.tar.bz2
Merge 9.0
-rw-r--r--doc/Tcl.n316
-rw-r--r--generic/tcl.h14
-rw-r--r--generic/tclDecls.h2
-rw-r--r--generic/tclEncoding.c123
-rw-r--r--generic/tclIO.c127
-rw-r--r--generic/tclIO.h2
-rw-r--r--generic/tclIOCmd.c6
-rw-r--r--generic/tclTest.c22
-rw-r--r--generic/tclUtil.c4
-rw-r--r--generic/tclZlib.c41
-rw-r--r--tests/dstring.test32
-rw-r--r--tests/encoding.test26
-rw-r--r--tests/io.test125
-rw-r--r--tests/zlib.test32
-rw-r--r--unix/tclAppInit.c11
-rw-r--r--win/tclAppInit.c7
16 files changed, 532 insertions, 358 deletions
diff --git a/doc/Tcl.n b/doc/Tcl.n
index 8e0b342..99af4df 100644
--- a/doc/Tcl.n
+++ b/doc/Tcl.n
@@ -1,6 +1,7 @@
'\"
'\" Copyright (c) 1993 The Regents of the University of California.
'\" Copyright (c) 1994-1996 Sun Microsystems, Inc.
+'\" Copyright (c) 2023 Nathan Coulter
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -16,176 +17,152 @@ Summary of Tcl language syntax.
.SH DESCRIPTION
.PP
The following rules define the syntax and semantics of the Tcl language:
-.IP "[1] \fBCommands.\fR"
-A Tcl script is a string containing one or more commands.
-Semi-colons and newlines are command separators unless quoted as
-described below.
-Close brackets are command terminators during command substitution
-(see below) unless quoted.
-.IP "[2] \fBEvaluation.\fR"
-A command is evaluated in two steps.
-First, the Tcl interpreter breaks the command into \fIwords\fR
-and performs substitutions as described below.
-These substitutions are performed in the same way for all
-commands.
-Secondly, the first word is used to locate a routine to
-carry out the command, and the remaining words of the command are
-passed to that routine.
-The routine is free to interpret each of its words
-in any way it likes, such as an integer, variable name, list,
-or Tcl script.
-Different commands interpret their words differently.
-.IP "[3] \fBWords.\fR"
-Words of a command are separated by white space (except for
-newlines, which are command separators).
-.IP "[4] \fBDouble quotes.\fR"
-If the first character of a word is double-quote
+.
+.IP "[1] \fBScript.\fR"
+A script is composed of zero or more commands delimited by semi-colons or
+newlines.
+.IP "[2] \fBCommand.\fR"
+A command is composed of zero or more words delimited by whitespace. The
+replacement for a substitution is included verbatim in the word. For example, a
+space in the replacement is included in the word rather than becoming a
+delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is
+processed from left to right and each substitution is performed as soon as it
+is complete.
+For example, the command
+.RS
+.PP
+.CS
+set y [set x 0][incr x][incr x]
+.CE
+.PP
+is composed of three words, and sets the value of \fIy\fR to \fI012\fR.
+.PP
+If hash
+.PQ #
+is the first character of what would otherwise be the first word of a command,
+all characters up to the next newline are ignored.
+.RE
+.
+.IP "[3] \fBBraced word.\fR"
+If a word is enclosed in braces
+.PQ {
+and
+.PQ } ""
+, the braces are removed and the enclosed characters become the word. No
+substitutions are performed. Nested pairs of braces may occur within the word.
+A brace preceded by an odd number of backslashes is not considered part of a
+pair, and neither brace nor the backslashes are removed from the word.
+.
+.IP "[4] \fBQuoted word.\fR"
+If a word is enclosed in double quotes
.PQ \N'34'
-then the word is terminated by the next double-quote character.
-If semi-colons, close brackets, or white space characters
-(including newlines) appear between the quotes then they are treated
-as ordinary characters and included in the word.
-Command substitution, variable substitution, and backslash substitution
-are performed on the characters between the quotes as described below.
-The double-quotes are not retained as part of the word.
-.IP "[5] \fBArgument expansion.\fR"
-If a word starts with the string
-.QW {*}
-followed by a non-whitespace character, then the leading
+, the double quotes are removed and the enclosed characters become the word.
+Substitutions are performed.
+.
+.IP "[5] \fBList.\fR"
+A list has the form of a single command. Newline is whitespace, and semicolon
+has no special interpretation. There is no script evaluation so there is no
+argument expansion, variable substitution, or command substitution: Dollar-sign
+and open bracket have no special interpretation, and what would be argument
+expansion in a script is invalid in a list.
+.
+.IP "[6] \fBArgument expansion.\fR"
+If
.QW {*}
-is removed and the rest of the word is parsed and substituted as any other
-word. After substitution, the word is parsed as a list (without command or
-variable substitutions; backslash substitutions are performed as is normal for
-a list and individual internal words may be surrounded by either braces or
-double-quote characters), and its words are added to the command being
-substituted. For instance,
-.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}"
+prefixes a word, it is removed. After any remaining enclosing braces or quotes
+are processed and applicable substitutions performed, the word, which must
+be a list, is removed from the command, and in its place each word in the
+list becomes an additional word in the command. For example,
+.CS
+cmd a {*}{b [c]} d {*}{$e f {g h}}
+.CE
is equivalent to
-.QW "cmd a b {[c]} d {$e} f {g h}" .
-.IP "[6] \fBBraces.\fR"
-If the first character of a word is an open brace
-.PQ {
-and rule [5] does not apply, then
-the word is terminated by the matching close brace
-.PQ } "" .
-Braces nest within the word: for each additional open
-brace there must be an additional close brace (however,
-if an open brace or close brace within the word is
-quoted with a backslash then it is not counted in locating the
-matching close brace).
-No substitutions are performed on the characters between the
-braces except for backslash-newline substitutions described
-below, nor do semi-colons, newlines, close brackets,
-or white space receive any special interpretation.
-The word will consist of exactly the characters between the
-outer braces, not including the braces themselves.
-.IP "[7] \fBCommand substitution.\fR"
-If a word contains an open bracket
+.CS
+cmd a b {[c]} d {$e} f {g h} .
+.CE
+.
+.IP "[7] \fBEvaluation.\fR"
+To evaluate a script, an interpreter evaluates each successive command. The
+first word identifies a procedure, and the remaining words are passed to that
+procedure for further evaluation. The procedure interprets each argument in
+its own way, e.g. as an integer, variable name, list, mathematical expression,
+script, or in some other arbitrary way. The result of the last command is the
+result of the script.
+.
+.IP "[8] \fBCommand substitution.\fR"
+Each pair of brackets
.PQ [
-then Tcl performs \fIcommand substitution\fR.
-To do this it invokes the Tcl interpreter recursively to process
-the characters following the open bracket as a Tcl script.
-The script may contain any number of commands and must be terminated
-by a close bracket
-.PQ ] "" .
-The result of the script (i.e. the result of its last command) is
-substituted into the word in place of the brackets and all of the
-characters between them.
-There may be any number of command substitutions in a single word.
-Command substitution is not performed on words enclosed in braces.
-.IP "[8] \fBVariable substitution.\fR"
-If a word contains a dollar-sign
+and
+.PQ ] ""
+encloses a script and is replaced by the result of that script.
+.IP "[9] \fBVariable substitution.\fR"
+Each of the following forms begins with dollar sign
.PQ $
-followed by one of the forms
-described below, then Tcl performs \fIvariable
-substitution\fR: the dollar-sign and the following characters are
-replaced in the word by the value of a variable.
-Variable substitution may take any of the following forms:
+and is replaced by the value of the identified variable. \fIname\fR names the
+variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and
+\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace
+delimiters (two or more colons). \fIindex\fR is the name of an individual
+variable within an array variable, and may be empty.
.RS
.TP 15
\fB$\fIname\fR
.
-\fIName\fR is the name of a scalar variable; the name is a sequence
-of one or more characters that are a letter, digit, underscore,
-or namespace separators (two or more colons).
-Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR,
-\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR).
+\fIname\fR may not be empty.
+
.TP 15
\fB$\fIname\fB(\fIindex\fB)\fR
.
-\fIName\fR gives the name of an array variable and \fIindex\fR gives
-the name of an element within that array.
-\fIName\fR must contain only letters, digits, underscores, and
-namespace separators, and may be an empty string.
-Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR,
-\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR).
-Command substitutions, variable substitutions, and backslash
-substitutions are performed on the characters of \fIindex\fR.
+\fIname\fR may be empty. Substitutions are performed on \fIindex\fR.
.TP 15
\fB${\fIname\fB}\fR
+\fIname\fR may be empty.
+.TP 15
+\fB${\fIname(index)\fB}\fR
.
-\fIName\fR is the name of a scalar variable or array element. It may contain
-any characters whatsoever except for close braces. It indicates an array
-element if \fIname\fR is in the form
-.QW \fIarrayName\fB(\fIindex\fB)\fR
-where \fIarrayName\fR does not contain any open parenthesis characters,
-.QW \fB(\fR ,
-or close brace characters,
-.QW \fB}\fR ,
-and \fIindex\fR can be any sequence of characters except for close brace
-characters. No further
-substitutions are performed during the parsing of \fIname\fR.
-.PP
-There may be any number of variable substitutions in a single word.
-Variable substitution is not performed on words enclosed in braces.
-.PP
-Note that variables may contain character sequences other than those listed
-above, but in that case other mechanisms must be used to access them (e.g.,
-via the \fBset\fR command's single-argument form).
+\fIname\fR may be empty. No substitutions are performed.
.RE
-.IP "[9] \fBBackslash substitution.\fR"
-If a backslash
+Variables that are not accessible through one of the forms above may be
+accessed through other mechanisms, e.g. the \fBset\fR command.
+.IP "[10] \fBBackslash substitution.\fR"
+Each backslash
.PQ \e
-appears within a word then \fIbackslash substitution\fR occurs.
-In all cases but those described below the backslash is dropped and
-the following character is treated as an ordinary
-character and included in the word.
-This allows characters such as double quotes, close brackets,
-and dollar signs to be included in words without triggering
-special processing.
-The following table lists the backslash sequences that are
-handled specially, along with the value that replaces each sequence.
+that is not part of one of the forms listed below is removed, and the next
+character is included in the word verbatim, which allows the inclusion of
+characters that would normally be interpreted, namely whitespace, braces,
+brackets, double quote, dollar sign, and backslash. The following sequences
+are replaced as described:
+.RS
+.RS
.RS
.TP 7
\e\fBa\fR
-Audible alert (bell) (Unicode U+000007).
+Audible alert (bell) (U+7).
.TP 7
\e\fBb\fR
-Backspace (Unicode U+000008).
+Backspace (U+8).
.TP 7
\e\fBf\fR
-Form feed (Unicode U+00000C).
+Form feed (U+C).
.TP 7
\e\fBn\fR
-Newline (Unicode U+00000A).
+Newline (U+A).
.TP 7
\e\fBr\fR
-Carriage-return (Unicode U+00000D).
+Carriage-return (U+D).
.TP 7
\e\fBt\fR
-Tab (Unicode U+000009).
+Tab (U+9).
.TP 7
\e\fBv\fR
-Vertical tab (Unicode U+00000B).
+Vertical tab (U+B).
.TP 7
\e\fB<newline>\fIwhiteSpace\fR
.
-A single space character replaces the backslash, newline, and all spaces
-and tabs after the newline. This backslash sequence is unique in that it
-is replaced in a separate pre-pass before the command is actually parsed.
-This means that it will be replaced even when it occurs between braces,
-and the resulting space will be treated as a word separator if it is not
-in braces or quotes.
+Newline preceded by an odd number of backslashes, along with the consecutive
+spaces and tabs that immediately follow it, is replaced by a single space.
+Because this happens before the command is split into words, it occurs even
+within braced words, and if the resulting space may subsequently be treated as
+a word delimiter.
.TP 7
\e\e
Backslash
@@ -193,77 +170,30 @@ Backslash
.TP 7
\e\fIooo\fR
.
-The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal
-value for the Unicode character that will be inserted, in the range
-\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF).
-The parser will stop just before this range overflows, or when
-the maximum of three digits is reached. The upper bits of the Unicode
-character will be 0.
+Up to three octal digits form an eight-bit value for a Unicode character in the
+range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a
+number in this range are consumed.
.TP 7
\e\fBx\fIhh\fR
.
-The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit
-hexadecimal value for the Unicode character that will be inserted. The upper
-bits of the Unicode character will be 0 (i.e., the character will be in the
-range U+000000\(enU+0000FF).
+Up to two hexadecimal digits form an eight-bit value for a Unicode character in
+the range \fI0\fR\(en\fIFF\fR.
.TP 7
\e\fBu\fIhhhh\fR
.
-The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a
-sixteen-bit hexadecimal value for the Unicode character that will be
-inserted. The upper bits of the Unicode character will be 0 (i.e., the
-character will be in the range U+000000\(enU+00FFFF).
+Up to four hexadecimal digits form a 16-bit value for a Unicode character in
+the range \fI0\fR\(en\fIFFFF\fR.
.TP 7
\e\fBU\fIhhhhhhhh\fR
.
-The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a
-twenty-one-bit hexadecimal value for the Unicode character that will be
-inserted, in the range U+000000\(enU+10FFFF. The parser will stop just
-before this range overflows, or when the maximum of eight digits
-is reached. The upper bits of the Unicode character will be 0.
+Up to eight hexadecimal digits form a 21-bit value for a Unicode character in
+the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in
+this range are consumed.
.RE
-.PP
-Backslash substitution is not performed on words enclosed in braces,
-except for backslash-newline as described above.
.RE
-.IP "[10] \fBComments.\fR"
-If a hash character
-.PQ #
-appears at a point where Tcl is
-expecting the first character of the first word of a command,
-then the hash character and the characters that follow it, up
-through the next newline, are treated as a comment and ignored.
-The comment character only has significance when it appears
-at the beginning of a command.
-.IP "[11] \fBOrder of substitution.\fR"
-Each character is processed exactly once by the Tcl interpreter
-as part of creating the words of a command.
-For example, if variable substitution occurs then no further
-substitutions are performed on the value of the variable; the
-value is inserted into the word verbatim.
-If command substitution occurs then the nested command is
-processed entirely by the recursive call to the Tcl interpreter;
-no substitutions are performed before making the recursive
-call and no additional substitutions are performed on the result
-of the nested script.
-.RS
.PP
-Substitutions take place from left to right, and each substitution is
-evaluated completely before attempting to evaluate the next. Thus, a
-sequence like
-.PP
-.CS
-set y [set x 0][incr x][incr x]
-.CE
-.PP
-will always set the variable \fIy\fR to the value, \fI012\fR.
.RE
-.IP "[12] \fBSubstitution and word boundaries.\fR"
-Substitutions do not affect the word boundaries of a command,
-except for argument expansion as specified in rule [5].
-For example, during variable substitution the entire value of
-the variable becomes part of a single word, even if the variable's
-value contains spaces.
+.
.SH KEYWORDS
backslash, command, comment, script, substitution, variable
'\" Local Variables:
diff --git a/generic/tcl.h b/generic/tcl.h
index 6040099..2d2849f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -311,10 +311,10 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt;
#define Tcl_WideAsDouble(val) ((double)((Tcl_WideInt)(val)))
#define Tcl_DoubleAsWide(val) ((Tcl_WideInt)((double)(val)))
-#if TCL_MAJOR_VERSION > 8
-typedef size_t Tcl_Size;
-#else
+#if TCL_MAJOR_VERSION < 9
typedef int Tcl_Size;
+#else
+typedef size_t Tcl_Size;
#endif
#ifdef _WIN32
@@ -452,17 +452,17 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData);
#if TCL_MAJOR_VERSION > 8
typedef struct Tcl_RegExpIndices {
- size_t start; /* Character offset of first character in
+ Tcl_Size start; /* Character offset of first character in
* match. */
- size_t end; /* Character offset of first character after
+ Tcl_Size end; /* Character offset of first character after
* the match. */
} Tcl_RegExpIndices;
typedef struct Tcl_RegExpInfo {
- size_t nsubs; /* Number of subexpressions in the compiled
+ Tcl_Size nsubs; /* Number of subexpressions in the compiled
* expression. */
Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */
- size_t extendStart; /* The offset at which a subsequent match
+ Tcl_Size extendStart; /* The offset at which a subsequent match
* might begin. */
} Tcl_RegExpInfo;
#else
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index bdc094d..d8d8ddb 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -4233,7 +4233,7 @@ extern const TclStubs *tclStubsPtr;
#define Tcl_GlobalEvalObj(interp, objPtr) \
Tcl_EvalObjEx(interp, objPtr, TCL_EVAL_GLOBAL)
-#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl)
+#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) && TCL_MAJOR_VERSION > 8
# ifdef USE_TCL_STUBS
# undef Tcl_Gets
# undef Tcl_GetsObj
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index a208270..b2b319d 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1209,8 +1209,8 @@ Tcl_ExternalToUtfDStringEx(
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int result, soFar, srcRead, dstWrote, dstChars;
- Tcl_Size dstLen;
+ int result;
+ Tcl_Size dstLen, soFar;
const char *srcStart = src;
/* DO FIRST - Must always be initialized before returning */
@@ -1239,18 +1239,40 @@ Tcl_ExternalToUtfDStringEx(
srcLen = encodingPtr->lengthProc(src);
}
- flags |= TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START;
if (encodingPtr->toUtfProc == UtfToUtfProc) {
flags |= ENCODING_INPUT;
}
while (1) {
- result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
- flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
- soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+ int srcChunkLen, srcChunkRead;
+ int dstChunkLen, dstChunkWrote, dstChunkChars;
+
+ if (srcLen > INT_MAX) {
+ srcChunkLen = INT_MAX;
+ } else {
+ srcChunkLen = srcLen;
+ flags |= TCL_ENCODING_END; /* Last chunk */
+ }
+ dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen;
- src += srcRead;
- if (result != TCL_CONVERT_NOSPACE) {
+ result = encodingPtr->toUtfProc(encodingPtr->clientData, src,
+ srcChunkLen, flags, &state, dst, dstChunkLen,
+ &srcChunkRead, &dstChunkWrote, &dstChunkChars);
+ soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr);
+
+ src += srcChunkRead;
+
+ /*
+ * Keep looping in two case -
+ * - our destination buffer did not have enough room
+ * - we had not passed in all the data and error indicated fragment
+ * of a multibyte character
+ * In both cases we have to grow buffer, move the input source pointer
+ * and loop. Otherwise, return the result we got.
+ */
+ if ((result != TCL_CONVERT_NOSPACE) &&
+ !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) {
Tcl_Size nBytesProcessed = (src - srcStart);
Tcl_DStringSetLength(dstPtr, soFar);
@@ -1277,17 +1299,17 @@ Tcl_ExternalToUtfDStringEx(
}
}
return result;
- }
+ }
- /* Expand space and continue */
- flags &= ~TCL_ENCODING_START;
- srcLen -= srcRead;
- if (Tcl_DStringLength(dstPtr) == 0) {
- Tcl_DStringSetLength(dstPtr, dstLen);
- }
- Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
- dst = Tcl_DStringValue(dstPtr) + soFar;
- dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
+ flags &= ~TCL_ENCODING_START;
+ srcLen -= srcChunkRead;
+
+ if (Tcl_DStringLength(dstPtr) == 0) {
+ Tcl_DStringSetLength(dstPtr, dstLen);
+ }
+ Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
+ dst = Tcl_DStringValue(dstPtr) + soFar;
+ dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
}
}
@@ -1315,7 +1337,7 @@ Tcl_ExternalToUtf(
Tcl_Encoding encoding, /* The encoding for the source string, or NULL
* for the default system encoding. */
const char *src, /* Source string in specified encoding. */
- Tcl_Size srcLen, /* Source string length in bytes, or < 0 for
+ Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for
* encoding-specific string length. */
int flags, /* Conversion control flags. */
Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
@@ -1360,6 +1382,13 @@ Tcl_ExternalToUtf(
flags |= TCL_ENCODING_START | TCL_ENCODING_END;
statePtr = &state;
}
+ if (srcLen > INT_MAX) {
+ srcLen = INT_MAX;
+ flags &= ~TCL_ENCODING_END;
+ }
+ if (dstLen > INT_MAX) {
+ dstLen = INT_MAX;
+ }
if (srcReadPtr == NULL) {
srcReadPtr = &srcRead;
}
@@ -1502,9 +1531,9 @@ Tcl_UtfToExternalDStringEx(
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int result, soFar, srcRead, dstWrote, dstChars;
+ int result;
+ Tcl_Size dstLen, soFar;
const char *srcStart = src;
- Tcl_Size dstLen;
/* DO FIRST - must always be initialized on return */
Tcl_DStringInit(dstPtr);
@@ -1531,19 +1560,41 @@ Tcl_UtfToExternalDStringEx(
} else if (srcLen == TCL_INDEX_NONE) {
srcLen = strlen(src);
}
-
- flags |= TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START;
while (1) {
+ int srcChunkLen, srcChunkRead;
+ int dstChunkLen, dstChunkWrote, dstChunkChars;
+
+ if (srcLen > INT_MAX) {
+ srcChunkLen = INT_MAX;
+ } else {
+ srcChunkLen = srcLen;
+ flags |= TCL_ENCODING_END; /* Last chunk */
+ }
+ dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen;
+
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
- srcLen, flags, &state, dst, dstLen,
- &srcRead, &dstWrote, &dstChars);
- soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+ srcChunkLen, flags, &state, dst, dstChunkLen,
+ &srcChunkRead, &dstChunkWrote, &dstChunkChars);
+ soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr);
+
+ /* Move past the part processed in this go around */
+ src += srcChunkRead;
- src += srcRead;
- if (result != TCL_CONVERT_NOSPACE) {
+ /*
+ * Keep looping in two case -
+ * - our destination buffer did not have enough room
+ * - we had not passed in all the data and error indicated fragment
+ * of a multibyte character
+ * In both cases we have to grow buffer, move the input source pointer
+ * and loop. Otherwise, return the result we got.
+ */
+ if ((result != TCL_CONVERT_NOSPACE) &&
+ !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) {
Tcl_Size nBytesProcessed = (src - srcStart);
- int i = soFar + encodingPtr->nullSize - 1;
- while (i >= soFar) {
+ size_t i = soFar + encodingPtr->nullSize - 1;
+ /* Loop as DStringSetLength only stores one nul byte at a time */
+ while (i+1 >= soFar+1) {
Tcl_DStringSetLength(dstPtr, i--);
}
if (errorLocPtr) {
@@ -1576,7 +1627,8 @@ Tcl_UtfToExternalDStringEx(
}
flags &= ~TCL_ENCODING_START;
- srcLen -= srcRead;
+ srcLen -= srcChunkRead;
+
if (Tcl_DStringLength(dstPtr) == 0) {
Tcl_DStringSetLength(dstPtr, dstLen);
}
@@ -1610,7 +1662,7 @@ Tcl_UtfToExternal(
Tcl_Encoding encoding, /* The encoding for the converted string, or
* NULL for the default system encoding. */
const char *src, /* Source string in UTF-8. */
- Tcl_Size srcLen, /* Source string length in bytes, or < 0 for
+ Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for
* strlen(). */
int flags, /* Conversion control flags. */
Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
@@ -1652,6 +1704,13 @@ Tcl_UtfToExternal(
flags |= TCL_ENCODING_START | TCL_ENCODING_END;
statePtr = &state;
}
+ if (srcLen > INT_MAX) {
+ srcLen = INT_MAX;
+ flags &= ~TCL_ENCODING_END;
+ }
+ if (dstLen > INT_MAX) {
+ dstLen = INT_MAX;
+ }
if (srcReadPtr == NULL) {
srcReadPtr = &srcRead;
}
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 6d6a935..dc0ce7d 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -102,7 +102,7 @@ typedef struct CopyState {
Tcl_WideInt total; /* Total bytes transferred (written). */
Tcl_Interp *interp; /* Interp that started the copy. */
Tcl_Obj *cmdPtr; /* Command to be invoked at completion. */
- size_t bufSize; /* Size of appended buffer. */
+ Tcl_Size bufSize; /* Size of appended buffer. */
char buffer[TCLFLEXARRAY]; /* Copy buffer, this must be the last
* field. */
} CopyState;
@@ -151,7 +151,7 @@ typedef struct CloseCallback {
* Static functions in this file:
*/
-static ChannelBuffer * AllocChannelBuffer(size_t length);
+static ChannelBuffer * AllocChannelBuffer(Tcl_Size length);
static void PreserveChannelBuffer(ChannelBuffer *bufPtr);
static void ReleaseChannelBuffer(ChannelBuffer *bufPtr);
static int IsShared(ChannelBuffer *bufPtr);
@@ -191,9 +191,9 @@ static int DetachChannel(Tcl_Interp *interp, Tcl_Channel chan);
static void DiscardInputQueued(ChannelState *statePtr,
int discardSavedBuffers);
static void DiscardOutputQueued(ChannelState *chanPtr);
-static int DoRead(Channel *chanPtr, char *dst, size_t bytesToRead,
+static int DoRead(Channel *chanPtr, char *dst, Tcl_Size bytesToRead,
int allowShortReads);
-static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, size_t toRead,
+static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, Tcl_Size toRead,
int appendFlag);
static int FilterInputBytes(Channel *chanPtr,
GetsState *statePtr);
@@ -237,7 +237,7 @@ static int WillRead(Channel *chanPtr);
* short description of what the macro does.
*
* --------------------------------------------------------------------------
- * size_t BytesLeft(ChannelBuffer *bufPtr)
+ * Tcl_Size BytesLeft(ChannelBuffer *bufPtr)
*
* Returns the number of bytes of data remaining in the buffer.
*
@@ -2502,10 +2502,10 @@ Tcl_RemoveChannelMode(
static ChannelBuffer *
AllocChannelBuffer(
- size_t length) /* Desired length of channel buffer. */
+ Tcl_Size length) /* Desired length of channel buffer. */
{
ChannelBuffer *bufPtr;
- size_t n;
+ Tcl_Size n;
n = length + CHANNELBUFFER_HEADER_SIZE + BUFFER_PADDING + BUFFER_PADDING;
bufPtr = (ChannelBuffer *)Tcl_Alloc(n);
@@ -4037,11 +4037,11 @@ Tcl_ClearChannelHandlers(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_Write(
Tcl_Channel chan, /* The channel to buffer output for. */
const char *src, /* Data to queue in output buffer. */
- size_t srcLen) /* Length of data in bytes, or -1 for
+ Tcl_Size srcLen) /* Length of data in bytes, or TCL_INDEX_NONE for
* strlen(). */
{
/*
@@ -4091,18 +4091,18 @@ Tcl_Write(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_WriteRaw(
Tcl_Channel chan, /* The channel to buffer output for. */
const char *src, /* Data to queue in output buffer. */
- size_t srcLen) /* Length of data in bytes, or -1 for
+ Tcl_Size srcLen) /* Length of data in bytes, or TCL_INDEX_NONE for
* strlen(). */
{
Channel *chanPtr = ((Channel *) chan);
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
int errorCode;
- size_t written;
+ Tcl_Size written;
if (CheckChannelErrors(statePtr, TCL_WRITABLE | CHANNEL_RAW_MODE) != 0) {
return TCL_INDEX_NONE;
@@ -4148,17 +4148,17 @@ Tcl_WriteRaw(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_WriteChars(
Tcl_Channel chan, /* The channel to buffer output for. */
const char *src, /* UTF-8 characters to queue in output
* buffer. */
- size_t len) /* Length of string in bytes, or TCL_INDEX_NONE for
+ Tcl_Size len) /* Length of string in bytes, or TCL_INDEX_NONE for
* strlen(). */
{
Channel *chanPtr = (Channel *) chan;
ChannelState *statePtr = chanPtr->state; /* State info for channel */
- size_t result;
+ Tcl_Size result;
Tcl_Obj *objPtr;
if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) {
@@ -4223,7 +4223,7 @@ Tcl_WriteChars(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_WriteObj(
Tcl_Channel chan, /* The channel to buffer output for. */
Tcl_Obj *objPtr) /* The object to write. */
@@ -4235,7 +4235,6 @@ Tcl_WriteObj(
Channel *chanPtr;
ChannelState *statePtr; /* State info for channel */
const char *src;
- size_t srcLen = 0;
statePtr = ((Channel *) chan)->state;
chanPtr = statePtr->topChanPtr;
@@ -4243,21 +4242,38 @@ Tcl_WriteObj(
if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) {
return TCL_INDEX_NONE;
}
- if (statePtr->encoding == NULL) {
- size_t result;
+ Tcl_Size srcLen;
+ if (statePtr->encoding == NULL) {
src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen);
if (src == NULL) {
Tcl_SetErrno(EILSEQ);
- result = TCL_INDEX_NONE;
- } else {
- result = WriteBytes(chanPtr, src, srcLen);
+ return TCL_INDEX_NONE;
}
- return result;
} else {
src = Tcl_GetStringFromObj(objPtr, &srcLen);
- return WriteChars(chanPtr, src, srcLen);
}
+
+ size_t totalWritten = 0;
+ /*
+ * Note original code always called WriteChars even if srcLen 0
+ * so we will too.
+ */
+ do {
+ int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen;
+ int written;
+ if (statePtr->encoding == NULL) {
+ written = WriteBytes(chanPtr, src, chunkSize);
+ } else {
+ written = WriteChars(chanPtr, src, chunkSize);
+ }
+ if (written < 0) {
+ return TCL_INDEX_NONE;
+ }
+ totalWritten += written;
+ srcLen -= chunkSize;
+ } while (srcLen);
+ return totalWritten;
}
static void
@@ -4539,7 +4555,7 @@ Write(
*---------------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_Gets(
Tcl_Channel chan, /* Channel from which to read. */
Tcl_DString *lineRead) /* The line read will be appended to this
@@ -4548,7 +4564,7 @@ Tcl_Gets(
* for managing the storage. */
{
Tcl_Obj *objPtr;
- size_t charsStored;
+ Tcl_Size charsStored;
TclNewObj(objPtr);
charsStored = Tcl_GetsObj(chan, objPtr);
@@ -4582,7 +4598,7 @@ Tcl_Gets(
*---------------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_GetsObj(
Tcl_Channel chan, /* Channel from which to read. */
Tcl_Obj *objPtr) /* The line read will be appended to this
@@ -4594,7 +4610,7 @@ Tcl_GetsObj(
/* State info for channel */
ChannelBuffer *bufPtr;
int inEofChar, skip, copiedTotal, oldFlags, oldRemoved;
- size_t oldLength;
+ Tcl_Size oldLength;
Tcl_Encoding encoding;
char *dst, *dstEnd, *eol, *eof;
Tcl_EncodingState oldState;
@@ -4749,7 +4765,7 @@ Tcl_GetsObj(
*/
if (eol >= dstEnd) {
- size_t offset;
+ Tcl_Size offset;
if (eol != eof) {
offset = eol - objPtr->bytes;
@@ -5019,7 +5035,7 @@ TclGetsObjBinary(
/* State info for channel */
ChannelBuffer *bufPtr;
int inEofChar, skip, copiedTotal, oldFlags, oldRemoved;
- size_t rawLen, byteLen = 0, oldLength;
+ Tcl_Size rawLen, byteLen = 0, oldLength;
int eolChar;
unsigned char *dst, *dstEnd, *eol, *eof, *byteArray;
@@ -5287,7 +5303,7 @@ FreeBinaryEncoding(
}
static Tcl_Encoding
-GetBinaryEncoding()
+GetBinaryEncoding(void)
{
ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
@@ -5670,11 +5686,11 @@ CommonGetsCleanup(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_Read(
Tcl_Channel chan, /* The channel from which to read. */
char *dst, /* Where to store input read. */
- size_t bytesToRead) /* Maximum number of bytes to read. */
+ Tcl_Size bytesToRead) /* Maximum number of bytes to read. */
{
Channel *chanPtr = (Channel *) chan;
ChannelState *statePtr = chanPtr->state;
@@ -5715,11 +5731,11 @@ Tcl_Read(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_ReadRaw(
Tcl_Channel chan, /* The channel from which to read. */
char *readBuf, /* Where to store input read. */
- size_t bytesToRead) /* Maximum number of bytes to read. */
+ Tcl_Size bytesToRead) /* Maximum number of bytes to read. */
{
Channel *chanPtr = (Channel *) chan;
ChannelState *statePtr = chanPtr->state;
@@ -5798,8 +5814,8 @@ Tcl_ReadRaw(
}
} else if (nread > 0) {
/*
- * Successful read (short is OK) - add to bytes copied.
- */
+ * Successful read (short is OK) - add to bytes copied.
+ */
copied += nread;
} else {
@@ -5833,11 +5849,11 @@ Tcl_ReadRaw(
*---------------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_ReadChars(
Tcl_Channel chan, /* The channel to read. */
Tcl_Obj *objPtr, /* Input data is stored in this object. */
- size_t toRead, /* Maximum number of characters to store, or
+ Tcl_Size toRead, /* Maximum number of characters to store, or
* TCL_INDEX_NONE to read all available data (up to EOF or
* when channel blocks). */
int appendFlag) /* If non-zero, data read from the channel
@@ -5893,7 +5909,7 @@ static int
DoReadChars(
Channel *chanPtr, /* The channel to read. */
Tcl_Obj *objPtr, /* Input data is stored in this object. */
- size_t toRead, /* Maximum number of characters to store, or
+ Tcl_Size toRead, /* Maximum number of characters to store, or
* TCL_INDEX_NONE to read all available data (up to EOF or
* when channel blocks). */
int appendFlag) /* If non-zero, data read from the channel
@@ -6185,7 +6201,7 @@ ReadChars(
int savedIEFlags = statePtr->inputEncodingFlags;
int savedFlags = statePtr->flags;
char *dst, *src = RemovePoint(bufPtr);
- size_t numBytes;
+ Tcl_Size numBytes;
int srcLen = BytesLeft(bufPtr);
/*
@@ -6728,11 +6744,11 @@ TranslateInputEOL(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_Ungets(
Tcl_Channel chan, /* The channel for which to add the input. */
const char *str, /* The input itself. */
- size_t len, /* The length of the input. */
+ Tcl_Size len, /* The length of the input. */
int atEnd) /* If non-zero, add at end of queue; otherwise
* add at head of queue. */
{
@@ -7480,7 +7496,11 @@ Tcl_Eof(
ChannelState *statePtr = ((Channel *) chan)->state;
/* State of real channel structure. */
- return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0;
+ if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY)
+ && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
+ return 0;
+ }
+ return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0;
}
/*
@@ -7650,7 +7670,7 @@ Tcl_ChannelBuffered(
void
Tcl_SetChannelBufferSize(
Tcl_Channel chan, /* The channel whose buffer size to set. */
- size_t sz) /* The size to set. */
+ Tcl_Size sz) /* The size to set. */
{
ChannelState *statePtr; /* State of real channel structure. */
@@ -7704,7 +7724,7 @@ Tcl_SetChannelBufferSize(
*----------------------------------------------------------------------
*/
-size_t
+Tcl_Size
Tcl_GetChannelBufferSize(
Tcl_Channel chan) /* The channel for which to find the buffer
* size. */
@@ -7756,7 +7776,7 @@ Tcl_BadChannelOption(
const char *genericopt =
"blocking buffering buffersize encoding encodingprofile eofchar translation";
const char **argv;
- size_t argc, i;
+ Tcl_Size argc, i;
Tcl_DString ds;
Tcl_Obj *errObj;
@@ -8035,7 +8055,7 @@ Tcl_SetChannelOption(
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
size_t len; /* Length of optionName string. */
- size_t argc;
+ Tcl_Size argc;
const char **argv = NULL;
/*
@@ -9585,7 +9605,7 @@ CopyData(
Tcl_Channel inChan, outChan;
ChannelState *inStatePtr, *outStatePtr;
int result = TCL_OK, size;
- size_t sizeb;
+ Tcl_Size sizeb;
Tcl_WideInt total;
const char *buffer;
int inBinary, outBinary, sameEncoding;
@@ -9607,6 +9627,7 @@ CopyData(
* the bottom of the stack.
*/
+ SetFlag(inStatePtr, CHANNEL_FCOPY);
inBinary = (inStatePtr->encoding == NULL);
outBinary = (outStatePtr->encoding == NULL);
sameEncoding = inStatePtr->encoding == outStatePtr->encoding
@@ -9722,6 +9743,7 @@ CopyData(
TclDecrRefCount(bufObj);
bufObj = NULL;
}
+ ResetFlag(inStatePtr, CHANNEL_FCOPY);
return TCL_OK;
}
}
@@ -9813,6 +9835,7 @@ CopyData(
TclDecrRefCount(bufObj);
bufObj = NULL;
}
+ ResetFlag(inStatePtr, CHANNEL_FCOPY);
return TCL_OK;
}
@@ -9835,6 +9858,7 @@ CopyData(
TclDecrRefCount(bufObj);
bufObj = NULL;
}
+ ResetFlag(inStatePtr, CHANNEL_FCOPY);
return TCL_OK;
}
} /* while */
@@ -9887,6 +9911,7 @@ CopyData(
}
}
}
+ ResetFlag(inStatePtr, CHANNEL_FCOPY);
return result;
}
@@ -9923,7 +9948,7 @@ static int
DoRead(
Channel *chanPtr, /* The channel from which to read. */
char *dst, /* Where to store input read. */
- size_t bytesToRead, /* Maximum number of bytes to read. */
+ Tcl_Size bytesToRead, /* Maximum number of bytes to read. */
int allowShortReads) /* Allow half-blocking (pipes,sockets) */
{
ChannelState *statePtr = chanPtr->state;
@@ -11021,7 +11046,7 @@ FixLevelCode(
Tcl_Obj *msg)
{
int explicitResult, numOptions, lcn;
- size_t lc;
+ Tcl_Size lc;
Tcl_Obj **lv, **lvn;
int res, i, j, val, lignore, cignore;
int newlevel = -1, newcode = -1;
diff --git a/generic/tclIO.h b/generic/tclIO.h
index a050010..8f0ef8a 100644
--- a/generic/tclIO.h
+++ b/generic/tclIO.h
@@ -235,6 +235,8 @@ typedef struct ChannelState {
* flushed after every newline. */
#define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always
* be flushed immediately. */
+#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy
+ * mode. */
#define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued
* output buffers has been
* scheduled. */
diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c
index 4ce27bb..197ca32 100644
--- a/generic/tclIOCmd.c
+++ b/generic/tclIOCmd.c
@@ -106,7 +106,7 @@ Tcl_PutsObjCmd(
Tcl_Obj *string; /* String to write. */
Tcl_Obj *chanObjPtr = NULL; /* channel object. */
int newline; /* Add a newline at end? */
- int result; /* Result of puts operation. */
+ size_t result; /* Result of puts operation. */
int mode; /* Mode in which channel is opened. */
switch (objc) {
@@ -163,12 +163,12 @@ Tcl_PutsObjCmd(
TclChannelPreserve(chan);
result = Tcl_WriteObj(chan, string);
- if (result == -1) {
+ if (result == TCL_INDEX_NONE) {
goto error;
}
if (newline != 0) {
result = Tcl_WriteChars(chan, "\n", 1);
- if (result == -1) {
+ if (result == TCL_INDEX_NONE) {
goto error;
}
}
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 652c5aa..b6c7f77 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -4105,7 +4105,7 @@ TestregexpObjCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int i, indices, stringLength, match, about;
- size_t ii;
+ Tcl_Size ii;
int hasxflags, cflags, eflags;
Tcl_RegExp regExpr;
const char *string;
@@ -4217,7 +4217,7 @@ TestregexpObjCmd(
if (objc > 2 && (cflags&REG_EXPECT) && indices) {
const char *varName;
const char *value;
- size_t start, end;
+ Tcl_Size start, end;
char resinfo[TCL_INTEGER_SPACE * 2];
varName = Tcl_GetString(objv[2]);
@@ -4257,11 +4257,11 @@ TestregexpObjCmd(
Tcl_RegExpGetInfo(regExpr, &info);
for (i = 0; i < objc; i++) {
- size_t start, end;
+ Tcl_Size start, end;
Tcl_Obj *newPtr, *varPtr, *valuePtr;
varPtr = objv[i];
- ii = ((cflags&REG_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (size_t)i;
+ ii = ((cflags&REG_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (Tcl_Size)i;
if (indices) {
Tcl_Obj *objs[2];
@@ -6476,10 +6476,10 @@ static int
TestWrongNumArgsObjCmd(
TCL_UNUSED(void *),
Tcl_Interp *interp, /* Current interpreter. */
- size_t objc, /* Number of arguments. */
+ Tcl_Size objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- size_t i, length;
+ Tcl_Size i, length;
const char *msg;
if (objc + 1 < 4) {
@@ -7187,7 +7187,7 @@ TestUtfPrevCmd(
int objc,
Tcl_Obj *const objv[])
{
- size_t numBytes, offset;
+ Tcl_Size numBytes, offset;
char *bytes;
const char *result;
@@ -7228,7 +7228,7 @@ TestNumUtfCharsCmd(
Tcl_Obj *const objv[])
{
if (objc > 1) {
- size_t numBytes, len, limit = TCL_INDEX_NONE;
+ Tcl_Size numBytes, len, limit = TCL_INDEX_NONE;
const char *bytes = Tcl_GetStringFromObj(objv[1], &numBytes);
if (objc > 2) {
@@ -7296,7 +7296,7 @@ TestGetIntForIndexCmd(
int objc,
Tcl_Obj *const objv[])
{
- size_t result;
+ Tcl_Size result;
Tcl_WideInt endvalue;
if (objc != 3) {
@@ -7415,7 +7415,7 @@ TestHashSystemHashCmd(
Tcl_SetHashValue(hPtr, INT2PTR(i+42));
}
- if (hash.numEntries != (size_t)limit) {
+ if (hash.numEntries != (Tcl_Size)limit) {
Tcl_AppendResult(interp, "unexpected maximal size", NULL);
Tcl_DeleteHashTable(&hash);
return TCL_ERROR;
@@ -8175,7 +8175,7 @@ static int
InterpCompiledVarResolver(
TCL_UNUSED(Tcl_Interp *),
const char *name,
- TCL_UNUSED(size_t) /*length*/,
+ TCL_UNUSED(Tcl_Size) /*length*/,
TCL_UNUSED(Tcl_Namespace *),
Tcl_ResolvedVarInfo **rPtr)
{
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3abd615..0ebfb1d 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1143,13 +1143,13 @@ TclScanElement(
*/
requireEscape = 1;
- length -= (length > 0);
+ length -= (length+1 > 1);
p++;
break;
}
if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) {
extra++; /* Escape sequences all one byte longer. */
- length -= (length > 0);
+ length -= (length+1 > 1);
p++;
}
forbidNone = 1;
diff --git a/generic/tclZlib.c b/generic/tclZlib.c
index dc7c3f3..1df84d7 100644
--- a/generic/tclZlib.c
+++ b/generic/tclZlib.c
@@ -443,10 +443,21 @@ GenerateHeader(
if (GetValue(interp, dictObj, "comment", &value) != TCL_OK) {
goto error;
} else if (value != NULL) {
- valueStr = Tcl_GetStringFromObj(value, &length);
- Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL,
+ Tcl_EncodingState state;
+ valueStr = Tcl_GetStringFromObj(value, &len);
+ result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len,
+ TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_PROFILE_STRICT, &state,
headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len,
NULL);
+ if (result != TCL_OK) {
+ if (result == TCL_CONVERT_UNKNOWN) {
+ Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL);
+ } else {
+ Tcl_AppendResult(interp, "Comment too large for zip", NULL);
+ }
+ result = TCL_ERROR;
+ goto error;
+ }
headerPtr->nativeCommentBuf[len] = '\0';
headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf;
if (extraSizePtr != NULL) {
@@ -464,9 +475,21 @@ GenerateHeader(
if (GetValue(interp, dictObj, "filename", &value) != TCL_OK) {
goto error;
} else if (value != NULL) {
- valueStr = Tcl_GetStringFromObj(value, &length);
- Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL,
- headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL);
+ Tcl_EncodingState state;
+ valueStr = Tcl_GetStringFromObj(value, &len);
+ result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len,
+ TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_PROFILE_STRICT, &state,
+ headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len,
+ NULL);
+ if (result != TCL_OK) {
+ if (result == TCL_CONVERT_UNKNOWN) {
+ Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL);
+ } else {
+ Tcl_AppendResult(interp, "Filename too large for zip", NULL);
+ }
+ result = TCL_ERROR;
+ goto error;
+ }
headerPtr->nativeFilenameBuf[len] = '\0';
headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf;
if (extraSizePtr != NULL) {
@@ -547,8 +570,8 @@ ExtractHeader(
}
}
- Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->comment, -1,
- TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
+ Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, -1,
+ &tmp);
SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp));
}
SetValue(dictObj, "crc", Tcl_NewBooleanObj(headerPtr->hcrc));
@@ -564,8 +587,8 @@ ExtractHeader(
}
}
- Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->name, -1,
- TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
+ Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, -1,
+ &tmp);
SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp));
}
if (headerPtr->os != 255) {
diff --git a/tests/dstring.test b/tests/dstring.test
index 314cee8..23863d0 100644
--- a/tests/dstring.test
+++ b/tests/dstring.test
@@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup {
} -cleanup {
testdstring free
} -result {x #}
+test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring element "\\\n"; # Will setfault
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result \\\\\\n
+test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring element "\\\{"; # Will setfault
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result [list [list \{]]
+test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring element "\\\}"; # Will setfault
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result [list [list \}]]
+test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup {
+ testdstring free
+} -body {
+ testdstring element "\\\\"; # Will setfault
+ testdstring get
+} -cleanup {
+ testdstring free
+} -result [list [list \\]]
test dstring-3.1 {nested sublists} -constraints testdstring -setup {
testdstring free
diff --git a/tests/encoding.test b/tests/encoding.test
index 2deda8d..215b5c8 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -1046,6 +1046,32 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints {
[testencoding nullength ksc5601]
} -result {1 2 4 2 2}
+test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints {
+ perf
+} -body {
+ # Test to ensure not misinterpreted as -1
+ list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]]
+} -result {4294967295 1}
+
+test encoding-30.1 {encoding convertto large strings > 4GB} -constraints {
+ perf
+} -body {
+ list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]]
+} -result {4294967296 1}
+
+test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints {
+ perf
+} -body {
+ # Test to ensure not misinterpreted as -1
+ list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]]
+} -result {4294967295 1}
+
+test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints {
+ perf
+} -body {
+ list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]]
+} -result {4294967296 1}
+
# cleanup
namespace delete ::tcl::test::encoding
diff --git a/tests/io.test b/tests/io.test
index b0142dd..04d3720 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -36,6 +36,7 @@ namespace eval ::tcl::test::io {
}
source [file join [file dirname [info script]] tcltests.tcl]
+testConstraint pointerIs64bit [expr {$::tcl_platform(pointerSize) >= 8}]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testchannel [llength [info commands testchannel]]
testConstraint testfevent [llength [info commands testfevent]]
@@ -194,6 +195,47 @@ test io-1.9 {Tcl_WriteChars: WriteChars} {
set sizes
} {19 19 19 19 19}
+test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-1.10.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile w]
+ puts -nonewline $fd [string repeat A 0x80000000]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 2147483648
+test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-1.11.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile w]
+ puts -nonewline $fd [string repeat A 0x100000000]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 4294967296
+test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-1.12.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile w]
+ # *Exactly* UINT_MAX - separate bug from the general large file tests
+ puts -nonewline $fd [string repeat A 0xffffffff]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 4294967295
+
test io-2.1 {WriteBytes} {
# loop until all bytes are written
@@ -236,6 +278,47 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} {
lappend x [contents $path(test1)]
} [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"]
+test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-2.5.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile wb]
+ puts -nonewline $fd [string repeat A 0x80000000]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 2147483648
+test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-2.6.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile wb]
+ puts -nonewline $fd [string repeat A 0x100000000]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 4294967296
+test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints {
+ pointerIs64bit perf
+} -setup {
+ set tmpfile [file join [temporaryDirectory] io-2.7.tmp]
+} -cleanup {
+ file delete $tmpfile
+} -body {
+ set fd [open $tmpfile wb]
+ # *Exactly* UINT_MAX - separate bug from the general large file tests
+ puts -nonewline $fd [string repeat A 0xffffffff]
+ close $fd
+ # TODO - Should really read it back in but large reads are not currently working!
+ file size $tmpfile
+} -result 4294967295
+
test io-3.1 {WriteChars: compatibility with WriteBytes} {
# loop until all bytes are written
@@ -9140,48 +9223,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s
removeFile io-75.5
} -result 4181
-test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup {
- set fn [makeFile {} io-75.6]
- set f [open $fn w+]
- fconfigure $f -encoding binary
- # \x81 is invalid in utf-8
- puts -nonewline $f A\x81
- flush $f
- seek $f 0
- fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict
-} -body {
- set d [read $f]
- binary scan $d H* hd
- lappend hd [catch {read $f} msg]
- close $f
- lappend hd $msg
-} -cleanup {
- removeFile io-75.6
-} -match glob -result {41 1 {error reading "*": illegal byte sequence}}
-
-test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup {
- set fn [makeFile {} io-75.7]
- set f [open $fn w+]
- fconfigure $f -encoding binary
- # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later.
- puts -nonewline $f A\xA1\x1A
- flush $f
- seek $f 0
- fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict
-} -body {
- set d [read $f]
- binary scan $d H* hd
- lappend hd [eof $f]
- lappend hd [catch {read $f} msg]
- lappend hd $msg
- fconfigure $f -encoding iso8859-1
- lappend hd [read $f];# We changed encoding, so now we can read the \xA1
- close $f
- set hd
-} -cleanup {
- removeFile io-75.7
-} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ยก}
-
test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup {
set fn [makeFile {} io-75.8]
set f [open $fn w+]
diff --git a/tests/zlib.test b/tests/zlib.test
index 7e11634..ae7dd6d 100644
--- a/tests/zlib.test
+++ b/tests/zlib.test
@@ -486,6 +486,38 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup {
catch {close $inSide}
catch {close $outSide}
} -result {{one two} {one two}}
+test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup {
+ set file [makeFile {} test.gz]
+} -body {
+ set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]]
+} -cleanup {
+ catch {close $f}
+ removeFile $file
+} -returnCodes 1 -result {Comment too large for zip}
+test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup {
+ set file [makeFile {} test.gz]
+} -body {
+ set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]]
+} -cleanup {
+ catch {close $f}
+ removeFile $file
+} -returnCodes 1 -result {Filename too large for zip}
+test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup {
+ set file [makeFile {} test.gz]
+} -body {
+ set f [zlib push gzip [open $file w] -header [list comment \u100]]
+} -cleanup {
+ catch {close $f}
+ removeFile $file
+} -returnCodes 1 -result {Comment contains characters > 0xFF}
+test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup {
+ set file [makeFile {} test.gz]
+} -body {
+ set f [zlib push gzip [open $file w] -header [list filename \u100]]
+} -cleanup {
+ catch {close $f}
+ removeFile $file
+} -returnCodes 1 -result {Filename contains characters > 0xFF}
test zlib-9.1 "check fcopy with push" -constraints zlib -setup {
set sfile [makeFile {} testsrc.gz]
diff --git a/unix/tclAppInit.c b/unix/tclAppInit.c
index 05d25de..e3d95bc 100644
--- a/unix/tclAppInit.c
+++ b/unix/tclAppInit.c
@@ -158,15 +158,16 @@ Tcl_AppInit(
* is the name of the application. If this line is deleted then no
* user-specific startup file will be run under any conditions.
*/
-
#ifdef DJGPP
- Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL,
- Tcl_NewStringObj("~/tclsh.rc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY);
+#define INITFILENAME "tclshrc.tcl"
#else
- Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL,
- Tcl_NewStringObj("~/.tclshrc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY);
+#define INITFILENAME ".tclshrc"
#endif
+ (void)Tcl_EvalEx(interp,
+ "set tcl_rcFileName [file tildeexpand ~/" INITFILENAME "]",
+ -1,
+ TCL_EVAL_GLOBAL);
return TCL_OK;
}
diff --git a/win/tclAppInit.c b/win/tclAppInit.c
index 30127fd..077500a 100644
--- a/win/tclAppInit.c
+++ b/win/tclAppInit.c
@@ -215,8 +215,11 @@ Tcl_AppInit(
* user-specific startup file will be run under any conditions.
*/
- Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL,
- Tcl_NewStringObj("~/tclshrc.tcl", TCL_INDEX_NONE), TCL_GLOBAL_ONLY);
+ (void)Tcl_EvalEx(interp,
+ "set tcl_rcFileName [file tildeexpand ~/tclshrc.tcl]",
+ -1,
+ TCL_EVAL_GLOBAL);
+
return TCL_OK;
}