diff options
author | apnadkarni <apnmbx-wits@yahoo.com> | 2023-03-01 07:19:07 (GMT) |
---|---|---|
committer | apnadkarni <apnmbx-wits@yahoo.com> | 2023-03-01 07:19:07 (GMT) |
commit | 9522e3788042247359e535da5bf235eac4c37541 (patch) | |
tree | 804fe277e92a7b078ef1528f9675a77a350ab892 | |
parent | 00e1068c039491b579117c6b38d7d415cb345e68 (diff) | |
parent | 62d9150ef53073a2ab29f2ba1b46551f273e56ec (diff) | |
download | tcl-9522e3788042247359e535da5bf235eac4c37541.zip tcl-9522e3788042247359e535da5bf235eac4c37541.tar.gz tcl-9522e3788042247359e535da5bf235eac4c37541.tar.bz2 |
Merge 9.0
-rw-r--r-- | doc/Tcl.n | 316 | ||||
-rw-r--r-- | generic/tcl.h | 14 | ||||
-rw-r--r-- | generic/tclDecls.h | 2 | ||||
-rw-r--r-- | generic/tclEncoding.c | 123 | ||||
-rw-r--r-- | generic/tclIO.c | 127 | ||||
-rw-r--r-- | generic/tclIO.h | 2 | ||||
-rw-r--r-- | generic/tclIOCmd.c | 6 | ||||
-rw-r--r-- | generic/tclTest.c | 22 | ||||
-rw-r--r-- | generic/tclUtil.c | 4 | ||||
-rw-r--r-- | generic/tclZlib.c | 41 | ||||
-rw-r--r-- | tests/dstring.test | 32 | ||||
-rw-r--r-- | tests/encoding.test | 26 | ||||
-rw-r--r-- | tests/io.test | 125 | ||||
-rw-r--r-- | tests/zlib.test | 32 | ||||
-rw-r--r-- | unix/tclAppInit.c | 11 | ||||
-rw-r--r-- | win/tclAppInit.c | 7 |
16 files changed, 532 insertions, 358 deletions
@@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,176 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: +.RS +.RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB<newline>\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -193,77 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: diff --git a/generic/tcl.h b/generic/tcl.h index 6040099..2d2849f 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -311,10 +311,10 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt; #define Tcl_WideAsDouble(val) ((double)((Tcl_WideInt)(val))) #define Tcl_DoubleAsWide(val) ((Tcl_WideInt)((double)(val))) -#if TCL_MAJOR_VERSION > 8 -typedef size_t Tcl_Size; -#else +#if TCL_MAJOR_VERSION < 9 typedef int Tcl_Size; +#else +typedef size_t Tcl_Size; #endif #ifdef _WIN32 @@ -452,17 +452,17 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData); #if TCL_MAJOR_VERSION > 8 typedef struct Tcl_RegExpIndices { - size_t start; /* Character offset of first character in + Tcl_Size start; /* Character offset of first character in * match. */ - size_t end; /* Character offset of first character after + Tcl_Size end; /* Character offset of first character after * the match. */ } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { - size_t nsubs; /* Number of subexpressions in the compiled + Tcl_Size nsubs; /* Number of subexpressions in the compiled * expression. */ Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ - size_t extendStart; /* The offset at which a subsequent match + Tcl_Size extendStart; /* The offset at which a subsequent match * might begin. */ } Tcl_RegExpInfo; #else diff --git a/generic/tclDecls.h b/generic/tclDecls.h index bdc094d..d8d8ddb 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4233,7 +4233,7 @@ extern const TclStubs *tclStubsPtr; #define Tcl_GlobalEvalObj(interp, objPtr) \ Tcl_EvalObjEx(interp, objPtr, TCL_EVAL_GLOBAL) -#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) +#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) && TCL_MAJOR_VERSION > 8 # ifdef USE_TCL_STUBS # undef Tcl_Gets # undef Tcl_GetsObj diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a208270..b2b319d 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1209,8 +1209,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; - Tcl_Size dstLen; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; /* DO FIRST - Must always be initialized before returning */ @@ -1239,18 +1239,40 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); + + src += srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { Tcl_Size nBytesProcessed = (src - srcStart); Tcl_DStringSetLength(dstPtr, soFar); @@ -1277,17 +1299,17 @@ Tcl_ExternalToUtfDStringEx( } } return result; - } + } - /* Expand space and continue */ - flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; - if (Tcl_DStringLength(dstPtr) == 0) { - Tcl_DStringSetLength(dstPtr, dstLen); - } - Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); - dst = Tcl_DStringValue(dstPtr) + soFar; - dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; + flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; + + if (Tcl_DStringLength(dstPtr) == 0) { + Tcl_DStringSetLength(dstPtr, dstLen); + } + Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); + dst = Tcl_DStringValue(dstPtr) + soFar; + dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; } } @@ -1315,7 +1337,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1360,6 +1382,13 @@ Tcl_ExternalToUtf( flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } @@ -1502,9 +1531,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; - Tcl_Size dstLen; /* DO FIRST - must always be initialized on return */ Tcl_DStringInit(dstPtr); @@ -1531,19 +1560,41 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } - - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; while (1) { + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); + + /* Move past the part processed in this go around */ + src += srcChunkRead; - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { Tcl_Size nBytesProcessed = (src - srcStart); - int i = soFar + encodingPtr->nullSize - 1; - while (i >= soFar) { + size_t i = soFar + encodingPtr->nullSize - 1; + /* Loop as DStringSetLength only stores one nul byte at a time */ + while (i+1 >= soFar+1) { Tcl_DStringSetLength(dstPtr, i--); } if (errorLocPtr) { @@ -1576,7 +1627,8 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + srcLen -= srcChunkRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } @@ -1610,7 +1662,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1652,6 +1704,13 @@ Tcl_UtfToExternal( flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } diff --git a/generic/tclIO.c b/generic/tclIO.c index 6d6a935..dc0ce7d 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -102,7 +102,7 @@ typedef struct CopyState { Tcl_WideInt total; /* Total bytes transferred (written). */ Tcl_Interp *interp; /* Interp that started the copy. */ Tcl_Obj *cmdPtr; /* Command to be invoked at completion. */ - size_t bufSize; /* Size of appended buffer. */ + Tcl_Size bufSize; /* Size of appended buffer. */ char buffer[TCLFLEXARRAY]; /* Copy buffer, this must be the last * field. */ } CopyState; @@ -151,7 +151,7 @@ typedef struct CloseCallback { * Static functions in this file: */ -static ChannelBuffer * AllocChannelBuffer(size_t length); +static ChannelBuffer * AllocChannelBuffer(Tcl_Size length); static void PreserveChannelBuffer(ChannelBuffer *bufPtr); static void ReleaseChannelBuffer(ChannelBuffer *bufPtr); static int IsShared(ChannelBuffer *bufPtr); @@ -191,9 +191,9 @@ static int DetachChannel(Tcl_Interp *interp, Tcl_Channel chan); static void DiscardInputQueued(ChannelState *statePtr, int discardSavedBuffers); static void DiscardOutputQueued(ChannelState *chanPtr); -static int DoRead(Channel *chanPtr, char *dst, size_t bytesToRead, +static int DoRead(Channel *chanPtr, char *dst, Tcl_Size bytesToRead, int allowShortReads); -static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, size_t toRead, +static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, Tcl_Size toRead, int appendFlag); static int FilterInputBytes(Channel *chanPtr, GetsState *statePtr); @@ -237,7 +237,7 @@ static int WillRead(Channel *chanPtr); * short description of what the macro does. * * -------------------------------------------------------------------------- - * size_t BytesLeft(ChannelBuffer *bufPtr) + * Tcl_Size BytesLeft(ChannelBuffer *bufPtr) * * Returns the number of bytes of data remaining in the buffer. * @@ -2502,10 +2502,10 @@ Tcl_RemoveChannelMode( static ChannelBuffer * AllocChannelBuffer( - size_t length) /* Desired length of channel buffer. */ + Tcl_Size length) /* Desired length of channel buffer. */ { ChannelBuffer *bufPtr; - size_t n; + Tcl_Size n; n = length + CHANNELBUFFER_HEADER_SIZE + BUFFER_PADDING + BUFFER_PADDING; bufPtr = (ChannelBuffer *)Tcl_Alloc(n); @@ -4037,11 +4037,11 @@ Tcl_ClearChannelHandlers( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_Write( Tcl_Channel chan, /* The channel to buffer output for. */ const char *src, /* Data to queue in output buffer. */ - size_t srcLen) /* Length of data in bytes, or -1 for + Tcl_Size srcLen) /* Length of data in bytes, or TCL_INDEX_NONE for * strlen(). */ { /* @@ -4091,18 +4091,18 @@ Tcl_Write( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_WriteRaw( Tcl_Channel chan, /* The channel to buffer output for. */ const char *src, /* Data to queue in output buffer. */ - size_t srcLen) /* Length of data in bytes, or -1 for + Tcl_Size srcLen) /* Length of data in bytes, or TCL_INDEX_NONE for * strlen(). */ { Channel *chanPtr = ((Channel *) chan); ChannelState *statePtr = chanPtr->state; /* State info for channel */ int errorCode; - size_t written; + Tcl_Size written; if (CheckChannelErrors(statePtr, TCL_WRITABLE | CHANNEL_RAW_MODE) != 0) { return TCL_INDEX_NONE; @@ -4148,17 +4148,17 @@ Tcl_WriteRaw( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_WriteChars( Tcl_Channel chan, /* The channel to buffer output for. */ const char *src, /* UTF-8 characters to queue in output * buffer. */ - size_t len) /* Length of string in bytes, or TCL_INDEX_NONE for + Tcl_Size len) /* Length of string in bytes, or TCL_INDEX_NONE for * strlen(). */ { Channel *chanPtr = (Channel *) chan; ChannelState *statePtr = chanPtr->state; /* State info for channel */ - size_t result; + Tcl_Size result; Tcl_Obj *objPtr; if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) { @@ -4223,7 +4223,7 @@ Tcl_WriteChars( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_WriteObj( Tcl_Channel chan, /* The channel to buffer output for. */ Tcl_Obj *objPtr) /* The object to write. */ @@ -4235,7 +4235,6 @@ Tcl_WriteObj( Channel *chanPtr; ChannelState *statePtr; /* State info for channel */ const char *src; - size_t srcLen = 0; statePtr = ((Channel *) chan)->state; chanPtr = statePtr->topChanPtr; @@ -4243,21 +4242,38 @@ Tcl_WriteObj( if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) { return TCL_INDEX_NONE; } - if (statePtr->encoding == NULL) { - size_t result; + Tcl_Size srcLen; + if (statePtr->encoding == NULL) { src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); if (src == NULL) { Tcl_SetErrno(EILSEQ); - result = TCL_INDEX_NONE; - } else { - result = WriteBytes(chanPtr, src, srcLen); + return TCL_INDEX_NONE; } - return result; } else { src = Tcl_GetStringFromObj(objPtr, &srcLen); - return WriteChars(chanPtr, src, srcLen); } + + size_t totalWritten = 0; + /* + * Note original code always called WriteChars even if srcLen 0 + * so we will too. + */ + do { + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + if (statePtr->encoding == NULL) { + written = WriteBytes(chanPtr, src, chunkSize); + } else { + written = WriteChars(chanPtr, src, chunkSize); + } + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + return totalWritten; } static void @@ -4539,7 +4555,7 @@ Write( *--------------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_Gets( Tcl_Channel chan, /* Channel from which to read. */ Tcl_DString *lineRead) /* The line read will be appended to this @@ -4548,7 +4564,7 @@ Tcl_Gets( * for managing the storage. */ { Tcl_Obj *objPtr; - size_t charsStored; + Tcl_Size charsStored; TclNewObj(objPtr); charsStored = Tcl_GetsObj(chan, objPtr); @@ -4582,7 +4598,7 @@ Tcl_Gets( *--------------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_GetsObj( Tcl_Channel chan, /* Channel from which to read. */ Tcl_Obj *objPtr) /* The line read will be appended to this @@ -4594,7 +4610,7 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - size_t oldLength; + Tcl_Size oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; @@ -4749,7 +4765,7 @@ Tcl_GetsObj( */ if (eol >= dstEnd) { - size_t offset; + Tcl_Size offset; if (eol != eof) { offset = eol - objPtr->bytes; @@ -5019,7 +5035,7 @@ TclGetsObjBinary( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - size_t rawLen, byteLen = 0, oldLength; + Tcl_Size rawLen, byteLen = 0, oldLength; int eolChar; unsigned char *dst, *dstEnd, *eol, *eof, *byteArray; @@ -5287,7 +5303,7 @@ FreeBinaryEncoding( } static Tcl_Encoding -GetBinaryEncoding() +GetBinaryEncoding(void) { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -5670,11 +5686,11 @@ CommonGetsCleanup( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_Read( Tcl_Channel chan, /* The channel from which to read. */ char *dst, /* Where to store input read. */ - size_t bytesToRead) /* Maximum number of bytes to read. */ + Tcl_Size bytesToRead) /* Maximum number of bytes to read. */ { Channel *chanPtr = (Channel *) chan; ChannelState *statePtr = chanPtr->state; @@ -5715,11 +5731,11 @@ Tcl_Read( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_ReadRaw( Tcl_Channel chan, /* The channel from which to read. */ char *readBuf, /* Where to store input read. */ - size_t bytesToRead) /* Maximum number of bytes to read. */ + Tcl_Size bytesToRead) /* Maximum number of bytes to read. */ { Channel *chanPtr = (Channel *) chan; ChannelState *statePtr = chanPtr->state; @@ -5798,8 +5814,8 @@ Tcl_ReadRaw( } } else if (nread > 0) { /* - * Successful read (short is OK) - add to bytes copied. - */ + * Successful read (short is OK) - add to bytes copied. + */ copied += nread; } else { @@ -5833,11 +5849,11 @@ Tcl_ReadRaw( *--------------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_ReadChars( Tcl_Channel chan, /* The channel to read. */ Tcl_Obj *objPtr, /* Input data is stored in this object. */ - size_t toRead, /* Maximum number of characters to store, or + Tcl_Size toRead, /* Maximum number of characters to store, or * TCL_INDEX_NONE to read all available data (up to EOF or * when channel blocks). */ int appendFlag) /* If non-zero, data read from the channel @@ -5893,7 +5909,7 @@ static int DoReadChars( Channel *chanPtr, /* The channel to read. */ Tcl_Obj *objPtr, /* Input data is stored in this object. */ - size_t toRead, /* Maximum number of characters to store, or + Tcl_Size toRead, /* Maximum number of characters to store, or * TCL_INDEX_NONE to read all available data (up to EOF or * when channel blocks). */ int appendFlag) /* If non-zero, data read from the channel @@ -6185,7 +6201,7 @@ ReadChars( int savedIEFlags = statePtr->inputEncodingFlags; int savedFlags = statePtr->flags; char *dst, *src = RemovePoint(bufPtr); - size_t numBytes; + Tcl_Size numBytes; int srcLen = BytesLeft(bufPtr); /* @@ -6728,11 +6744,11 @@ TranslateInputEOL( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_Ungets( Tcl_Channel chan, /* The channel for which to add the input. */ const char *str, /* The input itself. */ - size_t len, /* The length of the input. */ + Tcl_Size len, /* The length of the input. */ int atEnd) /* If non-zero, add at end of queue; otherwise * add at head of queue. */ { @@ -7480,7 +7496,11 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + return 0; + } + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -7650,7 +7670,7 @@ Tcl_ChannelBuffered( void Tcl_SetChannelBufferSize( Tcl_Channel chan, /* The channel whose buffer size to set. */ - size_t sz) /* The size to set. */ + Tcl_Size sz) /* The size to set. */ { ChannelState *statePtr; /* State of real channel structure. */ @@ -7704,7 +7724,7 @@ Tcl_SetChannelBufferSize( *---------------------------------------------------------------------- */ -size_t +Tcl_Size Tcl_GetChannelBufferSize( Tcl_Channel chan) /* The channel for which to find the buffer * size. */ @@ -7756,7 +7776,7 @@ Tcl_BadChannelOption( const char *genericopt = "blocking buffering buffersize encoding encodingprofile eofchar translation"; const char **argv; - size_t argc, i; + Tcl_Size argc, i; Tcl_DString ds; Tcl_Obj *errObj; @@ -8035,7 +8055,7 @@ Tcl_SetChannelOption( ChannelState *statePtr = chanPtr->state; /* State info for channel */ size_t len; /* Length of optionName string. */ - size_t argc; + Tcl_Size argc; const char **argv = NULL; /* @@ -9585,7 +9605,7 @@ CopyData( Tcl_Channel inChan, outChan; ChannelState *inStatePtr, *outStatePtr; int result = TCL_OK, size; - size_t sizeb; + Tcl_Size sizeb; Tcl_WideInt total; const char *buffer; int inBinary, outBinary, sameEncoding; @@ -9607,6 +9627,7 @@ CopyData( * the bottom of the stack. */ + SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9722,6 +9743,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -9813,6 +9835,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -9835,6 +9858,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -9887,6 +9911,7 @@ CopyData( } } } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } @@ -9923,7 +9948,7 @@ static int DoRead( Channel *chanPtr, /* The channel from which to read. */ char *dst, /* Where to store input read. */ - size_t bytesToRead, /* Maximum number of bytes to read. */ + Tcl_Size bytesToRead, /* Maximum number of bytes to read. */ int allowShortReads) /* Allow half-blocking (pipes,sockets) */ { ChannelState *statePtr = chanPtr->state; @@ -11021,7 +11046,7 @@ FixLevelCode( Tcl_Obj *msg) { int explicitResult, numOptions, lcn; - size_t lc; + Tcl_Size lc; Tcl_Obj **lv, **lvn; int res, i, j, val, lignore, cignore; int newlevel = -1, newcode = -1; diff --git a/generic/tclIO.h b/generic/tclIO.h index a050010..8f0ef8a 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -235,6 +235,8 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ +#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy + * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 4ce27bb..197ca32 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -106,7 +106,7 @@ Tcl_PutsObjCmd( Tcl_Obj *string; /* String to write. */ Tcl_Obj *chanObjPtr = NULL; /* channel object. */ int newline; /* Add a newline at end? */ - int result; /* Result of puts operation. */ + size_t result; /* Result of puts operation. */ int mode; /* Mode in which channel is opened. */ switch (objc) { @@ -163,12 +163,12 @@ Tcl_PutsObjCmd( TclChannelPreserve(chan); result = Tcl_WriteObj(chan, string); - if (result == -1) { + if (result == TCL_INDEX_NONE) { goto error; } if (newline != 0) { result = Tcl_WriteChars(chan, "\n", 1); - if (result == -1) { + if (result == TCL_INDEX_NONE) { goto error; } } diff --git a/generic/tclTest.c b/generic/tclTest.c index 652c5aa..b6c7f77 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -4105,7 +4105,7 @@ TestregexpObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, indices, stringLength, match, about; - size_t ii; + Tcl_Size ii; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; const char *string; @@ -4217,7 +4217,7 @@ TestregexpObjCmd( if (objc > 2 && (cflags®_EXPECT) && indices) { const char *varName; const char *value; - size_t start, end; + Tcl_Size start, end; char resinfo[TCL_INTEGER_SPACE * 2]; varName = Tcl_GetString(objv[2]); @@ -4257,11 +4257,11 @@ TestregexpObjCmd( Tcl_RegExpGetInfo(regExpr, &info); for (i = 0; i < objc; i++) { - size_t start, end; + Tcl_Size start, end; Tcl_Obj *newPtr, *varPtr, *valuePtr; varPtr = objv[i]; - ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (size_t)i; + ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (Tcl_Size)i; if (indices) { Tcl_Obj *objs[2]; @@ -6476,10 +6476,10 @@ static int TestWrongNumArgsObjCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ - size_t objc, /* Number of arguments. */ + Tcl_Size objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - size_t i, length; + Tcl_Size i, length; const char *msg; if (objc + 1 < 4) { @@ -7187,7 +7187,7 @@ TestUtfPrevCmd( int objc, Tcl_Obj *const objv[]) { - size_t numBytes, offset; + Tcl_Size numBytes, offset; char *bytes; const char *result; @@ -7228,7 +7228,7 @@ TestNumUtfCharsCmd( Tcl_Obj *const objv[]) { if (objc > 1) { - size_t numBytes, len, limit = TCL_INDEX_NONE; + Tcl_Size numBytes, len, limit = TCL_INDEX_NONE; const char *bytes = Tcl_GetStringFromObj(objv[1], &numBytes); if (objc > 2) { @@ -7296,7 +7296,7 @@ TestGetIntForIndexCmd( int objc, Tcl_Obj *const objv[]) { - size_t result; + Tcl_Size result; Tcl_WideInt endvalue; if (objc != 3) { @@ -7415,7 +7415,7 @@ TestHashSystemHashCmd( Tcl_SetHashValue(hPtr, INT2PTR(i+42)); } - if (hash.numEntries != (size_t)limit) { + if (hash.numEntries != (Tcl_Size)limit) { Tcl_AppendResult(interp, "unexpected maximal size", NULL); Tcl_DeleteHashTable(&hash); return TCL_ERROR; @@ -8175,7 +8175,7 @@ static int InterpCompiledVarResolver( TCL_UNUSED(Tcl_Interp *), const char *name, - TCL_UNUSED(size_t) /*length*/, + TCL_UNUSED(Tcl_Size) /*length*/, TCL_UNUSED(Tcl_Namespace *), Tcl_ResolvedVarInfo **rPtr) { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3abd615..0ebfb1d 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1143,13 +1143,13 @@ TclScanElement( */ requireEscape = 1; - length -= (length > 0); + length -= (length+1 > 1); p++; break; } if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) { extra++; /* Escape sequences all one byte longer. */ - length -= (length > 0); + length -= (length+1 > 1); p++; } forbidNone = 1; diff --git a/generic/tclZlib.c b/generic/tclZlib.c index dc7c3f3..1df84d7 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -443,10 +443,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "comment", &value) != TCL_OK) { goto error; } else if (value != NULL) { - valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + Tcl_EncodingState state; + valueStr = Tcl_GetStringFromObj(value, &len); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_PROFILE_STRICT, &state, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Comment too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -464,9 +475,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "filename", &value) != TCL_OK) { goto error; } else if (value != NULL) { - valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + Tcl_EncodingState state; + valueStr = Tcl_GetStringFromObj(value, &len); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_PROFILE_STRICT, &state, + headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, + NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Filename too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { @@ -547,8 +570,8 @@ ExtractHeader( } } - Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->comment, -1, - TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, -1, + &tmp); SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp)); } SetValue(dictObj, "crc", Tcl_NewBooleanObj(headerPtr->hcrc)); @@ -564,8 +587,8 @@ ExtractHeader( } } - Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->name, -1, - TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, -1, + &tmp); SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp)); } if (headerPtr->os != 255) { diff --git a/tests/dstring.test b/tests/dstring.test index 314cee8..23863d0 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} +test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\n"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result \\\\\\n +test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\{"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \{]] +test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\}"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \}]] +test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\\"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \\]] test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free diff --git a/tests/encoding.test b/tests/encoding.test index 2deda8d..215b5c8 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1046,6 +1046,32 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} +test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967295 1} + +test encoding-30.1 {encoding convertto large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967296 1} + +test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967295 1} + +test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967296 1} + # cleanup namespace delete ::tcl::test::encoding diff --git a/tests/io.test b/tests/io.test index b0142dd..04d3720 100644 --- a/tests/io.test +++ b/tests/io.test @@ -36,6 +36,7 @@ namespace eval ::tcl::test::io { } source [file join [file dirname [info script]] tcltests.tcl] +testConstraint pointerIs64bit [expr {$::tcl_platform(pointerSize) >= 8}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testchannel [llength [info commands testchannel]] testConstraint testfevent [llength [info commands testfevent]] @@ -194,6 +195,47 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { set sizes } {19 19 19 19 19} +test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.10.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 +test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.11.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.12.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 + test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -236,6 +278,47 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { lappend x [contents $path(test1)] } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] +test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.5.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 +test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.6.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit perf +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.7.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 + test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written @@ -9140,48 +9223,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ยก} - test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] diff --git a/tests/zlib.test b/tests/zlib.test index 7e11634..ae7dd6d 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,6 +486,38 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} +test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] +} -cleanup { + catch {close $f} + removeFile $file +} -returnCodes 1 -result {Comment too large for zip} +test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] +} -cleanup { + catch {close $f} + removeFile $file +} -returnCodes 1 -result {Filename too large for zip} +test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment \u100]] +} -cleanup { + catch {close $f} + removeFile $file +} -returnCodes 1 -result {Comment contains characters > 0xFF} +test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename \u100]] +} -cleanup { + catch {close $f} + removeFile $file +} -returnCodes 1 -result {Filename contains characters > 0xFF} test zlib-9.1 "check fcopy with push" -constraints zlib -setup { set sfile [makeFile {} testsrc.gz] diff --git a/unix/tclAppInit.c b/unix/tclAppInit.c index 05d25de..e3d95bc 100644 --- a/unix/tclAppInit.c +++ b/unix/tclAppInit.c @@ -158,15 +158,16 @@ Tcl_AppInit( * is the name of the application. If this line is deleted then no * user-specific startup file will be run under any conditions. */ - #ifdef DJGPP - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclsh.rc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME "tclshrc.tcl" #else - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/.tclshrc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME ".tclshrc" #endif + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/" INITFILENAME "]", + -1, + TCL_EVAL_GLOBAL); return TCL_OK; } diff --git a/win/tclAppInit.c b/win/tclAppInit.c index 30127fd..077500a 100644 --- a/win/tclAppInit.c +++ b/win/tclAppInit.c @@ -215,8 +215,11 @@ Tcl_AppInit( * user-specific startup file will be run under any conditions. */ - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclshrc.tcl", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/tclshrc.tcl]", + -1, + TCL_EVAL_GLOBAL); + return TCL_OK; } |