From cbda5cb9b212067d1d831ec476057502e3c70531 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:41:06 +0000 Subject: Make Tcl_UtfToExternal()/Tcl_ExternalToUtf() report the error, if srcLen and dstLen are both > INT_MAX and therefore not all characters can be handled by this function. --- generic/tclEncoding.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..67e67e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1229,7 +1229,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1271,7 +1271,15 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { @@ -1467,7 +1475,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1506,7 +1514,15 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { -- cgit v0.12 From baf9b5e9bb89e1e13583fb510f6cb134d39126ff Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:54:02 +0000 Subject: Handle statePtr != NULL as well --- generic/tclEncoding.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 67e67e9..e639d3a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1271,17 +1271,16 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } @@ -1514,17 +1513,16 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } -- cgit v0.12 From 152d7203ac1b3f7f560995985c15f7527f2ecdc9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 17:19:07 +0000 Subject: Handle Tcl_UtfToExternal error in tclZlib.c --- generic/tclZlib.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 5a6dbc4..ea18c16 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -444,9 +444,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, - NULL); + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode comment", NULL); + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -465,8 +469,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode filename", NULL); + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { -- cgit v0.12 From e7b8b9d2dd7951ecf0e3cbbcb618244fd7c45ebb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 20:12:43 +0000 Subject: Proposed fix for [f9eafc3886]: Error handling in zlib comment/filename. With testcases --- generic/tclZlib.c | 47 ++++++++++++++++++++++++++++++++++++++--------- tests/zlib.test | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 63a25fa..cbff7b7 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -441,10 +441,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "comment", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Comment too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -462,9 +473,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "filename", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, + headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, + NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Filename too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { @@ -1189,7 +1212,8 @@ Tcl_ZlibStreamPut( { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; char *dataTmp = NULL; - int e, size, outSize, toStore; + int e; + int size, outSize, toStore; if (zshPtr->streamEnd) { if (zshPtr->interp) { @@ -1312,7 +1336,8 @@ Tcl_ZlibStreamGet( * may get less! */ { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; - int e, i, listLen, itemLen, dataPos = 0; + int e; + int i, listLen, itemLen, dataPos = 0; Tcl_Obj *itemObj; unsigned char *dataPtr, *itemPtr; int existing; @@ -1561,7 +1586,8 @@ Tcl_ZlibDeflate( int level, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, extraSize = 0; + int wbits = 0, e = 0, extraSize = 0; + int inLen = 0; Byte *inData = NULL; z_stream stream; GzipHeader header; @@ -1711,7 +1737,8 @@ Tcl_ZlibInflate( int bufferSize, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, newBufferSize; + int wbits = 0, e = 0; + int inLen = 0, newBufferSize; Byte *inData = NULL, *outData = NULL, *newOutData = NULL; z_stream stream; gz_header header, *headerPtr = NULL; @@ -2365,7 +2392,8 @@ ZlibPushSubcmd( const char *const *pushOptions = pushDecompressOptions; enum pushOptions {poDictionary, poHeader, poLevel, poLimit}; Tcl_Obj *headerObj = NULL, *compDictObj = NULL; - int limit = DEFAULT_BUFFER_SIZE, dummy; + int limit = DEFAULT_BUFFER_SIZE; + int dummy; if (objc < 4) { Tcl_WrongNumArgs(interp, 2, objv, "mode channel ?options...?"); @@ -2897,7 +2925,8 @@ ZlibTransformClose( Tcl_Interp *interp) { ZlibChannelData *cd = (ZlibChannelData *)instanceData; - int e, written, result = TCL_OK; + int e, result = TCL_OK; + int written; /* * Delete the support timer. diff --git a/tests/zlib.test b/tests/zlib.test index 7ddf1d7..c3e344c 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,6 +486,54 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} +test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment too large for zip} +test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] filename] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename too large for zip} +test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment contains characters > 0xFF} +test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename contains characters > 0xFF} test zlib-9.1 "check fcopy with push" -constraints zlib -setup { set sfile [makeFile {} testsrc.gz] -- cgit v0.12 From 6b172a213198a8d51d9b0b7783e0df3adc71bfe6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 21:37:15 +0000 Subject: fill in bug ticket-nr --- tests/zlib.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index c3e344c..61e14bb 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,7 +486,7 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} -test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] @@ -498,7 +498,7 @@ test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment too large for zip} -test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] @@ -510,7 +510,7 @@ test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Filename too large for zip} -test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] @@ -522,7 +522,7 @@ test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} -test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] -- cgit v0.12 From a1fe72fa4a3bf6c99720ce309d0611a5d941ea93 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 27 Feb 2023 20:50:33 +0000 Subject: Fix testcases --- tests/zlib.test | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index 61e14bb..5312d2b 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -490,48 +490,32 @@ test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment too large for zip} test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] filename] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename too large for zip} test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename contains characters > 0xFF} -- cgit v0.12 From e743d3e48700a8b562d4a7e3893c856532ca107c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 09:57:22 +0000 Subject: Fix formatting issue in Tcl.n --- doc/Tcl.n | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/Tcl.n b/doc/Tcl.n index 8e0b342..0f784af 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -156,6 +156,8 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS +.RS +.RS .TP 7 \e\fBa\fR Audible alert (bell) (Unicode U+000007). @@ -222,6 +224,7 @@ inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. .RE +.RE .PP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. -- cgit v0.12 From 7d6cf09e029257c1c0656f2cd9253a4436e6a27c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 10:04:49 +0000 Subject: Make the descriptions in doc/Tcl.n more concise and intuitive. --- doc/Tcl.n | 315 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 194 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f784af..d13f3ea 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,178 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: .RS .RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -195,78 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. -.RE +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: -- cgit v0.12