summaryrefslogtreecommitdiffstats
path: root/doc/scan.n
diff options
context:
space:
mode:
Diffstat (limited to 'doc/scan.n')
-rw-r--r--doc/scan.n293
1 files changed, 226 insertions, 67 deletions
diff --git a/doc/scan.n b/doc/scan.n
index 96121f8..5b91449 100644
--- a/doc/scan.n
+++ b/doc/scan.n
@@ -1,134 +1,293 @@
'\"
'\" Copyright (c) 1993 The Regents of the University of California.
'\" Copyright (c) 1994-1996 Sun Microsystems, Inc.
+'\" Copyright (c) 2000 Scriptics Corporation.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" SCCS: @(#) scan.n 1.12 96/08/26 13:00:13
-'\"
+.TH scan n 8.4 Tcl "Tcl Built-In Commands"
.so man.macros
-.TH scan n "" Tcl "Tcl Built-In Commands"
.BS
'\" Note: do not modify the .SH NAME line immediately below!
.SH NAME
scan \- Parse string using conversion specifiers in the style of sscanf
.SH SYNOPSIS
-\fBscan \fIstring format varName \fR?\fIvarName ...\fR?
+\fBscan \fIstring format \fR?\fIvarName varName ...\fR?
.BE
-
.SH INTRODUCTION
.PP
-This command parses fields from an input string in the same fashion
-as the ANSI C \fBsscanf\fR procedure and returns a count of the number
-of conversions performed, or -1 if the end of the input string is
-reached before any conversions have been performed.
-\fIString\fR gives the input to be parsed and \fIformat\fR indicates
-how to parse it, using \fB%\fR conversion specifiers as in \fBsscanf\fR.
-Each \fIvarName\fR gives the name of a variable; when a field is
-scanned from \fIstring\fR the result is converted back into a string
-and assigned to the corresponding variable.
-
+This command parses substrings from an input string in a fashion similar
+to the ANSI C \fBsscanf\fR procedure and returns a count of the number of
+conversions performed, or -1 if the end of the input string is reached
+before any conversions have been performed. \fIString\fR gives the input
+to be parsed and \fIformat\fR indicates how to parse it, using \fB%\fR
+conversion specifiers as in \fBsscanf\fR. Each \fIvarName\fR gives the
+name of a variable; when a substring is scanned from \fIstring\fR that
+matches a conversion specifier, the substring is assigned to the
+corresponding variable.
+If no \fIvarName\fR variables are specified, then \fBscan\fR works in an
+inline manner, returning the data that would otherwise be stored in the
+variables as a list. In the inline case, an empty string is returned when
+the end of the input string is reached before any conversions have been
+performed.
.SH "DETAILS ON SCANNING"
.PP
-\fBScan\fR operates by scanning \fIstring\fR and \fIformatString\fR together.
-If the next character in \fIformatString\fR is a blank or tab then it
+\fBScan\fR operates by scanning \fIstring\fR and \fIformat\fR together.
+If the next character in \fIformat\fR is a blank or tab then it
matches any number of white space characters in \fIstring\fR (including
zero).
-Otherwise, if it isn't a \fB%\fR character then it
+Otherwise, if it is not a \fB%\fR character then it
must match the next character of \fIstring\fR.
-When a \fB%\fR is encountered in \fIformatString\fR, it indicates
+When a \fB%\fR is encountered in \fIformat\fR, it indicates
the start of a conversion specifier.
-A conversion specifier contains three fields after the \fB%\fR:
-a \fB*\fR, which indicates that the converted value is to be discarded
-instead of assigned to a variable; a number indicating a maximum field
-width; and a conversion character.
+A conversion specifier contains up to four fields after the \fB%\fR:
+a XPG3 position specifier (or a \fB*\fR to indicate the converted
+value is to be discarded instead of assigned to any variable); a number
+indicating a maximum substring width; a size modifier; and a
+conversion character.
All of these fields are optional except for the conversion character.
+The fields that are present must appear in the order given above.
.PP
-When \fBscan\fR finds a conversion specifier in \fIformatString\fR, it
-first skips any white-space characters in \fIstring\fR.
+When \fBscan\fR finds a conversion specifier in \fIformat\fR, it
+first skips any white-space characters in \fIstring\fR (unless the
+conversion character is \fB[\fR or \fBc\fR).
Then it converts the next input characters according to the
conversion specifier and stores the result in the variable given
by the next argument to \fBscan\fR.
+.SS "OPTIONAL POSITIONAL SPECIFIER"
+.PP
+If the \fB%\fR is followed by a decimal number and a \fB$\fR, as in
+.QW \fB%2$d\fR ,
+then the variable to use is not taken from the next
+sequential argument. Instead, it is taken from the argument indicated
+by the number, where 1 corresponds to the first \fIvarName\fR. If
+there are any positional specifiers in \fIformat\fR then all of the
+specifiers must be positional. Every \fIvarName\fR on the argument
+list must correspond to exactly one conversion specifier or an error
+is generated, or in the inline case, any position can be specified
+at most once and the empty positions will be filled in with empty strings.
+.SS "OPTIONAL SIZE MODIFIER"
+.PP
+The size modifier field is used only when scanning a substring into
+one of Tcl's integer values. The size modifier field dictates the
+integer range acceptable to be stored in a variable, or, for the inline
+case, in a position in the result list.
+The syntactically valid values for the size modifier are \fBh\fR, \fBL\fR,
+\fBl\fR, and \fBll\fR. The \fBh\fR size modifier value is equivalent
+to the absence of a size modifier in the the conversion specifier.
+Either one indicates the integer range to be stored is limited to
+the same range produced by the \fBint()\fR function of the \fBexpr\fR
+command. The \fBL\fR size modifier is equivalent to the \fBl\fR size
+modifier. Either one indicates the integer range to be stored is
+limited to the same range produced by the \fBwide()\fR function of
+the \fBexpr\fR command. The \fBll\fR size modifier indicates that
+the integer range to be stored is unlimited.
+.SS "MANDATORY CONVERSION CHARACTER"
+.PP
The following conversion characters are supported:
-.TP 10
+.TP
\fBd\fR
-The input field must be a decimal integer.
-It is read in and the value is stored in the variable as a decimal string.
-.TP 10
+.
+The input substring must be a decimal integer.
+It is read in and the integer value is stored in the variable,
+truncated as required by the size modifier value.
+.TP
\fBo\fR
-The input field must be an octal integer. It is read in and the
-value is stored in the variable as a decimal string.
-.TP 10
-\fBx\fR
-The input field must be a hexadecimal integer. It is read in
-and the value is stored in the variable as a decimal string.
-.TP 10
+.
+The input substring must be an octal integer. It is read in and the
+integer value is stored in the variable,
+truncated as required by the size modifier value.
+.TP
+\fBx\fR or \fBX\fR
+.
+The input substring must be a hexadecimal integer.
+It is read in and the integer value is stored in the variable,
+truncated as required by the size modifier value.
+.TP
+\fBb\fR
+.
+The input substring must be a binary integer.
+It is read in and the integer value is stored in the variable,
+truncated as required by the size modifier value.
+.TP
+\fBu\fR
+.
+The input substring must be a decimal integer.
+The integer value is truncated as required by the size modifier
+value, and the corresponding unsigned value for that truncated
+range is computed and stored in the variable as a decimal string.
+The conversion makes no sense without reference to a truncation range,
+so the size modifier \fBll\fR is not permitted in combination
+with conversion character \fBu\fR.
+.TP
+\fBi\fR
+.
+The input substring must be an integer. The base (i.e. decimal, binary,
+octal, or hexadecimal) is determined in the same fashion as described in
+\fBexpr\fR. The integer value is stored in the variable,
+truncated as required by the size modifier value.
+.TP
\fBc\fR
-A single character is read in and its binary value is stored in
-the variable as a decimal string.
+.
+A single character is read in and its Unicode value is stored in
+the variable as an integer value.
Initial white space is not skipped in this case, so the input
-field may be a white-space character.
-This conversion is different from the ANSI standard in that the
-input field always consists of a single character and no field
-width may be specified.
-.TP 10
+substring may be a white-space character.
+.TP
\fBs\fR
-The input field consists of all the characters up to the next
+.
+The input substring consists of all the characters up to the next
white-space character; the characters are copied to the variable.
-.TP 10
-\fBe\fR or \fBf\fR or \fBg\fR
-The input field must be a floating-point number consisting
+.TP
+\fBe\fR or \fBf\fR or \fBg\fR or \fBE\fR or \fBG\fR
+.
+The input substring must be a floating-point number consisting
of an optional sign, a string of decimal digits possibly
containing a decimal point, and an optional exponent consisting
of an \fBe\fR or \fBE\fR followed by an optional sign and a string of
decimal digits.
-It is read in and stored in the variable as a floating-point string.
-.TP 10
+It is read in and stored in the variable as a floating-point value.
+.TP
\fB[\fIchars\fB]\fR
-The input field consists of any number of characters in
-\fIchars\fR.
+.
+The input substring consists of one or more characters in \fIchars\fR.
The matching string is stored in the variable.
If the first character between the brackets is a \fB]\fR then
it is treated as part of \fIchars\fR rather than the closing
bracket for the set.
-.TP 10
+If \fIchars\fR
+contains a sequence of the form \fIa\fB\-\fIb\fR then any
+character between \fIa\fR and \fIb\fR (inclusive) will match.
+If the first or last character between the brackets is a \fB\-\fR, then
+it is treated as part of \fIchars\fR rather than indicating a range.
+.TP
\fB[^\fIchars\fB]\fR
-The input field consists of any number of characters not in
-\fIchars\fR.
+.
+The input substring consists of one or more characters not in \fIchars\fR.
The matching string is stored in the variable.
If the character immediately following the \fB^\fR is a \fB]\fR then it is
treated as part of the set rather than the closing bracket for
the set.
-.LP
+If \fIchars\fR
+contains a sequence of the form \fIa\fB\-\fIb\fR then any
+character between \fIa\fR and \fIb\fR (inclusive) will be excluded
+from the set.
+If the first or last character between the brackets is a \fB\-\fR, then
+it is treated as part of \fIchars\fR rather than indicating a range value.
+.TP
+\fBn\fR
+.
+No input is consumed from the input string. Instead, the total number
+of characters scanned from the input string so far is stored in the variable.
+.PP
The number of characters read from the input for a conversion is the
largest number that makes sense for that particular conversion (e.g.
as many decimal digits as possible for \fB%d\fR, as
many octal digits as possible for \fB%o\fR, and so on).
-The input field for a given conversion terminates either when a
-white-space character is encountered or when the maximum field
+The input substring for a given conversion terminates either when a
+white-space character is encountered or when the maximum substring
width has been reached, whichever comes first.
If a \fB*\fR is present in the conversion specifier
then no variable is assigned and the next scan argument is not consumed.
-
.SH "DIFFERENCES FROM ANSI SSCANF"
.PP
The behavior of the \fBscan\fR command is the same as the behavior of
the ANSI C \fBsscanf\fR procedure except for the following differences:
.IP [1]
-\fB%p\fR and \fB%n\fR conversion specifiers are not currently
-supported.
+\fB%p\fR conversion specifier is not supported.
.IP [2]
For \fB%c\fR conversions a single character value is
converted to a decimal string, which is then assigned to the
corresponding \fIvarName\fR;
-no field width may be specified for this conversion.
+no substring width may be specified for this conversion.
.IP [3]
-The \fBl\fR, \fBh\fR, and \fBL\fR modifiers are ignored; integer
-values are always converted as if there were no modifier present
-and real values are always converted as if the \fBl\fR modifier
-were present (i.e. type \fBdouble\fR is used for the internal
-representation).
-
+The \fBh\fR modifier is always ignored and the \fBl\fR and \fBL\fR
+modifiers are ignored when converting real values (i.e. type
+\fBdouble\fR is used for the internal representation). The \fBll\fR
+modifier has no \fBsscanf\fR counterpart.
+.IP [4]
+If the end of the input string is reached before any conversions have been
+performed and no variables are given, an empty string is returned.
+.SH EXAMPLES
+.PP
+Convert a UNICODE character to its numeric value:
+.PP
+.CS
+set char "x"
+set value [\fBscan\fR $char %c]
+.CE
+.PP
+Parse a simple color specification of the form \fI#RRGGBB\fR using
+hexadecimal conversions with substring sizes:
+.PP
+.CS
+set string "#08D03F"
+\fBscan\fR $string "#%2x%2x%2x" r g b
+.CE
+.PP
+Parse a \fIHH:MM\fR time string, noting that this avoids problems with
+octal numbers by forcing interpretation as decimals (if we did not
+care, we would use the \fB%i\fR conversion instead):
+.PP
+.CS
+set string "08:08" ;# *Not* octal!
+if {[\fBscan\fR $string "%d:%d" hours minutes] != 2} {
+ error "not a valid time string"
+}
+# We have to understand numeric ranges ourselves...
+if {$minutes < 0 || $minutes > 59} {
+ error "invalid number of minutes"
+}
+.CE
+.PP
+Break a string up into sequences of non-whitespace characters (note
+the use of the \fB%n\fR conversion so that we get skipping over
+leading whitespace correct):
+.PP
+.CS
+set string " a string {with braced words} + leading space "
+set words {}
+while {[\fBscan\fR $string %s%n word length] == 2} {
+ lappend words $word
+ set string [string range $string $length end]
+}
+.CE
+.PP
+Parse a simple coordinate string, checking that it is complete by
+looking for the terminating character explicitly:
+.PP
+.CS
+set string "(5.2,-4e-2)"
+# Note that the spaces before the literal parts of
+# the scan pattern are significant, and that ")" is
+# the Unicode character \eu0029
+if {
+ [\fBscan\fR $string " (%f ,%f %c" x y last] != 3
+ || $last != 0x0029
+} then {
+ error "invalid coordinate string"
+}
+puts "X=$x, Y=$y"
+.CE
+.PP
+An interactive session demonstrating the truncation of integer
+values determined by size modifiers:
+.PP
+.CS
+\fI%\fR set tcl_platform(wordSize)
+4
+\fI%\fR scan 20000000000000000000 %d
+2147483647
+\fI%\fR scan 20000000000000000000 %ld
+9223372036854775807
+\fI%\fR scan 20000000000000000000 %lld
+20000000000000000000
+.CE
+.SH "SEE ALSO"
+format(n), sscanf(3)
.SH KEYWORDS
conversion specifier, parse, scan
+'\" Local Variables:
+'\" mode: nroff
+'\" End: