diff options
Diffstat (limited to 'man/man7/funtext.7')
-rw-r--r-- | man/man7/funtext.7 | 713 |
1 files changed, 713 insertions, 0 deletions
diff --git a/man/man7/funtext.7 b/man/man7/funtext.7 new file mode 100644 index 0000000..b24b317 --- /dev/null +++ b/man/man7/funtext.7 @@ -0,0 +1,713 @@ +.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sh \" Subsection heading +.br +.if t .Sp +.ne 5 +.PP +\fB\\$1\fR +.PP +.. +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. | will give a +.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to +.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' +.\" expand to `' in nroff, nothing in troff, for use with C<>. +.tr \(*W-|\(bv\*(Tr +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.\" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.hy 0 +.if n .na +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "funtext 7" +.TH funtext 7 "April 14, 2011" "version 1.4.5" "SAORD Documentation" +.SH "NAME" +Funtext \- Support for Column\-based Text Files +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +This document contains a summary of the options for processing column-based +text files. +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Funtools will automatically sense and process \*(L"standard\*(R" +column-based text files as if they were \s-1FITS\s0 binary tables without any +change in Funtools syntax. In particular, you can filter text files +using the same syntax as \s-1FITS\s0 binary tables: +.PP +.Vb 3 +\& fundisp foo.txt'[cir 512 512 .1]' +\& fundisp \-T foo.txt > foo.rdb +\& funtable foo.txt'[pha=1:10,cir 512 512 10]' foo.fits +.Ve +.PP +The first example displays a filtered selection of a text file. The +second example converts a text file to an \s-1RDB\s0 file. The third example +converts a filtered selection of a text file to a \s-1FITS\s0 binary table. +.PP +Text files can also be used in Funtools image programs. In this case, +you must provide binning parameters (as with raw event files), using +the bincols keyword specifier: +.PP +.Vb 1 +\& bincols=([xname[:tlmin[:tlmax:[binsiz]]]],[yname[:tlmin[:tlmax[:binsiz]]] +.Ve +.PP +For example: +.PP +.Vb 1 +\& funcnts foo'[bincols=(x:1024,y:1024)]' "ann 512 512 0 10 n=10" +.Ve +.PP +\&\fBStandard Text Files\fR +.PP +Standard text files have the following characteristics: +.IP "\(bu" 4 +Optional comment lines start with # +.IP "\(bu" 4 +Optional blank lines are considered comments +.IP "\(bu" 4 +An optional table header consists of the following (in order): +.RS 4 +.IP "\(bu" 4 +a single line of alpha-numeric column names +.IP "\(bu" 4 +an optional line of unit strings containing the same number of cols +.IP "\(bu" 4 +an optional line of dashes containing the same number of cols +.RE +.RS 4 +.RE +.IP "\(bu" 4 +Data lines follow the optional header and (for the present) consist of + the same number of columns as the header. +.IP "\(bu" 4 +Standard delimiters such as space, tab, comma, semi\-colon, and bar. +.PP +Examples: +.PP +.Vb 5 +\& # rdb file +\& foo1 foo2 foo3 foos +\& ---- ---- ---- ---- +\& 1 2.2 3 xxxx +\& 10 20.2 30 yyyy +.Ve +.PP +.Vb 5 +\& # multiple consecutive whitespace and dashes +\& foo1 foo2 foo3 foos +\& --- ---- ---- ---- +\& 1 2.2 3 xxxx +\& 10 20.2 30 yyyy +.Ve +.PP +.Vb 2 +\& # comma delims and blank lines +\& foo1,foo2,foo3,foos +.Ve +.PP +.Vb 2 +\& 1,2.2,3,xxxx +\& 10,20.2,30,yyyy +.Ve +.PP +.Vb 4 +\& # bar delims with null values +\& foo1|foo2|foo3|foos +\& 1||3|xxxx +\& 10|20.2||yyyy +.Ve +.PP +.Vb 3 +\& # header-less data +\& 1 2.2 3 xxxx +\& 10 20.2 30 yyyy +.Ve +.PP +The default set of token delimiters consists of spaces, tabs, commas, +semi\-colons, and vertical bars. Several parsers are used +simultaneously to analyze a line of text in different ways. One way +of analyzing a line is to allow a combination of spaces, tabs, and +commas to be squashed into a single delimiter (no null values between +consecutive delimiters). Another way is to allow tab, semi\-colon, and +vertical bar delimiters to support null values, i.e. two consecutive +delimiters implies a null value (e.g. \s-1RDB\s0 file). A successful parser +is one which returns a consistent number of columns for all rows, with +each column having a consistent data type. More than one parser can +be successful. For now, it is assumed that successful parsers all +return the same tokens for a given line. (Theoretically, there are +pathological cases, which will be taken care of as needed). Bad parsers +are discarded on the fly. +.PP +If the header does not exist, then names \*(L"col1\*(R", \*(L"col2\*(R", etc. are +assigned to the columns to allow filtering. Furthermore, data types +for each column are determined by the data types found in the columns +of the first data line, and can be one of the following: string, int, +and double. Thus, all of the above examples return the following +display: +.PP +.Vb 4 +\& fundisp foo'[foo1>5]' +\& FOO1 FOO2 FOO3 FOOS +\& ---------- --------------------- ---------- ------------ +\& 10 20.20000000 30 yyyy +.Ve +.PP +\&\fBComments Convert to Header Params\fR +.PP +Comments which precede data rows are converted into header parameters and +will be written out as such using funimage or funhead. Two styles of comments +are recognized: +.PP +1. FITS-style comments have an equal sign \*(L"=\*(R" between the keyword and +value and an optional slash \*(L"/\*(R" to signify a comment. The strict \s-1FITS\s0 +rules on column positions are not enforced. In addition, strings only +need to be quoted if they contain whitespace. For example, the following +are valid FITS-style comments: +.PP +.Vb 5 +\& # fits0 = 100 +\& # fits1 = /usr/local/bin +\& # fits2 = "/usr/local/bin /opt/local/bin" +\& # fits3c = /usr/local/bin /opt/local/bin /usr/bin +\& # fits4c = "/usr/local/bin /opt/local/bin" / path dir +.Ve +.PP +Note that the fits3c comment is not quoted and therefore its value is the +single token \*(L"/usr/local/bin\*(R" and the comment is \*(L"opt/local/bin /usr/bin\*(R". +This is different from the quoted comment in fits4c. +.PP +2. Free-form comments can have an optional colon separator between the +keyword and value. In the absence of quote, all tokens after the +keyword are part of the value, i.e. no comment is allowed. If a string +is quoted, then slash \*(L"/\*(R" after the string will signify a comment. +For example: +.PP +.Vb 4 +\& # com1 /usr/local/bin +\& # com2 "/usr/local/bin /opt/local/bin" +\& # com3 /usr/local/bin /opt/local/bin /usr/bin +\& # com4c "/usr/local/bin /opt/local/bin" / path dir +.Ve +.PP +.Vb 4 +\& # com11: /usr/local/bin +\& # com12: "/usr/local/bin /opt/local/bin" +\& # com13: /usr/local/bin /opt/local/bin /usr/bin +\& # com14c: "/usr/local/bin /opt/local/bin" / path dir +.Ve +.PP +Note that com3 and com13 are not quoted, so the whole string is part of +the value, while comz4c and com14c are quoted and have comments following +the values. +.PP +Some text files have column name and data type information in the header. +You can specify the format of column information contained in the +header using the \*(L"hcolfmt=\*(R" specification. See below for a detailed +description. +.PP +\&\fBMultiple Tables in a Single File\fR +.PP +Multiple tables are supported in a single file. If an RDB-style file +is sensed, then a ^L (vertical tab) will signify end of +table. Otherwise, an end of table is sensed when a new header (i.e., +all alphanumeric columns) is found. (Note that this heuristic does not +work for single column tables where the column type is \s-1ASCII\s0 and the +table that follows also has only one column.) You also can specify +characters that signal an end of table condition using the \fBeot=\fR +keyword. See below for details. +.PP +You can access the nth table (starting from 1) in a multi-table file +by enclosing the table number in brackets, as with a \s-1FITS\s0 extension: +.PP +.Vb 1 +\& fundisp foo'[2]' +.Ve +.PP +The above example will display the second table in the file. +(Index values start at 1 in oder to maintain logical compatibility +with \s-1FITS\s0 files, where extension numbers also start at 1). +.PP +\&\fB\s-1\f(BITEXT\s0()\fB Specifier\fR +.PP +As with \s-1\fIARRAY\s0()\fR and \s-1\fIEVENTS\s0()\fR specifiers for raw image arrays and raw +event lists respectively, you can use \s-1\fITEXT\s0()\fR on text files to pass +key=value options to the parsers. An empty set of keywords is +equivalent to not having \s-1\fITEXT\s0()\fR at all, that is: +.PP +.Vb 2 +\& fundisp foo +\& fundisp foo'[TEXT()]' +.Ve +.PP +are equivalent. A multi-table index number is placed before the \s-1\fITEXT\s0()\fR +specifier as the first token, when indexing into a multi\-table: +.PP +.Vb 1 +\& fundisp foo'[2,TEXT(...)]' +.Ve +.PP +The filter specification is placed after the \s-1\fITEXT\s0()\fR specifier, separated +by a comma, or in an entirely separate bracket: +.PP +.Vb 2 +\& fundisp foo'[TEXT(...),circle 512 512 .1]' +\& fundisp foo'[2,TEXT(...)][circle 512 512 .1]' +.Ve +.PP +\&\fB\f(BIText()\fB Keyword Options\fR +.PP +The following is a list of keywords that can be used within the \s-1\fITEXT\s0()\fR +specifier (the first three are the most important): +.IP "\(bu" 4 +delims=\*(L"[delims]\*(R" +.Sp +Specify token delimiters for this file. Only a single parser having these +delimiters will be used to process the file. +.Sp +.Vb 2 +\& fundisp foo.fits'[TEXT(delims="!")]' +\& fundisp foo.fits'[TEXT(delims="\et%")]' +.Ve +.IP "\(bu" 4 +comchars=\*(L"[comchars]\*(R" +.Sp +Specify comment characters. You must include \*(L"\en\*(R" to allow blank lines. +These comment characters will be used for all standard parsers (unless delims +are also specified). +.Sp +.Vb 1 +\& fundisp foo.fits'[TEXT(comchars="!\en")]' +.Ve +.IP "\(bu" 4 +cols=\*(L"[name1:type1 ...]\*(R" +.Sp +Specify names and data type of columns. This overrides header +names and/or data types in the first data row or default names and +data types for header-less tables. +.Sp +.Vb 1 +\& fundisp foo.fits'[TEXT(cols="x:I,y:I,pha:I,pi:I,time:D,dx:E,dy:e")]' +.Ve +.Sp +If the column specifier is the only keyword, then the cols= is not +required (in analogy with \s-1\fIEVENTS\s0()\fR): +.Sp +.Vb 1 +\& fundisp foo.fits'[TEXT(x:I,y:I,pha:I,pi:I,time:D,dx:E,dy:e)]' +.Ve +.Sp +Of course, an index is allowed in this case: +.Sp +.Vb 1 +\& fundisp foo.fits'[2,TEXT(x:I,y:I,pha:I,pi:I,time:D,dx:E,dy:e)]' +.Ve +.IP "\(bu" 4 +eot=\*(L"[eot delim]\*(R" +.Sp +Specify end of table string specifier for multi-table files. \s-1RDB\s0 +files support ^L. The end of table specifier is a string and the whole +string must be found alone on a line to signify \s-1EOT\s0. For example: +.Sp +.Vb 1 +\& fundisp foo.fits'[TEXT(eot="END")]' +.Ve +.Sp +will end the table when a line contains \*(L"\s-1END\s0\*(R" is found. Multiple lines +are supported, so that: +.Sp +.Vb 1 +\& fundisp foo.fits'[TEXT(eot="END\enGAME")]' +.Ve +.Sp +will end the table when a line contains \*(L"\s-1END\s0\*(R" followed by a line +containing \*(L"\s-1GAME\s0\*(R". +.Sp +In the absence of an \s-1EOT\s0 delimiter, a new table will be sensed when a new +header (all alphanumeric columns) is found. +.IP "\(bu" 4 +null1=\*(L"[datatype]\*(R" +.Sp +Specify data type of a single null value in row 1. +Since column data types are determined by the first row, a null value +in that row will result in an error and a request to specify names and +data types using cols=. If you only have a one null in row 1, you don't +need to specify all names and columns. Instead, use null1=\*(L"type\*(R" to +specify its data type. +.IP "\(bu" 4 +alen=[n] +.Sp +Specify size in bytes for \s-1ASCII\s0 type columns. +\&\s-1FITS\s0 binary tables only support fixed length \s-1ASCII\s0 columns, so a +size value must be specified. The default is 16 bytes. +.IP "\(bu" 4 +nullvalues=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether to expect null values. +Give the parsers a hint as to whether null values should be allowed. The +default is to try to determine this from the data. +.IP "\(bu" 4 +whitespace=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether surrounding white space should be kept as part of +string tokens. By default surrounding white space is removed from +tokens. +.IP "\(bu" 4 +header=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether to require a header. This is needed by tables +containing all string columns (and with no row containing dashes), in +order to be able to tell whether the first row is a header or part of +the data. The default is false, meaning that the first row will be +data. If a row dashes are present, the previous row is considered the +column name row. +.IP "\(bu" 4 +units=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether to require a units line. +Give the parsers a hint as to whether a row specifying units should be +allowed. The default is to try to determine this from the data. +.IP "\(bu" 4 +i2f=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether to allow int to float conversions. +If a column in row 1 contains an integer value, the data type for that +column will be set to int. If a subsequent row contains a float in +that same column, an error will be signaled. This flag specifies that, +instead of an error, the float should be silently truncated to +int. Usually, you will want an error to be signaled, so that you can +specify the data type using cols= (or by changing the value of +the column in row 1). +.IP "\(bu" 4 +comeot=[\*(L"true\*(R"|\*(L"false\*(R"|0|1|2] +.Sp +Specify whether comment signifies end of table. +If comeot is 0 or false, then comments do not signify end of table and +can be interspersed with data rows. If the value is true or 1 (the +default for standard parsers), then non-blank lines (e.g. lines +beginning with '#') signify end of table but blanks are allowed +between rows. If the value is 2, then all comments, including blank +lines, signify end of table. +.IP "\(bu" 4 +lazyeot=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Specify whether \*(L"lazy\*(R" end of table should be permitted (default is +true for standard formats, except rdb format where explicit ^L is required +between tables). A lazy \s-1EOT\s0 can occur when a new table starts directly +after an old one, with no special \s-1EOT\s0 delimiter. A check for this \s-1EOT\s0 +condition is begun when a given row contains all string tokens. If, in +addition, there is a mismatch between the number of tokens in the +previous row and this row, or a mismatch between the number of string +tokens in the prev row and this row, a new table is assumed to have +been started. For example: +.Sp +.Vb 4 +\& ival1 sval3 +\& ----- ----- +\& 1 two +\& 3 four +.Ve +.Sp +.Vb 4 +\& jval1 jval2 tval3 +\& ----- ----- ------ +\& 10 20 thirty +\& 40 50 sixty +.Ve +.Sp +Here the line \*(L"jval1 ...\*(R" contains all string tokens. In addition, +the number of tokens in this line (3) differs from the number of +tokens in the previous line (2). Therefore a new table is assumed +to have started. Similarly: +.Sp +.Vb 4 +\& ival1 ival2 sval3 +\& ----- ----- ----- +\& 1 2 three +\& 4 5 six +.Ve +.Sp +.Vb 4 +\& jval1 jval2 tval3 +\& ----- ----- ------ +\& 10 20 thirty +\& 40 50 sixty +.Ve +.Sp +Again, the line \*(L"jval1 ...\*(R" contains all string tokens. The number of +string tokens in the previous row (1) differs from the number of +tokens in the current \fIrow\fR\|(3). We therefore assume a new table as been +started. This lazy \s-1EOT\s0 test is not performed if lazyeot is explicitly +set to false. +.IP "\(bu" 4 +hcolfmt=[header column format] +.Sp +Some text files have column name and data type information in the header. +For example, VizieR catalogs have headers containing both column names +and data types: +.Sp +.Vb 3 +\& #Column e_Kmag (F6.3) ?(k_msigcom) K total magnitude uncertainty (4) [ucd=ERROR] +\& #Column Rflg (A3) (rd_flg) Source of JHK default mag (6) [ucd=REFER_CODE] +\& #Column Xflg (I1) [0,2] (gal_contam) Extended source contamination (10) [ucd=CODE_MISC] +.Ve +.Sp +while Sextractor files have headers containing column names alone: +.Sp +.Vb 4 +\& # 1 X_IMAGE Object position along x [pixel] +\& # 2 Y_IMAGE Object position along y [pixel] +\& # 3 ALPHA_J2000 Right ascension of barycenter (J2000) [deg] +\& # 4 DELTA_J2000 Declination of barycenter (J2000) [deg] +.Ve +.Sp +The hcolfmt specification allows you to describe which header lines +contain column name and data type information. It consists of a string +defining the format of the column line, using \*(L"$col\*(R" (or \*(L"$name\*(R") to +specify placement of the column name, \*(L"$fmt\*(R" to specify placement of the +data format, and \*(L"$skip\*(R" to specify tokens to ignore. You also can +specify tokens explicitly (or, for those users familiar with how +sscanf works, you can specify scanf skip specifiers using \*(L"%*\*(R"). +For example, the VizieR hcolfmt above might be specified in several ways: +.Sp +.Vb 3 +\& Column $col ($fmt) # explicit specification of "Column" string +\& $skip $col ($fmt) # skip one token +\& %*s $col ($fmt) # skip one string (using scanf format) +.Ve +.Sp +while the Sextractor format might be specified using: +.Sp +.Vb 2 +\& $skip $col # skip one token +\& %*d $col # skip one int (using scanf format) +.Ve +.Sp +You must ensure that the hcolfmt statement only senses actual column +definitions, with no false positives or negatives. For example, the +first Sextractor specification, \*(L"$skip \f(CW$col\fR\*(R", will consider any header +line containing two tokens to be a column name specifier, while the +second one, \*(L"%*d \f(CW$col\fR\*(R", requires an integer to be the first token. In +general, it is preferable to specify formats as explicitly as +possible. +.Sp +Note that the VizieR-style header info is sensed automatically by the +funtools standard VizieR-like parser, using the hcolfmt \*(L"Column \f(CW$col\fR +($fmt)\*(R". There is no need for explicit use of hcolfmt in this case. +.IP "\(bu" 4 +debug=[\*(L"true\*(R"|\*(L"false\*(R"] +.Sp +Display debugging information during parsing. +.PP +\&\fBEnvironment Variables\fR +.PP +Environment variables are defined to allow many of these \s-1\fITEXT\s0()\fR values to be +set without having to include them in \s-1\fITEXT\s0()\fR every time a file is processed: +.PP +.Vb 10 +\& keyword environment variable +\& ------- -------------------- +\& delims TEXT_DELIMS +\& comchars TEXT_COMCHARS +\& cols TEXT_COLUMNS +\& eot TEXT_EOT +\& null1 TEXT_NULL1 +\& alen TEXT_ALEN +\& bincols TEXT_BINCOLS +\& hcolfmt TEXT_HCOLFMT +.Ve +.PP +\&\fBRestrictions and Problems\fR +.PP +As with raw event files, the '+' (copy extensions) specifier is not +supported for programs such as funtable. +.PP +String to int and int to string data conversions are allowed by the +text parsers. This is done more by force of circumstance than by +conviction: these transitions often happens with VizieR catalogs, +which we want to support fully. One consequence of allowing these +transitions is that the text parsers can get confused by columns which +contain a valid integer in the first row and then switch to a +string. Consider the following table: +.PP +.Vb 4 +\& xxx yyy zzz +\& ---- ---- ---- +\& 111 aaa bbb +\& ccc 222 ddd +.Ve +.PP +The xxx column has an integer value in row one a string in row two, +while the yyy column has the reverse. The parser will erroneously +treat the first column as having data type int: +.PP +.Vb 5 +\& fundisp foo.tab +\& XXX YYY ZZZ +\& ---------- ------------ ------------ +\& 111 'aaa' 'bbb' +\& 1667457792 '222' 'ddd' +.Ve +.PP +while the second column is processed correctly. This situation can be avoided +in any number of ways, all of which force the data type of the first column +to be a string. For example, you can edit the file and explicitly quote the +first row of the column: +.PP +.Vb 4 +\& xxx yyy zzz +\& ---- ---- ---- +\& "111" aaa bbb +\& ccc 222 ddd +.Ve +.PP +.Vb 5 +\& [sh] fundisp foo.tab +\& XXX YYY ZZZ +\& ------------ ------------ ------------ +\& '111' 'aaa' 'bbb' +\& 'ccc' '222' 'ddd' +.Ve +.PP +You can edit the file and explicitly set the data type of the first column: +.PP +.Vb 4 +\& xxx:3A yyy zzz +\& ------ ---- ---- +\& 111 aaa bbb +\& ccc 222 ddd +.Ve +.PP +.Vb 5 +\& [sh] fundisp foo.tab +\& XXX YYY ZZZ +\& ------------ ------------ ------------ +\& '111' 'aaa' 'bbb' +\& 'ccc' '222' 'ddd' +.Ve +.PP +You also can explicitly set the column names and data types of all columns, +without editing the file: +.PP +.Vb 5 +\& [sh] fundisp foo.tab'[TEXT(xxx:3A,yyy:3A,zzz:3a)]' +\& XXX YYY ZZZ +\& ------------ ------------ ------------ +\& '111' 'aaa' 'bbb' +\& 'ccc' '222' 'ddd' +.Ve +.PP +The issue of data type transitions (which to allow and which to disallow) +is still under discussion. +.SH "SEE ALSO" +.IX Header "SEE ALSO" +See funtools(7) for a list of Funtools help pages |