1 files changed, 327 insertions, 0 deletions
diff --git a/funtools/man/man7/funidx.7 b/funtools/man/man7/funidx.7
new file mode 100644
index 0000000..bf87bb8
--- /dev/null
+++ b/funtools/man/man7/funidx.7
@@ -0,0 +1,327 @@
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sh \" Subsection heading
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings.  \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote.  | will give a
+.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
+.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
+.\" expand to `' in nroff, nothing in troff, for use with C<>.
+.tr \(*W-|\(bv\*(Tr
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+.    ds -- \(*W-
+.    ds PI pi
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
+.    ds L" ""
+.    ds R" ""
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds -- \|\(em\|
+.    ds PI \(*p
+.    ds L" ``
+.    ds R" ''
+'br\}
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.if \nF \{\
+.    de IX
+.    tm Index:\\$1\t\\n%\t"\\$2"
+..
+.    nr % 0
+.    rr F
+.\}
+.\"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.hy 0
+.if n .na
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
+.    \" fudge factors for nroff and troff
+.if n \{\
+.    ds #H 0
+.    ds #V .8m
+.    ds #F .3m
+.    ds #[ \f1
+.    ds #] \fP
+.\}
+.if t \{\
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.    ds #V .6m
+.    ds #F 0
+.    ds #[ \&
+.    ds #] \&
+.\}
+.    \" simple accents for nroff and troff
+.if n \{\
+.    ds ' \&
+.    ds ` \&
+.    ds ^ \&
+.    ds , \&
+.    ds ~ ~
+.    ds /
+.\}
+.if t \{\
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+.    \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.    \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.    \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.    ds : e
+.    ds 8 ss
+.    ds o a
+.    ds d- d\h'-1'\(ga
+.    ds D- D\h'-1'\(hy
+.    ds th \o'bp'
+.    ds Th \o'LP'
+.    ds ae ae
+.    ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "funidx 7"
+.TH funidx 7 "April 14, 2011" "version 1.4.5" "SAORD Documentation"
+.SH "NAME"
+Funidx \- Using Indexes to Filter Rows in a Table
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+This document contains a summary of the user interface for 
+filtering rows in binary tables with indexes.
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+Funtools Table Filtering allows rows in a
+table to be selected based on the values of one or more columns in the
+row. Because the actual filter code is compiled on the fly, it is very
+efficient. However, for very large files (hundreds of Mb or larger),
+evaluating the filter expression on each row can take a long time. Therefore,
+funtools supports index files for columns, which are used automatically during
+filtering to reduce dramatically the number of row evaluations performed.
+The speed increase for indexed filtering can be an order of magnitude or
+more, depending on the size of the file.
+.PP
+The funindex program creates an
+index on one or more columns in a binary table. For example, to create an index
+for the column pi in the file huge.fits, use:
+.PP
+.Vb 1
+\&  funindex huge.fits pi
+.Ve
+.PP
+This will create an index named huge_pi.idx.
+.PP
+When a filter expression is initialized for row evaluation, funtools
+looks for an index file for each column in the filter expression. If
+found, and if the file modification date of the index file is later
+than that of the data file, then the index will be used to reduce the
+number of rows that are evaluated in the filter. When 
+Spatial Region Filtering is part of the
+expression, the columns associated with the region are checked for index
+files.
+.PP
+If an index file is not available for a given column, then in general,
+all rows must be checked when that column is part of a filter
+expression.  This is not true, however, when a non-indexed column is
+part of an \s-1AND\s0 expression. In this case, only the rows that pass the
+other part of the \s-1AND\s0 expression need to be checked. Thus, in some cases,
+filtering speed can increase significantly even if all columns are not
+indexed.
+.PP
+Also note that certain types of filter expression syntax cannot make
+use of indices. For example, calling functions with column names as
+arguments implies that all rows must be checked against the function
+value. Once again, however, if this function is part of an \s-1AND\s0
+expression, then a significant improvement in speed still is possible
+if the other part of the \s-1AND\s0 expression is indexed.
+.PP
+For example, note below the dramatic speedup in searching a 1 Gb
+file using an \s-1AND\s0 filter, even when one of the columns (pha) has no
+index:
+.PP
+.Vb 22
+\&  time fundisp \e
+\&  huge.fits'[idx_activate=0,idx_debug=1,pha=2348&&cir 4000 4000 1]' \e
+\&  "x y pha"
+\&          x           y        pha                                   
+\& ---------- ----------- ----------                                    
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    42.36u 13.07s 6:42.89 13.7%
+.Ve
+.PP
+.Vb 26
+\&  time fundisp \e
+\&  huge.fits'[idx_activate=1,idx_debug=1,pha=2348&&cir 4000 4000 1]' \e
+\&  "x y pha"
+\&          x           y        pha                                    
+\& ---------- ----------- ----------                                    
+\& idxeq: [INDEF]                                   
+\& idxand sort: x[ROW 8037025:8070128] y[ROW 5757665:5792352]             
+\& idxand(1): INDEF [IDX_OR_SORT]                                   
+\& idxall(1): [IDX_OR_SORT]                                   
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    3999.48     4000.47       2348
+\&    1.55u 0.37s 1:19.80 2.4%
+.Ve
+.PP
+When all columns are indexed, the increase in speed can be even more dramatic:
+.PP
+.Vb 22
+\&  time fundisp \e
+\&  huge.fits'[idx_activate=0,idx_debug=1,pi=770&&cir 4000 4000 1]' \e
+\&  "x y pi"
+\&          x           y         pi                                    
+\& ---------- ----------- ----------                                    
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    42.60u 12.63s 7:28.63 12.3%
+.Ve
+.PP
+.Vb 27
+\&  time fundisp \e
+\&  huge.fits'[idx_activate=1,idx_debug=1,pi=770&&cir 4000 4000 1]' \e
+\&  "x y pi"
+\&          x           y         pi                                    
+\& ---------- ----------- ----------                                    
+\& idxeq: pi start=9473025,stop=9492240 => pi[ROW 9473025:9492240]          
+\& idxand sort: x[ROW 8037025:8070128] y[ROW 5757665:5792352]               
+\& idxor sort/merge: pi[ROW 9473025:9492240] [IDX_OR_SORT]                   
+\& idxmerge(5): [IDX_OR_SORT] pi[ROW]                                   
+\& idxall(1): [IDX_OR_SORT]                                   
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    3999.48     4000.47        770
+\&    1.67u 0.30s 0:24.76 7.9%
+.Ve
+.PP
+The miracle of indexed filtering (and indeed, of any indexing) is the
+speed of the binary search on the index, which is of order log2(n)
+instead of n. (The funtools binary search method is taken from
+http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary, to whom
+grateful acknowledgement is made.)  This means that the larger the
+file, the better the performance. Conversely, it also means that for
+small files, using an index (and the overhead involved) can slow
+filtering down somewhat. Our tests indicate that on a file containing
+a few tens of thousands of rows, indexed filtering can be 10 to 20
+percent slower than non-indexed filtering. Of course, your mileage
+will vary with conditions (disk access speed, amount of available
+memory, process load, etc.)
+.PP
+Any problem encountered during index processing will result in
+indexing being turned off, and replaced by filtering all rows. You can turn
+filtering off manually by setting the idx_activate variable to 0 (in a filter
+expression) or the \s-1FILTER_IDX_ACTIVATE\s0 environment variable to 0 (in the global
+environment). Debugging output showing how the indexes are being processed can
+be displayed to stderr by setting the idx_debug variable to 1 (in a filter
+expression) or the \s-1FILTER_IDX_DEBUG\s0 environment variable to 1 (in the global
+environment).
+.PP
+Currently, indexed filtering only works with \s-1FITS\s0 binary tables and raw
+event files. It does not work with text files. This restriction might be
+removed in a future release.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+See funtools(7) for a list of Funtools help pages