diff options
Diffstat (limited to 'funtools/man/man7/funidx.7')
-rw-r--r-- | funtools/man/man7/funidx.7 | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/funtools/man/man7/funidx.7 b/funtools/man/man7/funidx.7 new file mode 100644 index 0000000..bf87bb8 --- /dev/null +++ b/funtools/man/man7/funidx.7 @@ -0,0 +1,327 @@ +.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sh \" Subsection heading +.br +.if t .Sp +.ne 5 +.PP +\fB\\$1\fR +.PP +.. +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. | will give a +.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to +.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' +.\" expand to `' in nroff, nothing in troff, for use with C<>. +.tr \(*W-|\(bv\*(Tr +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.\" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.hy 0 +.if n .na +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "funidx 7" +.TH funidx 7 "April 14, 2011" "version 1.4.5" "SAORD Documentation" +.SH "NAME" +Funidx \- Using Indexes to Filter Rows in a Table +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +This document contains a summary of the user interface for +filtering rows in binary tables with indexes. +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Funtools Table Filtering allows rows in a +table to be selected based on the values of one or more columns in the +row. Because the actual filter code is compiled on the fly, it is very +efficient. However, for very large files (hundreds of Mb or larger), +evaluating the filter expression on each row can take a long time. Therefore, +funtools supports index files for columns, which are used automatically during +filtering to reduce dramatically the number of row evaluations performed. +The speed increase for indexed filtering can be an order of magnitude or +more, depending on the size of the file. +.PP +The funindex program creates an +index on one or more columns in a binary table. For example, to create an index +for the column pi in the file huge.fits, use: +.PP +.Vb 1 +\& funindex huge.fits pi +.Ve +.PP +This will create an index named huge_pi.idx. +.PP +When a filter expression is initialized for row evaluation, funtools +looks for an index file for each column in the filter expression. If +found, and if the file modification date of the index file is later +than that of the data file, then the index will be used to reduce the +number of rows that are evaluated in the filter. When +Spatial Region Filtering is part of the +expression, the columns associated with the region are checked for index +files. +.PP +If an index file is not available for a given column, then in general, +all rows must be checked when that column is part of a filter +expression. This is not true, however, when a non-indexed column is +part of an \s-1AND\s0 expression. In this case, only the rows that pass the +other part of the \s-1AND\s0 expression need to be checked. Thus, in some cases, +filtering speed can increase significantly even if all columns are not +indexed. +.PP +Also note that certain types of filter expression syntax cannot make +use of indices. For example, calling functions with column names as +arguments implies that all rows must be checked against the function +value. Once again, however, if this function is part of an \s-1AND\s0 +expression, then a significant improvement in speed still is possible +if the other part of the \s-1AND\s0 expression is indexed. +.PP +For example, note below the dramatic speedup in searching a 1 Gb +file using an \s-1AND\s0 filter, even when one of the columns (pha) has no +index: +.PP +.Vb 22 +\& time fundisp \e +\& huge.fits'[idx_activate=0,idx_debug=1,pha=2348&&cir 4000 4000 1]' \e +\& "x y pha" +\& x y pha +\& ---------- ----------- ---------- +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 42.36u 13.07s 6:42.89 13.7% +.Ve +.PP +.Vb 26 +\& time fundisp \e +\& huge.fits'[idx_activate=1,idx_debug=1,pha=2348&&cir 4000 4000 1]' \e +\& "x y pha" +\& x y pha +\& ---------- ----------- ---------- +\& idxeq: [INDEF] +\& idxand sort: x[ROW 8037025:8070128] y[ROW 5757665:5792352] +\& idxand(1): INDEF [IDX_OR_SORT] +\& idxall(1): [IDX_OR_SORT] +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 3999.48 4000.47 2348 +\& 1.55u 0.37s 1:19.80 2.4% +.Ve +.PP +When all columns are indexed, the increase in speed can be even more dramatic: +.PP +.Vb 22 +\& time fundisp \e +\& huge.fits'[idx_activate=0,idx_debug=1,pi=770&&cir 4000 4000 1]' \e +\& "x y pi" +\& x y pi +\& ---------- ----------- ---------- +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 42.60u 12.63s 7:28.63 12.3% +.Ve +.PP +.Vb 27 +\& time fundisp \e +\& huge.fits'[idx_activate=1,idx_debug=1,pi=770&&cir 4000 4000 1]' \e +\& "x y pi" +\& x y pi +\& ---------- ----------- ---------- +\& idxeq: pi start=9473025,stop=9492240 => pi[ROW 9473025:9492240] +\& idxand sort: x[ROW 8037025:8070128] y[ROW 5757665:5792352] +\& idxor sort/merge: pi[ROW 9473025:9492240] [IDX_OR_SORT] +\& idxmerge(5): [IDX_OR_SORT] pi[ROW] +\& idxall(1): [IDX_OR_SORT] +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 3999.48 4000.47 770 +\& 1.67u 0.30s 0:24.76 7.9% +.Ve +.PP +The miracle of indexed filtering (and indeed, of any indexing) is the +speed of the binary search on the index, which is of order log2(n) +instead of n. (The funtools binary search method is taken from +http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary, to whom +grateful acknowledgement is made.) This means that the larger the +file, the better the performance. Conversely, it also means that for +small files, using an index (and the overhead involved) can slow +filtering down somewhat. Our tests indicate that on a file containing +a few tens of thousands of rows, indexed filtering can be 10 to 20 +percent slower than non-indexed filtering. Of course, your mileage +will vary with conditions (disk access speed, amount of available +memory, process load, etc.) +.PP +Any problem encountered during index processing will result in +indexing being turned off, and replaced by filtering all rows. You can turn +filtering off manually by setting the idx_activate variable to 0 (in a filter +expression) or the \s-1FILTER_IDX_ACTIVATE\s0 environment variable to 0 (in the global +environment). Debugging output showing how the indexes are being processed can +be displayed to stderr by setting the idx_debug variable to 1 (in a filter +expression) or the \s-1FILTER_IDX_DEBUG\s0 environment variable to 1 (in the global +environment). +.PP +Currently, indexed filtering only works with \s-1FITS\s0 binary tables and raw +event files. It does not work with text files. This restriction might be +removed in a future release. +.SH "SEE ALSO" +.IX Header "SEE ALSO" +See funtools(7) for a list of Funtools help pages |