summaryrefslogtreecommitdiffstats
path: root/funtools/man/man1/funjoin.1
blob: 6e7dd31a131b9e4fdb2af3903c735afd2a1037e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sh \" Subsection heading
.br
.if t .Sp
.ne 5
.PP
\fB\\$1\fR
.PP
..
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  | will give a
.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
.\" expand to `' in nroff, nothing in troff, for use with C<>.
.tr \(*W-|\(bv\*(Tr
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.if \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.\"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.hy 0
.if n .na
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "funjoin 1"
.TH funjoin 1 "April 14, 2011" "version 1.4.5" "SAORD Documentation"
.SH "NAME"
funjoin \- join two or more FITS binary tables on specified columns
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
\&\fBfunjoin\fR [switches] <ifile1> <ifile2> ... <ifilen> <ofile> 
.SH "OPTIONS"
.IX Header "OPTIONS"
.Vb 11
\&  \-a  cols             # columns to activate in all files
\&  \-a1 cols ... an cols # columns to activate in each file
\&  \-b  'c1:bvl,c2:bv2'  # blank values for common columns in all files
\&  \-bn 'c1:bv1,c2:bv2'  # blank values for columns in specific files
\&  \-j  col              # column to join in all files
\&  \-j1 col ... jn col   # column to join in each file
\&  \-m min               # min matches to output a row
\&  \-M max               # max matches to output a row
\&  \-s                   # add 'jfiles' status column
\&  \-S col               # add col as status column
\&  \-t tol               # tolerance for joining numeric cols [2 files only]
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
\&\fBfunjoin\fR joins rows from two or more (up to 32)
\&\s-1FITS\s0 Binary Table files, based on the values
of specified join columns in each file. \s-1NB:\s0 the join columns must have
an index file associated with it. These files are generated using the
\&\fBfunindex\fR program.
.PP
The first argument to the program specifies the first input \s-1FITS\s0 table
or raw event file. If \*(L"stdin\*(R" is specified, data are read from the
standard input.  Subsequent arguments specify additional event files
and tables to join.  The last argument is the output \s-1FITS\s0 file.
.PP
\&\s-1NB:\s0 Do \fBnot\fR use Funtools Bracket
Notation to specify \s-1FITS\s0 extensions and row filters when running
funjoin or you will get wrong results. Rows are accessed and joined
using the index files directly, and this bypasses all filtering.
.PP
The join columns are specified using the \fB\-j col\fR switch (which
specifies a column name to use for all files) or with \fB\-j1 col1\fR,
\&\fB\-j2 col2\fR, ... \fB\-jn coln\fR switches (which specify a column
name to use for each file). A join column must be specified for each file.
If both \fB\-j col\fR and \fB\-jn coln\fR are specified for a given
file, then the latter is used. Join columns must either be of type
string or type numeric; it is illegal to mix numeric and string
columns in a given join.  For example, to join three files using the
same key column for each file, use:
.PP
.Vb 1
\&  funjoin \-j key in1.fits in2.fits in3.fits out.fits
.Ve
.PP
A different key can be specified for the third file in this way:
.PP
.Vb 1
\&  funjoin \-j key \-j3 otherkey in1.fits in2.fits in3.fits out.fits
.Ve
.PP
The \fB\-a \*(L"cols\*(R"\fR switch (and \fB\-a1 \*(L"col1\*(R"\fR,
\&\fB\-a2 \*(L"cols2\*(R"\fR counterparts) can be used to specify columns to
activate (i.e. write to the output file) for each input file. By
default, all columns are output.
.PP
If two or more columns from separate files have the same name, the
second (and subsequent) columns are renamed to have an underscore
and a numeric value appended.
.PP
The \fB\-m min\fR and \fB\-M max\fR switches specify the minimum
and maximum number of joins required to write out a row. The default
minimum is 0 joins (i.e. all rows are written out) and the default maximum
is 63 (the maximum number of possible joins with a limit of 32 input files).
For example, to write out only those rows in which exactly two files
have columns that match (i.e. one join):
.PP
.Vb 1
\&  funjoin \-j key \-m 1 \-M 1 in1.fits in2.fits in3.fits ... out.fits
.Ve
.PP
A given row can have the requisite number of joins without all of the
files being joined (e.g. three files are being joined but only two
have a given join key value). In this case, all of the columns of the
non-joined file are written out, by default, using blanks (zeros or NULLs).
The \fB\-b c1:bv1,c2:bv2\fR and
\-b1 'c1:bv1,c2:bv2' \-b2 'c1:bv1,c2 - bv2' ...
switches can be used to set the blank value for columns common to all
files and/or columns in a specified file, respectively. Each blank value
string contains a comma-separated list of column:blank_val specifiers.
For floating point values (single or double), a case-insensitive string
value of \*(L"nan\*(R" means that the \s-1IEEE\s0 NaN (not\-a\-number) should be
used. Thus, for example:
.PP
.Vb 1
\&  funjoin \-b "AKEY:???" \-b1 "A:-1" \-b3 "G:NaN,E:-1,F:-100" ...
.Ve
.PP
means that a non-joined \s-1AKEY\s0 column in any file will contain the
string \*(L"???\*(R", the non-joined A column of file 1 will contain a value
of \-1, the non-joined G column of file 3 will contain \s-1IEEE\s0 NaNs, while
the non-joined E and F columns of the same file will contain values \-1
and \-100, respectively. Of course, where common and specific blank values
are specified for the same column, the specific blank value is used.
.PP
To distinguish which files are non-blank components of a given row,
the \fB\-s\fR (status) switch can be used to add a bitmask column named
\&\*(L"\s-1JFILES\s0\*(R" to the output file. In this column, a bit is set for each
non-blank file composing the given row, with bit 0 corresponds to the
first file, bit 1 to the second file, and so on. The file names
themselves are stored in the \s-1FITS\s0 header as parameters named \s-1JFILE1\s0,
\&\s-1JFILE2\s0, etc.  The \fB\-S col\fR switch allows you to change the name
of the status column from the default \*(L"\s-1JFILES\s0\*(R".
.PP
A join between rows is the Cartesian product of all rows in one file
having a given join column value with all rows in a second file having
the same value for its join column and so on. Thus, if file1 has 2
rows with join column value 100, file2 has 3 rows with the same value,
and file3 has 4 rows, then the join results in 2*3*4=24 rows being output.
.PP
The join algorithm directly processes the index file associated with
the join column of each file. The smallest value of all the current
columns is selected as a base, and this value is used to join
equal-valued columns in the other files. In this way, the index files
are traversed exactly once.
.PP
The \fB\-t tol\fR switch specifies a tolerance value for numeric
columns.  At present, a tolerance value can join only two files at a
time.  (A completely different algorithm is required to join more than
two files using a tolerance, somethng we might consider implementing
in the future.)
.PP
The following example shows many of the features of funjoin. The input files
t1.fits, t2.fits, and t3.fits contain the following columns:
.PP
.Vb 11
\&  [sh] fundisp t1.fits
\&        AKEY    KEY      A      B 
\& ----------- ------ ------ ------
\&         aaa      0      0      1
\&         bbb      1      3      4
\&         ccc      2      6      7
\&         ddd      3      9     10
\&         eee      4     12     13
\&         fff      5     15     16
\&         ggg      6     18     19
\&         hhh      7     21     22
.Ve
.PP
fundisp t2.fits
        \s-1AKEY\s0    \s-1KEY\s0      C      D 
 \-\-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\- \-\-\-\-\-\- \-\-\-\-\-\-
         iii      8     24     25
         ggg      6     18     19
         eee      4     12     13
         ccc      2      6      7
         aaa      0      0      1
.PP
fundisp t3.fits
        \s-1AKEY\s0    \s-1KEY\s0        E        F           G
\&\-\-\-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\- \-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\-
         ggg      6       18       19      100.10
         jjj      9       27       28      200.20
         aaa      0        0        1      300.30
         ddd      3        9       10      400.40
.PP
Given these input files, the following funjoin command:
.PP
.Vb 3
\&  funjoin \-s \-a1 "\-B" \-a2 "\-D" \-a3 "\-E" \-b \e
\&  "AKEY:???" \-b1 "AKEY:XXX,A:255" \-b3 "G:NaN,E:-1,F:-100" \e
\&  \-j key t1.fits t2.fits t3.fits foo.fits
.Ve
.PP
will join the files on the \s-1KEY\s0 column, outputting all columns except B
(in t1.fits), D (in t2.fits) and E (in t3.fits), and setting blank
values for \s-1AKEY\s0 (globally, but overridden for t1.fits) and A (in file
1) and G, E, and F (in file 3).  A \s-1JFILES\s0 column will be output to
flag which files were used in each row:
.PP
.Vb 12
\&        AKEY    KEY      A       AKEY_2  KEY_2      C       AKEY_3  KEY_3        F           G   JFILES
\&  ------------ ------ ------ ------------ ------ ------ ------------ ------ -------- ----------- --------
\&         aaa      0      0          aaa      0      0          aaa      0        1      300.30        7
\&         bbb      1      3          ???      0      0          ???      0     \-100         nan        1
\&         ccc      2      6          ccc      2      6          ???      0     \-100         nan        3
\&         ddd      3      9          ???      0      0          ddd      3       10      400.40        5
\&         eee      4     12          eee      4     12          ???      0     \-100         nan        3
\&         fff      5     15          ???      0      0          ???      0     \-100         nan        1
\&         ggg      6     18          ggg      6     18          ggg      6       19      100.10        7
\&         hhh      7     21          ???      0      0          ???      0     \-100         nan        1
\&         XXX      0    255          iii      8     24          ???      0     \-100         nan        2
\&         XXX      0    255          ???      0      0          jjj      9       28      200.20        4
.Ve
.SH "SEE ALSO"
.IX Header "SEE ALSO"
See funtools(7) for a list of Funtools help pages