summaryrefslogtreecommitdiffstats
path: root/funtools/funjoin.c
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 17:38:41 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 17:38:41 (GMT)
commit5b44fb0d6530c4ff66a446afb69933aa8ffd014f (patch)
treee059f66d1f612e21fe9d83f9620c8715530353ec /funtools/funjoin.c
parentda2e3d212171bbe64c1af39114fd067308656990 (diff)
parent23c7930d27fe11c4655e1291a07a821dbbaba78d (diff)
downloadblt-5b44fb0d6530c4ff66a446afb69933aa8ffd014f.zip
blt-5b44fb0d6530c4ff66a446afb69933aa8ffd014f.tar.gz
blt-5b44fb0d6530c4ff66a446afb69933aa8ffd014f.tar.bz2
Merge commit '23c7930d27fe11c4655e1291a07a821dbbaba78d' as 'funtools'
Diffstat (limited to 'funtools/funjoin.c')
-rw-r--r--funtools/funjoin.c1172
1 files changed, 1172 insertions, 0 deletions
diff --git a/funtools/funjoin.c b/funtools/funjoin.c
new file mode 100644
index 0000000..9ca019e
--- /dev/null
+++ b/funtools/funjoin.c
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2005 Smithsonian Astrophysical Observatory
+ */
+
+/*
+ * funjoin -j key t1.fits t2.fits t3.fits foo.fits
+ */
+
+#include <math.h>
+#include <funtoolsP.h>
+#include <word.h>
+
+/* #define JOIN_DEBUG 1 */
+#if JOIN_DEBUG
+#define IPRINTF(x) fprintf x
+#else
+#define IPRINTF(x)
+#endif
+
+#ifndef ftol
+#define ftol(x,y,t) (fabs((double)x-(double)y)<=((double)t+(double)1.0E-15))
+#endif
+
+#ifndef feq
+#define feq(x,y) (fabs((double)x-(double)y)<=(double)1.0E-15)
+#endif
+
+#define MAXIFILE 32
+#define MAXOFILE 1
+#define MAXROW 8192
+
+#define KEY_STRING 1
+#define KEY_NUMERIC 2
+
+#define JFILES_COL "JFILES"
+
+typedef unsigned int JBITMASK;
+#define JBITSIZE (int)sizeof(JBITMASK)
+
+static int maxrow=MAXROW;
+
+typedef struct _colrec{
+ char *name, *oname;
+ int type, mode, offset, n, width;
+ int coffset;
+} *XCol, XColRec;
+
+
+typedef struct _filerec{
+ char *fname;
+ char *jname;
+ char *bstr;
+ char *actstr;
+ Fun fun;
+ int eof;
+ int rowsize;
+ int rowoffset;
+ char *rowbuf;
+ char *blank;
+ int counter;
+ int tcol;
+ int maxcol;
+ int ncol;
+ XCol cols;
+ int dtype;
+ int jtype, jmode, joffset, jn, jwidth;
+ int idx;
+ double dval;
+ double mval;
+ char *sval;
+ int ibase;
+ GIO igio;
+ int irow;
+ int maxindex;
+ int nindex;
+ int *indexes;
+} *XFile, XFileRec;
+
+extern char *optarg;
+extern int optind;
+
+#ifdef ANSI_FUNC
+static void
+usage (char *fname)
+#else
+static void usage(fname)
+ char *fname;
+#endif
+{
+ fprintf(stderr,
+ "usage: %s <switches> ifile1 ifile2 ... ifilen ofile\n",
+ fname);
+ fprintf(stderr, "optional switches:\n");
+ fprintf(stderr, " -a cols # columns to activate in all files\n");
+ fprintf(stderr, " -a1 cols ... an cols # columns to activate in each file\n");
+ fprintf(stderr, " -b 'c1:bvl,c2:bv2' # blank values for common columns in all files\n");
+ fprintf(stderr, " -bn 'c1:bv1,c2:bv2' # blank values for columns in specific files\n");
+ fprintf(stderr, " -j col # column to join in all files\n");
+ fprintf(stderr, " -j1 col ... jn col # column to join in each file\n");
+ fprintf(stderr, " -m min # min matches to output a row\n");
+ fprintf(stderr, " -M max # max matches to output a row\n");
+ fprintf(stderr, " -s # add 'jfiles' status column\n");
+ fprintf(stderr, " -S col # add col as status column\n");
+ fprintf(stderr, " -t tol # tolerance for joining numeric cols [2 files only]\n");
+ fprintf(stderr, "Between 2 and %d input files are allowed.\n", MAXIFILE);
+ fprintf(stderr, "\n(version: %s)\n", FUN_VERSION);
+ exit(1);
+}
+
+#ifdef ANSI_FUNC
+static void
+JoinMakeBlank(XFile file, char *defblank)
+#else
+static void
+JoinMakeBlank(file, defblank)
+ XFile file;
+ char *defblank;
+#endif
+{
+ int i, j;
+ int ip=0;
+ char tbuf[SZ_LINE];
+ char *b;
+ char *blanks[2];
+ char *t, *v;
+ unsigned char bval;
+ short sval;
+ unsigned short usval;
+ int ival;
+ longlong lval;
+ unsigned int uival;
+ float fval;
+ double dval;
+
+ /* start with a blank line */
+ file->blank = xcalloc(file->rowsize, sizeof(char));
+ /* blank specifications are separated by comma or semi */
+ newdtable(",;");
+
+ /* there are 2 places where we can get blank values: default and file */
+ if( defblank )
+ blanks[0] = xstrdup(defblank);
+ else
+ blanks[0] = NULL;
+ if( file->bstr )
+ blanks[1] = xstrdup(file->bstr);
+ else
+ blanks[1] = NULL;
+
+
+ /* process defaults first, then overwrite with specifics */
+ for(j=0; j<2; j++){
+ if( !blanks[j] || !*blanks[j] ) continue;
+ ip = 0;
+ while( word(blanks[j], tbuf, &ip) ){
+ if( (t=strchr(tbuf, ':')) ){
+ v=t+1;
+ *t = '\0';
+ for(i=0; i<file->ncol; i++){
+ if( !strcasecmp(tbuf, file->cols[i].oname) ){
+ b = file->blank+file->cols[i].coffset;
+ switch(file->cols[i].type){
+ case 'L':
+ bval = atoi(v);
+ memcpy(b, &bval, sizeof(unsigned char));
+ break;
+ case 'X':
+ switch(file->cols[i].width/file->cols[i].n){
+ case 0:
+ case 1:
+ bval = atoi(v);
+ memcpy(b, &bval, sizeof(unsigned char));
+ break;
+ case 2:
+ sval = atoi(v);
+ memcpy(b, &sval, sizeof(short));
+ break;
+ case 4:
+ ival = atoi(v);
+ memcpy(b, &ival, sizeof(int));
+ break;
+ default:
+ gerror(stderr, "only set blank value on X of size 1,2,4\n");
+ break;
+ }
+ break;
+ case 'B':
+ bval = atoi(v);
+ memcpy(b, &bval, sizeof(unsigned char));
+ break;
+ case 'I':
+ sval = atoi(v);
+ memcpy(b, &sval, sizeof(short));
+ break;
+ case 'J':
+ ival = atoi(v);
+ memcpy(b, &ival, sizeof(int));
+ break;
+ case 'K':
+#if HAVE_LONG_LONG == 0
+ gerror(stderr,
+ "64-bit data support not built (long long not available)\n");
+#endif
+ lval = atoll(v);
+ memcpy(b, &lval, sizeof(longlong));
+ break;
+ case 'U':
+ usval = atoi(v);
+ memcpy(b, &usval, sizeof(unsigned short));
+ break;
+ case 'V':
+ uival = atoi(v);
+ memcpy(b, &uival, sizeof(unsigned int));
+ break;
+ case 'E':
+ if( !strcasecmp(t, "nan") ){
+ fval = getnanf();
+ }
+ else{
+ fval = atof(v);
+ }
+ memcpy(b, &fval, sizeof(float));
+ break;
+ case 'D':
+ if( !strcasecmp(t, "nan") ){
+ dval = getnand();
+ }
+ else{
+ dval = atof(v);
+ }
+ memcpy(b, &dval, sizeof(double));
+ break;
+ case 'A':
+ strncpy(b, v, file->cols[i].width);
+ break;
+ }
+ }
+ }
+ }
+ else{
+ gerror(stderr, "invalid blank specification: %s\n", tbuf);
+ }
+ }
+ }
+ /* free up delim table */
+ freedtable();
+ /* free blank strings */
+ for(i=0; i<2; i++){
+ if( blanks[i] ) xfree(blanks[i]);
+ }
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinAddCol(XFile file, char *name, char *oname,
+ int type, int mode, int offset, int n, int width, int coffset)
+#else
+static int
+JoinAddCol(file, name, oname, type, mode, offset, n, width, coffset)
+ XFile file;
+ char *name;
+ char *oname;
+ int type;
+ int mode;
+ int offset;
+ int n;
+ int width;
+ int coffset;
+#endif
+{
+ if( !file->maxcol ){
+ file->maxcol = 1;
+ file->cols = xcalloc(file->maxcol, sizeof(XColRec));
+ }
+ else if( file->ncol >= file->maxcol ){
+ file->maxcol *= 2;
+ file->cols = xrealloc(file->cols, file->maxcol*sizeof(XColRec));
+ }
+ file->cols[file->ncol].name = xstrdup(name);;
+ file->cols[file->ncol].oname = xstrdup(oname);;
+ file->cols[file->ncol].type = type;
+ file->cols[file->ncol].mode = mode;
+ file->cols[file->ncol].offset = offset;
+ file->cols[file->ncol].n = n;
+ file->cols[file->ncol].width = width;
+ file->cols[file->ncol].coffset = coffset;
+ file->ncol++;
+ return file->ncol;
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinFilesLeft(XFile ifiles, int nfile)
+#else
+static int
+JoinFilesLeft(ifiles, nfile)
+ XFile ifiles;
+ int nfile;
+#endif
+{
+ int i;
+ int left=0;
+
+ for(i=0; i<nfile; i++){
+ if( !ifiles[i].eof ){
+ left++;
+ }
+ }
+ return left;
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinReadNext(XFile ifiles, int nfile, int which)
+#else
+static int
+JoinReadNext(ifiles, nfile, which)
+ XFile ifiles;
+ int nfile;
+ int which;
+#endif
+{
+ int i;
+ int got;
+ int lo, hi;
+ int nrec=0;
+
+ if( which < 0 ){
+ lo = 0;
+ hi = ABS(which)-1;
+ }
+ else{
+ lo = which;
+ hi = which;
+ }
+ if( hi > nfile ) return 0;
+ for(i=lo; i<=hi; i++){
+ if( ifiles[i].eof ) continue;
+ FunTableRowGet(ifiles[i].fun, (void *)&(ifiles[i]), 1, NULL, &got);
+ if( !got ){
+ ifiles[i].eof = 1;
+ }
+ else{
+ nrec++;
+ }
+ }
+ return nrec;
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinGetMatches(XFile ifiles, int nfile, int ktype, double tol, char *matches)
+#else
+static int
+JoinGetMatches(ifiles, nfile, ktype, tol, matches)
+ XFile ifiles;
+ int nfile;
+ int ktype;
+ double tol;
+ char *matches;
+#endif
+{
+ int i;
+ int m=0;
+ int ibase=-1;
+ char *sval=NULL;
+ double dval=0.0;
+
+ /* no matches yet */
+ memset(matches, 0, nfile);
+ /* clear index position information */
+ for(i=0; i<nfile; i++){
+ ifiles[i].ibase = 0;
+ ifiles[i].irow = -1;
+ }
+ /* get first valid file */
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].eof ) continue;
+ ibase = i;
+ break;
+ }
+ /*make sure we have a valid file */
+ if( ibase < 0 ) return 0;
+ /* process all files and look for matches */
+ switch(ktype){
+ case KEY_STRING:
+ /* get base */
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].eof ) continue;
+ /* smallest value is base */
+ if( strcmp(ifiles[i].sval, ifiles[ibase].sval) < 0 ){
+ ibase = i;
+ }
+ }
+ /* this is the smallest value */
+ sval = ifiles[ibase].sval;
+ /* look for matches in all valid files */
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].eof ) continue;
+ /* ascii requires exact string match */
+ if( !strcmp(ifiles[i].sval, sval) ){
+ matches[m++] = i;
+ }
+ }
+ break;
+ case KEY_NUMERIC:
+ /* get base */
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].eof ) continue;
+ /* smallest value is base */
+ if( ifiles[i].dval < ifiles[ibase].dval ){
+ ibase = i;
+ }
+ }
+ /* this is the smallest value */
+ dval = ifiles[ibase].dval;
+ FunInfoGet(ifiles[ibase].fun, FUN_ROW, &ifiles[ibase].irow, 0);
+ /* look for matches in all valid files */
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].eof ) continue;
+ /* tolerance test */
+ if( (tol > 0.0) && ftol(ifiles[i].dval,dval,tol) ){
+ matches[m++] = i;
+ ifiles[i].mval = dval;
+ FunInfoGet(ifiles[i].fun, FUN_ROW, &ifiles[i].irow, 0);
+ }
+ /* "exact" match */
+ else if( feq(ifiles[i].dval,dval) ){
+ matches[m++] = i;
+ ifiles[i].irow = -1;
+ }
+ }
+ break;
+ }
+ if( ibase >=0 ) ifiles[ibase].ibase = 1;
+ return m;
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinAddIndex(XFile ifile, int idx)
+#else
+static int
+JoinAddIndex(ifile, idx)
+ XFile ifile;
+ int idx;
+#endif
+{
+ if( !ifile->maxindex ){
+ ifile->maxindex = 1;
+ ifile->indexes = xcalloc(ifile->maxindex, sizeof(int));
+ }
+ else if( ifile->nindex >= ifile->maxindex ){
+ ifile->maxindex *= 2;
+ ifile->indexes = xrealloc(ifile->indexes, ifile->maxindex*sizeof(int));
+ }
+ ifile->indexes[ifile->nindex++] = idx;
+ return ifile->nindex;
+}
+
+#ifdef ANSI_FUNC
+static void
+JoinGatherRows(XFile ifiles, int nfile, int ktype, double tol,
+ char *matches, int nmatch, int *resetflag)
+#else
+static void
+JoinGatherRows(ifiles, nfile, ktype, tol, matches, nmatch, resetflag)
+ XFile ifiles;
+ int nfile;
+ int ktype;
+ double tol;
+ char *matches;
+ int nmatch;
+ int *resetflag
+#endif
+{
+ int i, j;
+ int ibase=-1;
+ char *sval=NULL;
+ double dval;
+ double mval;
+
+ /* no need to reset rows yet */
+ *resetflag = -1;
+ /* make sure we have matches to process */
+ if( !nmatch ) return;
+ /* find base value */
+ for(j=0; j<nmatch; j++){
+ i = matches[j];
+ if( ifiles[i].ibase ){
+ ibase = i;
+ break;
+ }
+ }
+ /* should never happen */
+ if( ibase < 0 ) return;
+ /* for each match file, look for successive rows that also match */
+ for(j=0; j<nmatch; j++){
+ i = matches[j];
+ ifiles[i].nindex = 0;
+ JoinAddIndex(&ifiles[i], ifiles[i].idx);
+ switch(ktype){
+ case KEY_STRING:
+ sval = ifiles[i].sval;
+ while( JoinReadNext(ifiles, nfile, i) ){
+ if( !strcmp(sval,ifiles[i].sval) ){
+ JoinAddIndex(&ifiles[i], ifiles[i].idx);
+ }
+ else{
+ break;
+ }
+ }
+ break;
+ case KEY_NUMERIC:
+ dval = ifiles[i].dval;
+ mval = ifiles[i].mval;
+ while( JoinReadNext(ifiles, nfile, i) ){
+ if( (tol > 0.0) && (i!=ibase) && ftol(ifiles[i].dval,mval,tol) ){
+ JoinAddIndex(&ifiles[i], ifiles[i].idx);
+ }
+ else if( feq(ifiles[i].dval,dval) ){
+ JoinAddIndex(&ifiles[i], ifiles[i].idx);
+ }
+ else{
+ /* If this is the base file and we are using a tolerance, and the
+ difference between the last base and this base value is less than
+ twice the tolerance, we have to reset all other file positions
+ and re-check those values against this new base value. */
+ if( (tol > 0.0) && (i==ibase) && ftol(ifiles[i].dval,dval,(2*tol)) ){
+ *resetflag = ibase;
+ }
+ break;
+ }
+ }
+ }
+ }
+}
+
+#ifdef ANSI_FUNC
+static int
+JoinWriteRows(XFile files, XFile ifiles, int nfile, char *matches,
+ int nmatch, int jbits, XFile ofiles)
+#else
+static int
+JoinWriteRows(files, ifiles, nfile, matches, nmatch, jbits, ofiles)
+ XFile files;
+ XFile ifiles;
+ int nfile;
+ char *matches;
+ int nmatch;
+ int jbits;
+ XFile ofiles;
+#endif
+{
+ int i, j, k;
+ int ii;
+ int got;
+ int nrow=1;
+ char *buf;
+ char *rowptr;
+ char *flags=NULL;
+ JBITMASK *jfiles=NULL;
+
+ /* allocate flags to tell which files we proces */
+ if( !(flags=xcalloc(nfile, sizeof(char))) ){
+ return 0;
+ }
+ /* allocate space for joinfiles bitflag */
+ if( jbits ){
+ if( !(jfiles=xcalloc(jbits/JBITSIZE, JBITSIZE)) ){
+ return 0;
+ }
+ }
+
+ /* set initial values for files which have joins */
+ for(i=0; i<nmatch; i++){
+ ii = (int)matches[i];
+ flags[ii] = 1;
+ nrow *= ifiles[ii].nindex;
+ ifiles[ii].counter = 0;
+ }
+
+ /* process all rows */
+ for(i=0; i<nrow; i++){
+ /* clear output rowbuf */
+ memset(ofiles[0].rowbuf, 0, ofiles[0].rowsize);
+ /* make up the row */
+ for(j=0; j<nfile; j++){
+ if( !flags[j] ){
+ /* move blanks into output record for this row */
+ IPRINTF((stderr, "blank "));
+ memcpy(ofiles[0].rowbuf+files[j].rowoffset,
+ files[j].blank, files[j].rowsize);
+ }
+ /* retrieve data and transfer active columns to output */
+ else{
+ ii = ifiles[j].counter;
+ IPRINTF((stderr, "%d ", ifiles[j].indexes[ii]));
+ if( FunTableRowSeek(files[j].fun, ifiles[j].indexes[ii], NULL) <0 ){
+ gerror(stderr, "can't seek to row %d: %s\n",
+ ifiles[j].indexes[ii], ifiles[j].fname);
+ }
+ if( !(buf=FunTableRowGet(files[j].fun, NULL, 1, NULL, &got)) || !got ){
+ gerror(stderr, "can't read row %d: %s\n",
+ ifiles[j].indexes[ii], ifiles[j].fname);
+ }
+ rowptr = ofiles[0].rowbuf+files[j].rowoffset;
+ for(k=0; k<files[j].ncol; k++){
+ memcpy(rowptr, buf+files[j].cols[k].offset, files[j].cols[k].width);
+ rowptr += files[j].cols[k].width;
+ }
+ if( jbits ) jfiles[j/JBITSIZE] |= 1<<(j%JBITSIZE);
+ if( buf ) xfree(buf);
+ }
+ }
+ /* save jfiles flag value, if necessary */
+ if( jbits ){
+ rowptr = ofiles[0].rowbuf + ofiles[0].rowoffset;
+ memcpy(rowptr, jfiles, jbits);
+ }
+ /* write row */
+ IPRINTF((stderr, "\n"));
+ if( !FunTableRowPut(ofiles[0].fun, ofiles[0].rowbuf, 1, 0, NULL) ){
+ gerror(stderr, "can't write output row\n");
+ }
+ /* inc to next row */
+ for(j=nmatch-1; j>=0; j--){
+ ii = matches[j];
+ ifiles[ii].counter++;
+ if( ifiles[ii].counter >= ifiles[ii].nindex ){
+ ifiles[ii].counter = 0;
+ }
+ else{
+ break;
+ }
+ }
+ }
+ /* free up space */
+ if( flags ) xfree(flags);
+ if( jfiles) xfree(jfiles);
+ return 1;
+}
+
+#ifdef ANSI_FUNC
+static void
+JoinResetRows(XFile ifiles, char *matches, int nmatch, int resetflag)
+#else
+static void
+JoinResetRows(ifiles, matches, nmatch, resetflag)
+ XFile ifiles;
+ char *matches;
+ int nmatch;
+ int resetflag;
+#endif
+{
+ int i, j;
+ /* reset index positions so that we re-check tolerances */
+ if( resetflag >= 0 ){
+ for(j=0; j<nmatch; j++){
+ i = matches[j];
+ if( !ifiles[i].ibase && (ifiles[i].irow>=0) ){
+ ifiles[i].eof = 0;
+ FunTableRowSeek(ifiles[i].fun, ifiles[i].irow, NULL);
+ JoinReadNext(ifiles, i, i);
+ }
+ }
+ }
+}
+
+#ifdef ANSI_FUNC
+int
+main (int argc, char **argv)
+#else
+int
+main(argc, argv)
+ int argc;
+ char **argv;
+#endif
+{
+ int i, j, k;
+ int type, mode, offset, n, width;
+ int namei;
+ int coffset;
+ int resetflag=-1;
+ int jbits=0;
+ int nmatch=0;
+ int minmatch=1;
+ int maxmatch=MAXIFILE;
+ int nfile=0;
+ int tcol=0;
+ int osize=0;
+ int oncol=0;
+ int ktype=0;
+ int *ooffsets=NULL;
+ char *matches=NULL;
+ char *name;
+ char *basename;
+ char *defact=NULL;
+ char *defcol=NULL;
+ char *defblank=NULL;
+ char *s;
+ char *filtstr;
+ char *jfiles=NULL;
+ char **onames=NULL;
+ char **otypes=NULL;
+ char **omodes=NULL;
+ char tbuf[SZ_LINE];
+ char tbuf2[SZ_LINE];
+ char namebuf[SZ_LINE];
+ double tlmin, tlmax, binsiz, tscale, tzero;
+ double tol;
+ XFile files=NULL, ifiles=NULL, ofiles=NULL;
+
+ /* exit on gio errors */
+ if( !getenv("GERROR") )
+ setgerror(2);
+
+ /* get maxrow,if user-specified */
+ if( (s=getenv("FUN_MAXROW")) != NULL )
+ maxrow = atoi(s);
+
+ /* we are using indexes specially and don't want to use them normally */
+ putenv("FILTER_IDX_ACTIVATE=false");
+
+ /* allocate input and output file arrays (we'll do the index array later) */
+ if( !(files=xcalloc(MAXIFILE, sizeof(XFileRec))) ){
+ gerror(stderr, "can't allocate primary record structure for join\n");
+ }
+ if( !(ofiles=xcalloc(MAXOFILE, sizeof(XFileRec))) ){
+ gerror(stderr, "can't allocate output record structure for join\n");
+ }
+
+ /* process arguments */
+ for(i=1; i<argc; i++) {
+ if ( argv[i][0] == '-' ) {
+ switch (argv[i][1]) {
+ case 'a':
+ if( argv[i][2] ){
+ j = atoi(&argv[i][2])-1;
+ if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
+ files[j].actstr = argv[++i];
+ }
+ else{
+ gerror(stderr, "invalid index for column activate: %d\n", j+1);
+ }
+ }
+ else{
+ defact = argv[++i];
+ }
+ break;
+ case 'b':
+ if( argv[i][2] ){
+ j = atoi(&argv[i][2])-1;
+ if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
+ files[j].bstr = argv[++i];
+ }
+ else{
+ gerror(stderr, "invalid index for join column: %d\n", j+1);
+ }
+ }
+ else{
+ defblank = argv[++i];
+ }
+ break;
+ case 'j':
+ if( argv[i][2] ){
+ j = atoi(&argv[i][2])-1;
+ if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
+ files[j].jname = argv[++i];
+ }
+ else{
+ gerror(stderr, "invalid index for join column: %d\n", j+1);
+ }
+ }
+ else{
+ if (i < argc-1) {
+ defcol = argv[++i];
+ }
+ }
+ break;
+ case 'm':
+ if (i < argc-1) {
+ minmatch = atoi(argv[++i])+1;
+ }
+ if( minmatch < 1 ) minmatch = 1;
+ break;
+ case 'M':
+ if (i < argc-1) {
+ maxmatch = atoi(argv[++i])+1;
+ }
+ if( maxmatch < 1 ) maxmatch = 1;
+ break;
+ case 's':
+ jfiles = JFILES_COL;
+ break;
+ case 'S':
+ if (i < argc-1) {
+ jfiles = argv[++i];
+ }
+ break;
+ case 't':
+ if (i < argc-1) {
+ tol = atof(argv[++i]);
+ }
+ if( tol <= 0 ){
+ gerror(stderr, "tolerance value must be positive\n");
+ }
+ break;
+ }
+ continue;
+ }
+ /* no switch -- must be a file name */
+ if( nfile < MAXIFILE ){
+ files[nfile].fname = argv[i];
+ nfile++;
+ }
+ else{
+ gerror(stderr, "too many files (%d > %d)\n", nfile, MAXIFILE);
+ }
+ }
+
+ /* make sure we have at least 2 input + 1 output file args */
+ if( nfile < 3 ) usage(argv[0]);
+
+ /* for now, tolerance only works with 2 files */
+ if( (tol > 0.0) && (nfile > 3) ){
+ gerror(stderr, "for now, -t [tol] can only join 2 files\n");
+ }
+
+ /* move last input to output and decrement number of input files */
+ ofiles[0].fname = files[nfile-1].fname;
+ nfile--;
+
+ /* reallocate input files */
+ if( !(files=xrealloc(files, nfile*sizeof(XFileRec))) ){
+ gerror(stderr, "can't re-allocate primary record structure for join\n");
+ }
+
+ /* make sure we have a join column name for each file */
+ for(i=0; i<nfile; i++){
+ if( !files[i].jname ){
+ if( defcol ){
+ files[i].jname = defcol;
+ }
+ else{
+ gerror(stderr,
+ "no join column specified for file: %s\n", files[i].fname);
+ }
+ }
+ }
+
+ /* allocate exact number of index file records */
+ if( !(ifiles=xcalloc(nfile, sizeof(XFileRec))) ){
+ gerror(stderr, "can't allocate primary record structure for join\n");
+ }
+ if( !(matches=(char *)xcalloc(nfile, sizeof(char))) ){
+ gerror(stderr, "can't allocate key result buffer\n");
+ }
+
+ /* open input and index files */
+ for(i=0; i<nfile; i++){
+ /* open the input data file */
+ if( !(files[i].fun = FunOpen(files[i].fname, "r", NULL)) ){
+ gerror(stderr,
+ "can't FunOpen input file (or find extension): %s\n",
+ files[i].fname);
+ }
+ /* make sure the join column is in this file */
+ if( !FunColumnLookup(files[i].fun, files[i].jname, 0, NULL,
+ &files[i].jtype,
+ &files[i].jmode,
+ &files[i].joffset,
+ &files[i].jn,
+ &files[i].jwidth) ){
+ gerror(stderr, "can't find column %s in input file: %s\n",
+ files[i].jname, files[i].fname);
+ }
+ /* check for filter (we read index directly, bypassing filters) */
+ filtstr=NULL;
+ FunInfoGet(files[i].fun, FUN_FILTER, &filtstr, 0);
+ if( filtstr && *filtstr ){
+ gerror(stderr, "row filters are not permitted: %s\n", files[i].fname);
+ }
+ /* activate specified columns */
+ if( files[i].actstr )
+ FunColumnActivate(files[i].fun, files[i].actstr, NULL);
+ else
+ FunColumnActivate(files[i].fun, defact, NULL);
+ /* reset rowoffset flag for this file */
+ files[i].rowoffset = -1;
+ /* get number of possible columns */
+ FunInfoGet(files[i].fun, FUN_NCOL, &(files[i].tcol), 0);
+ /* temp counter of total number of columns */
+ tcol += files[i].tcol;
+ /* open the index for the specified join column */
+ idxinitfilenames(files[i].fun->header->filename, NULL);
+ s = idxindexfilename(files[i].jname, NULL);
+ idxfreefilenames();
+ if( !s ){
+ gerror(stderr, "can't find index file for column '%s' in file: %s\n",
+ files[i].jname, files[i].fname);
+ }
+ if( !(ifiles[i].fun = FunOpen(s, "r", NULL)) ){
+ gerror(stderr, "can't FunOpen index file (or find extension): %s\n", s);
+ }
+ /* get gio handle for seeking and sving */
+ FunInfoGet(ifiles[i].fun, FUN_GIO, &ifiles[i].igio, 0);
+ /* make sure the join column is in the index file */
+ ifiles[i].fname = xstrdup(s);
+ ifiles[i].jname = files[i].jname;
+ if( !FunColumnLookup(ifiles[i].fun, ifiles[i].jname, 0, NULL,
+ &ifiles[i].jtype,
+ &ifiles[i].jmode,
+ &ifiles[i].joffset,
+ &ifiles[i].jn,
+ &ifiles[i].jwidth) ){
+ gerror(stderr, "can't find column %s in index file: %s\n",
+ ifiles[i].jname, ifiles[i].fname);
+ }
+ /* define how we will read index file, based on data type of join column */
+ switch(ifiles[i].jtype){
+ case 'B':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'U':
+ case 'V':
+ case 'L':
+ case 'X':
+ FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
+ "n", "J", "r", FUN_OFFSET(XFile, idx),
+ ifiles[i].jname, "D", "r", FUN_OFFSET(XFile, dval),
+ NULL);
+ ifiles[i].dtype = 'D';
+ ktype |= KEY_NUMERIC;
+ break;
+ case 'D':
+ case 'E':
+ FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
+ "n", "J", "r", FUN_OFFSET(XFile, idx),
+ ifiles[i].jname, "D", "r", FUN_OFFSET(XFile, dval),
+ NULL);
+ ifiles[i].dtype = 'D';
+ ktype |= KEY_NUMERIC;
+ break;
+ case 'A':
+ snprintf(tbuf, SZ_LINE-1, "@%dA", ifiles[i].jn);
+ FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
+ "n", "J", "r", FUN_OFFSET(XFile, idx),
+ ifiles[i].jname, tbuf, "r", FUN_OFFSET(XFile, sval),
+ NULL);
+ ifiles[i].dtype = 'A';
+ ifiles[i].sval = xcalloc(ifiles[i].jn+1, sizeof(char));
+ ktype |= KEY_STRING;
+ break;
+ default:
+ gerror(stderr, "bad datatype for join column: %c\n", ifiles[i].jtype);
+ }
+ /* free up temp space */
+ if( s ) xfree(s);
+ }
+ /* we don't allow mixing of string and numeric values */
+ if( ktype == (KEY_STRING|KEY_NUMERIC) ){
+ gerror(stderr, "can't mix string and numeric join columns\n");
+ }
+
+ /* open output file */
+ if( !(ofiles[0].fun = FunOpen(ofiles[0].fname, "w", NULL)) ){
+ gerror(stderr, "can't FunOpen output file: %s\n", ofiles[0].fname);
+ }
+
+ /* allocate space for the max number of columns we can have (incl jfiles) */
+ onames = (char **)xcalloc(tcol+1, sizeof(char *));
+ otypes = (char **)xcalloc(tcol+1, sizeof(char *));
+ omodes = (char **)xcalloc(tcol+1, sizeof(char *));
+ ooffsets = (int *)xcalloc(tcol+1, sizeof(int));
+
+ /* contruct list of output columns */
+ for(i=0; i<nfile; i++){
+ coffset = 0;
+ for(j=0; j<files[i].tcol; j++){
+ if( !FunColumnLookup(files[i].fun, NULL, j,
+ &name, &type, &mode, &offset, &n, &width) ){
+ gerror(stderr,
+ "can't find column %d in input file: %s\n", j, files[i].fname);
+ }
+ if( mode & COL_ACTIVE ){
+ /* save original name in case of duplicate */
+ basename = name;
+ /* first numeric value we will append */
+ namei = 2;
+again:
+ for(k=0; k<oncol; k++){
+ if( !strcasecmp(name, onames[k]) ){
+ k = -1;
+ break;
+ }
+ }
+ /* append a file number to duplicate names */
+ if( k < 0 ){
+ snprintf(namebuf, SZ_LINE-1, "%s_%d", basename, namei);
+ name = namebuf;
+ namei++;
+ IPRINTF((stderr, "trying new col name for file %d: %s\n", i, name));
+ goto again;
+ }
+ /* add column */
+ JoinAddCol(&files[i], name, basename, type, mode, offset, n, width,
+ coffset);
+ /* bump offset into current row */
+ coffset += width;
+ /* size of active columns for this file only -- save in index rec */
+ files[i].rowsize += width;
+ /* offset into output where this file's contribution starts */
+ if( files[i].rowoffset < 0 ) files[i].rowoffset = osize;
+ /* get auxiliary info */
+ FunColumnLookup2(files[i].fun, NULL, j,
+ &tlmin, &tlmax, &binsiz, &tscale, &tzero);
+ /* generate type string */
+ snprintf(tbuf, SZ_LINE-1, "%d%c", n, type);
+ if( !feq(tlmin, tlmax) ){
+ snprintf(tbuf2, SZ_LINE-1, ":%f:%f", tlmin, tlmax);
+ strncat(tbuf, tbuf2, SZ_LINE-1);
+ }
+ if( !feq(binsiz, 0.0) && !feq(binsiz, 1.0) ){
+ snprintf(tbuf2, SZ_LINE-1, ":%f", binsiz);
+ strncat(tbuf, tbuf2, SZ_LINE-1);
+ }
+ if( !feq(tscale, 0.0) ){
+ snprintf(tbuf2, SZ_LINE-1, ";%f", tscale);
+ strncat(tbuf, tbuf2, SZ_LINE-1);
+ if( !feq(tzero, 0.0) ){
+ snprintf(tbuf2, SZ_LINE-1, ":%f", tzero);
+
+ strncat(tbuf, tbuf2, SZ_LINE-1);
+ }
+ }
+ IPRINTF((stderr, "%d: name=%s type=%c mode=%o offset=%d n=%d => %s\n",
+ i, name, type, mode, offset, n, tbuf));
+ /* save column info */
+ onames[oncol] = xstrdup(name);
+ otypes[oncol] = xstrdup(tbuf);
+ omodes[oncol] = "w";
+ ooffsets[oncol] = osize;
+ /* total size for all files */
+ osize += width;
+ oncol++;
+ }
+ }
+ /* create blank line for this file */
+ JoinMakeBlank(&files[i], defblank);
+ /* add filename to header */
+ FunParamPuts(ofiles[0].fun, "JFILE", i+1, files[i].fname, "join file", 1);
+ }
+
+ /* and one more for joinfiles, if needed */
+ if( jfiles ){
+ strncpy(tbuf, jfiles, SZ_LINE-1);
+ cluc(tbuf);
+ onames[oncol] = xstrdup(tbuf);
+ jbits = ((MAXIFILE+7)/8)*8;
+ snprintf(tbuf, SZ_LINE-1, "%dX", jbits);
+ otypes[oncol] = xstrdup(tbuf);
+ omodes[oncol] = "w";
+ ooffsets[oncol] = osize;
+ /* save for later use */
+ ofiles[0].rowoffset = osize;
+ /* total size for all files */
+ osize += (jbits/8);
+ oncol++;
+ }
+
+ /* reallocate output column array to correct size */
+ onames = (char **)xrealloc(onames, oncol*sizeof(char *));
+ otypes = (char **)xrealloc(otypes, oncol*sizeof(char *));
+ omodes = (char **)xrealloc(omodes, oncol*sizeof(char *));
+ ooffsets = (int *)xrealloc(ooffsets, oncol*sizeof(int));
+
+ /* set up the output columns */
+ FunColumnSelectArr(ofiles[0].fun, osize, NULL,
+ onames, otypes, omodes, ooffsets, oncol);
+
+ /* this tells us the size of the output buffer */
+ FunInfoGet(ofiles[0].fun, FUN_ROWSIZE, &(ofiles[0].rowsize), 0);
+ ofiles[0].rowbuf = xcalloc(ofiles[0].rowsize, sizeof(char));
+
+ /* read a record from each index */
+ JoinReadNext(ifiles, nfile, -nfile);
+
+ /* check for joins, all rows in all files */
+ while( JoinFilesLeft(ifiles, nfile) > 0 ){
+ nmatch = JoinGetMatches(ifiles, nfile, ktype, tol, matches);
+ /* yikes ... when we reset rows to check against the next base,
+ we don't want to write out anything if there is no match, since this
+ was already done with the last base ... its confusing */
+ if( (tol > 0.0) && (nmatch == 1) && (resetflag >=0) &&
+ (resetflag != matches[0]) ){
+ for(i=0; i<nmatch; i++){
+ JoinReadNext(ifiles, nfile, matches[i]);
+ }
+ }
+ /* this is the normal output of matched rows */
+ else if( (nmatch >= minmatch) && (nmatch <= maxmatch) ){
+ JoinGatherRows(ifiles, nfile, ktype, tol, matches, nmatch, &resetflag);
+ /* write all matched rows */
+ if(!JoinWriteRows(files, ifiles, nfile, matches, nmatch, jbits, ofiles)){
+ gerror(stderr, "can't write rows for join\n");
+ }
+ /* might have to reset the rows when using tolerance values */
+ JoinResetRows(ifiles, matches, nmatch, resetflag);
+ }
+ /* no output, so we advance the rows that matched */
+ else{
+ for(i=0; i<nmatch; i++){
+ JoinReadNext(ifiles, nfile, matches[i]);
+ }
+ }
+ }
+
+ /* free output column arrays */
+ if( omodes ) xfree(omodes);
+ if( ooffsets ) xfree(ooffsets);
+ if( otypes ){
+ for(i=0; i<oncol; i++){
+ if( otypes[i] ) xfree(otypes[i]);
+ }
+ xfree(otypes);
+ }
+ if( onames ){
+ for(i=0; i<oncol; i++){
+ if( onames[i] ) xfree(onames[i]);
+ }
+ xfree(onames);
+ }
+
+ /* close input files and index files */
+ if( files ){
+ for(i=0; i<nfile; i++){
+ if( files[i].fun ) FunClose(files[i].fun);
+ if( files[i].blank ) xfree(files[i].blank);
+ if( files[i].cols ){
+ for(j=0; j<files[i].ncol; j++){
+ if( files[i].cols[j].name ) xfree(files[i].cols[j].name);
+ if( files[i].cols[j].oname ) xfree(files[i].cols[j].oname);
+ }
+ xfree(files[i].cols);
+ }
+ }
+ xfree(files);
+ }
+ if( ifiles ){
+ for(i=0; i<nfile; i++){
+ if( ifiles[i].fun ) FunClose(ifiles[i].fun);
+ if( ifiles[i].fname ) xfree(ifiles[i].fname);
+ if( ifiles[i].indexes ) xfree(ifiles[i].indexes);
+ if( ifiles[i].sval ) xfree(ifiles[i].sval);
+ }
+ xfree(ifiles);
+ }
+ if( ofiles ){
+ for(i=0; i<MAXOFILE; i++){
+ if( ofiles[i].fun ) FunClose(ofiles[i].fun);
+ if( ofiles[i].rowbuf ) xfree(ofiles[i].rowbuf);
+ }
+ xfree(ofiles);
+ }
+
+ /* free up everything else */
+ if( matches ) xfree(matches);
+
+ return(0);
+}