summaryrefslogtreecommitdiffstats
path: root/funtools/funjoin.c
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-10-26 21:13:00 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-10-26 21:13:00 (GMT)
commitda2e3d212171bbe64c1af39114fd067308656990 (patch)
tree9601f7ed15fa1394762124630c12a792bc073ec2 /funtools/funjoin.c
parent76b109ad6d97d19ab835596dc70149ef379f3733 (diff)
downloadblt-da2e3d212171bbe64c1af39114fd067308656990.zip
blt-da2e3d212171bbe64c1af39114fd067308656990.tar.gz
blt-da2e3d212171bbe64c1af39114fd067308656990.tar.bz2
rm funtools for update
Diffstat (limited to 'funtools/funjoin.c')
-rw-r--r--funtools/funjoin.c1172
1 files changed, 0 insertions, 1172 deletions
diff --git a/funtools/funjoin.c b/funtools/funjoin.c
deleted file mode 100644
index 9ca019e..0000000
--- a/funtools/funjoin.c
+++ /dev/null
@@ -1,1172 +0,0 @@
-/*
- * Copyright (c) 2005 Smithsonian Astrophysical Observatory
- */
-
-/*
- * funjoin -j key t1.fits t2.fits t3.fits foo.fits
- */
-
-#include <math.h>
-#include <funtoolsP.h>
-#include <word.h>
-
-/* #define JOIN_DEBUG 1 */
-#if JOIN_DEBUG
-#define IPRINTF(x) fprintf x
-#else
-#define IPRINTF(x)
-#endif
-
-#ifndef ftol
-#define ftol(x,y,t) (fabs((double)x-(double)y)<=((double)t+(double)1.0E-15))
-#endif
-
-#ifndef feq
-#define feq(x,y) (fabs((double)x-(double)y)<=(double)1.0E-15)
-#endif
-
-#define MAXIFILE 32
-#define MAXOFILE 1
-#define MAXROW 8192
-
-#define KEY_STRING 1
-#define KEY_NUMERIC 2
-
-#define JFILES_COL "JFILES"
-
-typedef unsigned int JBITMASK;
-#define JBITSIZE (int)sizeof(JBITMASK)
-
-static int maxrow=MAXROW;
-
-typedef struct _colrec{
- char *name, *oname;
- int type, mode, offset, n, width;
- int coffset;
-} *XCol, XColRec;
-
-
-typedef struct _filerec{
- char *fname;
- char *jname;
- char *bstr;
- char *actstr;
- Fun fun;
- int eof;
- int rowsize;
- int rowoffset;
- char *rowbuf;
- char *blank;
- int counter;
- int tcol;
- int maxcol;
- int ncol;
- XCol cols;
- int dtype;
- int jtype, jmode, joffset, jn, jwidth;
- int idx;
- double dval;
- double mval;
- char *sval;
- int ibase;
- GIO igio;
- int irow;
- int maxindex;
- int nindex;
- int *indexes;
-} *XFile, XFileRec;
-
-extern char *optarg;
-extern int optind;
-
-#ifdef ANSI_FUNC
-static void
-usage (char *fname)
-#else
-static void usage(fname)
- char *fname;
-#endif
-{
- fprintf(stderr,
- "usage: %s <switches> ifile1 ifile2 ... ifilen ofile\n",
- fname);
- fprintf(stderr, "optional switches:\n");
- fprintf(stderr, " -a cols # columns to activate in all files\n");
- fprintf(stderr, " -a1 cols ... an cols # columns to activate in each file\n");
- fprintf(stderr, " -b 'c1:bvl,c2:bv2' # blank values for common columns in all files\n");
- fprintf(stderr, " -bn 'c1:bv1,c2:bv2' # blank values for columns in specific files\n");
- fprintf(stderr, " -j col # column to join in all files\n");
- fprintf(stderr, " -j1 col ... jn col # column to join in each file\n");
- fprintf(stderr, " -m min # min matches to output a row\n");
- fprintf(stderr, " -M max # max matches to output a row\n");
- fprintf(stderr, " -s # add 'jfiles' status column\n");
- fprintf(stderr, " -S col # add col as status column\n");
- fprintf(stderr, " -t tol # tolerance for joining numeric cols [2 files only]\n");
- fprintf(stderr, "Between 2 and %d input files are allowed.\n", MAXIFILE);
- fprintf(stderr, "\n(version: %s)\n", FUN_VERSION);
- exit(1);
-}
-
-#ifdef ANSI_FUNC
-static void
-JoinMakeBlank(XFile file, char *defblank)
-#else
-static void
-JoinMakeBlank(file, defblank)
- XFile file;
- char *defblank;
-#endif
-{
- int i, j;
- int ip=0;
- char tbuf[SZ_LINE];
- char *b;
- char *blanks[2];
- char *t, *v;
- unsigned char bval;
- short sval;
- unsigned short usval;
- int ival;
- longlong lval;
- unsigned int uival;
- float fval;
- double dval;
-
- /* start with a blank line */
- file->blank = xcalloc(file->rowsize, sizeof(char));
- /* blank specifications are separated by comma or semi */
- newdtable(",;");
-
- /* there are 2 places where we can get blank values: default and file */
- if( defblank )
- blanks[0] = xstrdup(defblank);
- else
- blanks[0] = NULL;
- if( file->bstr )
- blanks[1] = xstrdup(file->bstr);
- else
- blanks[1] = NULL;
-
-
- /* process defaults first, then overwrite with specifics */
- for(j=0; j<2; j++){
- if( !blanks[j] || !*blanks[j] ) continue;
- ip = 0;
- while( word(blanks[j], tbuf, &ip) ){
- if( (t=strchr(tbuf, ':')) ){
- v=t+1;
- *t = '\0';
- for(i=0; i<file->ncol; i++){
- if( !strcasecmp(tbuf, file->cols[i].oname) ){
- b = file->blank+file->cols[i].coffset;
- switch(file->cols[i].type){
- case 'L':
- bval = atoi(v);
- memcpy(b, &bval, sizeof(unsigned char));
- break;
- case 'X':
- switch(file->cols[i].width/file->cols[i].n){
- case 0:
- case 1:
- bval = atoi(v);
- memcpy(b, &bval, sizeof(unsigned char));
- break;
- case 2:
- sval = atoi(v);
- memcpy(b, &sval, sizeof(short));
- break;
- case 4:
- ival = atoi(v);
- memcpy(b, &ival, sizeof(int));
- break;
- default:
- gerror(stderr, "only set blank value on X of size 1,2,4\n");
- break;
- }
- break;
- case 'B':
- bval = atoi(v);
- memcpy(b, &bval, sizeof(unsigned char));
- break;
- case 'I':
- sval = atoi(v);
- memcpy(b, &sval, sizeof(short));
- break;
- case 'J':
- ival = atoi(v);
- memcpy(b, &ival, sizeof(int));
- break;
- case 'K':
-#if HAVE_LONG_LONG == 0
- gerror(stderr,
- "64-bit data support not built (long long not available)\n");
-#endif
- lval = atoll(v);
- memcpy(b, &lval, sizeof(longlong));
- break;
- case 'U':
- usval = atoi(v);
- memcpy(b, &usval, sizeof(unsigned short));
- break;
- case 'V':
- uival = atoi(v);
- memcpy(b, &uival, sizeof(unsigned int));
- break;
- case 'E':
- if( !strcasecmp(t, "nan") ){
- fval = getnanf();
- }
- else{
- fval = atof(v);
- }
- memcpy(b, &fval, sizeof(float));
- break;
- case 'D':
- if( !strcasecmp(t, "nan") ){
- dval = getnand();
- }
- else{
- dval = atof(v);
- }
- memcpy(b, &dval, sizeof(double));
- break;
- case 'A':
- strncpy(b, v, file->cols[i].width);
- break;
- }
- }
- }
- }
- else{
- gerror(stderr, "invalid blank specification: %s\n", tbuf);
- }
- }
- }
- /* free up delim table */
- freedtable();
- /* free blank strings */
- for(i=0; i<2; i++){
- if( blanks[i] ) xfree(blanks[i]);
- }
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinAddCol(XFile file, char *name, char *oname,
- int type, int mode, int offset, int n, int width, int coffset)
-#else
-static int
-JoinAddCol(file, name, oname, type, mode, offset, n, width, coffset)
- XFile file;
- char *name;
- char *oname;
- int type;
- int mode;
- int offset;
- int n;
- int width;
- int coffset;
-#endif
-{
- if( !file->maxcol ){
- file->maxcol = 1;
- file->cols = xcalloc(file->maxcol, sizeof(XColRec));
- }
- else if( file->ncol >= file->maxcol ){
- file->maxcol *= 2;
- file->cols = xrealloc(file->cols, file->maxcol*sizeof(XColRec));
- }
- file->cols[file->ncol].name = xstrdup(name);;
- file->cols[file->ncol].oname = xstrdup(oname);;
- file->cols[file->ncol].type = type;
- file->cols[file->ncol].mode = mode;
- file->cols[file->ncol].offset = offset;
- file->cols[file->ncol].n = n;
- file->cols[file->ncol].width = width;
- file->cols[file->ncol].coffset = coffset;
- file->ncol++;
- return file->ncol;
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinFilesLeft(XFile ifiles, int nfile)
-#else
-static int
-JoinFilesLeft(ifiles, nfile)
- XFile ifiles;
- int nfile;
-#endif
-{
- int i;
- int left=0;
-
- for(i=0; i<nfile; i++){
- if( !ifiles[i].eof ){
- left++;
- }
- }
- return left;
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinReadNext(XFile ifiles, int nfile, int which)
-#else
-static int
-JoinReadNext(ifiles, nfile, which)
- XFile ifiles;
- int nfile;
- int which;
-#endif
-{
- int i;
- int got;
- int lo, hi;
- int nrec=0;
-
- if( which < 0 ){
- lo = 0;
- hi = ABS(which)-1;
- }
- else{
- lo = which;
- hi = which;
- }
- if( hi > nfile ) return 0;
- for(i=lo; i<=hi; i++){
- if( ifiles[i].eof ) continue;
- FunTableRowGet(ifiles[i].fun, (void *)&(ifiles[i]), 1, NULL, &got);
- if( !got ){
- ifiles[i].eof = 1;
- }
- else{
- nrec++;
- }
- }
- return nrec;
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinGetMatches(XFile ifiles, int nfile, int ktype, double tol, char *matches)
-#else
-static int
-JoinGetMatches(ifiles, nfile, ktype, tol, matches)
- XFile ifiles;
- int nfile;
- int ktype;
- double tol;
- char *matches;
-#endif
-{
- int i;
- int m=0;
- int ibase=-1;
- char *sval=NULL;
- double dval=0.0;
-
- /* no matches yet */
- memset(matches, 0, nfile);
- /* clear index position information */
- for(i=0; i<nfile; i++){
- ifiles[i].ibase = 0;
- ifiles[i].irow = -1;
- }
- /* get first valid file */
- for(i=0; i<nfile; i++){
- if( ifiles[i].eof ) continue;
- ibase = i;
- break;
- }
- /*make sure we have a valid file */
- if( ibase < 0 ) return 0;
- /* process all files and look for matches */
- switch(ktype){
- case KEY_STRING:
- /* get base */
- for(i=0; i<nfile; i++){
- if( ifiles[i].eof ) continue;
- /* smallest value is base */
- if( strcmp(ifiles[i].sval, ifiles[ibase].sval) < 0 ){
- ibase = i;
- }
- }
- /* this is the smallest value */
- sval = ifiles[ibase].sval;
- /* look for matches in all valid files */
- for(i=0; i<nfile; i++){
- if( ifiles[i].eof ) continue;
- /* ascii requires exact string match */
- if( !strcmp(ifiles[i].sval, sval) ){
- matches[m++] = i;
- }
- }
- break;
- case KEY_NUMERIC:
- /* get base */
- for(i=0; i<nfile; i++){
- if( ifiles[i].eof ) continue;
- /* smallest value is base */
- if( ifiles[i].dval < ifiles[ibase].dval ){
- ibase = i;
- }
- }
- /* this is the smallest value */
- dval = ifiles[ibase].dval;
- FunInfoGet(ifiles[ibase].fun, FUN_ROW, &ifiles[ibase].irow, 0);
- /* look for matches in all valid files */
- for(i=0; i<nfile; i++){
- if( ifiles[i].eof ) continue;
- /* tolerance test */
- if( (tol > 0.0) && ftol(ifiles[i].dval,dval,tol) ){
- matches[m++] = i;
- ifiles[i].mval = dval;
- FunInfoGet(ifiles[i].fun, FUN_ROW, &ifiles[i].irow, 0);
- }
- /* "exact" match */
- else if( feq(ifiles[i].dval,dval) ){
- matches[m++] = i;
- ifiles[i].irow = -1;
- }
- }
- break;
- }
- if( ibase >=0 ) ifiles[ibase].ibase = 1;
- return m;
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinAddIndex(XFile ifile, int idx)
-#else
-static int
-JoinAddIndex(ifile, idx)
- XFile ifile;
- int idx;
-#endif
-{
- if( !ifile->maxindex ){
- ifile->maxindex = 1;
- ifile->indexes = xcalloc(ifile->maxindex, sizeof(int));
- }
- else if( ifile->nindex >= ifile->maxindex ){
- ifile->maxindex *= 2;
- ifile->indexes = xrealloc(ifile->indexes, ifile->maxindex*sizeof(int));
- }
- ifile->indexes[ifile->nindex++] = idx;
- return ifile->nindex;
-}
-
-#ifdef ANSI_FUNC
-static void
-JoinGatherRows(XFile ifiles, int nfile, int ktype, double tol,
- char *matches, int nmatch, int *resetflag)
-#else
-static void
-JoinGatherRows(ifiles, nfile, ktype, tol, matches, nmatch, resetflag)
- XFile ifiles;
- int nfile;
- int ktype;
- double tol;
- char *matches;
- int nmatch;
- int *resetflag
-#endif
-{
- int i, j;
- int ibase=-1;
- char *sval=NULL;
- double dval;
- double mval;
-
- /* no need to reset rows yet */
- *resetflag = -1;
- /* make sure we have matches to process */
- if( !nmatch ) return;
- /* find base value */
- for(j=0; j<nmatch; j++){
- i = matches[j];
- if( ifiles[i].ibase ){
- ibase = i;
- break;
- }
- }
- /* should never happen */
- if( ibase < 0 ) return;
- /* for each match file, look for successive rows that also match */
- for(j=0; j<nmatch; j++){
- i = matches[j];
- ifiles[i].nindex = 0;
- JoinAddIndex(&ifiles[i], ifiles[i].idx);
- switch(ktype){
- case KEY_STRING:
- sval = ifiles[i].sval;
- while( JoinReadNext(ifiles, nfile, i) ){
- if( !strcmp(sval,ifiles[i].sval) ){
- JoinAddIndex(&ifiles[i], ifiles[i].idx);
- }
- else{
- break;
- }
- }
- break;
- case KEY_NUMERIC:
- dval = ifiles[i].dval;
- mval = ifiles[i].mval;
- while( JoinReadNext(ifiles, nfile, i) ){
- if( (tol > 0.0) && (i!=ibase) && ftol(ifiles[i].dval,mval,tol) ){
- JoinAddIndex(&ifiles[i], ifiles[i].idx);
- }
- else if( feq(ifiles[i].dval,dval) ){
- JoinAddIndex(&ifiles[i], ifiles[i].idx);
- }
- else{
- /* If this is the base file and we are using a tolerance, and the
- difference between the last base and this base value is less than
- twice the tolerance, we have to reset all other file positions
- and re-check those values against this new base value. */
- if( (tol > 0.0) && (i==ibase) && ftol(ifiles[i].dval,dval,(2*tol)) ){
- *resetflag = ibase;
- }
- break;
- }
- }
- }
- }
-}
-
-#ifdef ANSI_FUNC
-static int
-JoinWriteRows(XFile files, XFile ifiles, int nfile, char *matches,
- int nmatch, int jbits, XFile ofiles)
-#else
-static int
-JoinWriteRows(files, ifiles, nfile, matches, nmatch, jbits, ofiles)
- XFile files;
- XFile ifiles;
- int nfile;
- char *matches;
- int nmatch;
- int jbits;
- XFile ofiles;
-#endif
-{
- int i, j, k;
- int ii;
- int got;
- int nrow=1;
- char *buf;
- char *rowptr;
- char *flags=NULL;
- JBITMASK *jfiles=NULL;
-
- /* allocate flags to tell which files we proces */
- if( !(flags=xcalloc(nfile, sizeof(char))) ){
- return 0;
- }
- /* allocate space for joinfiles bitflag */
- if( jbits ){
- if( !(jfiles=xcalloc(jbits/JBITSIZE, JBITSIZE)) ){
- return 0;
- }
- }
-
- /* set initial values for files which have joins */
- for(i=0; i<nmatch; i++){
- ii = (int)matches[i];
- flags[ii] = 1;
- nrow *= ifiles[ii].nindex;
- ifiles[ii].counter = 0;
- }
-
- /* process all rows */
- for(i=0; i<nrow; i++){
- /* clear output rowbuf */
- memset(ofiles[0].rowbuf, 0, ofiles[0].rowsize);
- /* make up the row */
- for(j=0; j<nfile; j++){
- if( !flags[j] ){
- /* move blanks into output record for this row */
- IPRINTF((stderr, "blank "));
- memcpy(ofiles[0].rowbuf+files[j].rowoffset,
- files[j].blank, files[j].rowsize);
- }
- /* retrieve data and transfer active columns to output */
- else{
- ii = ifiles[j].counter;
- IPRINTF((stderr, "%d ", ifiles[j].indexes[ii]));
- if( FunTableRowSeek(files[j].fun, ifiles[j].indexes[ii], NULL) <0 ){
- gerror(stderr, "can't seek to row %d: %s\n",
- ifiles[j].indexes[ii], ifiles[j].fname);
- }
- if( !(buf=FunTableRowGet(files[j].fun, NULL, 1, NULL, &got)) || !got ){
- gerror(stderr, "can't read row %d: %s\n",
- ifiles[j].indexes[ii], ifiles[j].fname);
- }
- rowptr = ofiles[0].rowbuf+files[j].rowoffset;
- for(k=0; k<files[j].ncol; k++){
- memcpy(rowptr, buf+files[j].cols[k].offset, files[j].cols[k].width);
- rowptr += files[j].cols[k].width;
- }
- if( jbits ) jfiles[j/JBITSIZE] |= 1<<(j%JBITSIZE);
- if( buf ) xfree(buf);
- }
- }
- /* save jfiles flag value, if necessary */
- if( jbits ){
- rowptr = ofiles[0].rowbuf + ofiles[0].rowoffset;
- memcpy(rowptr, jfiles, jbits);
- }
- /* write row */
- IPRINTF((stderr, "\n"));
- if( !FunTableRowPut(ofiles[0].fun, ofiles[0].rowbuf, 1, 0, NULL) ){
- gerror(stderr, "can't write output row\n");
- }
- /* inc to next row */
- for(j=nmatch-1; j>=0; j--){
- ii = matches[j];
- ifiles[ii].counter++;
- if( ifiles[ii].counter >= ifiles[ii].nindex ){
- ifiles[ii].counter = 0;
- }
- else{
- break;
- }
- }
- }
- /* free up space */
- if( flags ) xfree(flags);
- if( jfiles) xfree(jfiles);
- return 1;
-}
-
-#ifdef ANSI_FUNC
-static void
-JoinResetRows(XFile ifiles, char *matches, int nmatch, int resetflag)
-#else
-static void
-JoinResetRows(ifiles, matches, nmatch, resetflag)
- XFile ifiles;
- char *matches;
- int nmatch;
- int resetflag;
-#endif
-{
- int i, j;
- /* reset index positions so that we re-check tolerances */
- if( resetflag >= 0 ){
- for(j=0; j<nmatch; j++){
- i = matches[j];
- if( !ifiles[i].ibase && (ifiles[i].irow>=0) ){
- ifiles[i].eof = 0;
- FunTableRowSeek(ifiles[i].fun, ifiles[i].irow, NULL);
- JoinReadNext(ifiles, i, i);
- }
- }
- }
-}
-
-#ifdef ANSI_FUNC
-int
-main (int argc, char **argv)
-#else
-int
-main(argc, argv)
- int argc;
- char **argv;
-#endif
-{
- int i, j, k;
- int type, mode, offset, n, width;
- int namei;
- int coffset;
- int resetflag=-1;
- int jbits=0;
- int nmatch=0;
- int minmatch=1;
- int maxmatch=MAXIFILE;
- int nfile=0;
- int tcol=0;
- int osize=0;
- int oncol=0;
- int ktype=0;
- int *ooffsets=NULL;
- char *matches=NULL;
- char *name;
- char *basename;
- char *defact=NULL;
- char *defcol=NULL;
- char *defblank=NULL;
- char *s;
- char *filtstr;
- char *jfiles=NULL;
- char **onames=NULL;
- char **otypes=NULL;
- char **omodes=NULL;
- char tbuf[SZ_LINE];
- char tbuf2[SZ_LINE];
- char namebuf[SZ_LINE];
- double tlmin, tlmax, binsiz, tscale, tzero;
- double tol;
- XFile files=NULL, ifiles=NULL, ofiles=NULL;
-
- /* exit on gio errors */
- if( !getenv("GERROR") )
- setgerror(2);
-
- /* get maxrow,if user-specified */
- if( (s=getenv("FUN_MAXROW")) != NULL )
- maxrow = atoi(s);
-
- /* we are using indexes specially and don't want to use them normally */
- putenv("FILTER_IDX_ACTIVATE=false");
-
- /* allocate input and output file arrays (we'll do the index array later) */
- if( !(files=xcalloc(MAXIFILE, sizeof(XFileRec))) ){
- gerror(stderr, "can't allocate primary record structure for join\n");
- }
- if( !(ofiles=xcalloc(MAXOFILE, sizeof(XFileRec))) ){
- gerror(stderr, "can't allocate output record structure for join\n");
- }
-
- /* process arguments */
- for(i=1; i<argc; i++) {
- if ( argv[i][0] == '-' ) {
- switch (argv[i][1]) {
- case 'a':
- if( argv[i][2] ){
- j = atoi(&argv[i][2])-1;
- if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
- files[j].actstr = argv[++i];
- }
- else{
- gerror(stderr, "invalid index for column activate: %d\n", j+1);
- }
- }
- else{
- defact = argv[++i];
- }
- break;
- case 'b':
- if( argv[i][2] ){
- j = atoi(&argv[i][2])-1;
- if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
- files[j].bstr = argv[++i];
- }
- else{
- gerror(stderr, "invalid index for join column: %d\n", j+1);
- }
- }
- else{
- defblank = argv[++i];
- }
- break;
- case 'j':
- if( argv[i][2] ){
- j = atoi(&argv[i][2])-1;
- if( (j >= 0) && (j < MAXIFILE) && (i < argc-1)){
- files[j].jname = argv[++i];
- }
- else{
- gerror(stderr, "invalid index for join column: %d\n", j+1);
- }
- }
- else{
- if (i < argc-1) {
- defcol = argv[++i];
- }
- }
- break;
- case 'm':
- if (i < argc-1) {
- minmatch = atoi(argv[++i])+1;
- }
- if( minmatch < 1 ) minmatch = 1;
- break;
- case 'M':
- if (i < argc-1) {
- maxmatch = atoi(argv[++i])+1;
- }
- if( maxmatch < 1 ) maxmatch = 1;
- break;
- case 's':
- jfiles = JFILES_COL;
- break;
- case 'S':
- if (i < argc-1) {
- jfiles = argv[++i];
- }
- break;
- case 't':
- if (i < argc-1) {
- tol = atof(argv[++i]);
- }
- if( tol <= 0 ){
- gerror(stderr, "tolerance value must be positive\n");
- }
- break;
- }
- continue;
- }
- /* no switch -- must be a file name */
- if( nfile < MAXIFILE ){
- files[nfile].fname = argv[i];
- nfile++;
- }
- else{
- gerror(stderr, "too many files (%d > %d)\n", nfile, MAXIFILE);
- }
- }
-
- /* make sure we have at least 2 input + 1 output file args */
- if( nfile < 3 ) usage(argv[0]);
-
- /* for now, tolerance only works with 2 files */
- if( (tol > 0.0) && (nfile > 3) ){
- gerror(stderr, "for now, -t [tol] can only join 2 files\n");
- }
-
- /* move last input to output and decrement number of input files */
- ofiles[0].fname = files[nfile-1].fname;
- nfile--;
-
- /* reallocate input files */
- if( !(files=xrealloc(files, nfile*sizeof(XFileRec))) ){
- gerror(stderr, "can't re-allocate primary record structure for join\n");
- }
-
- /* make sure we have a join column name for each file */
- for(i=0; i<nfile; i++){
- if( !files[i].jname ){
- if( defcol ){
- files[i].jname = defcol;
- }
- else{
- gerror(stderr,
- "no join column specified for file: %s\n", files[i].fname);
- }
- }
- }
-
- /* allocate exact number of index file records */
- if( !(ifiles=xcalloc(nfile, sizeof(XFileRec))) ){
- gerror(stderr, "can't allocate primary record structure for join\n");
- }
- if( !(matches=(char *)xcalloc(nfile, sizeof(char))) ){
- gerror(stderr, "can't allocate key result buffer\n");
- }
-
- /* open input and index files */
- for(i=0; i<nfile; i++){
- /* open the input data file */
- if( !(files[i].fun = FunOpen(files[i].fname, "r", NULL)) ){
- gerror(stderr,
- "can't FunOpen input file (or find extension): %s\n",
- files[i].fname);
- }
- /* make sure the join column is in this file */
- if( !FunColumnLookup(files[i].fun, files[i].jname, 0, NULL,
- &files[i].jtype,
- &files[i].jmode,
- &files[i].joffset,
- &files[i].jn,
- &files[i].jwidth) ){
- gerror(stderr, "can't find column %s in input file: %s\n",
- files[i].jname, files[i].fname);
- }
- /* check for filter (we read index directly, bypassing filters) */
- filtstr=NULL;
- FunInfoGet(files[i].fun, FUN_FILTER, &filtstr, 0);
- if( filtstr && *filtstr ){
- gerror(stderr, "row filters are not permitted: %s\n", files[i].fname);
- }
- /* activate specified columns */
- if( files[i].actstr )
- FunColumnActivate(files[i].fun, files[i].actstr, NULL);
- else
- FunColumnActivate(files[i].fun, defact, NULL);
- /* reset rowoffset flag for this file */
- files[i].rowoffset = -1;
- /* get number of possible columns */
- FunInfoGet(files[i].fun, FUN_NCOL, &(files[i].tcol), 0);
- /* temp counter of total number of columns */
- tcol += files[i].tcol;
- /* open the index for the specified join column */
- idxinitfilenames(files[i].fun->header->filename, NULL);
- s = idxindexfilename(files[i].jname, NULL);
- idxfreefilenames();
- if( !s ){
- gerror(stderr, "can't find index file for column '%s' in file: %s\n",
- files[i].jname, files[i].fname);
- }
- if( !(ifiles[i].fun = FunOpen(s, "r", NULL)) ){
- gerror(stderr, "can't FunOpen index file (or find extension): %s\n", s);
- }
- /* get gio handle for seeking and sving */
- FunInfoGet(ifiles[i].fun, FUN_GIO, &ifiles[i].igio, 0);
- /* make sure the join column is in the index file */
- ifiles[i].fname = xstrdup(s);
- ifiles[i].jname = files[i].jname;
- if( !FunColumnLookup(ifiles[i].fun, ifiles[i].jname, 0, NULL,
- &ifiles[i].jtype,
- &ifiles[i].jmode,
- &ifiles[i].joffset,
- &ifiles[i].jn,
- &ifiles[i].jwidth) ){
- gerror(stderr, "can't find column %s in index file: %s\n",
- ifiles[i].jname, ifiles[i].fname);
- }
- /* define how we will read index file, based on data type of join column */
- switch(ifiles[i].jtype){
- case 'B':
- case 'I':
- case 'J':
- case 'K':
- case 'U':
- case 'V':
- case 'L':
- case 'X':
- FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
- "n", "J", "r", FUN_OFFSET(XFile, idx),
- ifiles[i].jname, "D", "r", FUN_OFFSET(XFile, dval),
- NULL);
- ifiles[i].dtype = 'D';
- ktype |= KEY_NUMERIC;
- break;
- case 'D':
- case 'E':
- FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
- "n", "J", "r", FUN_OFFSET(XFile, idx),
- ifiles[i].jname, "D", "r", FUN_OFFSET(XFile, dval),
- NULL);
- ifiles[i].dtype = 'D';
- ktype |= KEY_NUMERIC;
- break;
- case 'A':
- snprintf(tbuf, SZ_LINE-1, "@%dA", ifiles[i].jn);
- FunColumnSelect(ifiles[i].fun, sizeof(XFileRec), NULL,
- "n", "J", "r", FUN_OFFSET(XFile, idx),
- ifiles[i].jname, tbuf, "r", FUN_OFFSET(XFile, sval),
- NULL);
- ifiles[i].dtype = 'A';
- ifiles[i].sval = xcalloc(ifiles[i].jn+1, sizeof(char));
- ktype |= KEY_STRING;
- break;
- default:
- gerror(stderr, "bad datatype for join column: %c\n", ifiles[i].jtype);
- }
- /* free up temp space */
- if( s ) xfree(s);
- }
- /* we don't allow mixing of string and numeric values */
- if( ktype == (KEY_STRING|KEY_NUMERIC) ){
- gerror(stderr, "can't mix string and numeric join columns\n");
- }
-
- /* open output file */
- if( !(ofiles[0].fun = FunOpen(ofiles[0].fname, "w", NULL)) ){
- gerror(stderr, "can't FunOpen output file: %s\n", ofiles[0].fname);
- }
-
- /* allocate space for the max number of columns we can have (incl jfiles) */
- onames = (char **)xcalloc(tcol+1, sizeof(char *));
- otypes = (char **)xcalloc(tcol+1, sizeof(char *));
- omodes = (char **)xcalloc(tcol+1, sizeof(char *));
- ooffsets = (int *)xcalloc(tcol+1, sizeof(int));
-
- /* contruct list of output columns */
- for(i=0; i<nfile; i++){
- coffset = 0;
- for(j=0; j<files[i].tcol; j++){
- if( !FunColumnLookup(files[i].fun, NULL, j,
- &name, &type, &mode, &offset, &n, &width) ){
- gerror(stderr,
- "can't find column %d in input file: %s\n", j, files[i].fname);
- }
- if( mode & COL_ACTIVE ){
- /* save original name in case of duplicate */
- basename = name;
- /* first numeric value we will append */
- namei = 2;
-again:
- for(k=0; k<oncol; k++){
- if( !strcasecmp(name, onames[k]) ){
- k = -1;
- break;
- }
- }
- /* append a file number to duplicate names */
- if( k < 0 ){
- snprintf(namebuf, SZ_LINE-1, "%s_%d", basename, namei);
- name = namebuf;
- namei++;
- IPRINTF((stderr, "trying new col name for file %d: %s\n", i, name));
- goto again;
- }
- /* add column */
- JoinAddCol(&files[i], name, basename, type, mode, offset, n, width,
- coffset);
- /* bump offset into current row */
- coffset += width;
- /* size of active columns for this file only -- save in index rec */
- files[i].rowsize += width;
- /* offset into output where this file's contribution starts */
- if( files[i].rowoffset < 0 ) files[i].rowoffset = osize;
- /* get auxiliary info */
- FunColumnLookup2(files[i].fun, NULL, j,
- &tlmin, &tlmax, &binsiz, &tscale, &tzero);
- /* generate type string */
- snprintf(tbuf, SZ_LINE-1, "%d%c", n, type);
- if( !feq(tlmin, tlmax) ){
- snprintf(tbuf2, SZ_LINE-1, ":%f:%f", tlmin, tlmax);
- strncat(tbuf, tbuf2, SZ_LINE-1);
- }
- if( !feq(binsiz, 0.0) && !feq(binsiz, 1.0) ){
- snprintf(tbuf2, SZ_LINE-1, ":%f", binsiz);
- strncat(tbuf, tbuf2, SZ_LINE-1);
- }
- if( !feq(tscale, 0.0) ){
- snprintf(tbuf2, SZ_LINE-1, ";%f", tscale);
- strncat(tbuf, tbuf2, SZ_LINE-1);
- if( !feq(tzero, 0.0) ){
- snprintf(tbuf2, SZ_LINE-1, ":%f", tzero);
-
- strncat(tbuf, tbuf2, SZ_LINE-1);
- }
- }
- IPRINTF((stderr, "%d: name=%s type=%c mode=%o offset=%d n=%d => %s\n",
- i, name, type, mode, offset, n, tbuf));
- /* save column info */
- onames[oncol] = xstrdup(name);
- otypes[oncol] = xstrdup(tbuf);
- omodes[oncol] = "w";
- ooffsets[oncol] = osize;
- /* total size for all files */
- osize += width;
- oncol++;
- }
- }
- /* create blank line for this file */
- JoinMakeBlank(&files[i], defblank);
- /* add filename to header */
- FunParamPuts(ofiles[0].fun, "JFILE", i+1, files[i].fname, "join file", 1);
- }
-
- /* and one more for joinfiles, if needed */
- if( jfiles ){
- strncpy(tbuf, jfiles, SZ_LINE-1);
- cluc(tbuf);
- onames[oncol] = xstrdup(tbuf);
- jbits = ((MAXIFILE+7)/8)*8;
- snprintf(tbuf, SZ_LINE-1, "%dX", jbits);
- otypes[oncol] = xstrdup(tbuf);
- omodes[oncol] = "w";
- ooffsets[oncol] = osize;
- /* save for later use */
- ofiles[0].rowoffset = osize;
- /* total size for all files */
- osize += (jbits/8);
- oncol++;
- }
-
- /* reallocate output column array to correct size */
- onames = (char **)xrealloc(onames, oncol*sizeof(char *));
- otypes = (char **)xrealloc(otypes, oncol*sizeof(char *));
- omodes = (char **)xrealloc(omodes, oncol*sizeof(char *));
- ooffsets = (int *)xrealloc(ooffsets, oncol*sizeof(int));
-
- /* set up the output columns */
- FunColumnSelectArr(ofiles[0].fun, osize, NULL,
- onames, otypes, omodes, ooffsets, oncol);
-
- /* this tells us the size of the output buffer */
- FunInfoGet(ofiles[0].fun, FUN_ROWSIZE, &(ofiles[0].rowsize), 0);
- ofiles[0].rowbuf = xcalloc(ofiles[0].rowsize, sizeof(char));
-
- /* read a record from each index */
- JoinReadNext(ifiles, nfile, -nfile);
-
- /* check for joins, all rows in all files */
- while( JoinFilesLeft(ifiles, nfile) > 0 ){
- nmatch = JoinGetMatches(ifiles, nfile, ktype, tol, matches);
- /* yikes ... when we reset rows to check against the next base,
- we don't want to write out anything if there is no match, since this
- was already done with the last base ... its confusing */
- if( (tol > 0.0) && (nmatch == 1) && (resetflag >=0) &&
- (resetflag != matches[0]) ){
- for(i=0; i<nmatch; i++){
- JoinReadNext(ifiles, nfile, matches[i]);
- }
- }
- /* this is the normal output of matched rows */
- else if( (nmatch >= minmatch) && (nmatch <= maxmatch) ){
- JoinGatherRows(ifiles, nfile, ktype, tol, matches, nmatch, &resetflag);
- /* write all matched rows */
- if(!JoinWriteRows(files, ifiles, nfile, matches, nmatch, jbits, ofiles)){
- gerror(stderr, "can't write rows for join\n");
- }
- /* might have to reset the rows when using tolerance values */
- JoinResetRows(ifiles, matches, nmatch, resetflag);
- }
- /* no output, so we advance the rows that matched */
- else{
- for(i=0; i<nmatch; i++){
- JoinReadNext(ifiles, nfile, matches[i]);
- }
- }
- }
-
- /* free output column arrays */
- if( omodes ) xfree(omodes);
- if( ooffsets ) xfree(ooffsets);
- if( otypes ){
- for(i=0; i<oncol; i++){
- if( otypes[i] ) xfree(otypes[i]);
- }
- xfree(otypes);
- }
- if( onames ){
- for(i=0; i<oncol; i++){
- if( onames[i] ) xfree(onames[i]);
- }
- xfree(onames);
- }
-
- /* close input files and index files */
- if( files ){
- for(i=0; i<nfile; i++){
- if( files[i].fun ) FunClose(files[i].fun);
- if( files[i].blank ) xfree(files[i].blank);
- if( files[i].cols ){
- for(j=0; j<files[i].ncol; j++){
- if( files[i].cols[j].name ) xfree(files[i].cols[j].name);
- if( files[i].cols[j].oname ) xfree(files[i].cols[j].oname);
- }
- xfree(files[i].cols);
- }
- }
- xfree(files);
- }
- if( ifiles ){
- for(i=0; i<nfile; i++){
- if( ifiles[i].fun ) FunClose(ifiles[i].fun);
- if( ifiles[i].fname ) xfree(ifiles[i].fname);
- if( ifiles[i].indexes ) xfree(ifiles[i].indexes);
- if( ifiles[i].sval ) xfree(ifiles[i].sval);
- }
- xfree(ifiles);
- }
- if( ofiles ){
- for(i=0; i<MAXOFILE; i++){
- if( ofiles[i].fun ) FunClose(ofiles[i].fun);
- if( ofiles[i].rowbuf ) xfree(ofiles[i].rowbuf);
- }
- xfree(ofiles);
- }
-
- /* free up everything else */
- if( matches ) xfree(matches);
-
- return(0);
-}