summaryrefslogtreecommitdiffstats
path: root/funtools/util/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'funtools/util/parse.c')
-rw-r--r--funtools/util/parse.c1187
1 files changed, 1187 insertions, 0 deletions
diff --git a/funtools/util/parse.c b/funtools/util/parse.c
new file mode 100644
index 0000000..a238a46
--- /dev/null
+++ b/funtools/util/parse.c
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2004 Smithsonian Astrophysical Observatory
+ */
+
+#include <parse.h>
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ *
+ * Private Routines and Data
+ *
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static int __parseline;
+static ParseRec __parserec;
+static Parse _parse = &__parserec;
+static char _ctab[PARSE_TABLE_SIZE][PARSE_TABLE_SIZE];
+
+#ifdef ANSI_FUNC
+static int
+_gettype(char *s, double *d, longlong *i)
+#else
+_gettype(s, d, i)
+ char *s;
+ double *d;
+ longlong *i;
+#endif
+{
+ char *t;
+ longlong lval;
+ double dval;
+
+ /* make sure we have something */
+ if( !s || !*s ) return PARSE_NULL;
+
+ /* the temp pointer will move along the string */
+ t = s;
+
+ /* skip leading spaces */
+ while( *t && isspace((int)*t) ) t++;
+ /* if all we had were spaces, its a string */
+ if( !*t ) return PARSE_STRING;
+
+ /* skip optional sign */
+ if( *t && (*t == '+') ){
+ t++;
+ }
+ /* skip optional minus sign or detect dashes */
+ else if( *t && (*t == '-') ){
+ t++;
+ if( !*t ) return PARSE_DASH;
+ if( *t && (*t == '-') ){
+ for(t++; *t; t++){
+ if( *t != '-' ) return PARSE_STRING;
+ }
+ return PARSE_DASH;
+ }
+ }
+
+ /* look for hex indication */
+ if( (*t == '0') && ((*(++t) == 'x') || (*t == 'X')) ){
+ goto testhex;
+ }
+
+ /* quick look for indication of a float */
+ for(; *t; t++){
+ if( (*t == '.') || (*t == 'E') )
+ goto testfloat;
+ }
+ goto testint;
+
+testfloat:
+ dval = SAOstrtod(s, &t);
+ while( *t && isspace((int)*t) )
+ t++;
+ if( *t != '\0' )
+ return PARSE_STRING;
+ if( d ) *d = dval;
+ return PARSE_FLOAT;
+
+testint:
+ lval = strtoll(s, &t, 10);
+ while( *t && isspace((int)*t) )
+ t++;
+ if( *t != '\0' )
+ return PARSE_STRING;
+ if( i ) *i = (longlong)lval;
+ if( d ) *d = (double)lval;
+ return PARSE_INTEGER;
+
+testhex:
+ lval = strtoll(s, &t, 16);
+ while( *t && isspace((int)*t) )
+ t++;
+ if( *t != '\0' )
+ return PARSE_STRING;
+ if( i ) *i = (longlong)lval;
+ if( d ) *d = (double)lval;
+ return PARSE_HEXINT;
+}
+
+#ifdef ANSI_FUNC
+static void
+_ParseInitialize(void)
+#else
+static void _ParseInitialize()
+#endif
+{
+ /* clear ctable */
+ memset(_ctab, 0, PARSE_TABLE_SIZE * PARSE_TABLE_SIZE);
+
+ /* set allowable type conversions */
+ _ctab[PARSE_NULL][PARSE_NULL] = 1;
+ _ctab[PARSE_NULL][PARSE_FLOAT] = 1;
+ _ctab[PARSE_NULL][PARSE_INTEGER] = 1;
+ _ctab[PARSE_NULL][PARSE_HEXINT] = 1;
+ _ctab[PARSE_NULL][PARSE_STRING] = 1;
+
+ _ctab[PARSE_DASH][PARSE_NULL] = 1;
+ _ctab[PARSE_DASH][PARSE_FLOAT] = 1;
+ _ctab[PARSE_DASH][PARSE_INTEGER] = 1;
+ _ctab[PARSE_DASH][PARSE_HEXINT] = 1;
+ _ctab[PARSE_DASH][PARSE_STRING] = 1;
+
+ _ctab[PARSE_FLOAT][PARSE_FLOAT] = 1;
+ _ctab[PARSE_FLOAT][PARSE_INTEGER] = 1;
+ _ctab[PARSE_FLOAT][PARSE_HEXINT] = 1;
+ _ctab[PARSE_FLOAT][PARSE_NULL] = 1;
+
+ _ctab[PARSE_INTEGER][PARSE_INTEGER] = 1;
+ _ctab[PARSE_INTEGER][PARSE_HEXINT] = 1;
+ _ctab[PARSE_INTEGER][PARSE_NULL] = 1;
+ _ctab[PARSE_INTEGER][PARSE_STRING] = 1;
+
+ _ctab[PARSE_HEXINT][PARSE_HEXINT] = 1;
+ _ctab[PARSE_HEXINT][PARSE_INTEGER] = 1;
+ _ctab[PARSE_HEXINT][PARSE_NULL] = 1;
+
+ _ctab[PARSE_STRING][PARSE_STRING] = 1;
+ _ctab[PARSE_STRING][PARSE_NULL] = 1;
+ _ctab[PARSE_STRING][PARSE_INTEGER] = 1;
+
+ /* set i2f conversions (requires explicit enabling) */
+ _ctab[PARSE_INTEGER][PARSE_FLOAT] = -1;
+ _ctab[PARSE_HEXINT][PARSE_FLOAT] = -1;
+}
+
+#ifdef ANSI_FUNC
+static int
+_ParseEOT(Parse parse, char *line)
+#else
+static int _ParseEOT(parse, line)
+ Parse parse;
+ char *line;
+#endif
+{
+ if( !parse || !line || !parse->eot || (parse->state & PARSE_STATE_BAD) )
+ return 0;
+ if( !strcmp(parse->eot->lines[parse->eot->ncur], line) ){
+ parse->eot->ncur++;
+ if( parse->eot->ncur == parse->eot->nline ){
+ parse->eot->ncur = 0;
+ return 1;
+ }
+ else{
+ return -1;
+ }
+ }
+ else{
+ parse->eot->ncur = 0;
+ if( !strcmp(parse->eot->lines[parse->eot->ncur], line) ){
+ parse->eot->ncur++;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+#ifdef ANSI_FUNC
+static int
+_ParseFixTokens(ParsedLine line1, ParsedLine line2)
+#else
+static int _ParseFixTokens(line1, line2)
+ ParsedLine line1;
+ ParsedLine line2;
+#endif
+{
+ int i;
+
+ /* sanity check */
+ if( !line1 || !line2 ) return -1;
+
+ /* horrible vizier hack: the units line often is missing trailing
+ null values. we try to fix that here. this really stinks */
+ if( line2->ntoken < line1->ntoken ){
+ line2->tokens = (ParsedToken)xrealloc(line2->tokens,
+ line1->ntoken*sizeof(ParsedTokenRec));
+ for(i=line2->ntoken; i<line1->ntoken; i++){
+ line2->tokens[i].sval = NULL;
+ line2->tokens[i].type = PARSE_NULL;
+ line2->tokens[i].delim = '\0';
+ }
+ line2->maxtoken = line1->ntoken;
+ line2->ntoken = line1->ntoken;
+ return 1;
+ }
+ else{
+ return 0;
+ }
+}
+
+#ifdef ANSI_FUNC
+static int
+_ParseLineState(Parse parse, int istate, char *UNUSED(mode))
+#else
+_ParseLineState(parse, istate, mode)
+ Parse parse;
+ int state;
+ char *mode;
+#endif
+{
+ int state;
+ ParsedLine line;
+
+ /* sanity check */
+ if( !parse ) return PARSE_STATE_UNKNOWN;
+
+ /* get line */
+ line = parse->cur;
+
+ /* look for EOT and blanks */
+ if( line->types[0] == PARSE_EOT ) return(PARSE_STATE_EOT);
+ if( !line->ntoken ) return(istate);
+
+ /* comments might signify EOT under certain circumstances */
+ if( line->types[0] == PARSE_COMMENT ){
+ if( (istate == PARSE_STATE_DATA) && parse->data1 ){
+ /* the middle of a multi-line EOT looks like a comment: just return */
+ if( parse->eot && parse->eot->ncur ) return(istate);
+ /* else check for comment-based EOT */
+ switch(parse->comeot){
+ case 0:
+ return(istate);
+ case 1:
+ if( line->tokens[0].delim == '\n' )
+ return(istate);
+ else{
+ state = PARSE_STATE_EOT|PARSE_STATE_REDOLINE;
+ return(state);
+ }
+ break;
+ case 2:
+ state = PARSE_STATE_EOT|PARSE_STATE_REDOLINE;
+ return(state);
+ break;
+ default:
+ return(istate);
+ }
+ }
+ else{
+ return(istate);
+ }
+ }
+
+ switch(istate){
+ case PARSE_STATE_INITIAL:
+ if( line->ntypes[PARSE_DASH] == line->ntoken ){
+ state = PARSE_STATE_DATA;
+ }
+ /* yuck: fix 'justify' bug that puts spaces in blank header lines */
+ else if( line->ntypes[PARSE_NULL] == line->ntoken ){
+ /* make believe we just saw a comment */
+ line->types[0] = PARSE_COMMENT;
+ return istate;
+ }
+ else if( (line->ntypes[PARSE_STRING]+line->ntypes[PARSE_NULL]) ==
+ line->ntoken ){
+ state = PARSE_STATE_STRING;
+ }
+ else{
+ state = PARSE_STATE_DATA;
+ parse->data1 = ParseLineDup(parse, line);
+ }
+ break;
+ case PARSE_STATE_STRING:
+ if( line->ntypes[PARSE_DASH] == line->ntoken ){
+ state = PARSE_STATE_DATA;
+ if( parse->needunits && parse->prev2 ){
+ parse->header = ParseLineDup(parse, parse->prev2);
+ parse->units = ParseLineDup(parse, parse->prev);
+ }
+ else{
+ parse->header = ParseLineDup(parse, parse->prev);
+ }
+ }
+ else if( (line->ntypes[PARSE_STRING]+line->ntypes[PARSE_NULL]) ==
+ line->ntoken ){
+ if( parse->needunits ){
+ if( parse->prev2 ){
+ state = PARSE_STATE_DATA;
+ parse->header = ParseLineDup(parse, parse->prev2);
+ parse->units = ParseLineDup(parse, parse->prev);
+ }
+ else{
+ state = PARSE_STATE_STRING;
+ /* if needunits is set, this might be a broken vizier unit line */
+ _ParseFixTokens(parse->prev, line);
+ }
+ }
+ else{
+ if( parse->needheader ){
+ state = PARSE_STATE_DATA;
+ parse->header = ParseLineDup(parse, parse->prev);
+ parse->data1 = ParseLineDup(parse, line);
+ }
+ else{
+ state = PARSE_STATE_DATA;
+ parse->data1 = ParseLineDup(parse, parse->prev);
+ }
+ }
+ }
+ else{
+ state = PARSE_STATE_DATA;
+ if( parse->needunits && parse->prev2 ){
+ parse->header = ParseLineDup(parse, parse->prev2);
+ parse->units = ParseLineDup(parse, parse->prev);
+ }
+ else{
+ parse->header = ParseLineDup(parse, parse->prev);
+ }
+ parse->data1 = ParseLineDup(parse, line);
+ }
+ break;
+ case PARSE_STATE_DATA:
+ /* all dashes means that we missed an EOT somewhere */
+ if( line->ntypes[PARSE_DASH] == line->ntoken ){
+ state = PARSE_STATE_BADMATCH;
+ }
+ /* all tokens are string might mean lazy EOT */
+ else if( (line->ntypes[PARSE_STRING] == line->ntoken) && parse->lazyeot ){
+ /* all tokens are strings and we have to look for a "lazy eot".
+ we look for a mismatch in the number of tokens or in the number
+ of string/null tokens between this line and the first data line */
+ if( parse->data1 &&
+ ((parse->data1->ntoken != line->ntoken) ||
+ ((parse->data1->ntypes[PARSE_STRING]+parse->data1->ntypes[PARSE_NULL]) != line->ntoken)) ){
+#if PARSE_LOOSELY
+ state = _ParseLineState(parse, PARSE_STATE_INITIAL, NULL);
+ state |= PARSE_STATE_EOT|PARSE_STATE_NEXTLINE;
+#else
+ state = PARSE_STATE_BADTYPE;
+#endif
+ }
+ else{
+ state = PARSE_STATE_DATA;
+ /* 8/22: I added this line to support tables containing only ascii
+ columns. But is there a reason why it was not here before??? */
+ if( !parse->data1 ) parse->data1 = ParseLineDup(parse, line);
+ }
+ }
+ else{
+ state = PARSE_STATE_DATA;
+ if( !parse->data1 ) parse->data1 = ParseLineDup(parse, line);
+ }
+ break;
+ case PARSE_STATE_BADMATCH:
+ state = PARSE_STATE_BADMATCH;
+ break;
+ case PARSE_STATE_BADMAX:
+ state = PARSE_STATE_BADMAX;
+ break;
+ case PARSE_STATE_UNKNOWN:
+ state = PARSE_STATE_UNKNOWN;
+ break;
+ default:
+ state = PARSE_STATE_UNKNOWN;
+ break;
+ }
+ return state;
+}
+
+#ifdef ANSI_FUNC
+static int
+_ParseLineFree(ParsedLine line)
+#else
+static int _ParseLineFree(line)
+ ParsedLine line;
+#endif
+{
+ int i;
+
+ /* sanity check */
+ if( !line ) return 0;
+
+ /* free token strings */
+ for(i=0; i< line->ntoken; i++){
+ if( line->tokens[i].sval ) xfree(line->tokens[i].sval);
+ }
+ /* free line strings */
+ if( line->types ) xfree(line->types);
+ if( line->tokens ) xfree(line->tokens);
+ xfree(line);
+
+ return 1;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ *
+ * Public Routines
+ *
+ *
+ *----------------------------------------------------------------------------
+ */
+
+#ifdef ANSI_FUNC
+int
+ParseWord(int *delims, int *comtab, int nullvalues, int whitespace,
+ char *lbuf, void *token, int tmax, int *lptr, int *lastd)
+#else
+int ParseWord(delims, comtab, nullvalues, whitespace,
+ lbuf, token, tmax, lptr, lastd)
+ int *delims;
+ int *comtab;
+ int nullvalues;
+ int whitespace;
+ char *lbuf;
+ void *token;
+ int tmax;
+ int *lptr;
+ int *lastd;
+#endif
+{
+ int ip;
+ int i;
+ int tlen;
+ int tcomtab[PARSE_TABLE_SIZE];
+ char quotes;
+ char *tbuf=NULL;
+
+ /* null out the output string, if passed in */
+ if( tmax )
+ *(char *)token = '\0';
+
+ /* reset last delimiter */
+ *lastd ='\0';
+
+ /* look for comtab */
+ if( !comtab ){
+ memset(tcomtab, 0, PARSE_TABLE_SIZE*sizeof(int));
+ comtab = tcomtab;
+ }
+
+ /* a more convenient pointer */
+ ip = *lptr;
+
+ /* if no buf, or we are at the end, just return */
+ if( !lbuf || !lbuf[ip] ){
+ *lastd = '\0';
+ return(0);
+ }
+
+ /* comment at beginning of line, just return */
+ if( (ip == 0) && comtab[(int)lbuf[ip]] ){
+ *lastd = lbuf[ip];
+ ip++;
+ return(-2);
+ }
+
+ /* allocate token space if necessary */
+ if( !tmax ){
+ tlen = SZ_LINE;
+ tbuf = xcalloc(tlen+1, sizeof(char));
+ }
+ else{
+ tbuf = (char *)token;
+ tlen = tmax;
+ }
+
+ /* skip over starting consecutive delims, if not processing null values */
+ if( !nullvalues ){
+ while( delims[(int)lbuf[ip]] ){
+ if( lbuf[ip] == '\0' ){
+ *lptr = ip;
+ return(0);
+ }
+ else
+ ip++;
+ }
+ }
+
+ /* grab up to next delim or comment */
+ for(i=0; lbuf[ip] && !delims[(int)lbuf[ip]] && !comtab[(int)lbuf[ip]]; ip++){
+ /*first check for an explicit quote */
+ if( lbuf[ip] == '"' ){
+ quotes = '"';
+ *lastd = '"';
+ }
+ else if( lbuf[ip] == '\'' ){
+ quotes = '\'';
+ *lastd = '\'';
+ }
+ else{
+ quotes = '\0';
+ }
+ /* process quoted string as a single token */
+ if( quotes != '\0' ){
+ /* bump past quote */
+ ip++;
+ /* grab up to next quote -- but skip escaped quotes */
+ for(; lbuf[ip] != '\0'; ip++){
+ if( (lbuf[ip] == quotes) && ((ip==0) || lbuf[ip-1] != '\\') ){
+ break;
+ }
+ else{
+ if( (tlen >= 0) && (i >= tlen) ){
+ if( tmax ){
+ break;
+ }
+ else{
+ tlen += SZ_LINE;
+ tbuf = xrealloc(tbuf, tlen);
+ }
+ }
+ tbuf[i++] = lbuf[ip];
+ }
+ }
+ }
+ /* single non-quoted token */
+ else{
+ if( (tlen >= 0) && (i >= tlen) ){
+ if( tmax ){
+ break;
+ }
+ else{
+ tlen += SZ_LINE;
+ tbuf = xrealloc(tbuf, tlen);
+ }
+ }
+ tbuf[i++] = lbuf[ip];
+ /* reset lastd to erase any trace of a quoted delim */
+ *lastd = '\0';
+ }
+ }
+ /* save this delimiter (unless we ended with a quoted string) */
+ if( !*lastd ) *lastd = lbuf[ip];
+ /* bump past delimiter (but not null terminator) */
+ if( lbuf[ip] ) ip++;
+
+ /* realloc if necessary */
+ if( !tmax ){
+ tbuf = xrealloc(tbuf, i+1);
+ *(char **)token = tbuf;
+ }
+ /* check size one more time */
+ if( i >= tlen )
+ i = tlen-1;
+ /* null terminate */
+ tbuf[i] = '\0';
+
+ /* remove surrounding white space, if necessary */
+ if( !whitespace && (strlen(tbuf)>1) )
+ nowhite(tbuf, tbuf);
+
+ /* got something */
+ *lptr = ip;
+ /* make allowance for nullvalues, if necessary */
+ if( nullvalues && !i && *lastd ) i = -1;
+ return(i);
+}
+
+#ifdef ANSI_FUNC
+Parse
+ParseNew(char *delims, char *comchars, char *eot, char *mode)
+#else
+Parse ParseNew(delims, comchars, eot, mode)
+ char *delims;
+ char *comchars;
+ char *eot;
+ char *mode;
+#endif
+{
+ int i;
+ int ip;
+ int lastd;
+ int tlen=0;
+ int dtable[PARSE_TABLE_SIZE];
+ char c;
+ char *s;
+ char *t;
+ char tbuf[SZ_LINE];
+ char tbuf2[SZ_LINE];
+ Parse parse=NULL;
+ static int init=0;
+
+ /* allocate parse struct */
+ if( (parse = (Parse)xcalloc(1, sizeof(ParseRec))) == NULL )
+ return(NULL);
+
+ /* initialize globals first time through */
+ if( !init ){
+ _ParseInitialize();
+ init++;
+ }
+
+ /* reset line counter */
+ __parseline = 0;
+
+ /* default type conversion scheme */
+ parse->convert = PARSE_DEFAULT_CONVERT;
+
+ /* set up delim table for removing enclosing chars from keyword strings */
+ memset(dtable, 0, PARSE_TABLE_SIZE*sizeof(int));
+ dtable[(int)'('] = 1;
+ dtable[(int)')'] = 1;
+
+ /* set the delim table */
+ if( !delims ) delims = PARSE_DEFAULT_DELIMS;
+ ip = 0; t = NULL;
+ ParseWord(dtable, (int *)NULL, 0, 1, delims, &t, 0, &ip, &lastd);
+ for(s=t; s && *s; s++){
+ if( (i=(int)*s) == '\\' ){
+ s++;
+ if( *s == 'n' ) i = '\n';
+ else if( *s == 't' ) i = '\t';
+ else if( *s == 'r' ) i = '\r';
+ else if( *s == 'f' ) i = '\014';
+ }
+ parse->delimtab[i] = 1;
+ }
+ if( t ) xfree(t);
+
+ /* set the comment table */
+ if( !comchars ) comchars = PARSE_DEFAULT_COMCHARS;
+ ip = 0; t = NULL;
+ ParseWord(dtable, (int *)NULL, 0, 1, comchars, &t, 0, &ip, &lastd);
+ for(s=t; s && *s; s++){
+ if( (i=(int)*s) == '\\' ){
+ s++;
+ if( *s == 'n' ) i = '\n';
+ else if( *s == 't' ) i = '\t';
+ else if( *s == 'r' ) i = '\r';
+ else if( *s == 'f' ) i = '\014';
+ }
+ parse->comtab[i] = 1;
+ }
+ if( t ) xfree(t);
+
+ /* set the eot records */
+ if( eot && *eot ){
+ ip = 0; t = NULL;
+ ParseWord(dtable, (int *)NULL, 0, 1, eot, &t, 0, &ip, &lastd);
+ parse->eot = (ParsedEOT)xcalloc(1, sizeof(ParsedEOTRec));
+ parse->eot->nline = 0;
+ parse->eot->maxline = 1;
+ parse->eot->lines =
+ (char **)xcalloc(parse->eot->maxline, sizeof(char *));
+ *tbuf = '\0';
+ tlen = 0;
+ /* split up eot string into separate lines */
+ for(s=t; s && *s; s++){
+ if( (c=*s) == '\\' ){
+ s++;
+ if( *s == 'n' ) c = '\n';
+ else if( *s == 't' ) c = '\t';
+ else if( *s == 'r' ) c = '\r';
+ else if( *s == 'f' ) c = '\014';
+ }
+ if( tlen >= SZ_LINE )
+ gerror(stderr, "EOT specification is too long (%d)\n", tlen);
+ tbuf[tlen++] = c;
+ /* handle end of one line */
+ if( c == '\n' ){
+ tbuf[tlen] = '\0';
+ parse->eot->lines[parse->eot->nline] = xstrdup(tbuf);
+ parse->eot->nline++;
+ while( parse->eot->nline >= parse->eot->maxline ){
+ parse->eot->maxline++;
+ parse->eot->lines =
+ (char **)xrealloc(parse->eot->lines,
+ parse->eot->maxline * sizeof(char *));
+ parse->eot->lines[parse->eot->maxline-1] = NULL;
+ }
+ *tbuf = '\0';
+ tlen = 0;
+ }
+ }
+ /* process final line, if \n was not the last char */
+ if( *tbuf ){
+ tbuf[tlen++] = '\n';
+ tbuf[tlen] = '\0';
+ parse->eot->lines[parse->eot->nline] = xstrdup(tbuf);
+ parse->eot->nline++;
+ }
+ if( t ) xfree(t);
+ }
+
+ /* process mode string */
+ if( mode && *mode ){
+ strncpy(tbuf, mode, SZ_LINE-1);
+ tbuf[SZ_LINE-1] = '\0';
+ if( keyword(tbuf, "nullvalues", tbuf2, SZ_LINE) )
+ parse->nullvalues = istrue(tbuf2);
+ if( keyword(tbuf, "whitespace", tbuf2, SZ_LINE) )
+ parse->whitespace = istrue(tbuf2);
+ if( keyword(tbuf, "header", tbuf2, SZ_LINE) )
+ parse->needheader = istrue(tbuf2);
+ if( keyword(tbuf, "units", tbuf2, SZ_LINE) )
+ parse->needunits = istrue(tbuf2);
+ if( keyword(tbuf, "i2f", tbuf2, SZ_LINE) )
+ parse->i2f = istrue(tbuf2);
+ if( keyword(tbuf, "debug", tbuf2, SZ_LINE) ){
+ if( istrue(tbuf2) )
+ parse->debug = 1;
+ else if( isfalse(tbuf2) )
+ parse->debug = 0;
+ else
+ parse->debug = atoi(tbuf2);
+ }
+ if( keyword(tbuf, "convert", tbuf2, SZ_LINE) ){
+ if( istrue(tbuf2) )
+ parse->convert = 1;
+ else if( isfalse(tbuf2) )
+ parse->convert = 0;
+ }
+ if( keyword(tbuf, "comeot", tbuf2, SZ_LINE) ){
+ if( istrue(tbuf2) )
+ parse->comeot = 1;
+ else if( isfalse(tbuf2) )
+ parse->comeot = 0;
+ else
+ parse->comeot = atoi(tbuf2);
+ }
+ if( keyword(tbuf, "lazyeot", tbuf2, SZ_LINE) ){
+ if( istrue(tbuf2) )
+ parse->lazyeot = 1;
+ else if( isfalse(tbuf2) )
+ parse->lazyeot = 0;
+ }
+ }
+
+ /* save inputs */
+ parse->delims = xstrdup(delims);
+ parse->comchars = xstrdup(comchars);
+ parse->mode = xstrdup(mode);
+
+ /* start out in initial state */
+ parse->state = PARSE_STATE_INITIAL;
+
+ /* return the news */
+ return parse;
+}
+
+#ifdef ANSI_FUNC
+int
+ParseLine(Parse parse, char *lbuf, char *UNUSED(mode))
+#else
+int ParseLine(parse, lbuf, mode)
+ Parse parse;
+ char *lbuf;
+ char *mode;
+#endif
+{
+ int i;
+ int got;
+ int ip;
+ ParsedLine line=NULL;
+
+ /* use default if necessary */
+ if( !parse ) parse = _parse;
+
+ /* if we have turned this parser off, just return */
+ if( parse->state & PARSE_STATE_BAD ) return 0;
+
+ /* shuffle lines as needed */
+ if( parse->cur ){
+ /* if cur is a comment, just clear it */
+ if( PARSE_ISCOMMENT(parse->cur) ){
+ _ParseLineFree(parse->cur);
+ }
+ /* valid line gets moved into prev */
+ else{
+ if( parse->prev ){
+ if( parse->prev2 ) _ParseLineFree(parse->prev2);
+ parse->prev2 = parse->prev;
+ }
+ parse->prev = parse->cur;
+ }
+ /* allocate space for the line */
+ parse->cur = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec));
+ }
+ else{
+ /* allocate space for the line */
+ parse->cur = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec));
+ }
+
+ /* convenience pointer to current */
+ line = parse->cur;
+
+ /* increment line number */
+ parse->nline++;
+
+ /* initialize line if necessary */
+ if( !line->tokens ){
+ line->maxtoken = PARSE_TOKEN_INCR;
+ line->tokens = (ParsedToken)xcalloc(line->maxtoken,sizeof(ParsedTokenRec));
+ line->types = (char *)xcalloc((line->maxtoken+1), sizeof(char));
+ }
+
+ /* look for EOT */
+ if( (got=_ParseEOT(parse, lbuf)) != 0 ){
+ i = 0;
+ /* end of table */
+ if( got == 1 )
+ line->tokens[i].type = PARSE_EOT;
+ else
+ /* first part of multi-line EOT -- treat as comment */
+ line->tokens[i].type = PARSE_COMMENT;
+ /* add type to line types */
+ line->types[i] = line->tokens[i].type;
+ /* increment the number of times we have seen this type */
+ line->ntypes[line->tokens[i].type] += 1;
+ /* bump index so we can null terminate properly */
+ i++;
+ goto done;
+ }
+
+ /* process each token in the line */
+ for(i=0, ip=0; lbuf[ip]; i++){
+ /* make sure we have enough room */
+ if( i >= line->maxtoken ){
+ line->maxtoken += PARSE_TOKEN_INCR;
+ line->tokens = (ParsedToken)xrealloc(line->tokens,
+ line->maxtoken*sizeof(ParsedTokenRec));
+ line->types = (char *)xrealloc(line->types,
+ (line->maxtoken+1)*sizeof(char));
+ }
+
+ /* process next word, and break if we don't get something */
+ got=ParseWord(parse->delimtab, parse->comtab,
+ parse->nullvalues, parse->whitespace,
+ lbuf, &(line->tokens[i].sval), 0, &ip,
+ &(line->tokens[i].delim));
+ /* analyze result */
+ if( (got == 0)
+ && (line->tokens[i].delim != '\'') && (line->tokens[i].delim != '"') ){
+ /* end of line (probably some extra spaces), so free up sval */
+ if( line->tokens[i].sval ){
+ xfree(line->tokens[i].sval);
+ line->tokens[i].sval = NULL;
+ }
+ break;
+ }
+ else if( got < 0 ){
+ if( got == -3 )
+ /* end of table */
+ line->tokens[i].type = PARSE_EOT;
+ else if( got == -2)
+ /* comment */
+ line->tokens[i].type = PARSE_COMMENT;
+ else if( got == -1 )
+ /* null value */
+ line->tokens[i].type = PARSE_NULL;
+ /* add type to line types */
+ line->types[i] = line->tokens[i].type;
+ /* increment the number of times we have seen this type */
+ line->ntypes[line->tokens[i].type] += 1;
+ /* for all but NULL, we are done with this line */
+ if( line->types[i] != PARSE_NULL ){
+ /* bump index so we can null terminate properly */
+ i++;
+ break;
+ }
+ }
+ else{
+ /* valid token, set token type */
+ if( parse->convert ){
+ if( (line->tokens[i].delim == '\'') || (line->tokens[i].delim == '"') )
+ line->tokens[i].type = PARSE_STRING;
+ else
+ line->tokens[i].type =
+ _gettype(line->tokens[i].sval,
+ &line->tokens[i].dval,
+ &line->tokens[i].lval);
+ }
+ else{
+ line->tokens[i].type = PARSE_STRING;
+ }
+ /* add type to line types */
+ line->types[i] = line->tokens[i].type;
+ /* increment the number of times we have seen this type */
+ line->ntypes[line->tokens[i].type] += 1;
+ }
+ }
+
+done:
+ /* null terminate and realloc to actual size */
+ line->types[i] = '\0';
+ line->maxtoken = i;
+ line->tokens = (ParsedToken)xrealloc(line->tokens, i*sizeof(ParsedTokenRec));
+ line->types = (char *)xrealloc(line->types, (i+1)*sizeof(char));
+ /* finalize total number of tokens processed */
+ line->ntoken = i;
+ /* get parse state for this line */
+ if( line->types[0] == PARSE_EOT ){
+ line->state = PARSE_STATE_EOT;
+ }
+ else{
+ line->state = _ParseLineState(parse, parse->state, NULL);
+ }
+
+ /* set line info in main record structure for access convenience */
+ parse->state = line->state;
+ parse->ntoken = line->ntoken;
+ parse->types = line->types;
+ parse->tokens = line->tokens;
+
+ /* return the news */
+ return parse->ntoken;
+}
+
+#ifdef ANSI_FUNC
+int
+ParseAnalyze(Parse *parsers, int nparser, char *lbuf)
+#else
+int ParseAnalyze(parsers, nparser, lbuf)
+ Parse *parsers;
+ int nparser;
+ char *lbuf;
+#endif
+{
+ int i, p;
+ int eot=0;
+ int np=0;
+ int tmax=0;
+ ParsedLine line;
+
+ /* parse the line using all parsers */
+ for(p=0; p<nparser; p++){
+ if( parsers[p]->state & PARSE_STATE_BAD ) continue;
+ if( parsers[p]->state & PARSE_STATE_EOT ) continue;
+ ParseLine(parsers[p], lbuf, NULL);
+ if( parsers[p]->state & PARSE_STATE_EOT ) eot++;
+ if( parsers[p]->debug > 1 ){
+ fprintf(stderr, "PARSE %d: state %x %s", p, parsers[p]->state, lbuf);
+ }
+ }
+ /* if some parsers found eot, but others did not, we can no longer use
+ the latter */
+ if( eot ){
+ for(p=0; p<nparser; p++){
+ if( parsers[p]->state & PARSE_STATE_BAD ) continue;
+ if( parsers[p]->state & PARSE_STATE_EOT ) continue;
+ /* this parser is in an unknown state */
+ parsers[p]->state = PARSE_STATE_UNKNOWN;
+ if( parsers[p]->debug > 1 ){
+ fprintf(stderr, "PARSE %d: did not find EOT (state unknown)\n", p);
+ }
+ }
+ /* exit on EOT */
+ return -1;
+ }
+
+ /* analyze each parser */
+ /* remove any parsers where the number of args changes */
+ for(p=0; p<nparser; p++){
+ /* skip bad parsers */
+ if( parsers[p]->state & PARSE_STATE_BAD ) continue;
+ if( parsers[p]->state & PARSE_STATE_EOT ) continue;
+ line = parsers[p]->cur;
+ /* skip comments */
+ if( line->types[0] == PARSE_COMMENT ) continue;
+ /* make sure current ntokens == previous (non-comment) ntokens */
+ if( parsers[p]->prev ){
+ if( parsers[p]->prev->ntoken != parsers[p]->cur->ntoken ){
+ parsers[p]->state = PARSE_STATE_BADMATCH;
+ if( parsers[p]->debug ){
+ fprintf(stderr, "PARSE: badmatch %d/%d: %d %d\n",
+ p, __parseline,
+ parsers[p]->prev->ntoken, parsers[p]->cur->ntoken);
+ }
+ }
+ /* check data type transitions */
+ if( parsers[p]->prev->state & PARSE_STATE_DATA ){
+ for(i=0; i<parsers[p]->ntoken; i++){
+ /* skip check if prev line did not had this many tokens */
+ if( i > parsers[p]->prev->ntoken ) break;
+ switch(_ctab[(int)parsers[p]->prev->types[i]][(int)parsers[p]->cur->types[i]]){
+ case -1:
+ /* i2f conversion explicity permitted is OK */
+ if( parsers[p]->i2f ){
+ break;
+ }
+ /* current data type same as initial data type is OK */
+ if( parsers[p]->data1 && (i <= parsers[p]->data1->ntoken) &&
+ (parsers[p]->data1->types[i] == parsers[p]->cur->types[i]) ){
+ break;
+ }
+ /* bad i2f conversion: drop through to error */
+ case 0:
+ parsers[p]->state = PARSE_STATE_BADMATCH;
+ if( parsers[p]->debug ){
+ fprintf(stderr, "PARSE: badconv %d/%d/%d: %c->%c\n",
+ p, __parseline, i,
+ parsers[p]->prev->types[i], parsers[p]->cur->types[i]);
+ }
+ break;
+ case 1:
+ break;
+ }
+ }
+ }
+ }
+ if( !(parsers[p]->state & PARSE_STATE_BAD) ){
+ tmax=MAX(tmax, parsers[p]->ntoken);
+ }
+ }
+
+ /* remove parsers with < tmax tokens */
+ if( tmax > 2) tmax = 2;
+ for(p=0; p<nparser; p++){
+ /* skip bad parsers */
+ if( parsers[p]->state & PARSE_STATE_BAD ) continue;
+ if( parsers[p]->state & PARSE_STATE_EOT ) continue;
+ /* skip comments */
+ if( parsers[p]->types[0] == PARSE_COMMENT ) continue;
+ /* check current number of tokens */
+ if( parsers[p]->ntoken > 0 ){
+ if( parsers[p]->ntoken < tmax ){
+ parsers[p]->state = PARSE_STATE_BADMAX;
+ if( parsers[p]->debug ){
+ fprintf(stderr, "PARSE: badmax %d/%d: %d < %d\n",
+ p, __parseline, parsers[p]->ntoken, tmax);
+ }
+ }
+ }
+ }
+
+ /* make sure we still have a parser left */
+ for(np=0, p=0; p<nparser; p++){
+ if( parsers[p]->state & PARSE_STATE_BAD ) continue;
+ if( parsers[p]->state & PARSE_STATE_EOT ) continue;
+ np++;
+ }
+
+ /* parsed another line */
+ __parseline++;
+ /* we either have parsers (np>0) or an error condition (np==0) */
+ return np;
+}
+
+#ifdef ANSI_FUNC
+ParsedLine
+ParseLineDup(Parse parse, ParsedLine line)
+#else
+ParsedLine ParseLineDup(parse, line)
+ Parse parse;
+ ParsedLine line;
+#endif
+{
+ int i;
+ ParsedLine nline;
+
+ /* sanity check */
+ if( !parse ) return NULL;
+
+ /* rellocate everything */
+ if( !(nline = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec))) ) return NULL;
+ memcpy(nline, line, sizeof(ParsedLineRec));
+ nline->tokens = (ParsedToken)xcalloc(line->maxtoken, sizeof(ParsedTokenRec));
+ memcpy(nline->tokens, line->tokens, line->maxtoken*sizeof(ParsedTokenRec));
+ for(i=0; i<nline->ntoken; i++){
+ nline->tokens[i].sval = xstrdup(line->tokens[i].sval);
+ }
+ nline->types = (char *)xcalloc((line->maxtoken+1), sizeof(char));
+ memcpy(nline->types, line->types, line->maxtoken+1);
+ return nline;
+}
+
+#ifdef ANSI_FUNC
+int
+ParseReset(Parse parse, ParsedLine line, int state)
+#else
+ int ParseReset(parse, line, state)
+ Parse parse;
+ ParsedLine line;
+ int state;
+#endif
+{
+ if( !parse ) return 0;
+
+ if( parse->prev2 ){
+ _ParseLineFree(parse->prev2);
+ parse->prev2 = NULL;
+ }
+ if( parse->prev ){
+ _ParseLineFree(parse->prev);
+ parse->prev = NULL;
+ }
+ if( parse->cur ){
+ _ParseLineFree(parse->cur);
+ parse->cur = NULL;
+ }
+ if( parse->header ){
+ _ParseLineFree(parse->header);
+ parse->header = NULL;
+ }
+ if( parse->units ){
+ _ParseLineFree(parse->units);
+ parse->units = NULL;
+ }
+ if( parse->data1 ){
+ _ParseLineFree(parse->data1);
+ parse->data1 = NULL;
+ }
+ if( parse->eot ) parse->eot->ncur = 0;
+ parse->ntoken = 0;
+ parse->types = NULL;
+ parse->tokens = NULL;
+
+ /* initialize as specified */
+ if( line ){
+ parse->cur = line;
+ }
+ if( state ){
+ parse->state = state;
+ }
+ else{
+ parse->state = PARSE_STATE_INITIAL;
+ }
+
+ return 1;
+}
+
+#ifdef ANSI_FUNC
+int
+ParseFree(Parse parse)
+#else
+int ParseFree(parse)
+ Parse parse;
+#endif
+{
+ int i;
+
+ /* sanity check */
+ if( !parse ) return 0;
+
+ /* reset frees up some space */
+ ParseReset(parse, NULL, 0);
+
+ /* free up remainder of allocated space */
+ if( parse->delims ) xfree(parse->delims);
+ if( parse->comchars ) xfree(parse->comchars);
+ if( parse->mode ) xfree(parse->mode);
+ if( parse->eot ){
+ if( parse->eot->lines ){
+ for(i=0; i<parse->eot->maxline; i++){
+ if( parse->eot->lines[i] ) xfree(parse->eot->lines[i]);
+ }
+ xfree(parse->eot->lines);
+ }
+ xfree(parse->eot);
+ }
+
+ /* free struct */
+ xfree(parse);
+
+ /* return the news */
+ return 1;
+}
+
+#ifdef ANSI_FUNC
+int
+ParseDataType(char *s, double *dval, longlong *ival)
+#else
+ int ParseFree(s, dval, ival)
+ char *s;
+ double *dval;
+ longlong *ival;
+#endif
+{
+ return _gettype(s, dval, ival);
+}