diff options
Diffstat (limited to 'funtools/util/parse.c')
-rw-r--r-- | funtools/util/parse.c | 1187 |
1 files changed, 1187 insertions, 0 deletions
diff --git a/funtools/util/parse.c b/funtools/util/parse.c new file mode 100644 index 0000000..a238a46 --- /dev/null +++ b/funtools/util/parse.c @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2004 Smithsonian Astrophysical Observatory + */ + +#include <parse.h> + +/* + *---------------------------------------------------------------------------- + * + * + * Private Routines and Data + * + * + *---------------------------------------------------------------------------- + */ + +static int __parseline; +static ParseRec __parserec; +static Parse _parse = &__parserec; +static char _ctab[PARSE_TABLE_SIZE][PARSE_TABLE_SIZE]; + +#ifdef ANSI_FUNC +static int +_gettype(char *s, double *d, longlong *i) +#else +_gettype(s, d, i) + char *s; + double *d; + longlong *i; +#endif +{ + char *t; + longlong lval; + double dval; + + /* make sure we have something */ + if( !s || !*s ) return PARSE_NULL; + + /* the temp pointer will move along the string */ + t = s; + + /* skip leading spaces */ + while( *t && isspace((int)*t) ) t++; + /* if all we had were spaces, its a string */ + if( !*t ) return PARSE_STRING; + + /* skip optional sign */ + if( *t && (*t == '+') ){ + t++; + } + /* skip optional minus sign or detect dashes */ + else if( *t && (*t == '-') ){ + t++; + if( !*t ) return PARSE_DASH; + if( *t && (*t == '-') ){ + for(t++; *t; t++){ + if( *t != '-' ) return PARSE_STRING; + } + return PARSE_DASH; + } + } + + /* look for hex indication */ + if( (*t == '0') && ((*(++t) == 'x') || (*t == 'X')) ){ + goto testhex; + } + + /* quick look for indication of a float */ + for(; *t; t++){ + if( (*t == '.') || (*t == 'E') ) + goto testfloat; + } + goto testint; + +testfloat: + dval = SAOstrtod(s, &t); + while( *t && isspace((int)*t) ) + t++; + if( *t != '\0' ) + return PARSE_STRING; + if( d ) *d = dval; + return PARSE_FLOAT; + +testint: + lval = strtoll(s, &t, 10); + while( *t && isspace((int)*t) ) + t++; + if( *t != '\0' ) + return PARSE_STRING; + if( i ) *i = (longlong)lval; + if( d ) *d = (double)lval; + return PARSE_INTEGER; + +testhex: + lval = strtoll(s, &t, 16); + while( *t && isspace((int)*t) ) + t++; + if( *t != '\0' ) + return PARSE_STRING; + if( i ) *i = (longlong)lval; + if( d ) *d = (double)lval; + return PARSE_HEXINT; +} + +#ifdef ANSI_FUNC +static void +_ParseInitialize(void) +#else +static void _ParseInitialize() +#endif +{ + /* clear ctable */ + memset(_ctab, 0, PARSE_TABLE_SIZE * PARSE_TABLE_SIZE); + + /* set allowable type conversions */ + _ctab[PARSE_NULL][PARSE_NULL] = 1; + _ctab[PARSE_NULL][PARSE_FLOAT] = 1; + _ctab[PARSE_NULL][PARSE_INTEGER] = 1; + _ctab[PARSE_NULL][PARSE_HEXINT] = 1; + _ctab[PARSE_NULL][PARSE_STRING] = 1; + + _ctab[PARSE_DASH][PARSE_NULL] = 1; + _ctab[PARSE_DASH][PARSE_FLOAT] = 1; + _ctab[PARSE_DASH][PARSE_INTEGER] = 1; + _ctab[PARSE_DASH][PARSE_HEXINT] = 1; + _ctab[PARSE_DASH][PARSE_STRING] = 1; + + _ctab[PARSE_FLOAT][PARSE_FLOAT] = 1; + _ctab[PARSE_FLOAT][PARSE_INTEGER] = 1; + _ctab[PARSE_FLOAT][PARSE_HEXINT] = 1; + _ctab[PARSE_FLOAT][PARSE_NULL] = 1; + + _ctab[PARSE_INTEGER][PARSE_INTEGER] = 1; + _ctab[PARSE_INTEGER][PARSE_HEXINT] = 1; + _ctab[PARSE_INTEGER][PARSE_NULL] = 1; + _ctab[PARSE_INTEGER][PARSE_STRING] = 1; + + _ctab[PARSE_HEXINT][PARSE_HEXINT] = 1; + _ctab[PARSE_HEXINT][PARSE_INTEGER] = 1; + _ctab[PARSE_HEXINT][PARSE_NULL] = 1; + + _ctab[PARSE_STRING][PARSE_STRING] = 1; + _ctab[PARSE_STRING][PARSE_NULL] = 1; + _ctab[PARSE_STRING][PARSE_INTEGER] = 1; + + /* set i2f conversions (requires explicit enabling) */ + _ctab[PARSE_INTEGER][PARSE_FLOAT] = -1; + _ctab[PARSE_HEXINT][PARSE_FLOAT] = -1; +} + +#ifdef ANSI_FUNC +static int +_ParseEOT(Parse parse, char *line) +#else +static int _ParseEOT(parse, line) + Parse parse; + char *line; +#endif +{ + if( !parse || !line || !parse->eot || (parse->state & PARSE_STATE_BAD) ) + return 0; + if( !strcmp(parse->eot->lines[parse->eot->ncur], line) ){ + parse->eot->ncur++; + if( parse->eot->ncur == parse->eot->nline ){ + parse->eot->ncur = 0; + return 1; + } + else{ + return -1; + } + } + else{ + parse->eot->ncur = 0; + if( !strcmp(parse->eot->lines[parse->eot->ncur], line) ){ + parse->eot->ncur++; + return -1; + } + } + return 0; +} + +#ifdef ANSI_FUNC +static int +_ParseFixTokens(ParsedLine line1, ParsedLine line2) +#else +static int _ParseFixTokens(line1, line2) + ParsedLine line1; + ParsedLine line2; +#endif +{ + int i; + + /* sanity check */ + if( !line1 || !line2 ) return -1; + + /* horrible vizier hack: the units line often is missing trailing + null values. we try to fix that here. this really stinks */ + if( line2->ntoken < line1->ntoken ){ + line2->tokens = (ParsedToken)xrealloc(line2->tokens, + line1->ntoken*sizeof(ParsedTokenRec)); + for(i=line2->ntoken; i<line1->ntoken; i++){ + line2->tokens[i].sval = NULL; + line2->tokens[i].type = PARSE_NULL; + line2->tokens[i].delim = '\0'; + } + line2->maxtoken = line1->ntoken; + line2->ntoken = line1->ntoken; + return 1; + } + else{ + return 0; + } +} + +#ifdef ANSI_FUNC +static int +_ParseLineState(Parse parse, int istate, char *UNUSED(mode)) +#else +_ParseLineState(parse, istate, mode) + Parse parse; + int state; + char *mode; +#endif +{ + int state; + ParsedLine line; + + /* sanity check */ + if( !parse ) return PARSE_STATE_UNKNOWN; + + /* get line */ + line = parse->cur; + + /* look for EOT and blanks */ + if( line->types[0] == PARSE_EOT ) return(PARSE_STATE_EOT); + if( !line->ntoken ) return(istate); + + /* comments might signify EOT under certain circumstances */ + if( line->types[0] == PARSE_COMMENT ){ + if( (istate == PARSE_STATE_DATA) && parse->data1 ){ + /* the middle of a multi-line EOT looks like a comment: just return */ + if( parse->eot && parse->eot->ncur ) return(istate); + /* else check for comment-based EOT */ + switch(parse->comeot){ + case 0: + return(istate); + case 1: + if( line->tokens[0].delim == '\n' ) + return(istate); + else{ + state = PARSE_STATE_EOT|PARSE_STATE_REDOLINE; + return(state); + } + break; + case 2: + state = PARSE_STATE_EOT|PARSE_STATE_REDOLINE; + return(state); + break; + default: + return(istate); + } + } + else{ + return(istate); + } + } + + switch(istate){ + case PARSE_STATE_INITIAL: + if( line->ntypes[PARSE_DASH] == line->ntoken ){ + state = PARSE_STATE_DATA; + } + /* yuck: fix 'justify' bug that puts spaces in blank header lines */ + else if( line->ntypes[PARSE_NULL] == line->ntoken ){ + /* make believe we just saw a comment */ + line->types[0] = PARSE_COMMENT; + return istate; + } + else if( (line->ntypes[PARSE_STRING]+line->ntypes[PARSE_NULL]) == + line->ntoken ){ + state = PARSE_STATE_STRING; + } + else{ + state = PARSE_STATE_DATA; + parse->data1 = ParseLineDup(parse, line); + } + break; + case PARSE_STATE_STRING: + if( line->ntypes[PARSE_DASH] == line->ntoken ){ + state = PARSE_STATE_DATA; + if( parse->needunits && parse->prev2 ){ + parse->header = ParseLineDup(parse, parse->prev2); + parse->units = ParseLineDup(parse, parse->prev); + } + else{ + parse->header = ParseLineDup(parse, parse->prev); + } + } + else if( (line->ntypes[PARSE_STRING]+line->ntypes[PARSE_NULL]) == + line->ntoken ){ + if( parse->needunits ){ + if( parse->prev2 ){ + state = PARSE_STATE_DATA; + parse->header = ParseLineDup(parse, parse->prev2); + parse->units = ParseLineDup(parse, parse->prev); + } + else{ + state = PARSE_STATE_STRING; + /* if needunits is set, this might be a broken vizier unit line */ + _ParseFixTokens(parse->prev, line); + } + } + else{ + if( parse->needheader ){ + state = PARSE_STATE_DATA; + parse->header = ParseLineDup(parse, parse->prev); + parse->data1 = ParseLineDup(parse, line); + } + else{ + state = PARSE_STATE_DATA; + parse->data1 = ParseLineDup(parse, parse->prev); + } + } + } + else{ + state = PARSE_STATE_DATA; + if( parse->needunits && parse->prev2 ){ + parse->header = ParseLineDup(parse, parse->prev2); + parse->units = ParseLineDup(parse, parse->prev); + } + else{ + parse->header = ParseLineDup(parse, parse->prev); + } + parse->data1 = ParseLineDup(parse, line); + } + break; + case PARSE_STATE_DATA: + /* all dashes means that we missed an EOT somewhere */ + if( line->ntypes[PARSE_DASH] == line->ntoken ){ + state = PARSE_STATE_BADMATCH; + } + /* all tokens are string might mean lazy EOT */ + else if( (line->ntypes[PARSE_STRING] == line->ntoken) && parse->lazyeot ){ + /* all tokens are strings and we have to look for a "lazy eot". + we look for a mismatch in the number of tokens or in the number + of string/null tokens between this line and the first data line */ + if( parse->data1 && + ((parse->data1->ntoken != line->ntoken) || + ((parse->data1->ntypes[PARSE_STRING]+parse->data1->ntypes[PARSE_NULL]) != line->ntoken)) ){ +#if PARSE_LOOSELY + state = _ParseLineState(parse, PARSE_STATE_INITIAL, NULL); + state |= PARSE_STATE_EOT|PARSE_STATE_NEXTLINE; +#else + state = PARSE_STATE_BADTYPE; +#endif + } + else{ + state = PARSE_STATE_DATA; + /* 8/22: I added this line to support tables containing only ascii + columns. But is there a reason why it was not here before??? */ + if( !parse->data1 ) parse->data1 = ParseLineDup(parse, line); + } + } + else{ + state = PARSE_STATE_DATA; + if( !parse->data1 ) parse->data1 = ParseLineDup(parse, line); + } + break; + case PARSE_STATE_BADMATCH: + state = PARSE_STATE_BADMATCH; + break; + case PARSE_STATE_BADMAX: + state = PARSE_STATE_BADMAX; + break; + case PARSE_STATE_UNKNOWN: + state = PARSE_STATE_UNKNOWN; + break; + default: + state = PARSE_STATE_UNKNOWN; + break; + } + return state; +} + +#ifdef ANSI_FUNC +static int +_ParseLineFree(ParsedLine line) +#else +static int _ParseLineFree(line) + ParsedLine line; +#endif +{ + int i; + + /* sanity check */ + if( !line ) return 0; + + /* free token strings */ + for(i=0; i< line->ntoken; i++){ + if( line->tokens[i].sval ) xfree(line->tokens[i].sval); + } + /* free line strings */ + if( line->types ) xfree(line->types); + if( line->tokens ) xfree(line->tokens); + xfree(line); + + return 1; +} + +/* + *---------------------------------------------------------------------------- + * + * + * Public Routines + * + * + *---------------------------------------------------------------------------- + */ + +#ifdef ANSI_FUNC +int +ParseWord(int *delims, int *comtab, int nullvalues, int whitespace, + char *lbuf, void *token, int tmax, int *lptr, int *lastd) +#else +int ParseWord(delims, comtab, nullvalues, whitespace, + lbuf, token, tmax, lptr, lastd) + int *delims; + int *comtab; + int nullvalues; + int whitespace; + char *lbuf; + void *token; + int tmax; + int *lptr; + int *lastd; +#endif +{ + int ip; + int i; + int tlen; + int tcomtab[PARSE_TABLE_SIZE]; + char quotes; + char *tbuf=NULL; + + /* null out the output string, if passed in */ + if( tmax ) + *(char *)token = '\0'; + + /* reset last delimiter */ + *lastd ='\0'; + + /* look for comtab */ + if( !comtab ){ + memset(tcomtab, 0, PARSE_TABLE_SIZE*sizeof(int)); + comtab = tcomtab; + } + + /* a more convenient pointer */ + ip = *lptr; + + /* if no buf, or we are at the end, just return */ + if( !lbuf || !lbuf[ip] ){ + *lastd = '\0'; + return(0); + } + + /* comment at beginning of line, just return */ + if( (ip == 0) && comtab[(int)lbuf[ip]] ){ + *lastd = lbuf[ip]; + ip++; + return(-2); + } + + /* allocate token space if necessary */ + if( !tmax ){ + tlen = SZ_LINE; + tbuf = xcalloc(tlen+1, sizeof(char)); + } + else{ + tbuf = (char *)token; + tlen = tmax; + } + + /* skip over starting consecutive delims, if not processing null values */ + if( !nullvalues ){ + while( delims[(int)lbuf[ip]] ){ + if( lbuf[ip] == '\0' ){ + *lptr = ip; + return(0); + } + else + ip++; + } + } + + /* grab up to next delim or comment */ + for(i=0; lbuf[ip] && !delims[(int)lbuf[ip]] && !comtab[(int)lbuf[ip]]; ip++){ + /*first check for an explicit quote */ + if( lbuf[ip] == '"' ){ + quotes = '"'; + *lastd = '"'; + } + else if( lbuf[ip] == '\'' ){ + quotes = '\''; + *lastd = '\''; + } + else{ + quotes = '\0'; + } + /* process quoted string as a single token */ + if( quotes != '\0' ){ + /* bump past quote */ + ip++; + /* grab up to next quote -- but skip escaped quotes */ + for(; lbuf[ip] != '\0'; ip++){ + if( (lbuf[ip] == quotes) && ((ip==0) || lbuf[ip-1] != '\\') ){ + break; + } + else{ + if( (tlen >= 0) && (i >= tlen) ){ + if( tmax ){ + break; + } + else{ + tlen += SZ_LINE; + tbuf = xrealloc(tbuf, tlen); + } + } + tbuf[i++] = lbuf[ip]; + } + } + } + /* single non-quoted token */ + else{ + if( (tlen >= 0) && (i >= tlen) ){ + if( tmax ){ + break; + } + else{ + tlen += SZ_LINE; + tbuf = xrealloc(tbuf, tlen); + } + } + tbuf[i++] = lbuf[ip]; + /* reset lastd to erase any trace of a quoted delim */ + *lastd = '\0'; + } + } + /* save this delimiter (unless we ended with a quoted string) */ + if( !*lastd ) *lastd = lbuf[ip]; + /* bump past delimiter (but not null terminator) */ + if( lbuf[ip] ) ip++; + + /* realloc if necessary */ + if( !tmax ){ + tbuf = xrealloc(tbuf, i+1); + *(char **)token = tbuf; + } + /* check size one more time */ + if( i >= tlen ) + i = tlen-1; + /* null terminate */ + tbuf[i] = '\0'; + + /* remove surrounding white space, if necessary */ + if( !whitespace && (strlen(tbuf)>1) ) + nowhite(tbuf, tbuf); + + /* got something */ + *lptr = ip; + /* make allowance for nullvalues, if necessary */ + if( nullvalues && !i && *lastd ) i = -1; + return(i); +} + +#ifdef ANSI_FUNC +Parse +ParseNew(char *delims, char *comchars, char *eot, char *mode) +#else +Parse ParseNew(delims, comchars, eot, mode) + char *delims; + char *comchars; + char *eot; + char *mode; +#endif +{ + int i; + int ip; + int lastd; + int tlen=0; + int dtable[PARSE_TABLE_SIZE]; + char c; + char *s; + char *t; + char tbuf[SZ_LINE]; + char tbuf2[SZ_LINE]; + Parse parse=NULL; + static int init=0; + + /* allocate parse struct */ + if( (parse = (Parse)xcalloc(1, sizeof(ParseRec))) == NULL ) + return(NULL); + + /* initialize globals first time through */ + if( !init ){ + _ParseInitialize(); + init++; + } + + /* reset line counter */ + __parseline = 0; + + /* default type conversion scheme */ + parse->convert = PARSE_DEFAULT_CONVERT; + + /* set up delim table for removing enclosing chars from keyword strings */ + memset(dtable, 0, PARSE_TABLE_SIZE*sizeof(int)); + dtable[(int)'('] = 1; + dtable[(int)')'] = 1; + + /* set the delim table */ + if( !delims ) delims = PARSE_DEFAULT_DELIMS; + ip = 0; t = NULL; + ParseWord(dtable, (int *)NULL, 0, 1, delims, &t, 0, &ip, &lastd); + for(s=t; s && *s; s++){ + if( (i=(int)*s) == '\\' ){ + s++; + if( *s == 'n' ) i = '\n'; + else if( *s == 't' ) i = '\t'; + else if( *s == 'r' ) i = '\r'; + else if( *s == 'f' ) i = '\014'; + } + parse->delimtab[i] = 1; + } + if( t ) xfree(t); + + /* set the comment table */ + if( !comchars ) comchars = PARSE_DEFAULT_COMCHARS; + ip = 0; t = NULL; + ParseWord(dtable, (int *)NULL, 0, 1, comchars, &t, 0, &ip, &lastd); + for(s=t; s && *s; s++){ + if( (i=(int)*s) == '\\' ){ + s++; + if( *s == 'n' ) i = '\n'; + else if( *s == 't' ) i = '\t'; + else if( *s == 'r' ) i = '\r'; + else if( *s == 'f' ) i = '\014'; + } + parse->comtab[i] = 1; + } + if( t ) xfree(t); + + /* set the eot records */ + if( eot && *eot ){ + ip = 0; t = NULL; + ParseWord(dtable, (int *)NULL, 0, 1, eot, &t, 0, &ip, &lastd); + parse->eot = (ParsedEOT)xcalloc(1, sizeof(ParsedEOTRec)); + parse->eot->nline = 0; + parse->eot->maxline = 1; + parse->eot->lines = + (char **)xcalloc(parse->eot->maxline, sizeof(char *)); + *tbuf = '\0'; + tlen = 0; + /* split up eot string into separate lines */ + for(s=t; s && *s; s++){ + if( (c=*s) == '\\' ){ + s++; + if( *s == 'n' ) c = '\n'; + else if( *s == 't' ) c = '\t'; + else if( *s == 'r' ) c = '\r'; + else if( *s == 'f' ) c = '\014'; + } + if( tlen >= SZ_LINE ) + gerror(stderr, "EOT specification is too long (%d)\n", tlen); + tbuf[tlen++] = c; + /* handle end of one line */ + if( c == '\n' ){ + tbuf[tlen] = '\0'; + parse->eot->lines[parse->eot->nline] = xstrdup(tbuf); + parse->eot->nline++; + while( parse->eot->nline >= parse->eot->maxline ){ + parse->eot->maxline++; + parse->eot->lines = + (char **)xrealloc(parse->eot->lines, + parse->eot->maxline * sizeof(char *)); + parse->eot->lines[parse->eot->maxline-1] = NULL; + } + *tbuf = '\0'; + tlen = 0; + } + } + /* process final line, if \n was not the last char */ + if( *tbuf ){ + tbuf[tlen++] = '\n'; + tbuf[tlen] = '\0'; + parse->eot->lines[parse->eot->nline] = xstrdup(tbuf); + parse->eot->nline++; + } + if( t ) xfree(t); + } + + /* process mode string */ + if( mode && *mode ){ + strncpy(tbuf, mode, SZ_LINE-1); + tbuf[SZ_LINE-1] = '\0'; + if( keyword(tbuf, "nullvalues", tbuf2, SZ_LINE) ) + parse->nullvalues = istrue(tbuf2); + if( keyword(tbuf, "whitespace", tbuf2, SZ_LINE) ) + parse->whitespace = istrue(tbuf2); + if( keyword(tbuf, "header", tbuf2, SZ_LINE) ) + parse->needheader = istrue(tbuf2); + if( keyword(tbuf, "units", tbuf2, SZ_LINE) ) + parse->needunits = istrue(tbuf2); + if( keyword(tbuf, "i2f", tbuf2, SZ_LINE) ) + parse->i2f = istrue(tbuf2); + if( keyword(tbuf, "debug", tbuf2, SZ_LINE) ){ + if( istrue(tbuf2) ) + parse->debug = 1; + else if( isfalse(tbuf2) ) + parse->debug = 0; + else + parse->debug = atoi(tbuf2); + } + if( keyword(tbuf, "convert", tbuf2, SZ_LINE) ){ + if( istrue(tbuf2) ) + parse->convert = 1; + else if( isfalse(tbuf2) ) + parse->convert = 0; + } + if( keyword(tbuf, "comeot", tbuf2, SZ_LINE) ){ + if( istrue(tbuf2) ) + parse->comeot = 1; + else if( isfalse(tbuf2) ) + parse->comeot = 0; + else + parse->comeot = atoi(tbuf2); + } + if( keyword(tbuf, "lazyeot", tbuf2, SZ_LINE) ){ + if( istrue(tbuf2) ) + parse->lazyeot = 1; + else if( isfalse(tbuf2) ) + parse->lazyeot = 0; + } + } + + /* save inputs */ + parse->delims = xstrdup(delims); + parse->comchars = xstrdup(comchars); + parse->mode = xstrdup(mode); + + /* start out in initial state */ + parse->state = PARSE_STATE_INITIAL; + + /* return the news */ + return parse; +} + +#ifdef ANSI_FUNC +int +ParseLine(Parse parse, char *lbuf, char *UNUSED(mode)) +#else +int ParseLine(parse, lbuf, mode) + Parse parse; + char *lbuf; + char *mode; +#endif +{ + int i; + int got; + int ip; + ParsedLine line=NULL; + + /* use default if necessary */ + if( !parse ) parse = _parse; + + /* if we have turned this parser off, just return */ + if( parse->state & PARSE_STATE_BAD ) return 0; + + /* shuffle lines as needed */ + if( parse->cur ){ + /* if cur is a comment, just clear it */ + if( PARSE_ISCOMMENT(parse->cur) ){ + _ParseLineFree(parse->cur); + } + /* valid line gets moved into prev */ + else{ + if( parse->prev ){ + if( parse->prev2 ) _ParseLineFree(parse->prev2); + parse->prev2 = parse->prev; + } + parse->prev = parse->cur; + } + /* allocate space for the line */ + parse->cur = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec)); + } + else{ + /* allocate space for the line */ + parse->cur = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec)); + } + + /* convenience pointer to current */ + line = parse->cur; + + /* increment line number */ + parse->nline++; + + /* initialize line if necessary */ + if( !line->tokens ){ + line->maxtoken = PARSE_TOKEN_INCR; + line->tokens = (ParsedToken)xcalloc(line->maxtoken,sizeof(ParsedTokenRec)); + line->types = (char *)xcalloc((line->maxtoken+1), sizeof(char)); + } + + /* look for EOT */ + if( (got=_ParseEOT(parse, lbuf)) != 0 ){ + i = 0; + /* end of table */ + if( got == 1 ) + line->tokens[i].type = PARSE_EOT; + else + /* first part of multi-line EOT -- treat as comment */ + line->tokens[i].type = PARSE_COMMENT; + /* add type to line types */ + line->types[i] = line->tokens[i].type; + /* increment the number of times we have seen this type */ + line->ntypes[line->tokens[i].type] += 1; + /* bump index so we can null terminate properly */ + i++; + goto done; + } + + /* process each token in the line */ + for(i=0, ip=0; lbuf[ip]; i++){ + /* make sure we have enough room */ + if( i >= line->maxtoken ){ + line->maxtoken += PARSE_TOKEN_INCR; + line->tokens = (ParsedToken)xrealloc(line->tokens, + line->maxtoken*sizeof(ParsedTokenRec)); + line->types = (char *)xrealloc(line->types, + (line->maxtoken+1)*sizeof(char)); + } + + /* process next word, and break if we don't get something */ + got=ParseWord(parse->delimtab, parse->comtab, + parse->nullvalues, parse->whitespace, + lbuf, &(line->tokens[i].sval), 0, &ip, + &(line->tokens[i].delim)); + /* analyze result */ + if( (got == 0) + && (line->tokens[i].delim != '\'') && (line->tokens[i].delim != '"') ){ + /* end of line (probably some extra spaces), so free up sval */ + if( line->tokens[i].sval ){ + xfree(line->tokens[i].sval); + line->tokens[i].sval = NULL; + } + break; + } + else if( got < 0 ){ + if( got == -3 ) + /* end of table */ + line->tokens[i].type = PARSE_EOT; + else if( got == -2) + /* comment */ + line->tokens[i].type = PARSE_COMMENT; + else if( got == -1 ) + /* null value */ + line->tokens[i].type = PARSE_NULL; + /* add type to line types */ + line->types[i] = line->tokens[i].type; + /* increment the number of times we have seen this type */ + line->ntypes[line->tokens[i].type] += 1; + /* for all but NULL, we are done with this line */ + if( line->types[i] != PARSE_NULL ){ + /* bump index so we can null terminate properly */ + i++; + break; + } + } + else{ + /* valid token, set token type */ + if( parse->convert ){ + if( (line->tokens[i].delim == '\'') || (line->tokens[i].delim == '"') ) + line->tokens[i].type = PARSE_STRING; + else + line->tokens[i].type = + _gettype(line->tokens[i].sval, + &line->tokens[i].dval, + &line->tokens[i].lval); + } + else{ + line->tokens[i].type = PARSE_STRING; + } + /* add type to line types */ + line->types[i] = line->tokens[i].type; + /* increment the number of times we have seen this type */ + line->ntypes[line->tokens[i].type] += 1; + } + } + +done: + /* null terminate and realloc to actual size */ + line->types[i] = '\0'; + line->maxtoken = i; + line->tokens = (ParsedToken)xrealloc(line->tokens, i*sizeof(ParsedTokenRec)); + line->types = (char *)xrealloc(line->types, (i+1)*sizeof(char)); + /* finalize total number of tokens processed */ + line->ntoken = i; + /* get parse state for this line */ + if( line->types[0] == PARSE_EOT ){ + line->state = PARSE_STATE_EOT; + } + else{ + line->state = _ParseLineState(parse, parse->state, NULL); + } + + /* set line info in main record structure for access convenience */ + parse->state = line->state; + parse->ntoken = line->ntoken; + parse->types = line->types; + parse->tokens = line->tokens; + + /* return the news */ + return parse->ntoken; +} + +#ifdef ANSI_FUNC +int +ParseAnalyze(Parse *parsers, int nparser, char *lbuf) +#else +int ParseAnalyze(parsers, nparser, lbuf) + Parse *parsers; + int nparser; + char *lbuf; +#endif +{ + int i, p; + int eot=0; + int np=0; + int tmax=0; + ParsedLine line; + + /* parse the line using all parsers */ + for(p=0; p<nparser; p++){ + if( parsers[p]->state & PARSE_STATE_BAD ) continue; + if( parsers[p]->state & PARSE_STATE_EOT ) continue; + ParseLine(parsers[p], lbuf, NULL); + if( parsers[p]->state & PARSE_STATE_EOT ) eot++; + if( parsers[p]->debug > 1 ){ + fprintf(stderr, "PARSE %d: state %x %s", p, parsers[p]->state, lbuf); + } + } + /* if some parsers found eot, but others did not, we can no longer use + the latter */ + if( eot ){ + for(p=0; p<nparser; p++){ + if( parsers[p]->state & PARSE_STATE_BAD ) continue; + if( parsers[p]->state & PARSE_STATE_EOT ) continue; + /* this parser is in an unknown state */ + parsers[p]->state = PARSE_STATE_UNKNOWN; + if( parsers[p]->debug > 1 ){ + fprintf(stderr, "PARSE %d: did not find EOT (state unknown)\n", p); + } + } + /* exit on EOT */ + return -1; + } + + /* analyze each parser */ + /* remove any parsers where the number of args changes */ + for(p=0; p<nparser; p++){ + /* skip bad parsers */ + if( parsers[p]->state & PARSE_STATE_BAD ) continue; + if( parsers[p]->state & PARSE_STATE_EOT ) continue; + line = parsers[p]->cur; + /* skip comments */ + if( line->types[0] == PARSE_COMMENT ) continue; + /* make sure current ntokens == previous (non-comment) ntokens */ + if( parsers[p]->prev ){ + if( parsers[p]->prev->ntoken != parsers[p]->cur->ntoken ){ + parsers[p]->state = PARSE_STATE_BADMATCH; + if( parsers[p]->debug ){ + fprintf(stderr, "PARSE: badmatch %d/%d: %d %d\n", + p, __parseline, + parsers[p]->prev->ntoken, parsers[p]->cur->ntoken); + } + } + /* check data type transitions */ + if( parsers[p]->prev->state & PARSE_STATE_DATA ){ + for(i=0; i<parsers[p]->ntoken; i++){ + /* skip check if prev line did not had this many tokens */ + if( i > parsers[p]->prev->ntoken ) break; + switch(_ctab[(int)parsers[p]->prev->types[i]][(int)parsers[p]->cur->types[i]]){ + case -1: + /* i2f conversion explicity permitted is OK */ + if( parsers[p]->i2f ){ + break; + } + /* current data type same as initial data type is OK */ + if( parsers[p]->data1 && (i <= parsers[p]->data1->ntoken) && + (parsers[p]->data1->types[i] == parsers[p]->cur->types[i]) ){ + break; + } + /* bad i2f conversion: drop through to error */ + case 0: + parsers[p]->state = PARSE_STATE_BADMATCH; + if( parsers[p]->debug ){ + fprintf(stderr, "PARSE: badconv %d/%d/%d: %c->%c\n", + p, __parseline, i, + parsers[p]->prev->types[i], parsers[p]->cur->types[i]); + } + break; + case 1: + break; + } + } + } + } + if( !(parsers[p]->state & PARSE_STATE_BAD) ){ + tmax=MAX(tmax, parsers[p]->ntoken); + } + } + + /* remove parsers with < tmax tokens */ + if( tmax > 2) tmax = 2; + for(p=0; p<nparser; p++){ + /* skip bad parsers */ + if( parsers[p]->state & PARSE_STATE_BAD ) continue; + if( parsers[p]->state & PARSE_STATE_EOT ) continue; + /* skip comments */ + if( parsers[p]->types[0] == PARSE_COMMENT ) continue; + /* check current number of tokens */ + if( parsers[p]->ntoken > 0 ){ + if( parsers[p]->ntoken < tmax ){ + parsers[p]->state = PARSE_STATE_BADMAX; + if( parsers[p]->debug ){ + fprintf(stderr, "PARSE: badmax %d/%d: %d < %d\n", + p, __parseline, parsers[p]->ntoken, tmax); + } + } + } + } + + /* make sure we still have a parser left */ + for(np=0, p=0; p<nparser; p++){ + if( parsers[p]->state & PARSE_STATE_BAD ) continue; + if( parsers[p]->state & PARSE_STATE_EOT ) continue; + np++; + } + + /* parsed another line */ + __parseline++; + /* we either have parsers (np>0) or an error condition (np==0) */ + return np; +} + +#ifdef ANSI_FUNC +ParsedLine +ParseLineDup(Parse parse, ParsedLine line) +#else +ParsedLine ParseLineDup(parse, line) + Parse parse; + ParsedLine line; +#endif +{ + int i; + ParsedLine nline; + + /* sanity check */ + if( !parse ) return NULL; + + /* rellocate everything */ + if( !(nline = (ParsedLine)xcalloc(1, sizeof(ParsedLineRec))) ) return NULL; + memcpy(nline, line, sizeof(ParsedLineRec)); + nline->tokens = (ParsedToken)xcalloc(line->maxtoken, sizeof(ParsedTokenRec)); + memcpy(nline->tokens, line->tokens, line->maxtoken*sizeof(ParsedTokenRec)); + for(i=0; i<nline->ntoken; i++){ + nline->tokens[i].sval = xstrdup(line->tokens[i].sval); + } + nline->types = (char *)xcalloc((line->maxtoken+1), sizeof(char)); + memcpy(nline->types, line->types, line->maxtoken+1); + return nline; +} + +#ifdef ANSI_FUNC +int +ParseReset(Parse parse, ParsedLine line, int state) +#else + int ParseReset(parse, line, state) + Parse parse; + ParsedLine line; + int state; +#endif +{ + if( !parse ) return 0; + + if( parse->prev2 ){ + _ParseLineFree(parse->prev2); + parse->prev2 = NULL; + } + if( parse->prev ){ + _ParseLineFree(parse->prev); + parse->prev = NULL; + } + if( parse->cur ){ + _ParseLineFree(parse->cur); + parse->cur = NULL; + } + if( parse->header ){ + _ParseLineFree(parse->header); + parse->header = NULL; + } + if( parse->units ){ + _ParseLineFree(parse->units); + parse->units = NULL; + } + if( parse->data1 ){ + _ParseLineFree(parse->data1); + parse->data1 = NULL; + } + if( parse->eot ) parse->eot->ncur = 0; + parse->ntoken = 0; + parse->types = NULL; + parse->tokens = NULL; + + /* initialize as specified */ + if( line ){ + parse->cur = line; + } + if( state ){ + parse->state = state; + } + else{ + parse->state = PARSE_STATE_INITIAL; + } + + return 1; +} + +#ifdef ANSI_FUNC +int +ParseFree(Parse parse) +#else +int ParseFree(parse) + Parse parse; +#endif +{ + int i; + + /* sanity check */ + if( !parse ) return 0; + + /* reset frees up some space */ + ParseReset(parse, NULL, 0); + + /* free up remainder of allocated space */ + if( parse->delims ) xfree(parse->delims); + if( parse->comchars ) xfree(parse->comchars); + if( parse->mode ) xfree(parse->mode); + if( parse->eot ){ + if( parse->eot->lines ){ + for(i=0; i<parse->eot->maxline; i++){ + if( parse->eot->lines[i] ) xfree(parse->eot->lines[i]); + } + xfree(parse->eot->lines); + } + xfree(parse->eot); + } + + /* free struct */ + xfree(parse); + + /* return the news */ + return 1; +} + +#ifdef ANSI_FUNC +int +ParseDataType(char *s, double *dval, longlong *ival) +#else + int ParseFree(s, dval, ival) + char *s; + double *dval; + longlong *ival; +#endif +{ + return _gettype(s, dval, ival); +} |