diff options
author | David Young <dyoung@hdfgroup.org> | 2021-03-15 13:16:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-15 13:16:22 (GMT) |
commit | f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952 (patch) | |
tree | 1850a385e92e3e434c9723fba3de1e870d5703b7 /hl/src/H5LTanalyze.l | |
parent | 24c83cf73ebb80a58ce92463dac02713d3a4dbf8 (diff) | |
download | hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.zip hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.tar.gz hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.tar.bz2 |
Simplified hl parsing (#399)
* Stop using global variables to share parse context with the lexer.
The lexer uses an unconventional strategy for parsing lexical categories
NUMBER (decimal numbers) and STRING (double-quoted strings) that involves
sharing the parse context with the lexer using global variables. There
are a couple of problems with that. First, the lexer is too complicated
for the simple tokenization it performs—it's hard to tell if it is
correct. Second, as @seanm points out, the shared global variables
spill into the namespace shared by other libraries and application
programs—e.g., VTK.
* Regenerate source files from *.[yl].
* Replace strndup, which isn't available on Windows, with a custom
routine, `trim_quotes`, that produces a copy of its `const char *`
argument with leading and trailing double quotes ('"') removed.
While I am here, remove the unnecessary statement `BEGIN INITIAL;`,
which I should have deleted in a previous commit.
* Regenerate .c from .l.
* You haven't programmed in C until you have programmed in High-Definition
(HD) C.
* \#include "H5private.h" for HD* definitions.
* Regenerate *.[ch] from *.[yl].
Diffstat (limited to 'hl/src/H5LTanalyze.l')
-rw-r--r-- | hl/src/H5LTanalyze.l | 90 |
1 files changed, 29 insertions, 61 deletions
diff --git a/hl/src/H5LTanalyze.l b/hl/src/H5LTanalyze.l index 49fc039..76d5800 100644 --- a/hl/src/H5LTanalyze.l +++ b/hl/src/H5LTanalyze.l @@ -18,11 +18,15 @@ */ %{ +#include <assert.h> #include <stdlib.h> #include <string.h> #include <hdf5.h> + +#include "H5private.h" #include "H5LTparse.h" +static char *trim_quotes(const char *); int my_yyinput(char *, int); #undef YY_INPUT #define YY_INPUT(b, r, ms) (r=my_yyinput(b, ms)) @@ -37,43 +41,8 @@ int my_yyinput(char *, int); extern char *myinput; extern size_t input_len; -#define STACK_SIZE 16 - -/*variables for compound type*/ -struct cmpd_info { - hid_t id; - hbool_t is_field; - hbool_t first_memb; -}; -extern struct cmpd_info cmpd_stack[STACK_SIZE]; -extern int csindex; - -/*variables for array type*/ -struct arr_info { - hsize_t dims[H5S_MAX_RANK]; - int ndim; - hbool_t is_dim; -}; -extern struct arr_info arr_stack[STACK_SIZE]; -extern int asindex; - -/*variables for enumerate type*/ -extern hbool_t is_enum; -extern hbool_t is_enum_memb; - -/*variables for string type*/ -extern hbool_t is_str_size; - -/*variables for opaque type*/ -extern hbool_t is_opq_size; -extern hbool_t is_opq_tag; - -hbool_t first_quote = 1; - %} -%s TAG_STRING - %% H5T_STD_I8BE {return hid(H5T_STD_I8BE_TOKEN);} @@ -138,32 +107,12 @@ OPQ_SIZE {return token(OPQ_SIZE_TOKEN);} OPQ_TAG {return token(OPQ_TAG_TOKEN);} [0-9]+ { - if( is_str_size || (is_enum && is_enum_memb) || - is_opq_size || (asindex>-1 && arr_stack[asindex].is_dim) || - (csindex>-1 && cmpd_stack[csindex].is_field) ) { - H5LTyylval.ival = atoi(yytext); - return NUMBER; - } else - REJECT; + H5LTyylval.ival = HDatoi(yytext); + return NUMBER; } -"\"" { - /*if it's first quote, and is a compound field name or an enum symbol*/ - if((is_opq_tag || is_enum || (csindex>-1 && cmpd_stack[csindex].is_field)) - && first_quote) { - first_quote = 0; - BEGIN TAG_STRING; - } else /*if it's second quote*/ - first_quote = 1; - return token('"'); - } -<TAG_STRING>[^\"]+ { -#ifdef H5_HAVE_WIN32_API - H5LTyylval.sval = _strdup(yytext); -#else /* H5_HAVE_WIN32_API */ - H5LTyylval.sval = strdup(yytext); -#endif /* H5_HAVE_WIN32_API */ - BEGIN INITIAL; +["][^\"]+["] { + H5LTyylval.sval = trim_quotes(yytext); return STRING; } @@ -177,18 +126,37 @@ OPQ_TAG {return token(OPQ_TAG_TOKEN);} "\n" { return 0; } %% + +/* Allocate a copy of `quoted` with the double quote character at + * the beginning and the one at the end both removed. The caller is + * responsible for free()ing the copy. + */ +static char * +trim_quotes(const char *quoted) +{ + size_t len = HDstrlen(quoted); + char *trimmed; + + HDassert(quoted[0] == '"' && quoted[len - 1] == '"'); + + trimmed = HDstrdup(quoted + 1); + trimmed[len - 2] = '\0'; + + return trimmed; +} + int my_yyinput(char *buf, int max_size) { int ret; - memcpy(buf, myinput, input_len); + HDmemcpy(buf, myinput, input_len); ret = (int)input_len; return ret; } int H5LTyyerror(const char *msg) { - printf("ERROR: %s before \"%s\".\n", msg, yytext); + HDprintf("ERROR: %s before \"%s\".\n", msg, yytext); return 0; } |