diff options
author | David Young <dyoung@hdfgroup.org> | 2021-03-15 13:16:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-15 13:16:22 (GMT) |
commit | f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952 (patch) | |
tree | 1850a385e92e3e434c9723fba3de1e870d5703b7 /hl/src/H5LTparse.y | |
parent | 24c83cf73ebb80a58ce92463dac02713d3a4dbf8 (diff) | |
download | hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.zip hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.tar.gz hdf5-f6d919a2ab0034a1ee76ca2d5f59f4d8114c4952.tar.bz2 |
Simplified hl parsing (#399)
* Stop using global variables to share parse context with the lexer.
The lexer uses an unconventional strategy for parsing lexical categories
NUMBER (decimal numbers) and STRING (double-quoted strings) that involves
sharing the parse context with the lexer using global variables. There
are a couple of problems with that. First, the lexer is too complicated
for the simple tokenization it performs—it's hard to tell if it is
correct. Second, as @seanm points out, the shared global variables
spill into the namespace shared by other libraries and application
programs—e.g., VTK.
* Regenerate source files from *.[yl].
* Replace strndup, which isn't available on Windows, with a custom
routine, `trim_quotes`, that produces a copy of its `const char *`
argument with leading and trailing double quotes ('"') removed.
While I am here, remove the unnecessary statement `BEGIN INITIAL;`,
which I should have deleted in a previous commit.
* Regenerate .c from .l.
* You haven't programmed in C until you have programmed in High-Definition
(HD) C.
* \#include "H5private.h" for HD* definitions.
* Regenerate *.[ch] from *.[yl].
Diffstat (limited to 'hl/src/H5LTparse.y')
-rw-r--r-- | hl/src/H5LTparse.y | 102 |
1 files changed, 46 insertions, 56 deletions
diff --git a/hl/src/H5LTparse.y b/hl/src/H5LTparse.y index 306d8c9..7c32772 100644 --- a/hl/src/H5LTparse.y +++ b/hl/src/H5LTparse.y @@ -22,7 +22,9 @@ #include <string.h> #include <hdf5.h> -extern int yylex(); +#include "H5private.h" + +extern int yylex(void); extern int yyerror(const char *); #define STACK_SIZE 16 @@ -35,13 +37,13 @@ struct cmpd_info { }; /*stack for nested compound type*/ -struct cmpd_info cmpd_stack[STACK_SIZE] = { +static struct cmpd_info cmpd_stack[STACK_SIZE] = { {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1}, {0, 0, 1} }; -int csindex = -1; /*pointer to the top of compound stack*/ +static int csindex = -1; /*pointer to the top of compound stack*/ /*structure for array type information*/ struct arr_info { @@ -50,23 +52,18 @@ struct arr_info { hbool_t is_dim; /*flag to lexer for dimension*/ }; /*stack for nested array type*/ -struct arr_info arr_stack[STACK_SIZE]; -int asindex = -1; /*pointer to the top of array stack*/ +static struct arr_info arr_stack[STACK_SIZE]; +static int asindex = -1; /*pointer to the top of array stack*/ -hbool_t is_str_size = 0; /*flag to lexer for string size*/ -hbool_t is_str_pad = 0; /*flag to lexer for string padding*/ -H5T_str_t str_pad; /*variable for string padding*/ -H5T_cset_t str_cset; /*variable for string character set*/ -hbool_t is_variable = 0; /*variable for variable-length string*/ -size_t str_size; /*variable for string size*/ +static H5T_str_t str_pad; /*variable for string padding*/ +static H5T_cset_t str_cset; /*variable for string character set*/ +static hbool_t is_variable = 0; /*variable for variable-length string*/ +static size_t str_size; /*variable for string size*/ -hid_t enum_id; /*type ID*/ -hbool_t is_enum = 0; /*flag to lexer for enum type*/ -hbool_t is_enum_memb = 0; /*flag to lexer for enum member*/ -char* enum_memb_symbol; /*enum member symbol string*/ - -hbool_t is_opq_size = 0; /*flag to lexer for opaque type size*/ -hbool_t is_opq_tag = 0; /*flag to lexer for opaque type tag*/ +static hid_t enum_id; /*type ID*/ +static hbool_t is_enum = 0; /*flag to lexer for enum type*/ +static hbool_t is_enum_memb = 0; /*flag to lexer for enum member*/ +static char* enum_memb_symbol; /*enum member symbol string*/ %} %union { @@ -99,7 +96,7 @@ hbool_t is_opq_tag = 0; /*flag to lexer for opaque type tag*/ %token <sval> STRING %token <ival> NUMBER -%token <ival> '{' '}' '[' ']' '"' ':' ';' +%token <ival> '{' '}' '[' ']' ':' ';' %% start : { memset(arr_stack, 0, STACK_SIZE*sizeof(struct arr_info)); /*initialize here?*/ } @@ -168,35 +165,35 @@ memb_list : | memb_list memb_def ; memb_def : ddl_type { cmpd_stack[csindex].is_field = 1; /*notify lexer a compound member is parsed*/ } - '"' field_name '"' field_offset ';' + field_name field_offset ';' { size_t origin_size, new_size; hid_t dtype_id = cmpd_stack[csindex].id; /*Adjust size and insert member, consider both member size and offset.*/ if(cmpd_stack[csindex].first_memb) { /*reclaim the size 1 temporarily set*/ - new_size = H5Tget_size($<hid>1) + $<ival>6; + new_size = H5Tget_size($<hid>1) + $<ival>4; H5Tset_size(dtype_id, new_size); /*member name is saved in yylval.sval by lexer*/ - H5Tinsert(dtype_id, $<sval>4, $<ival>6, $<hid>1); + H5Tinsert(dtype_id, $<sval>3, $<ival>4, $<hid>1); cmpd_stack[csindex].first_memb = 0; } else { origin_size = H5Tget_size(dtype_id); - if($<ival>6 == 0) { + if($<ival>4 == 0) { new_size = origin_size + H5Tget_size($<hid>1); H5Tset_size(dtype_id, new_size); - H5Tinsert(dtype_id, $<sval>4, origin_size, $<hid>1); + H5Tinsert(dtype_id, $<sval>3, origin_size, $<hid>1); } else { - new_size = $<ival>6 + H5Tget_size($<hid>1); + new_size = $<ival>4 + H5Tget_size($<hid>1); H5Tset_size(dtype_id, new_size); - H5Tinsert(dtype_id, $<sval>4, $<ival>6, $<hid>1); + H5Tinsert(dtype_id, $<sval>3, $<ival>4, $<hid>1); } } - if($<sval>4) { - free($<sval>4); - $<sval>4 = NULL; + if($<sval>3) { + HDfree($<sval>3); + $<sval>3 = NULL; } cmpd_stack[csindex].is_field = 0; H5Tclose($<hid>1); @@ -206,8 +203,8 @@ memb_def : ddl_type { cmpd_stack[csindex].is_field = 1; /*notify le ; field_name : STRING { - $<sval>$ = strdup(yylval.sval); - free(yylval.sval); + $<sval>$ = HDstrdup(yylval.sval); + HDfree(yylval.sval); yylval.sval = NULL; } ; @@ -247,20 +244,18 @@ vlen_type : H5T_VLEN_TOKEN '{' ddl_type '}' opaque_type : H5T_OPAQUE_TOKEN '{' - OPQ_SIZE_TOKEN { is_opq_size = 1; } opaque_size ';' + OPQ_SIZE_TOKEN opaque_size ';' { size_t size = (size_t)yylval.ival; $<hid>$ = H5Tcreate(H5T_OPAQUE, size); - is_opq_size = 0; } - OPQ_TAG_TOKEN { is_opq_tag = 1; } '"' opaque_tag '"' ';' + OPQ_TAG_TOKEN opaque_tag ';' { - H5Tset_tag($<hid>7, yylval.sval); - free(yylval.sval); + H5Tset_tag($<hid>6, yylval.sval); + HDfree(yylval.sval); yylval.sval = NULL; - is_opq_tag = 0; } - '}' { $<hid>$ = $<hid>7; } + '}' { $<hid>$ = $<hid>6; } ; opaque_size : NUMBER ; @@ -268,40 +263,39 @@ opaque_tag : STRING ; string_type : H5T_STRING_TOKEN '{' - STRSIZE_TOKEN { is_str_size = 1; } strsize ';' + STRSIZE_TOKEN strsize ';' { - if($<ival>5 == H5T_VARIABLE_TOKEN) + if($<ival>4 == H5T_VARIABLE_TOKEN) is_variable = 1; else str_size = yylval.ival; - is_str_size = 0; } STRPAD_TOKEN strpad ';' { - if($<ival>9 == H5T_STR_NULLTERM_TOKEN) + if($<ival>8 == H5T_STR_NULLTERM_TOKEN) str_pad = H5T_STR_NULLTERM; - else if($<ival>9 == H5T_STR_NULLPAD_TOKEN) + else if($<ival>8 == H5T_STR_NULLPAD_TOKEN) str_pad = H5T_STR_NULLPAD; - else if($<ival>9 == H5T_STR_SPACEPAD_TOKEN) + else if($<ival>8 == H5T_STR_SPACEPAD_TOKEN) str_pad = H5T_STR_SPACEPAD; } CSET_TOKEN cset ';' { - if($<ival>13 == H5T_CSET_ASCII_TOKEN) + if($<ival>12 == H5T_CSET_ASCII_TOKEN) str_cset = H5T_CSET_ASCII; - else if($<ival>13 == H5T_CSET_UTF8_TOKEN) + else if($<ival>12 == H5T_CSET_UTF8_TOKEN) str_cset = H5T_CSET_UTF8; } CTYPE_TOKEN ctype ';' { - if($<hid>17 == H5T_C_S1_TOKEN) + if($<hid>16 == H5T_C_S1_TOKEN) $<hid>$ = H5Tcopy(H5T_C_S1); - else if($<hid>17 == H5T_FORTRAN_S1_TOKEN) + else if($<hid>16 == H5T_FORTRAN_S1_TOKEN) $<hid>$ = H5Tcopy(H5T_FORTRAN_S1); } '}' { - hid_t str_id = $<hid>19; + hid_t str_id = $<hid>18; /*set string size*/ if(is_variable) { @@ -339,14 +333,10 @@ enum_type : H5T_ENUM_TOKEN '{' integer_type ';' enum_list : | enum_list enum_def ; -enum_def : '"' enum_symbol '"' { +enum_def : enum_symbol { is_enum_memb = 1; /*indicate member of enum*/ -#ifdef H5_HAVE_WIN32_API - enum_memb_symbol = _strdup(yylval.sval); -#else /* H5_HAVE_WIN32_API */ enum_memb_symbol = strdup(yylval.sval); -#endif /* H5_HAVE_WIN32_API */ - free(yylval.sval); + HDfree(yylval.sval); yylval.sval = NULL; } enum_val ';' @@ -386,7 +376,7 @@ enum_def : '"' enum_symbol '"' { } is_enum_memb = 0; - if(enum_memb_symbol) free(enum_memb_symbol); + if(enum_memb_symbol) HDfree(enum_memb_symbol); } H5Tclose(super); |