The following describes the data description language for HDF5 file. The description is in Backus-Naur Form. A quick explanation of the some symbols used.
::= defined as <tname> a token with the name tname <a> | <b> one of <a> or <b> <a>opt zero or one occurrence of <a> <a>* zero or more occurrence of <a> <a>+ one or more occurrence of <a> TBD To Be Decided <file> ::= hdf5 "<file_name>" { <file_boot_block>opt <root_group> } <file_name> ::= <identifier> <file_boot_block> ::= boot_block { <boot_block_content> } <boot_block_content> ::= TBD <root_group> ::= group "/" { <group_attribute>* <group_member>* } // Attributes and members can be in any order. <group_attribute> ::= <attribute> <attribute> ::= attribute "attr_name" { <datatype> <dataspace> <data>opt } // <datatype>, <dataspace> and <data> can be in any order // as long as <data> comes the last. <attr_name> ::= <identifier> <datatype> ::= datatype "<datatype_name>" ; | // shared data type datatype { <scalar_datatype> } ; | datatype { <dt_definition>+ } ; <datatype_name> ::= <identifier> <scalar_datatype> ::= int8 | uint8 | ... // data types supported by HDF5 <dt_definition> ::= "<datatype_name>" <field_name> ; | <scalar_datatype> <field_name> ; <field_name> ::= one or more field names separated by "," <dataspace> ::= dataspace "<dataspace_name>" ; | dataspace {array <current_dims> <max_dims>} ; | dataspace {other <ds_definition>+ } ; <dataspace_name> ::= <identifier> <current_dims> ::= a list of integers represented of the form ( , , .. , ) <max_dims> ::= a list of integers or H5S_UNLIMITED represented of the form ( , , .. , ) <ds_definition> ::= TBD <data> ::= data {TBD} ; <group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | <link> <named_datatype> ::= datatype "<datatype_name>" { <scalar_datatype> } | datatype "<datatype_name>" { <dt_definition>+ } <named_dataspace> ::= dataspace "<dataspace_name>" {array <current_dims> <max_dims> } | dataspace "<dataspace_name>" { other <ds_definition>+ } <group> ::= group "<group_name>" { <group_attribute>* <group_member>* } // Attributes and members can be in any order. <group_name> ::= <identifier> <dataset> ::= dataset "<dataset_name>" { <datatype> <dataspace> <storagelayout>opt <compression>opt <dataset_attribute>* <data>opt } // Any order is ok as long as <data> and <dataset_attribute> are // after <datatype> and <dataspace>. <dataset_name> ::= <identifier> <storagelayout> :: = storagelayout <contiguous_layout> ; | storagelayout <chunked_layout> ; | storagelayout <compact_layout> ; | storagelayout <external_layout> ; <contiguous_layout> ::= {contiguous} // default <chunked_layout> ::= {chunked <dims> } <dims> ::= a list of integers represented of the form ( , , .. , ) <compact_layout> ::= TBD // not implemented yet <external_layout> ::= {external <external_file>+ } <external_file> ::= (<file_name> <offset> <size>) <offset> ::= an integer <size> ::= an integer <compression> :: = compression { TBD } ; //algorithm name and properties? <dataset_attribute> ::= <attribute> ; <link> ::= <hardlink> | <softlink> <hardlink> ::= hardlink "<hardlink_name>" { linktarget "<target>"; } <hardlink_name> ::= <identifier> <target> ::= <identifier> <softlink> ::= softlink "<softlink_name>" { linktarget "<target>"; } <softlink_name> ::= <identifier> <identifier> ::= string // character "/" should be used with care. // <dataset_name> cannot contain "/", for example.
Consider an HDF5 file, example.h5, with the following structure: / / | \ group1 group3 dataset3 / \ | ^ group2 dataset2 hardlink --| / \ ^ dataset1 softlink | |------| hdf5 "example" { group "/" { datatype "shared_datatype1" { // named data type char name[20], address[40]; int32 id; } datatype "shared_datatype2" { "shared_datatype1" rec[100]; float32 total; } dataspace "shared_dataspace1" { array (1000) (1000)} // named data space attribute "group_attribute1" { //group attribute datatype {int32}; dataspace "shared_dataspace1"; } attribute "group_attribute2" { datatype {int32}; dataspace {array (3,5) (3,5)} ; data {(0,0,0,0,0) (1,1,1,1,1) (2,2,2,2,2)}; } group "group1" { attribute "group1_attribute" { datatype {char}; dataspace {array (100) (200)}; data {("attribute_of_group_1")}; } group "group2" { datatype "shared_datatype3" { int8 z; int32 x; } dataset "dataset1" { dataspace "shared_dataspace1"; datatype "shared_datatype3"; attribute "dataset1_attr" { datatype {char}; dataspace {array (10) (10)}; data {("data1")}; }; } softlink "sl" { linktarget "/group1/dataset2"; } } dataset "dataset2" { datatype {int8}; dataspace {array (2,5) (unlimited, 5)}; data {(0,0,0,0,0) (0,0,0,0,0)}; } } group "group3" { hardlink "hl" { linktarget "/dataset3" ; } } dataset "dataset3" { datatype { float32 }; dataspace {array (1000,1000) (1000,1000) }; storagelayout {chunked (250,1000)}; } } // root group }