DDL in BNF

The following describes the data description language for HDF5 file. The description is in Backus-Naur Form. A quick explanation of the some symbols used.

	::=               defined as
	<tname>           a token with the name tname
	<a> | <b>         one of <a> or <b>
	<a>opt             zero or one occurrence of <a>
	<a>*              zero or more occurrence of <a>
	<a>+              one or more occurrence of <a>
        TBD               To Be Decided


<file> ::= hdf5 "<file_name>" { <file_boot_block>opt <root_group> }

<file_name> ::= <identifier>

<file_boot_block> ::= boot_block { <boot_block_content> }

<boot_block_content> ::= TBD

<root_group> ::= group "/" { <nlink>opt <group_attribute>* <group_member>* }  
// nlink must appear before attributes and members.

<nlink>opt ::= { nlink <no_of_hardlinks> }

<no_of_hardlinks> ::= an integer larger than 1

<group_attribute> ::= <attribute> 

<attribute> ::= attribute "attr_name" { <datatype>    
                                        <dataspace>   
                                        <data>opt  } 
// <datatype>, <dataspace> and <data> can be in any order 
// as long as <data> comes the last.

<attr_name> ::=  <identifier>

<datatype> ::= datatype { "<datatype_name>" } |          // shared data type
               datatype { <scalar_datatype> }  |         
               datatype { <dt_definition>+ }        

<datatype_name> ::= <identifier>

<scalar_datatype> ::= H5T_NATIVE_CHAR | H5T_NATIVE_INT | ...     // data types supported by HDF5

<dt_definition> ::= <scalar_datatype> <field_name> ;

<field_name> ::= <identifier>

<dataspace> ::= dataspace { "<dataspace_name>" } |
                dataspace { array <current_dims> <max_dims> } | 
                dataspace { other <ds_definition>+ } 

<dataspace_name> ::= <identifier>

<current_dims> ::= a list of integers represented as (i1, i2, ... ) where ik is an integer, k = 1,2,...

<max_dims> ::= (i1, i2, ... ) where ik is an integer or H5S_UNLIMITED

<ds_definition> ::= TBD

<data> ::= data {TBD}

<group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | <softlink>

<named_datatype> ::= datatype "<datatype_name>" { <dt_definition>+ }

<named_dataspace> ::= dataspace "<dataspace_name>" { array <current_dims> <max_dims> } |
                      dataspace "<dataspace_name>" { other <ds_definition>+ }

<group> ::= group "<group_name>" { <nlink>opt <group_attribute>* <group_member>* }  

<group_name> ::= <identifier>

<dataset> ::= dataset "<dataset_name>" { <nlink>opt
                                         <datatype>  
                                         <dataspace> 
                                         <storagelayout>opt
                                         <compression>opt
                                         <dataset_attribute>*
                                         <data>opt  } 
// Any order is ok as long as <data> and <dataset_attribute> are 
// after <datatype> and <dataspace>.
// nlink must appear before others.

<dataset_name> ::= <identifier>

<storagelayout> :: = storagelayout <contiguous_layout>  |  
                     storagelayout <chunked_layout>  | 
                     storagelayout <compact_layout>  | 
                     storagelayout <external_layout> 

<contiguous_layout> ::= {contiguous}    // default

<chunked_layout> ::=  {chunked <dims> }

<dims> ::= (i1, i2, ... ), ik is an integer, k = 1,2,... 

<compact_layout> ::= TBD           

<external_layout> ::= {external <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= an integer

<size> ::= an integer

<compression> :: = compression { TBD }  

<dataset_attribute> ::= <attribute> 

<softlink> ::= softlink "<softlink_name>" { linktarget "<target>" }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<identifier> ::= string   
// character "/" should be used with care. 
// <dataset_name> cannot contain "/", for example.


An example of HDF5 File in DDL

Consider an HDF5 file, example.h5, with the following structure:

                             /

                     /       |       \
                 group1     group3  dataset3
                 /   \       |       
           group2  dataset2 softlink
             /
      dataset1 
              


hdf5 "example" { 

group "/" {

   datatype "shared_datatype" {                    // named data type
            H5T_NATIVE_CHAR c[20];
            H5T_NATIVE_INT i; 
   }
 
   attribute "group_attribute" {                   //attribute
             datatype {H5T_NATIVE_CHAR}
             dataspace {array (20) (20)}
             data { "group attribute" }
   } 

   group "group1" {

         group "group2" {

               dataset "dataset1" {
                        dataspace {array (4) (4)}
                        datatype {"shared_datatype"}
                        attribute "dataset1_attr" {
                                   datatype {H5T_NATIVE_CHAR}
                                   dataspace {array (10) (10)}
                        }
               }
         }

         dataset "dataset2" {
               datatype {H5T_NATIVE_INT}
               dataspace {array (2,5) (H5S_UNLIMITED, 5)}
               data {0,0,0,0,0,0,0,0,0,0}
         }
   }
   
   group "group3" {
         softlink "slink" {
                  linktarget "/sometarget" 
         }
   }
      
   
   dataset "dataset3" {
           datatype {H5T_NATIVE_FLOAT}
           dataspace {array (100,100) (100,100)} 
   }
 
} 

}