Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

DDL in BNF for HDF5

1. Introduction

This document contains the data description language (DDL) for an HDF5 file. The description is in Backus-Naur Form.

2. Explanation of Symbols

This section contains a brief explanation of the symbols used in the DDL.
    ::=                      defined as
    <tname>                  a token with the name tname
    <a> | <b>                one of <a> or <b>
    <a>opt                    zero or one occurrence of <a>
    <a>*                     zero or more occurrence of <a>
    <a>+                     one or more occurrence of <a>
    [0-9]                    an element in the range between 0 and 9
    `['                      the token within the quotes (used for special characters)
    TBD                      To Be Decided

3. The DDL

<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }

<file_name> ::= <identifier>

<file_super_block> ::= BOOT_BLOCK { <super_block_content> }

<super_block_content> ::= TBD

<root_group> ::= GROUP "/" { <unamed_datatype>* <object_id>opt
                 <group_attribute>* <group_member>* }  

<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type>

<unamed_datatype> ::= DATATYPE <unamed_type_name> { <datatype> }

<unamed_type_name> ::= the assigned name for unamed type is in the form of 
                       #oid1:oid2, where oid1 and oid2 are the object ids
                       of the type

<atomic_type> ::= <integer>  | <float>  | <time>      | <string> |
                  <bitfield> | <opaque> | <reference> | <enum>

<integer> ::=  H5T_STD_I8BE     | H5T_STD_I8LE      |
               H5T_STD_I16BE    | H5T_STD_I16LE     |
               H5T_STD_I32BE    | H5T_STD_I32LE     |
               H5T_STD_I64BE    | H5T_STD_I64LE     |
               H5T_STD_U8BE     | H5T_STD_U8LE      |
               H5T_STD_U16BE    | H5T_STD_U16LE     |
               H5T_STD_U32BE    | H5T_STD_U32LE     |
               H5T_STD_U64BE    | H5T_STD_U64LE     |
               H5T_NATIVE_CHAR  | H5T_NATIVE_UCHAR  |
               H5T_NATIVE_SHORT | H5T_NATIVE_USHORT |
               H5T_NATIVE_INT   | H5T_NATIVE_UINT   |
               H5T_NATIVE_LONG  | H5T_NATIVE_ULONG  |
               H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG

<float> ::= H5T_IEEE_F32BE   | H5T_IEEE_F32LE     |
            H5T_IEEE_F64BE   | H5T_IEEE_F64LE     |
            H5T_NATIVE_FLOAT |  H5T_NATIVE_DOUBLE |
            H5T_NATIVE_LDOUBLE

<time> ::= TBD

<string> ::= H5T_STRING { STRSIZE <strsize> ;
               STRPAD <strpad> ;
               CSET <cset> ;
               CTYPE <ctype> ; }  

<strsize> ::= <int_value>

<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD

<cset> ::= H5T_CSET_ASCII

<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1

<bitfield> ::= TBD

<opaque> ::= H5T_OPAQUE { <identifier> }

<reference> ::= H5T_REFERENCE { <ref_type> }

<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG

<compound_type> ::= H5T_COMPOUND { <member_type_def>+ }

<member_type_def> ::= <datatype> <field_name> ;

<field_name> ::= <identifier>

<variable_length_type> ::= H5T_VLEN { <datatype> }

<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }

<dim_sizes> ::= `['<dimsize>`]' | `['<dimsize>`]'<dim_sizes>

<dimsize> ::= <int_value>

<attribute> ::= ATTRIBUTE <attr_name> { <dataset_type>    
                                        <dataset_space>
                                        <data>opt  } 

<attr_name> ::= <identifier>

<dataset_type> ::= DATATYPE <path_name> | <datatype>

<enum> ::= H5T_ENUM { <enum_base_type> <enum_def>+  }

<enum_base_type> ::= <integer>
// Currently enums can only hold integer type data, but they may be expanded
// in the future to hold any datatype

<enum_def> ::= <enum_symbol> <enum_val>;

<enum_symbol> ::= <identifier>

<enum_val> ::= <int_value>

<path_name> ::= <path_part>+

<path_part> ::= /<identifier>

<dataspace> ::= <scalar_space> | <simple_space> | <complex_space>

<scalar_space> ::= SCALAR

<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }

<complex_space> ::= COMPLEX { <complex_space_definition> }

<dataset_space> ::= DATASPACE <path_name> | <dataspace>

<current_dims> ::= <dims>

<max_dims> ::= (i1, i2, ... ) where ik is an integer or H5S_UNLIMITED

<complex_space_definition> ::= TBD

<data> ::= DATA { <scalar_space_data> | <simple_space_data> | <complex_space_data> }
                  
<scalar_space_data> ::= <any_element>

<any_element> ::= <atomic_element> | <compound_element> | 
                  <variable_length_element> | <array_element>

<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>

<atomic_element> :: = <integer_data> | <float_data>    | <time_data>   |
                      <string_data>  | <bitfield_data> | <opaque_data> |
                      <enum_data>    | <reference_data>

<integer_data> ::= <int_value>

<float_data> ::= a floating point number

<time_data> ::= TBD

<string_data> ::= a string
// A string is enclosed in double quotes. 
// If a string is displayed on more than one line, string concatenate
// operator '//'is used.

<bitfield_data> ::= TBD

<opaque_data> ::= TBD

<enum_data> ::= <enum_symbol>

<reference_data> ::= <object_ref_data> | <data_region_data> | NULL

<object_ref_data> ::= <object_type> <object_num>

<object_type> ::= DATASET | GROUP | DATATYPE

<object_id> ::= OBJECTID { <object_num> }

<object_num> ::= <int_value>:<int_value> | <int_value>

<data_region_data> ::= H5T_STD_REF_DSETREG <object_num>
        { <data_region_data_info>, <data_region_data_info>, ...}

<data_region_data_info> ::= <region_info> | <point_info>

<region_info> ::= (<region_vals>)

<region_vals> ::= <lower_bound>:<upper_bound> | <lower_bound>:<upper_bound>, <region_vals>

<lower_bound> ::= <int_value>

<upper_bound> ::= <int_value>

<point_info> ::= (<point_vals>)

<point_vals> ::= <int_value> | <int_value>, <point_vals>

<compound_element> ::= { <any_data_seq> }

<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> | <atomic_element>

<simple_space_data> :: = <any_data_seq>

<variable_length_element> ::= ( <any_data_seq> )

<array_element> ::= `[' <any_data_seq> `]'

<complex_space_data> ::= TBD

<named_datatype> ::= DATATYPE <type_name> { <datatype> }

<type_name> ::= <identifier>

<named_dataspace> ::= TBD

<hardlink> ::= HARDLINK <path_name> 

<group> ::= GROUP <group_name> { <hardlink> | <group_info> }
            
<group_name> ::= <identifier>

<group_info> ::= <group_attribute>* <group_member>* 
            
<group_attribute> ::= <attribute> 

<group_member> ::= <named_datatype> | <named_dataspace> | <group> |
                   <dataset> | <softlink>

<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }

<dataset_info> ::= <dataset_type>  <dataset_space> <storagelayout>opt
                   <compression>opt <dataset_attribute>* <object_id>opt
                   <data>opt
// Tokens above can be in any order as long as <data> is 
// after <dataset_type> and <dataset_space>.

<dataset_name> ::= <identifier>

<storagelayout> :: = STORAGELAYOUT <contiguous_layout>  |  
                     STORAGELAYOUT <chunked_layout>     |
                     STORAGELAYOUT <compact_layout>     |
                     STORAGELAYOUT <external_layout> 

<contiguous_layout> ::= {CONTIGUOUS}    // default

<chunked_layout> ::=  {CHUNKED <dims> }

<dims> ::= (<dims_values>)

<dims_values> ::= <int_value> | <int_value>, <dims_values>

<compact_layout> ::= TBD           

<external_layout> ::= {EXTERNAL <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= <int_value>

<size> ::= <int_value>

<compression> :: = COMPRESSION { TBD }  

<dataset_attribute> ::= <attribute> 

<softlink> ::= SOFTLINK <softlink_name> { LINKTARGET <target> }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<identifier> ::= a string
// character '/' should be used with care. 

<int_value> ::= 0 | [1-9][0-9]*

4. An Example of an HDF5 File in DDL

HDF5 "example.h5" {
GROUP "/" {
   ATTRIBUTE "attr1" {
      DATATYPE H5T_STRING { 
           STRSIZE 17;
           STRPAD H5T_STR_NULLTERM;
           CSET H5T_CSET_ASCII;
           CTYPE H5T_C_S1;
         }
      DATASPACE SCALAR 
      DATA {
         "string attribute"
      }
   }
   DATASET "dset1" {
      DATATYPE H5T_STD_I32BE
      DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }
      DATA {
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9
      }
   }
   DATASET "dset2" {
      DATATYPE H5T_COMPOUND {
         H5T_STD_I32BE "a";
         H5T_IEEE_F32BE "b";
         H5T_IEEE_F64BE "c";
      }
      DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
      DATA {
         {
            1,
            0.1,
            0.01
         },
         {
            2,
            0.2,
            0.02
         },
         {
            3,
            0.3,
            0.03
         },
         {
            4,
            0.4,
            0.04
         },
         {
            5,
            0.5,
            0.05
         }
      }
   }
   GROUP "group1" {
      DATASET "dset3" {
         DATATYPE "/type1"
         DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
         DATA {
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            }
         }
      }
   }
   DATASET "dset3" {
      DATATYPE H5T_VLEN { H5T_STD_I32LE } 
      DATASPACE SIMPLE { ( 4 ) / ( 4 ) } 
      DATA {
         (0), (10, 11), (20, 21, 22), (30, 31, 32, 33)
      } 
   }
   GROUP "group2" {
      HARDLINK "/group1"
   }
   SOFTLINK "slink1" {
      LINKTARGET "somevalue"
   }
   DATATYPE "type1" H5T_COMPOUND {
      H5T_ARRAY { [4] H5T_STD_I32BE } "a";
      H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b";
   }
}
}

Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

HDF Help Desk
Last modified: 17 November 2000
Describes HDF5 Release 1.4 Beta, December 2000