Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Palettes   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

DDL in BNF for HDF5

1. Introduction

This document contains the data description language (DDL) for an HDF5 file. The description is in Backus-Naur Form.

2. Explanation of Symbols

This section contains a brief explanation of the symbols used in the DDL.
    ::=                      defined as
    <tname>                  a token with the name tname
    <a> | <b>                one of <a> or <b>
    <a>opt                    zero or one occurrence of <a>
    <a>*                     zero or more occurrence of <a>
    <a>+                     one or more occurrence of <a>
    TBD                      To Be Decided

3. The DDL

<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }

<file_name> ::= <identifier>

<file_super_block> ::= BOOT_BLOCK { <super_block_content> }

<super_block_content> ::= TBD

<root_group> ::= GROUP "/" { <unamed_datatype>* <object_id>opt
            <group_attribute>* <group_member>* }  

<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> |
            <array_type>

<unamed_datatype> ::= DATATYPE <unamed_type_name> { <datatype> }

<unamed_type_name> ::= the assigned name for unamed type is in the form of 
           #oid1:oid2, where oid1 and oid2 are the object ids of the type

<atomic_type> ::= <integer> | <float> | <time> | <string> | <bitfield> |
               <opaque> | <reference> | <enum>

<integer> ::=  H5T_STD_I8BE | H5T_STD_I8LE | H5T_STD_I16BE |
           H5T_STD_I16LE | H5T_STD_I32BE | H5T_STD_I32LE | H5T_STD_I64BE |
           H5T_STD_I64LE | H5T_STD_U8BE | H5T_STD_U8LE | H5T_STD_U16BE |
           H5T_STD_U16LE | H5T_STD_U32BE | H5T_STD_U32LE | H5T_STD_U64BE |
           H5T_STD_U64LE | H5T_NATIVE_CHAR | H5T_NATIVE_UCHAR |
           H5T_NATIVE_SHORT | H5T_NATIVE_USHORT | H5T_NATIVE_INT |
           H5T_NATIVE_UINT | H5T_NATIVE_LONG | H5T_NATIVE_ULONG |
           H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG

<float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE | H5T_IEEE_F64BE |
            H5T_IEEE_F64LE | H5T_NATIVE_FLOAT |  H5T_NATIVE_DOUBLE |
            H5T_NATIVE_LDOUBLE

<time> ::= TBD

<string> ::= H5T_STRING { STRSIZE <strsize> ;
               STRPAD <strpad> ;
               CSET <cset> ;
               CTYPE <ctype> ; }  

<strsize> ::= an integer

<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD

<cset> ::= H5T_CSET_ASCII

<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1

<bitfield> ::= TBD

<opaque> ::= H5T_OPAQUE { <identifier> }

<reference> ::= H5T_REFERENCE { <ref_type> }

<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG

<compound_type> ::= H5T_COMPOUND { <member_type_def>+ }

<member_type_def> ::= <datatype> <field_name> ;

<field_name> ::= <identifier>

<variable_length_type> ::= H5T_VLEN { <datatype> }

<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }

<dim_sizes> ::= [dimsize1][dimsize2]...
// where dimsize1, dimsize2 are integers

<attribute> ::= ATTRIBUTE <attr_name> { <dataset_type>    
                                        <dataset_space>
                                        <data>opt  } 

<attr_name> ::= <identifier>

<dataset_type> ::= DATATYPE <path_name> | <datatype>

<enum> ::= H5T_ENUM { <enum_base_type> <enum_def>+  }

<enum_base_type> ::= <integer>
// Currently enums can only hold integer type data, but they may be expanded
// in the future to hold any datatype

<enum_def> ::= <enum_symbol> <enum_val>;

<enum_symbol> ::= <identifier>

<enum_val> ::= an integer;

<path_name> ::= <path_part>+

<path_part> ::= /<identifier>

<dataspace> ::= <scalar_space> | <simple_space> | <complex_space>

<scalar_space> ::= SCALAR

<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }

<complex_space> ::= COMPLEX { <complex_space_definition> }

<dataset_space> ::= DATASPACE <path_name> | <dataspace>

<current_dims> ::= (i1, i2, ... ), where ik is an integer, k = 1,2,...

<max_dims> ::= (i1, i2, ... ) where ik is an integer or H5S_UNLIMITED

<complex_space_definition> ::= TBD

<data> ::= DATA { <scalar_space_data> | <simple_space_data> |
              <complex_space_data> }
                  
<scalar_space_data> ::= <any_element>

<any_element> ::= <atomic_element> | <compound_element> | 
        <variable_length_element> | <array_element>

<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>

<atomic_element> :: = <integer_data> | <float_data> | <time_data> |
                <string_data> | <bitfield_data> | <opaque_data> |
                <enum_data> | <reference_data>

<integer_data> ::= an integer

<float_data> ::= a floating point number

<time_data> ::= TBD

<string_data> ::= a string
// A string is enclosed in double quotes. 
// If a string is displayed on more than one line, string concatenate
// operator '//'is used.

<bitfield_data> ::= TBD

<opaque_data> ::= TBD

<enum_data> ::= <enum_symbol>

<reference_data> ::= <object_ref_data> | <data_region_data> | NULL

<object_ref_data> ::= <object_type> <object_num>

<object_type> ::= DATASET | GROUP | DATATYPE

<object_id> ::= OBJECTID { <object_num> }

<object_num> ::= an integer:an integer | an integer

<data_region_data> ::= H5T_STD_REF_DSETREG <object_num>
        { <data_region_data_info>, <data_region_data_info>, ...}

<data_region_data_info> ::= <region_info> | <point_info>

<region_info> ::= (<lower_bound>:<upper_bound>,
        <lower_bound>:<upper_bound>, ...)

<lower_bound> ::= an integer

<upper_bound> ::= an integer

<point_info> ::= (an integer, an integer, ...)

<compound_element> ::= { <any_data_seq> }

<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> |
        <atomic_element>

<simple_space_data> :: = <any_data_seq>

<variable_length_element> ::= ( <any_data_seq> )

<array_element> ::= [ <any_data_seq> ]

<complex_space_data> ::= TBD

<named_datatype> ::= DATATYPE <type_name> { <datatype> }

<type_name> ::= <identifier>

<named_dataspace> ::= TBD

<hardlink> ::= HARDLINK <path_name> 

<group> ::= GROUP <group_name> { <hardlink> | <group_info> }
            
<group_name> ::= <identifier>

<group_info> ::= <group_attribute>* <group_member>* 
            
<group_attribute> ::= <attribute> 

<group_member> ::= <named_datatype> | <named_dataspace> | <group> |
        <dataset> | <softlink>

<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }

<dataset_info> ::= <dataset_type>  <dataset_space> <storagelayout>opt
               <compression>opt <dataset_attribute>* <object_id>opt
               <data>opt
// Tokens above can be in any order as long as <data> is 
// after <dataset_type> and <dataset_space>.

<dataset_name> ::= <identifier>

<storagelayout> :: = STORAGELAYOUT <contiguous_layout>  |  
                     STORAGELAYOUT <chunked_layout>  | 
                     STORAGELAYOUT <compact_layout>  | 
                     STORAGELAYOUT <external_layout> 

<contiguous_layout> ::= {CONTIGUOUS}    // default

<chunked_layout> ::=  {CHUNKED <dims> }

<dims> ::= (i1, i2, ... ), ik is an integer, k = 1,2,... 

<compact_layout> ::= TBD           

<external_layout> ::= {EXTERNAL <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= an integer

<size> ::= an integer

<compression> :: = COMPRESSION { TBD }  

<dataset_attribute> ::= <attribute> 

<softlink> ::= SOFTLINK <softlink_name> { LINKTARGET <target> }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<identifier> ::= string   
// character '/' should be used with care. 

4. An Example of an HDF5 File in DDL

HDF5 "example.h5" {
GROUP "/" {
   ATTRIBUTE "attr1" {
      DATATYPE H5T_STRING { 
           STRSIZE 17;
           STRPAD H5T_STR_NULLTERM;
           CSET H5T_CSET_ASCII;
           CTYPE H5T_C_S1;
         }
      DATASPACE SCALAR 
      DATA {
         "string attribute"
      }
   }
   DATASET "dset1" {
      DATATYPE H5T_STD_I32BE
      DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }
      DATA {
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9
      }
   }
   DATASET "dset2" {
      DATATYPE H5T_COMPOUND {
         H5T_STD_I32BE "a";
         H5T_IEEE_F32BE "b";
         H5T_IEEE_F64BE "c";
      }
      DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
      DATA {
         {
            1,
            0.1,
            0.01
         },
         {
            2,
            0.2,
            0.02
         },
         {
            3,
            0.3,
            0.03
         },
         {
            4,
            0.4,
            0.04
         },
         {
            5,
            0.5,
            0.05
         }
      }
   }
   GROUP "group1" {
      DATASET "dset3" {
         DATATYPE "/type1"
         DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
         DATA {
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            }
         }
      }
   }
   DATASET "dset3" {
      DATATYPE H5T_VLEN { H5T_STD_I32LE } 
      DATASPACE SIMPLE { ( 4 ) / ( 4 ) } 
      DATA {
         (0), (10, 11), (20, 21, 22), (30, 31, 32, 33)
      } 
   }
   GROUP "group2" {
      HARDLINK "/group1"
   }
   SOFTLINK "slink1" {
      LINKTARGET "somevalue"
   }
   DATATYPE "type1" H5T_COMPOUND {
      H5T_ARRAY { [4] H5T_STD_I32BE } "a";
      H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b";
   }
}
}

Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Palettes   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

HDF Help Desk
Last modified: 17 November 2000