diff options
Diffstat (limited to 'doxygen')
67 files changed, 53961 insertions, 56 deletions
diff --git a/doxygen/Doxyfile.in b/doxygen/Doxyfile.in index b1b783c..0e41a7b 100644 --- a/doxygen/Doxyfile.in +++ b/doxygen/Doxyfile.in @@ -738,7 +738,7 @@ FILE_VERSION_FILTER = # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. -LAYOUT_FILE = +LAYOUT_FILE = @DOXYGEN_LAYOUT_FILE@ # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib @@ -855,9 +855,16 @@ INPUT_ENCODING = UTF-8 FILE_PATTERNS = H5*public.h \ H5*module.h \ - H5VLconnector.h \ - H5VLconnector_passthru.h \ - H5VLnative.h \ + H5FDcore.h \ + H5FDdirect.h \ + H5FDfamily.h \ + H5FDlog.h \ + H5FDmpi.h \ + H5FDmpio.h \ + H5FDmulti.h \ + H5FDsec2.h \ + H5FDstdio.h \ + H5FDwindows.h \ H5version.h \ *.dox @@ -907,7 +914,7 @@ EXCLUDE_SYMBOLS = # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = ../src ../examples ../test examples +EXAMPLE_PATH = @DOXYGEN_EXAMPLES_DIRECTORY@ # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and @@ -1168,7 +1175,7 @@ HTML_FILE_EXTENSION = .html # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_HEADER = +HTML_HEADER = @DOXYGEN_HTML_HEADER@ # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard @@ -1178,7 +1185,7 @@ HTML_HEADER = # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_FOOTER = +HTML_FOOTER = @DOXYGEN_HTML_FOOTER@ # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of @@ -1203,7 +1210,7 @@ HTML_STYLESHEET = # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_STYLESHEET = +HTML_EXTRA_STYLESHEET = @DOXYGEN_HTML_EXTRA_STYLESHEET@ # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note @@ -1213,7 +1220,7 @@ HTML_EXTRA_STYLESHEET = # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = @DOXYGEN_HTML_EXTRA_FILES@ # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to @@ -1271,7 +1278,7 @@ HTML_DYNAMIC_MENUS = NO # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_DYNAMIC_SECTIONS = NO +HTML_DYNAMIC_SECTIONS = YES # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand @@ -1483,7 +1490,7 @@ ECLIPSE_DOC_ID = org.doxygen.Project # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -DISABLE_INDEX = NO +DISABLE_INDEX = YES # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag @@ -1631,7 +1638,7 @@ MATHJAX_CODEFILE = # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. -SEARCHENGINE = NO +SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using JavaScript. There @@ -1643,7 +1650,7 @@ SEARCHENGINE = NO # The default value is: NO. # This tag requires that the tag SEARCHENGINE is set to YES. -SERVER_BASED_SEARCH = YES +SERVER_BASED_SEARCH = @DOXYGEN_SERVER_BASED_SEARCH@ # When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file @@ -1659,7 +1666,7 @@ SERVER_BASED_SEARCH = YES # The default value is: NO. # This tag requires that the tag SEARCHENGINE is set to YES. -EXTERNAL_SEARCH = NO +EXTERNAL_SEARCH = @DOXYGEN_EXTERNAL_SEARCH@ # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will return the search results when EXTERNAL_SEARCH is enabled. @@ -1670,7 +1677,7 @@ EXTERNAL_SEARCH = NO # Searching" for details. # This tag requires that the tag SEARCHENGINE is set to YES. -SEARCHENGINE_URL = +SEARCHENGINE_URL = @DOXYGEN_SEARCHENGINE_URL@ # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the @@ -2167,7 +2174,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = H5_HAVE_PARALLEL # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/doxygen/aliases b/doxygen/aliases index aa22bd4..30fbd9b 100644 --- a/doxygen/aliases +++ b/doxygen/aliases @@ -1,3 +1,5 @@ +ALIASES += THG="The HDF Group" + ################################################################################ # Styling ################################################################################ @@ -35,6 +37,9 @@ ALIASES += op{1}="\param[in] \1 Callback function" ALIASES += op_data="\param[in,out] op_data User-defined callback function context" ALIASES += op_data{1}="\param[in,out] \1 User-defined callback function context" +ALIASES += op_data_in="\param[in] op_data User-defined callback function context" +ALIASES += op_data_in{1}="\param[in] \1 User-defined callback function context" + ################################################################################ # Attributes ################################################################################ @@ -60,13 +65,20 @@ ALIASES += space_id{1}="\param[in] \1 Dataspace identifier" # Dataypes ################################################################################ -ALIASES += dtype_id="\param[in] dtype_id Datatype identifier" -ALIASES += dtype_id{1}="\param[in] \1 Datatype identifier" +ALIASES += type_id="\param[in] type_id Datatype identifier" +ALIASES += type_id{1}="\param[in] \1 Datatype identifier" ALIASES += file_type_id{1}="\param[in] \1 Datatype (in-file) identifier" ALIASES += mem_type_id{1}="\param[in] \1 Datatype (in-memory) identifier" ################################################################################ +# Errors +################################################################################ + +ALIASES += estack_id="\param[in] estack_id Error stack identifier" +ALIASES += estack_id{1}="\param[in] \1 Error stack identifier" + +################################################################################ # Files ################################################################################ @@ -160,11 +172,32 @@ ALIASES += fgdta_loc_obj_id{1}="\loc_obj_id{\1}. The identifier may be that of a ALIASES += estack_id="\param[in] estack_id Error stack identifier" ALIASES += estack_id{1}="\param[in] \1 Error stack identifier" +ALIASES += cpp_c_api_note="\attention \Bold{C++ Developers using HDF5 C-API functions beware:}\n Several functions in this C-API take function pointers or callbacks as arguments. Examples include H5Pset_elink_cb(), H5Pset_type_conv_cb(), H5Tconvert(), and H5Ewalk2(). Application code must ensure that those callback functions return normally such to allow the HDF5 to manage its resources and maintain a consistent state. For instance, those functions must not use the C \c setjmp / \c longjmp mechanism to leave those callback functions. Within the context of C++, any exceptions thrown within the callback function must be caught, such as with a \Code{catch(…)} statement. Any exception state can be placed within the provided user data function call arguments, and may be thrown again once the calling function has returned. Exceptions raised and not handled inside the callback are not supported as it might leave the HDF5 library in an inconsistent state. Similarly, using C++20 coroutines cannot be used as callbacks, since they do not support plain return statements. If a callback function yields execution to another C++20 coroutine calling HDF5 functions as well, this may lead to undefined behavior." +ALIASES += sa_metadata_ops="\sa \li H5Pget_all_coll_metadata_ops() \li H5Pget_coll_metadata_write() \li H5Pset_all_coll_metadata_ops() \li H5Pset_coll_metadata_write() \li \ref maybe_metadata_reads" + +################################################################################ +# References +################################################################################ + +ALIASES += ref_cons_semantics="<a href=\"https://portal.hdfgroup.org/display/HDF5/Enabling+a+Strict+Consistency+Semantics+Model+in+Parallel+HDF5\">Enabling a Strict Consistency Semantics Model in Parallel HDF5</a>" +ALIASES += ref_dld_filters="<a href=\"https://portal.hdfgroup.org/display/HDF5/HDF5+Dynamically+Loaded+Filters\">HDF5 Dynamically Loaded Filters</a>" +ALIASES += ref_file_image_ops="<a href=\"https://portal.hdfgroup.org/display/HDF5/HDF5+File+Image+Operations\">HDF5 File Image Operations</a>" +ALIASES += ref_filter_pipe="<a href=\"https://portal.hdfgroup.org/display/HDF5/HDF5+Data+Flow+Pipeline+for+H5Dread\">Data Flow Pipeline for H5Dread()</a>" +ALIASES += ref_group_impls="<a href=\"https://portal.hdfgroup.org/display/HDF5/Groups\">Group implementations in HDF5</a>" +ALIASES += ref_h5lib_relver="<a href=\"https://portal.hdfgroup.org/display/HDF5/HDF5+Library+Release+Version+Numbers\">HDF5 Library Release Version Numbers</a>" +ALIASES += ref_mdc_in_hdf5="<a href=\"https://portal.hdfgroup.org/display/HDF5/Metadata+Caching+in+HDF5\">Metadata Caching in HDF5</a>" +ALIASES += ref_mdc_logging="<a href=\"https://portal.hdfgroup.org/display/HDF5/H5F_START_MDC_LOGGING\">Metadata Cache Logging</a>" +ALIASES += ref_news_110="<a href=\"https://portal.hdfgroup.org/display/HDF5/New+Features+in+HDF5+Release+1.10\">New Features in HDF5 Release 1.10</a>" +ALIASES += ref_h5ocopy="<a href=\"https://portal.hdfgroup.org/display/HDF5/Copying+Committed+Datatypes+with+H5Ocopy\">Copying Committed Datatypes with H5Ocopy()</a>" +ALIASES += ref_sencode_fmt_change="<a href=\"https://portal.hdfgroup.org/pages/viewpage.action?pageId=58100093&preview=/58100093/58100094/encode_format_RFC.pdf\">RFC H5Secnode() / H5Sdecode() Format Change</a>" +ALIASES += ref_vlen_strings="\Emph{Creating variable-length string datatypes}" +ALIASES += ref_vol_doc="VOL documentation" ################################################################################ # The Usual Suspects ################################################################################ +ALIASES += click4more="(Click on a enumerator, field, or type for more information.)" ALIASES += csets="<table><tr><td>#H5T_CSET_ASCII</td><td>US ASCII</td></tr><tr><td>#H5T_CSET_UTF8</td><td>UTF-8 Unicode encoding</td></tr></table>" ALIASES += datatype_class=" \li #H5T_INTEGER \li #H5T_FLOAT \li #H5T_STRING \li #H5T_BITFIELD \li #H5T_OPAQUE \li #H5T_COMPOUND \li #H5T_REFERENCE \li #H5T_ENUM \li #H5T_VLEN \li #H5T_ARRAY" ALIASES += file_access="<table><tr><td>#H5F_ACC_RDWR</td><td>File was opened with read/write access.</td></tr><tr><td>#H5F_ACC_RDONLY</td><td>File was opened with read-only access.</td></tr><tr><td>#H5F_ACC_SWMR_WRITE</td><td>File was opened with read/write access for a single-writer/multiple-reader (SWMR) scenario. Note that the writer process must also open the file with the #H5F_ACC_RDWR flag.</td></tr><tr><td>#H5F_ACC_SWMR_READ</td><td>File was opened with read-only access for a single-writer/multiple-reader (SWMR) scenario. Note that the reader process must also open the file with the #H5F_ACC_RDONLY flag.</td></tr></table>" @@ -180,5 +213,5 @@ ALIASES += scopes="<table><tr><td>#H5F_SCOPE_GLOBAL</td><td>Flushes the entire v ALIASES += sign_prop="<table><tr><td>#H5T_SGN_NONE</td><td>0</td><td>Unsigned integer type</td></tr><tr><td>#H5T_SGN_2</td><td>1</td><td>Two's complement signed integer type</td></tr></table>" ALIASES += storage_type="<table><tr><td>#H5G_STORAGE_TYPE_COMPACT</td><td>Compact storage</td></tr><tr><td>#H5G_STORAGE_TYPE_DENSE</td><td>Indexed storage</td></tr><tr><td>#H5G_STORAGE_TYPE_SYMBOL_TABLE</td><td>Symbol tables, the original HDF5 structure</td></tr></table>" ALIASES += str_pad_type="<table><tr><td>#H5T_STR_NULLTERM</td><td>0</td><td>Null terminate (as C does)</td></tr><tr><td>#H5T_STR_NULLPAD</td><td>1</td><td>Pad with zeros</td></tr><tr><td>#H5T_STR_SPACEPAD</td><td>2</td><td>Pad with spaces (as FORTRAN does)</td></tr></table>" -ALIASES += virtual=" \see Supporting Functions: \li H5Pget_layout() \li H5Pset_layout() \li H5Sget_regular_hyperslab() \li H5Sis_regular_hyperslab() \li H5Sselect_hyperslab() \see VDS Functions: \li H5Pget_virtual_count() \li H5Pget_virtual_dsetname() \li H5Pget_virtual_filename() \li H5Pget_virtual_prefix() \li H5Pget_virtual_printf_gap() \li H5Pget_virtual_srcspace() \li H5Pget_virtual_view() \li H5Pget_virtual_vspace() \li H5Pset_virtual \li H5Pset_virtual_prefix() \li H5Pset_virtual_printf_gap() \li H5Pset_virtual_view()" +ALIASES += see_virtual=" \see Supporting Functions: H5Pget_layout(), H5Pset_layout(), H5Sget_regular_hyperslab(), H5Sis_regular_hyperslab(), H5Sselect_hyperslab() \see VDS Functions: H5Pget_virtual_count(), H5Pget_virtual_dsetname(), H5Pget_virtual_filename(), H5Pget_virtual_prefix(), H5Pget_virtual_printf_gap(), H5Pget_virtual_srcspace(), H5Pget_virtual_view(), H5Pget_virtual_vspace(), H5Pset_virtual(), H5Pset_virtual_prefix(), H5Pset_virtual_printf_gap(), H5Pset_virtual_view()" ALIASES += obj_info_fields="<table><tr><th>Flag</th><th>Purpose</th></tr><tr><td>#H5O_INFO_BASIC</td><td>Fill in the fileno, addr, type, and rc fields</td></tr><tr> <td>#H5O_INFO_TIME</td><td>Fill in the atime, mtime, ctime, and btime fields</td></tr><tr> <td>#H5O_INFO_NUM_ATTRS</td> <td>Fill in the num_attrs field</td></tr><tr><td>#H5O_INFO_HDR</td><td>Fill in the num_attrs field</td></tr><tr><td>#H5O_INFO_META_SIZE</td><td>Fill in the meta_size field</td></tr><tr><td>#H5O_INFO_ALL</td><td>#H5O_INFO_BASIC | #H5O_INFO_TIME | #H5O_INFO_NUM_ATTRS | #H5O_INFO_HDR | #H5O_INFO_META_SIZE</td></tr></table>" diff --git a/doxygen/dox/About.dox b/doxygen/dox/About.dox new file mode 100644 index 0000000..3be9202 --- /dev/null +++ b/doxygen/dox/About.dox @@ -0,0 +1,11 @@ +/** \page About About + +The implementation of this documentation set is based on the fantastic work of the +<a href="https://eigen.tuxfamily.org/index.php?title=Main_Page">Eigen project</a>. +Please refer to their <a href="https://gitlab.com/libeigen/eigen">GitLab repository</a> +and the online version of their +<a href="http://eigen.tuxfamily.org/dox/">Doxygen-based documentation</a>. +Not only does Eigen set a standard as a piece of software, but also as an example +of <em>documentation done right</em>. + +*/
\ No newline at end of file diff --git a/doxygen/dox/Cookbook.dox b/doxygen/dox/Cookbook.dox new file mode 100644 index 0000000..4abc896 --- /dev/null +++ b/doxygen/dox/Cookbook.dox @@ -0,0 +1,5 @@ +/** \page Cookbook Cookbook + + Healthy, everyday recipes for every taste and budget... + + */
\ No newline at end of file diff --git a/doxygen/dox/DDLBNF110.dox b/doxygen/dox/DDLBNF110.dox new file mode 100644 index 0000000..f7e4267 --- /dev/null +++ b/doxygen/dox/DDLBNF110.dox @@ -0,0 +1,650 @@ +/** \page DDLBNF110 DDL in BNF through HDF5 1.10 + +\todo Revise this & break it up! + +\section intro110 Introduction + +This document contains the data description language (DDL) for an HDF5 file. The +description is in Backus-Naur Form (BNF). + +\section expo110 Explanation of Symbols + +This section contains a brief explanation of the symbols used in the DDL. + +\code{.unparsed} +::= defined as + <tname> a token with the name tname + <a> | <b> one of <a> or <b> + <a>opt zero or one occurrence of <a> + <a>* zero or more occurrence of <a> + <a>+ one or more occurrence of <a> + [0-9] an element in the range between 0 and 9 + '[' the token within the quotes (used for special characters) + TBD To Be Decided +\endcode + +\section ddl110 The DDL + +\code{.unparsed} +<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> } + +<file_name> ::= <identifier> + +<file_super_block> ::= SUPER_BLOCK { + SUPERBLOCK_VERSION <int_value> + FREELIST_VERSION <int_value> + SYMBOLTABLE_VERSION <int_value> + OBJECTHEADER_VERSION <int_value> + OFFSET_SIZE <int_value> + LENGTH_SIZE <int_value> + BTREE_RANK <int_value> + BTREE_LEAF <int_value> + ISTORE_K <int_value> + <super_block_filespace> + USER_BLOCK { + USERBLOCK_SIZE <int_value> + } + } + +<super_block_filespace> ::= FILE_SPACE_STRATEGY <super_block_strategy> + FREE_SPACE_PERSIST <boolean_value> + FREE_SPACE_SECTION_THRESHOLD <int_value> + FILE_SPACE_PAGE_SIZE <int_value> + +<super_block_strategy> ::= H5F_FSPACE_STRATEGY_FSM_AGGR | H5F_FSPACE_STRATEGY_PAGE | + H5F_FSPACE_STRATEGY_AGGR | H5F_FSPACE_STRATEGY_NONE | + Unknown strategy + +<root_group> ::= GROUP "/" { + <anon_named_datatype>* + <object_id>opt + <group_comment>opt + <group_attribute>* + <group_member>* + } + +<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type> + +<anon_named_datatype> ::= DATATYPE <anon_named_type_name> { + <datatype> + } + +<anon_named_type_name> ::= the assigned name for anonymous named type is + in the form of #oid, where oid is the object id + of the type + +<atomic_type> ::= <integer> | <float> | <time> | <string> | + <bitfield> | <opaque> | <reference> | <enum> + +<boolean_value> ::= FALSE | TRUE + +<integer> ::= H5T_STD_I8BE | H5T_STD_I8LE | + H5T_STD_I16BE | H5T_STD_I16LE | + H5T_STD_I32BE | H5T_STD_I32LE | + H5T_STD_I64BE | H5T_STD_I64LE | + H5T_STD_U8BE | H5T_STD_U8LE | + H5T_STD_U16BE | H5T_STD_U16LE | + H5T_STD_U32BE | H5T_STD_U32LE | + H5T_STD_U64BE | H5T_STD_U64LE | + H5T_NATIVE_CHAR | H5T_NATIVE_UCHAR | + H5T_NATIVE_SHORT | H5T_NATIVE_USHORT | + H5T_NATIVE_INT | H5T_NATIVE_UINT | + H5T_NATIVE_LONG | H5T_NATIVE_ULONG | + H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG + +<float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE | + H5T_IEEE_F64BE | H5T_IEEE_F64LE | + H5T_NATIVE_FLOAT | H5T_NATIVE_DOUBLE | + H5T_NATIVE_LDOUBLE + +<time> ::= H5T_TIME: not yet implemented + +<string> ::= H5T_STRING { + STRSIZE <strsize>; + STRPAD <strpad>; + CSET <cset>; + CTYPE <ctype>; + } + +<strsize> ::= <int_value> + +<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD + +<cset> ::= H5T_CSET_ASCII | H5T_CSET_UTF8 + +<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1 + +<bitfield> ::= H5T_STD_B8BE | H5T_STD_B8LE | + H5T_STD_B16BE | H5T_STD_B16LE | + H5T_STD_B32BE | H5T_STD_B32LE | + H5T_STD_B64BE | H5T_STD_B64LE + +<opaque> ::= H5T_OPAQUE { + OPAQUE_TAG <identifier>; + OPAQUE_SIZE <int_value>;opt + } + +<reference> ::= H5T_REFERENCE { <ref_type> } + +<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG | H5T_STD_REF | UNDEFINED + +<compound_type> ::= H5T_COMPOUND { + <member_type_def>+ + } + +<member_type_def> ::= <datatype> <field_name>; + +<field_name> ::= <identifier> + +<variable_length_type> ::= H5T_VLEN { <datatype> } + +<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> } + +<dim_sizes> ::= '['<dimsize>']' | '['<dimsize>']'<dim_sizes> + +<dimsize> ::= <int_value> + +<attribute> ::= ATTRIBUTE <attr_name> { + <dataset_type> + <dataset_space> + <data>opt + } + +<attr_name> ::= <identifier> + +<dataset_type> ::= DATATYPE <path_name> | <datatype> + +<enum> ::= H5T_ENUM { + <enum_base_type> <enum_def>+ + } + +<enum_base_type> ::= <integer> +// Currently enums can only hold integer type data, but they may be expanded +// in the future to hold any datatype + +<enum_def> ::= <enum_symbol> <enum_val>; + +<enum_symbol> ::= <identifier> + +<enum_val> ::= <int_value> + +<path_name> ::= <path_part>+ + +<path_part> ::= /<identifier> + +<dataspace> ::= <scalar_space> | <simple_space> | <complex_space> | <null_space> + +<null_space> ::= NULL + +<scalar_space> ::= SCALAR + +<simple_space> ::= SIMPLE { <current_dims> / <max_dims> } + +<complex_space> ::= COMPLEX { <complex_space_definition> } + +<dataset_space> ::= DATASPACE <path_name> | <dataspace> + +<current_dims> ::= <dims> + +<max_dims> ::= '(' <max_dim_list> ')' + +<max_dim_list> ::= <max_dim> | <max_dim>, <max_dim_list> + +<max_dim> ::= <int_value> | H5S_UNLIMITED + +<data> ::= <subset> | <data_values> + +<data_values> ::= DATA { + <scalar_space_data> | <simple_space_data> + } + +<scalar_space_data> ::= <any_element> + +<any_element> ::= <atomic_element> | <compound_element> | + <variable_length_element> | <array_element> + +<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq> + +<atomic_element> :: = <integer_data> | <float_data> | <time_data> | + <string_data> | <bitfield_data> | <opaque_data> | + <enum_data> | <reference_data> + +<subset> ::= SUBSET { + <start>; + <stride>; + <count>; + <block>; + DATA { + <simple_space_data> + } + } + +<start> ::= START (<coor_list>) + +<stride> ::= STRIDE (<pos_list>) + +<count> ::= COUNT (<max_dim_list>) + +<block> ::= BLOCK (<max_dim_list>) + +<coor_list> ::= <coor_data>, <coor_list> | <coor_data> + +<coor_data> ::= <integer_data> | H5S_UNLIMITED + +<integer_data> ::= <int_value> + +<float_data> ::= a floating point number + +<time_data> ::= DATA{ not yet implemented.} + +<string_data> ::= a string +// A string is enclosed in double quotes. +// If a string is displayed on more than one line, string concatenate +// operator '//'is used. + +<bitfield_data> ::= <hex_value> + +<opaque_data> ::= <hex_value>:<hex_value> | <hex_value> + +<enum_data> ::= <enum_symbol> + +<reference_data> ::= <object_ref_data> | <data_region_data> | <attribute_data> | NULL + +<object_ref_data> ::= <object_type> <object_num> + +<object_type> ::= DATASET | GROUP | DATATYPE + +<object_id> ::= OBJECTID { <object_num> } + +<object_num> ::= <int_value>:<int_value> | <int_value> + +<attribute_data> ::= ATTRIBUTE <attr_name> + +<data_region_data> ::= DATASET <dataset_name> { + <data_region_type>opt <data_region_data_list> + <dataset_type>opt <dataset_space>opt + <data>opt + } + +<data_region_type> ::= REGION_TYPE <data_region_data_type> + +<data_region_data_type> ::= POINT | BLOCK + +<data_region_data_list> ::= <data_region_data_info>, <data_region_data_list> | + <data_region_data_info> + +<data_region_data_info> ::= <region_info> | <point_info> + +<region_info> ::= (<lower_region_vals>)-(<upper_region_vals>) + +<lower_region_vals> ::= <lower_bound>, <lower_region_vals> | <lower_bound> + +<upper_region_vals> ::= <upper_bound>, <upper_region_vals> | <upper_bound> + +<lower_bound> ::= <int_value> + +<upper_bound> ::= <int_value> + +<point_info> ::= (<point_vals>) + +<point_vals> ::= <int_value> | <int_value>, <point_vals> + +<compound_element> ::= { <any_data_seq> } + +<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> | + <atomic_element> + +<simple_space_data> :: = <any_data_seq> + +<variable_length_element> ::= ( <any_data_seq> ) + +<array_element> ::= '[' <any_data_seq> ']' + +<named_datatype> ::= DATATYPE <type_name> { <datatype> } + +<type_name> ::= <identifier> + +<hardlink> ::= HARDLINK <path_name> + +<group> ::= GROUP <group_name> { <hardlink> | <group_info> } + +<group_comment> ::= COMMENT <string_data> + +<group_name> ::= <identifier> + +<group_info> ::= <object_id>opt <group_comment>opt <group_attribute>* + <group_member>* + +<group_attribute> ::= <attribute> + +<group_member> ::= <named_datatype> | <group> | <dataset> | + <softlink> | <external_link> + +<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> } + +<dataset_info> ::= <dataset_type> + <dataset_space> + <dcpl_info>opt + <dataset_attribute>* <object_id>opt + <data>opt +// Tokens above can be in any order as long as <data> is +// after <dataset_type> and <dataset_space>. + +<dcpl_info> ::= <storagelayout> + <compression_filters> + <fillvalue> + <allocationtime> + +<dataset_name> ::= <identifier> + +<storagelayout> :: = STORAGE_LAYOUT { + <contiguous_layout> | <chunked_layout> | + <compact_layout> | <virtual_layout> + } + +<contiguous_layout> ::= CONTIGUOUS + <internal_layout> | <external_layout> + +<chunked_layout> ::= CHUNKED <dims> + <filter_ratio>opt + +<compact_layout> ::= COMPACT + <size> + +<internal_layout> ::= <size> + <offset> + +<external_layout> ::= EXTERNAL { + <external_file>+ + } + +<virtual_layout> ::= <vmaps>*opt + +<vmaps> ::= MAPPING <int_value> { + <virtual_map> + <source_map> + } + +<virtual_map> ::= VIRTUAL { + <vmaps_selection> + } + +<source_map> ::= SOURCE { + FILE <file_name> + DATASET <dataset_name> + <vmaps_selection> + } + +<vmaps_selection> ::= <regular_hyperslab> | <irregular_hyperslab> | + <select_points> | <select_none> | <select_all> + +<regular_hyperslab> ::= SELECTION REGULAR_HYPERSLAB { + <start> + <stride> + <count> + <block> + } + +<irregular_hyperslab> ::= SELECTION IRREGULAR_HYPERSLAB { + <region_info>+ + } + +<select_points> ::= SELECTION POINT { + (<coor_list>)+ + } + +<select_none> ::= SELECTION NONE + +<select_all> ::= SELECTION ALL + +<dims> ::= (<dims_values>) + +<dims_values> ::= <int_value> | <int_value>, <dims_values> + +<external_file> ::= FILENAME <file_name> <size> <offset> + +<offset> ::= OFFSET <int_value> + +<size> ::= SIZE <int_value> + +<filter_ratio> ::= <size> | <compressionratio> + +<compressionratio> :: = <size> (<float_data>:1 COMPRESSION) + +<compression_filters> :: = FILTERS { + <filter_type>+ | NONE + } + +<filter_type> :: = <filter_deflate> | <filter_shuffle> | + <filter_flecther> | <filter_szip> | + <filter_nbit> | <filter_scaleoffset> | + <filter_default> + +<filter_default> :: = <filter_user> { + FILTER_ID <int_value> + <filter_comment>opt + <filter_params>opt + } + +<filter_user> :: = USER_DEFINED_FILTER + +<filter_deflate> :: = COMPRESSION DEFLATE { LEVEL <int_value> } + +<filter_shuffle> :: = PREPROCESSING SHUFFLE + +<filter_flecther> :: = CHECKSUM FLETCHER32 + +<filter_szip> :: = COMPRESSION SZIP { + PIXELS_PER_BLOCK <int_value> + <filter_szip_mode>opt + <filter_szip_coding>opt + <filter_szip_order>opt + <filter_szip_header>opt + } + +<filter_szip_mode> :: = MODE HARDWARE | K13 + +<filter_szip_coding> :: = CODING ENTROPY | NEAREST NEIGHBOUR + +<filter_szip_order> :: = BYTE_ORDER LSB | MSB + +<filter_szip_header> :: = HEADER RAW + +<filter_nbit> :: = CHECKSUM NBIT + +<filter_scaleoffset> :: = COMPRESSION SCALEOFFSET { MIN BITS <int_value> } + +<filter_comment> :: = COMMENT <identifier> + +<filter_params> :: = PARAMS { <int_value>* } + +<fillvalue> ::= FILLVALUE { + FILL_TIME H5D_FILL_TIME_ALLOC | H5D_FILL_TIME_NEVER | H5D_FILL_TIME_IFSET + VALUE H5D_FILL_VALUE_UNDEFINED | H5D_FILL_VALUE_DEFAULT | <any_element> + } + +<allocationtime> ::= ALLOCATION_TIME { + H5D_ALLOC_TIME_EARLY | H5D_ALLOC_TIME_INCR | + H5D_ALLOC_TIME_LATE + } + +<dataset_attribute> ::= <attribute> + +<softlink> ::= SOFTLINK <softlink_name> { + LINKTARGET <target> + } + +<softlink_name> ::= <identifier> + +<target> ::= <identifier> + +<external_link> ::= EXTERNAL_LINK <external_link_name> { + TARGETFILE <targetfile> + TARGETPATH <targetpath> <targetobj>opt + } + +<external_link_name> ::= <identifier> + +<user_defined_link> ::= USERDEFINED_LINK <external_link_name> { + LINKCLASS <user_link_type> + } + +<user_link_type> ::= <int_value> + +<targetfile> ::= <file_name> + +<targetpath> ::= <identifier> + +<targetobj> ::= <named_datatype> | <group> | <dataset> + +<identifier> ::= "a string" +// character '/' should be used with care. + +<pos_list> ::= <pos_int>, <pos_list> | <pos_int> + +<int_value> ::= 0 | <pos_int> + +<pos_int> ::= [1-9][0-9]* + +<hex_value> ::= 0x[0-F][0-F]+ | [0-F][0-F]+ +\endcode + +\section example110 An Example of an HDF5 File in DDL + +\code{.unparsed} +HDF5 "example.h5" { +GROUP "/" { + ATTRIBUTE "attr1" { + DATATYPE H5T_STRING { + STRSIZE 17; + STRPAD H5T_STR_NULLTERM; + CSET H5T_CSET_ASCII; + CTYPE H5T_C_S1; + } + DATASPACE SCALAR + DATA { + "string attribute" + } + } + DATASET "dset1" { + DATATYPE H5T_STD_I32BE + DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) } + DATA { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + } + } + DATASET "dset2" { + DATATYPE H5T_COMPOUND { + H5T_STD_I32BE "a"; + H5T_IEEE_F32BE "b"; + H5T_IEEE_F64BE "c"; + } + DATASPACE SIMPLE { ( 5 ) / ( 5 ) } + DATA { + { + 1, + 0.1, + 0.01 + }, + { + 2, + 0.2, + 0.02 + }, + { + 3, + 0.3, + 0.03 + }, + { + 4, + 0.4, + 0.04 + }, + { + 5, + 0.5, + 0.05 + } + } + } + GROUP "group1" { + COMMENT "This is a comment for group1"; + DATASET "dset3" { + DATATYPE "/type1" + DATASPACE SIMPLE { ( 5 ) / ( 5 ) } + DATA { + { + [ 0, 1, 2, 3 ], + [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, + 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, + 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] + }, + { + [ 0, 1, 2, 3 ], + [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, + 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, + 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] + }, + { + [ 0, 1, 2, 3 ], + [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, + 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, + 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] + }, + { + [ 0, 1, 2, 3 ], + [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, + 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, + 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] + }, + { + [ 0, 1, 2, 3 ], + [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, + 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, + 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] + } + } + } + } + DATASET "dset3" { + DATATYPE H5T_VLEN { H5T_STD_I32LE } + DATASPACE SIMPLE { ( 4 ) / ( 4 ) } + DATA { + (0), (10, 11), (20, 21, 22), (30, 31, 32, 33) + } + } + GROUP "group2" { + HARDLINK "/group1" + } + SOFTLINK "slink1" { + LINKTARGET "somevalue" + } + DATATYPE "type1" H5T_COMPOUND { + H5T_ARRAY { [4] H5T_STD_I32BE } "a"; + H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b"; + } +} +} +\endcode + + */
\ No newline at end of file diff --git a/doxygen/dox/FileFormatSpec.dox b/doxygen/dox/FileFormatSpec.dox new file mode 100644 index 0000000..fc10574 --- /dev/null +++ b/doxygen/dox/FileFormatSpec.dox @@ -0,0 +1,23 @@ +/** \page FMT3 HDF5 File Format Specification Version 3.0 + +\htmlinclude H5.format.html + +*/ + +/** \page FMT2 HDF5 File Format Specification Version 2.0 + +\htmlinclude H5.format.2.0.html + +*/ + +/** \page FMT11 HDF5 File Format Specification Version 1.1 + +\htmlinclude H5.format.1.1.html + +*/ + +/** \page FMT1 HDF5 File Format Specification Version 1.0 + +\htmlinclude H5.format.1.0.html + +*/
\ No newline at end of file diff --git a/doxygen/dox/GettingStarted.dox b/doxygen/dox/GettingStarted.dox new file mode 100644 index 0000000..880491d --- /dev/null +++ b/doxygen/dox/GettingStarted.dox @@ -0,0 +1,3 @@ +/** \page GettingStarted \Code{Hello, HDF5!} + + */
\ No newline at end of file diff --git a/doxygen/dox/H5AC_cache_config_t.dox b/doxygen/dox/H5AC_cache_config_t.dox new file mode 100644 index 0000000..9b9862b --- /dev/null +++ b/doxygen/dox/H5AC_cache_config_t.dox @@ -0,0 +1,415 @@ +/** + * \page H5AC-cache-config-t Metadata Cache Configuration + * \tableofcontents + * + * \section gcf General configuration fields + * + * \par version + * Integer field containing the version number of this version + * of the H5AC_cache_config_t structure. Any instance of + * H5AC_cache_config_t passed to the cache must have a known + * version number, or an error will be flagged. + * + * \par rpt_fcn_enabled + * \parblock + * Boolean field used to enable and disable the default + * reporting function. This function is invoked every time the + * automatic cache resize code is run, and reports on its activities. + * + * This is a debugging function, and should normally be turned off. + * \endparblock + * + * \par open_trace_file + * \parblock + * Boolean field indicating whether the trace_file_name + * field should be used to open a trace file for the cache. + * + * \Emph{*** DEPRECATED ***} Use \Code{H5Fstart/stop} logging functions instead + * + * The trace file is a debuging feature that allow the capture of + * top level metadata cache requests for purposes of debugging and/or + * optimization. This field should normally be set to \c FALSE, as + * trace file collection imposes considerable overhead. + * + * This field should only be set to \c TRUE when the trace_file_name + * contains the full path of the desired trace file, and either + * there is no open trace file on the cache, or the \c close_trace_file + * field is also \c TRUE. + * \endparblock + * + * \par close_trace_file + * \parblock + * Boolean field indicating whether the current trace + * file (if any) should be closed. + * + * \Emph{*** DEPRECATED ***} Use \Code{H5Fstart/stop} logging functions instead + * + * See the above comments on the open_trace_file field. This field + * should be set to \c FALSE unless there is an open trace file on the + * cache that you wish to close. + * \endparblock + * + * \par trace_file_name + * \parblock + * Full path of the trace file to be opened if the + * open_trace_file field is \c TRUE. + * + * \Emph{*** DEPRECATED ***} Use \Code{H5Fstart/stop} logging functions instead + * + * In the parallel case, an ascii representation of the mpi rank of + * the process will be appended to the file name to yield a unique + * trace file name for each process. + * + * The length of the path must not exceed #H5AC__MAX_TRACE_FILE_NAME_LEN + * characters. + * \endparblock + * + * \par evictions_enabled + * \parblock + * Boolean field used to either report the current + * evictions enabled status of the cache, or to set the cache's + * evictions enabled status. + * + * In general, the metadata cache should always be allowed to + * evict entries. However, in some cases it is advantageous to + * disable evictions briefly, and thereby postpone metadata + * writes. However, this must be done with care, as the cache + * can grow quickly. If you do this, re-enable evictions as + * soon as possible and monitor cache size. + * + * At present, evictions can only be disabled if automatic + * cache resizing is also disabled (that is, \Code{(incr_mode == + * H5C_incr__off ) && ( decr_mode == H5C_decr__off )}). There + * is no logical reason why this should be so, but it simplifies + * implementation and testing, and I can't think of any reason + * why it would be desireable. If you can think of one, I'll + * revisit the issue. (JM) + * \endparblock + * + * \par set_initial_size + * Boolean flag indicating whether the size of the + * initial size of the cache is to be set to the value given in + * the initial_size field. If set_initial_size is \c FALSE, the + * initial_size field is ignored. + * + * \par initial_size + * If enabled, this field contain the size the cache is + * to be set to upon receipt of this structure. Needless to say, + * initial_size must lie in the closed interval \Code{[min_size, max_size]}. + * + * \par min_clean_fraction + * \c double in the range 0 to 1 indicating the fraction + * of the cache that is to be kept clean. This field is only used + * in parallel mode. Typical values are 0.1 to 0.5. + * + * \par max_size + * Maximum size to which the cache can be adjusted. The + * supplied value must fall in the closed interval + * \Code{[MIN_MAX_CACHE_SIZE, MAX_MAX_CACHE_SIZE]}. Also, \c max_size must + * be greater than or equal to \c min_size. + * + * \par min_size + * Minimum size to which the cache can be adjusted. The + * supplied value must fall in the closed interval + * \Code{[H5C__MIN_MAX_CACHE_SIZE, H5C__MAX_MAX_CACHE_SIZE]}. Also, \c min_size + * must be less than or equal to \c max_size. + * + * \par epoch_length + * \parblock + * Number of accesses on the cache over which to collect + * hit rate stats before running the automatic cache resize code, + * if it is enabled. + * + * At the end of an epoch, we discard prior hit rate data and start + * collecting afresh. The epoch_length must lie in the closed + * interval \Code{[H5C__MIN_AR_EPOCH_LENGTH, H5C__MAX_AR_EPOCH_LENGTH]}. + * \endparblock + * + * + * \section csicf Cache size increase control fields + * + * \par incr_mode + * Instance of the \c H5C_cache_incr_mode enumerated type whose + * value indicates how we determine whether the cache size should be + * increased. At present there are two possible values: + * \li \c H5C_incr__off: Don't attempt to increase the size of the cache + * automatically.\n + * When this increment mode is selected, the remaining fields + * in the cache size increase section ar ignored. + * \li \c H5C_incr__threshold: Attempt to increase the size of the cache + * whenever the average hit rate over the last epoch drops + * below the value supplied in the \c lower_hr_threshold + * field.\n + * Note that this attempt will fail if the cache is already + * at its maximum size, or if the cache is not already using + * all available space. + * + * Note that you must set \c decr_mode to \c H5C_incr__off if you + * disable metadata cache entry evictions. + * + * \par lower_hr_threshold + * \parblock + * Lower hit rate threshold. If the increment mode + * (\c incr_mode) is \c H5C_incr__threshold and the hit rate drops below the + * value supplied in this field in an epoch, increment the cache size by + * \c size_increment. Note that cache size may not be incremented above + * \c max_size, and that the increment may be further restricted by the + * \c max_increment field if it is enabled. + * + * When enabled, this field must contain a value in the range [0.0, 1.0]. + * Depending on the \c incr_mode selected, it may also have to be less than + * \c upper_hr_threshold. + * \endparblock + * + * \par increment + * \parblock + * Double containing the multiplier used to derive the new + * cache size from the old if a cache size increment is triggered. + * The increment must be greater than 1.0, and should not exceed 2.0. + * + * The new cache size is obtained my multiplying the current max cache + * size by the increment, and then clamping to \c max_size and to stay + * within the \c max_increment as necessary. + * \endparblock + * + * \par apply_max_increment + * Boolean flag indicating whether the \c max_increment + * field should be used to limit the maximum cache size increment. + * + * \par max_increment + * If enabled by the \c apply_max_increment field described + * above, this field contains the maximum number of bytes by which the + * cache size can be increased in a single re-size. + * + * \par flash_incr_mode + * \parblock + * Instance of the \c H5C_cache_flash_incr_mode enumerated + * type whose value indicates whether and by which algorithm we should + * make flash increases in the size of the cache to accommodate insertion + * of large entries and large increases in the size of a single entry. + * + * The addition of the flash increment mode was occasioned by performance + * problems that appear when a local heap is increased to a size in excess + * of the current cache size. While the existing re-size code dealt with + * this eventually, performance was very bad for the remainder of the + * epoch. + * + * At present, there are two possible values for the \c flash_incr_mode: + * + * \li \c H5C_flash_incr__off: Don't perform flash increases in the size of the cache. + * + * \li \c H5C_flash_incr__add_space: Let \c x be either the size of a newly + * newly inserted entry, or the number of bytes by which the + * size of an existing entry has been increased.\n + * If \Code{x > flash_threshold * current max cache size}, + * increase the current maximum cache size by \Code{x * flash_multiple} + * less any free space in the cache, and star a new epoch. For + * now at least, pay no attention to the maximum increment. + * + * In both of the above cases, the flash increment pays no attention to + * the maximum increment (at least in this first incarnation), but DOES + * stay within max_size. + * + * With a little thought, it should be obvious that the above flash + * cache size increase algorithm is not sufficient for all circumstances + * -- for example, suppose the user round robins through + * \Code{(1/flash_threshold) +1} groups, adding one data set to each on each + * pass. Then all will increase in size at about the same time, requiring + * the max cache size to at least double to maintain acceptable + * performance, however the above flash increment algorithm will not be + * triggered. + * + * Hopefully, the add space algorithms detailed above will be sufficient + * for the performance problems encountered to date. However, we should + * expect to revisit the issue. + * \endparblock + * + * \par flash_multiple + * Double containing the multiple described above in the + * \c H5C_flash_incr__add_space section of the discussion of the + * \c flash_incr_mode section. This field is ignored unless \c flash_incr_mode + * is \c H5C_flash_incr__add_space. + * + * \par flash_threshold + * Double containing the factor by which current max cache + * size is multiplied to obtain the size threshold for the add_space flash + * increment algorithm. The field is ignored unless \c flash_incr_mode is + * \c H5C_flash_incr__add_space. + * + * + * \section csdcf Cache size decrease control fields + * + * \par decr_mode + * \parblock + * Instance of the \c H5C_cache_decr_mode enumerated type whose + * value indicates how we determine whether the cache size should be + * decreased. At present there are four possibilities. + * + * \li \c H5C_decr__off: Don't attempt to decrease the size of the cache + * automatically.\n + * When this increment mode is selected, the remaining fields + * in the cache size decrease section are ignored. + * \li \c H5C_decr__threshold: Attempt to decrease the size of the cache + * whenever the average hit rate over the last epoch rises + * above the value supplied in the \c upper_hr_threshold + * field. + * \li \c H5C_decr__age_out: At the end of each epoch, search the cache for + * entries that have not been accessed for at least the number + * of epochs specified in the epochs_before_eviction field, and + * evict these entries. Conceptually, the maximum cache size + * is then decreased to match the new actual cache size. However, + * this reduction may be modified by the \c min_size, the + * \c max_decrement, and/or the \c empty_reserve. + * \li \c H5C_decr__age_out_with_threshold: Same as age_out, but we only + * attempt to reduce the cache size when the hit rate observed + * over the last epoch exceeds the value provided in the + * \c upper_hr_threshold field. + * + * Note that you must set \c decr_mode to \c H5C_decr__off if you + * disable metadata cache entry evictions. + * \endparblock + * + * \par upper_hr_threshold + * \parblock + * Upper hit rate threshold. The use of this field + * varies according to the current \c decr_mode : + * + * \c H5C_decr__off or \c H5C_decr__age_out: The value of this field is + * ignored. + * + * \li \c H5C_decr__threshold: If the hit rate exceeds this threshold in any + * epoch, attempt to decrement the cache size by size_decrement.\n + * Note that cache size may not be decremented below \c min_size.\n + * Note also that if the \c upper_threshold is 1.0, the cache size\n + * will never be reduced. + * + * \li \c H5C_decr__age_out_with_threshold: If the hit rate exceeds this + * threshold in any epoch, attempt to reduce the cache size + * by evicting entries that have not been accessed for more + * than the specified number of epochs. + * \endparblock + * + * \par decrement + * \parblock + * This field is only used when the decr_mode is + * \c H5C_decr__threshold. + * + * The field is a double containing the multiplier used to derive the + * new cache size from the old if a cache size decrement is triggered. + * The decrement must be in the range 0.0 (in which case the cache will + * try to contract to its minimum size) to 1.0 (in which case the + * cache will never shrink). + * \endparblock + * + * \par apply_max_decrement + * Boolean flag used to determine whether decrements + * in cache size are to be limited by the \c max_decrement field. + * + * \par max_decrement + * Maximum number of bytes by which the cache size can be + * decreased in a single re-size. Note that decrements may also be + * restricted by the \c min_size of the cache, and (in age out modes) by + * the \c empty_reserve field. + * + * \par epochs_before_eviction + * \parblock + * Integer field used in \c H5C_decr__age_out and + * \c H5C_decr__age_out_with_threshold decrement modes. + * + * This field contains the number of epochs an entry must remain + * unaccessed before it is evicted in an attempt to reduce the + * cache size. If applicable, this field must lie in the range + * \Code{[1, H5C__MAX_EPOCH_MARKERS]}. + * \endparblock + * + * \par apply_empty_reserve + * Boolean field controlling whether the empty_reserve + * field is to be used in computing the new cache size when the + * decr_mode is H5C_decr__age_out or H5C_decr__age_out_with_threshold. + * + * \par empty_reserve + * \parblock + * To avoid a constant racheting down of cache size by small + * amounts in the \c H5C_decr__age_out and \c H5C_decr__age_out_with_threshold + * modes, this field allows one to require that any cache size + * reductions leave the specified fraction of unused space in the cache. + * + * The value of this field must be in the range [0.0, 1.0]. I would + * expect typical values to be in the range of 0.01 to 0.1. + * \endparblock + * + * + * \section pcf Parallel Configuration Fields + * + * In PHDF5, all operations that modify metadata must be executed collectively. + * + * We used to think that this was enough to ensure consistency across the + * metadata caches, but since we allow processes to read metadata individually, + * the order of dirty entries in the LRU list can vary across processes, + * which can result in inconsistencies between the caches. + * + * PHDF5 uses several strategies to prevent such inconsistencies in metadata, + * all of which use the fact that the same stream of dirty metadata is seen + * by all processes for purposes of synchronization. This is done by + * having each process count the number of bytes of dirty metadata generated, + * and then running a "sync point" whenever this count exceeds a user + * specified threshold (see \c dirty_bytes_threshold below). + * + * The current metadata write strategy is indicated by the + * \c metadata_write_strategy field. The possible values of this field, along + * with the associated metadata write strategies are discussed below. + * + * \par dirty_bytes_threshold + * \parblock + * Threshold of dirty byte creation used to + * synchronize updates between caches. (See above for outline and + * motivation.) + * + * This value MUST be consistent across all processes accessing the + * file. This field is ignored unless HDF5 has been compiled for + * parallel. + * \endparblock + * + * \par metadata_write_strategy + * Integer field containing a code indicating the + * desired metadata write strategy. The valid values of this field + * are enumerated and discussed below: + * + * \li #H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY\n + * When metadata_write_strategy is set to this value, only process + * zero is allowed to write dirty metadata to disk. All other + * processes must retain dirty metadata until they are informed at + * a sync point that the dirty metadata in question has been written + * to disk.\n + * When the sync point is reached (or when there is a user generated + * flush), process zero flushes sufficient entries to bring it into + * complience with its min clean size (or flushes all dirty entries in + * the case of a user generated flush), broad casts the list of + * entries just cleaned to all the other processes, and then exits + * the sync point.\n + * Upon receipt of the broadcast, the other processes mark the indicated + * entries as clean, and leave the sync point as well. + * + * \li #H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED\n + * In the distributed metadata write strategy, process zero still makes + * the decisions as to what entries should be flushed, but the actual + * flushes are distributed across the processes in the computation to + * the extent possible.\n + * In this strategy, when a sync point is triggered (either by dirty + * metadata creation or manual flush), all processes enter a barrier.\n + * On the other side of the barrier, process 0 constructs an ordered + * list of the entries to be flushed, and then broadcasts this list + * to the caches in all the processes.\n + * All processes then scan the list of entries to be flushed, flushing + * some, and marking the rest as clean. The algorithm for this purpose + * ensures that each entry in the list is flushed exactly once, and + * all are marked clean in each cache.\n + * Note that in the case of a flush of the cache, no message passing + * is necessary, as all processes have the same list of dirty entries, + * and all of these entries must be flushed. Thus in this case it is + * sufficient for each process to sort its list of dirty entries after + * leaving the initial barrier, and use this list as if it had been + * received from process zero.\n + * To avoid possible messages from the past/future, all caches must + * wait until all caches are done before leaving the sync point. + */
\ No newline at end of file diff --git a/doxygen/dox/H5Acreate.dox b/doxygen/dox/H5Acreate.dox new file mode 100644 index 0000000..18d648f --- /dev/null +++ b/doxygen/dox/H5Acreate.dox @@ -0,0 +1,9 @@ +/** + * \ingroup H5A + * \def H5Acreate() + * H5Acreate() is a macro that is mapped to either H5Acreate1() or + * H5Acreate2(). + * + * + * \todo Standardize the way we describe these macros! + */ diff --git a/doxygen/dox/H5Aiterate.dox b/doxygen/dox/H5Aiterate.dox new file mode 100644 index 0000000..46b9bb4 --- /dev/null +++ b/doxygen/dox/H5Aiterate.dox @@ -0,0 +1,9 @@ +/** + * \ingroup H5A + * \def H5Aiterate() + * H5Aiterate() is a macro that is mapped to either H5Aiterate1() or + * H5Aiterate2(). + * + * + * \todo Standardize the way we describe these macros! + */ diff --git a/doxygen/dox/H5Fget_info.dox b/doxygen/dox/H5Fget_info.dox new file mode 100644 index 0000000..9b02752 --- /dev/null +++ b/doxygen/dox/H5Fget_info.dox @@ -0,0 +1,44 @@ +/** + * \ingroup H5F + * \def H5Fget_info() + * H5Fget_info() is a macro that is mapped to either H5Fget_info1() + * or H5Fget_info2(), depending on the needs of the application. + * Similarly, the macro for the \ref H5F_info_t struct is mapped to either + * H5F_info1_t or H5F_info2_t. + * + * Such macros are provided to facilitate application compatibility. + * Their use and mappings are fully described in \ref api-compat-macros. + * + * When both the HDF5 library and the application are built and installed with + * no specific compatibility flags, H5Fget_info() is mapped to the most recent + * version of the function, currently H5Fget_info2(). If the library and/or + * application is compiled for Release 1.8 emulation, H5Fget_info() will be + * mapped to H5Fget_info1(). Since there was no H5Fget_info() function in + * Release 1.6, if the library and/or application is compiled for Release 1.6 + * emulation, H5Fget_info() will be mapped to the most recent version of the + * function, currently H5Fget_info2(). Function-specific flags are available to + * override these settings on a function-by-function basis when the application + * is compiled. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * + * \Bold{Global settings}\n + * \li No compatibility flag: H5Fget_info2() and H5F_info2_t + * \li Enable deprecated symbols: H5Fget_info2() and H5F_info2_t + * \li Disable deprecated symbols: H5Fget_info2() and H5F_info2_t + * \li Emulate Release 1.6 interface: H5Fget_info2() and H5F_info2_t + * \li Emulate Release 1.8 interface: H5Fget_info1() and H5F_info1_t + * + * \Bold{Function- and struct-level macros}\n + * \li \Code{H5Fget_info_vers=2}: H5Fget_info2() + * \li \Code{H5Fget_info_vers=1}: H5Fget_info1() + * \li \Code{H5F_info_t_vers=2}: H5F_info2_t + * \li \Code{H5F_info_t_vers=1}: H5F_info1_t + * + * \version 1.10.0 The C function H5Fget_info() and H5F_info_t renamed to + * H5Fget_info1() and H5F_info1_t, respectively, and deprecated + * in this release. The C macro #H5Fget_info, the C function + * H5Fget_info2(), and the struct H5F_info2_t introduced in this + * release. + */ diff --git a/doxygen/dox/H5Lget_info.dox b/doxygen/dox/H5Lget_info.dox new file mode 100644 index 0000000..2c0971e --- /dev/null +++ b/doxygen/dox/H5Lget_info.dox @@ -0,0 +1,17 @@ + /** + * \ingroup LMGT + * \def H5Lget_info() + * H5Lget_info() is a macro that is mapped to either H5Lget_info1() + * or H5Lget_info2() Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in \ref api-compat-macros. + * If the library and/or application is compiled for Release + * 1.12 emulation, H5Lget_info() will be mapped to H5Lget_info2() and + * H5Lget_info1() is deprecated. With earlier versions, H5Lget_info() is mapped to + * H5Lget_info1(). Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * \li No compatibility flag: H5Lget_info2() (using 1.12 source) H5Lget_info1() + * (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Lget_info2() + * \li Emulate Release 1.8 or 1.10 interface: H5Lget_info1() + * + */ diff --git a/doxygen/dox/H5Lget_info_by_idx.dox b/doxygen/dox/H5Lget_info_by_idx.dox new file mode 100644 index 0000000..bf76822 --- /dev/null +++ b/doxygen/dox/H5Lget_info_by_idx.dox @@ -0,0 +1,17 @@ + /** + * \ingroup LMGT + * \def H5Lget_info_by_idx() + * H5Lget_info_by_idx() is a macro that is mapped to either H5Lget_info_by_idx1() + * or H5Lget_info_by_idx2() Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in \ref api-compat-macros. + * If the library and/or application is compiled for Release + * 1.12 emulation, H5Lget_info_by_idx() will be mapped to H5Lget_info_by_idx2() and + * H5Lget_info_by_idx1() is deprecated. With earlier versions, H5Lget_infoby_idx() is mapped to + * H5Lget_info_by_idx1(). Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * \li No compatibility flag: H5Lget_info_by_idx2() (using 1.12 source) H5Lget_info_by_idx1() + * (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Lget_info_by_idx2() + * \li Emulate Release 1.8 or 1.10 interface: H5Lget_info_by_idx1() + * + */ diff --git a/doxygen/dox/H5Literate.dox b/doxygen/dox/H5Literate.dox new file mode 100644 index 0000000..eaaf2fe --- /dev/null +++ b/doxygen/dox/H5Literate.dox @@ -0,0 +1,20 @@ +/** + * \ingroup TRAV + * \def H5Literate() + * H5Literate() is a macro that is mapped to either H5Literate1() or + * H5Literate2() Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * \ref api-compat-macros. If the library and/or application is + * compiled for Release 1.12 emulation, H5Literate() will be mapped to + * H5Literate2() and H5Literate1() is deprecated. With earlier versions, + * H5Literate() is mapped to H5Literate1(). Specific compile-time compatibility + * flags and the resulting mappings are as follows: + * \li No compatibility flag: H5Literate2() (using 1.12 source) H5Literate1() + * (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Literate2() + * \li Emulate Release 1.8 or 1.10 interface: H5Literate1() + * + * \version 1.12.0 The function H5Literate() was renamed to H5Literate1() and + * deprecated in this release. The macro H5Literate() and the + * function H5Literate2() were introduced in this release. + */ diff --git a/doxygen/dox/H5Literate_by_name.dox b/doxygen/dox/H5Literate_by_name.dox new file mode 100644 index 0000000..5ffd7c6 --- /dev/null +++ b/doxygen/dox/H5Literate_by_name.dox @@ -0,0 +1,21 @@ +/** + * \ingroup TRAV + * \def H5Literate_by_name() + * H5Literate_by_name() is a macro that is mapped to either + * H5Literate_by_name1() or H5Literate_by_name2() Such macros are provided to + * facilitate application compatibility. Their use and mappings are fully + * described in \ref api-compat-macros. If the library and/or application is + * compiled for Release 1.12 emulation, H5Literate_by_name() will be mapped to + * H5Literate_by_name2() and H5Literate_by_name1() is deprecated. With earlier + * versions, H5Literate_by_name() is mapped to H5Literate_by_name1(). + * Specific compile-time compatibility flags and the resulting mappings are as + * follows: + * \li No compatibility flag: H5Literate_by_name2() (using 1.12 source) + * H5Literate_by_name1() (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Literate_by_name2() + * \li Emulate Release 1.8 or 1.10 interface: H5Literate_by_name1() + * + * \version 1.12.0 The function H5Literate_by_name() was renamed to H5Literate_by_name1() and + * deprecated in this release. The macro H5Literate_by_name() and the + * function H5Literate_by_name2() were introduced in this release. + */ diff --git a/doxygen/dox/H5Lvisit.dox b/doxygen/dox/H5Lvisit.dox new file mode 100644 index 0000000..2dc547f --- /dev/null +++ b/doxygen/dox/H5Lvisit.dox @@ -0,0 +1,20 @@ +/** + * \ingroup TRAV + * \def H5Lvisit() + * H5Lvisit() is a macro that is mapped to either H5Lvisit1() or + * H5Lvisit2() Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * \ref api-compat-macros. If the library and/or application is + * compiled for Release 1.12 emulation, H5Lvisit() will be mapped to + * H5Lvisit2() and H5Lvisit1() is deprecated. With earlier versions, + * H5Lvisit() is mapped to H5Lvisit1(). Specific compile-time compatibility + * flags and the resulting mappings are as follows: + * \li No compatibility flag: H5Lvisit2() (using 1.12 source) H5Lvisit1() + * (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Lvisit2() + * \li Emulate Release 1.8 or 1.10 interface: H5Lvisit1() + * + * \version 1.12.0 The function H5Lvisit() was renamed to H5Lvisit1() and + * deprecated in this release. The macro H5Lvisit() and the + * function H5Lvisit2() were introduced in this release. + */ diff --git a/doxygen/dox/H5Lvisit_by_name.dox b/doxygen/dox/H5Lvisit_by_name.dox new file mode 100644 index 0000000..691787f --- /dev/null +++ b/doxygen/dox/H5Lvisit_by_name.dox @@ -0,0 +1,20 @@ +/** + * \ingroup TRAV + * \def H5Lvisit_by_name() + * H5Lvisit_by_name() is a macro that is mapped to either H5Lvisit_by_name1() or + * H5Lvisit_by_name2() Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * \ref api-compat-macros. If the library and/or application is + * compiled for Release 1.12 emulation, H5Lvisit_by_name() will be mapped to + * H5Lvisit_by_name2() and H5Lvisit_by_name1() is deprecated. With earlier versions, + * H5Lvisit_by_name() is mapped to H5Lvisit_by_name1(). Specific compile-time + * compatibility flags and the resulting mappings are as follows: + * \li No compatibility flag: H5Lvisit_by_name2() (using 1.12 source) H5Lvisit_by_name1() + * (using 1.10 or 1.8 source) + * \li Emulate Release 1.12: H5Lvisit_by_name2() + * \li Emulate Release 1.8 or 1.10 interface: H5Lvisit_by_name1() + * + * \version 1.12.0 The function H5Lvisit_by_name() was renamed to H5Lvisit_by_name1() and + * deprecated in this release. The macro H5Lvisit_by_name() and the + * function H5Lvisit_by_name2() were introduced in this release. + */ diff --git a/doxygen/dox/H5Oget_info.dox b/doxygen/dox/H5Oget_info.dox new file mode 100644 index 0000000..ee4cd1c --- /dev/null +++ b/doxygen/dox/H5Oget_info.dox @@ -0,0 +1,72 @@ +/** + * \ingroup H5O + * \def H5Oget_info + * + * #H5Oget_info is a macro that is mapped to: + * \li #H5Oget_info3 + * \li #H5Oget_info1 + * + * \details Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * API Compatibility Macros in HDF5; we urge you to read that + * document closely. + * + * In HDF5 versions 1.12 and after, #H5Oget_info is mapped to + * #H5Oget_info3 and #H5Oget_info1 is deprecated. + * In version 1.10 #H5Oget_info is identical to #H5Oget_info1. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * \par + * <table> + * <tr> + * <th>Compatibility setting</th> + * <th>H5Oget_info</th> + * </tr> + * <tr> + * <td>No compatibility flag \n </td> + * <td>#H5Oget_info3 (in release 1.12) \n + * #H5Oget_info1 (in 1.8 or 1.10)</td> + * </tr> + * <tr> + * <td>Emulate Release 1.12</td> + * <td>#H5Oget_info3</td> + * </tr> + * <tr> + * <td>Emulate Release 1.10/1.8 interface</td> + * <td>#H5Oget_info1</td> + * </tr> + * </table> + * + * \note If you are iterating through a lot of different objects to + * retrieve information via the #H5Oget_info family of routines, + * you may see memory building up. This can be due to memory + * allocation for metadata such as object headers and messages + * when the iterated objects are put into the metadata cache. + * \note + * If the memory buildup is not desirable, you can configure a + * smaller cache via #H5Fset_mdc_config or set the file access + * property list via #H5Pset_mdc_config. A smaller sized cache + * will force metadata entries to be evicted from the cache, + * thus freeing the memory associated with the entries. + * + * \version 1.12.0 The macro #H5Oget_info and the function #H5Oget_info3 + * were added, and #H5Oget_info1 was deprecated. + * \version 1.10.5 The macro #H5Oget_info was removed. The functions + * #H5Oget_info1 and #H5Oget_info are identical + * in this release. This change was added to restore the + * broken API compatibility introduced in HDF5-1.10.3. + * \version 1.10.3 The function #H5Oget_info was renamed + * #H5Oget_info1. The macro #H5Oget_info and the function + * #H5Oget_info2 were introduced in this release. + * \version 1.8.15 Added a note about the valid values for the \c version field + * in the H5O_hdr_info_t structure. + * \version 1.8.11 Fortran subroutine introduced in this release. + * \version 1.8.10 Added #H5O_type_t structure to the Description section. + * Separated H5O_hdr_info_t structure from + * #H5O_info_t in the Description section. Clarified the + * definition and implementation of the time fields. + * + * \since 1.8.0 + * + */ diff --git a/doxygen/dox/H5Oget_info_by_idx.dox b/doxygen/dox/H5Oget_info_by_idx.dox new file mode 100644 index 0000000..49b8031 --- /dev/null +++ b/doxygen/dox/H5Oget_info_by_idx.dox @@ -0,0 +1,55 @@ +/** + * \ingroup H5O + * \def H5Oget_info_by_idx + * + * #H5Oget_info_by_idx is a macro that is mapped to: + * \li #H5Oget_info_by_idx3 + * \li #H5Oget_info_by_idx1 + * + * \details Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * API Compatibility Macros in HDF5; we urge you to read that + * document closely. + * + * In HDF5 versions 1.12 and after, #H5Oget_info_by_idx is mapped to + * #H5Oget_info_by_idx3 and #H5Oget_info_by_idx1 is deprecated. + * In version 1.10 #H5Oget_info_by_idx is identical to #H5Oget_info_by_idx1. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * + * \par + * <table> + * <tr> + * <th>Compatibility setting</th> + * <th>H5Oget_info_by_idx</th> + * </tr> + * <tr> + * <td>No compatibility flag \n </td> + * <td>#H5Oget_info_by_idx3 for 1.12 \n + * #H5Oget_info_by_idx1 for 1.8/1.10</td> + * </tr> + * <tr> + * <td>Emulate Release 1.12</td> + * <td>#H5Oget_info_by_idx3</td> + * </tr> + * <tr> + * <td>Emulate Release 1.10/1.8 interface</td> + * <td>#H5Oget_info_by_idx1</td> + * </tr> + * </table> + * + * \version 1.12.0 The macro #H5Oget_info_by_idx and function #H5Oget_info_by_idx3 were added, + * and #H5Oget_info_by_idx1 was deprecated. + * \version 1.10.5 The macro #H5Oget_info_by_idx was removed. The functions + * #H5Oget_info_by_idx and #H5Oget_info_by_idx1 are + * identical in this release. This change was added to restore the + * broken API compatibility introduced in HDF5-1.10.3. + * \version 1.10.3 The function #H5Oget_info_by_idx was renamed #H5Oget_info_by_idx1. + * The macro #H5Oget_info_by_idx and the function #H5Oget_info_by_idx2 + * were introduced in this release. + * \version 1.8.11 Fortran subroutine introduced in this release. + * + * \since 1.8.0 + * + */ diff --git a/doxygen/dox/H5Oget_info_by_name.dox b/doxygen/dox/H5Oget_info_by_name.dox new file mode 100644 index 0000000..18f7d28 --- /dev/null +++ b/doxygen/dox/H5Oget_info_by_name.dox @@ -0,0 +1,58 @@ +/** + * \ingroup H5O + * \def H5Oget_info_by_name + * + * #H5Oget_info_by_name is a macro that is mapped to: + * \li #H5Oget_info_by_name3 + * \li #H5Oget_info_by_name1 + * + * \details Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * API Compatibility Macros in HDF5; we urge you to read that + * document closely. + * + * In HDF5 versions 1.12 and after, #H5Oget_info_by_name is mapped to + * #H5Oget_info_by_name3. In version 1.10 #H5Oget_info_by_name is + * identical to #H5Oget_info_by_name1. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * + * \par + * <table> + * <tr> + * <th>Compatibility setting</th> + * <th>H5Oget_info_by_name</th> + * </tr> + * <tr> + * <td>No compatibility flag \n </td> + * <td>#H5Oget_info_by_name3 for 1.12 and above \n + * #H5Oget_info_by_name1 for 1.8 or 1.10</td> + * </tr> + * <tr> + * <td>Emulate Release 1.12</td> + * <td>#H5Oget_info_by_name3</td> + * </tr> + * <tr> + * <td>Emulate Release 1.10 or 1.8 interface</td> + * <td>#H5Oget_info_by_name1</td> + * </tr> + * </table> + * + * \version 1.12.0 The macro #H5Oget_info_by_name and function + * #H5Oget_info_by_name3 were added and + * #H5Oget_info_by_name1 was deprecated. + * \version 1.10.5 The macro #H5Oget_info_by_name was removed. The functions + * #H5Oget_info_by_name and #H5Oget_info_by_name1 are + * identical in this release. This change was added to restore + * the broken API compatibility introduced in HDF5-1.10.3. + * \version 1.10.3 The function #H5Oget_info_by_name was renamed + * to #H5Oget_info_by_name1. The macro #H5Oget_info_by_name + * and the function #H5Oget_info_by_name2 were introduced + * in this release. + * \version 1.8.8 Fortran 2003 subroutine and \c h5o_info_t derived + * type introduced in this release.</td> + * + * \since 1.8.0 + * + */ diff --git a/doxygen/dox/H5Ovisit.dox b/doxygen/dox/H5Ovisit.dox new file mode 100644 index 0000000..1e2a3ea --- /dev/null +++ b/doxygen/dox/H5Ovisit.dox @@ -0,0 +1,55 @@ +/** + * \ingroup H5O + * \def H5Ovisit + * + * #H5Ovisit is a macro that is mapped to one of the following: + * \li #H5Ovisit3 + * \li #H5Ovisit1 + * + * \details Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * API Compatibility Macros in HDF5; we urge you to read that + * document closely. + * + * In HDF5 versions 1.12 and after, #H5Ovisit is mapped to + * #H5Ovisit3. In version 1.10, #H5Ovisit is identical + * to #H5Ovisit1. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * + * \par + * <table> + * <tr> + * <th>Compatibility settings</th> + * <th>H5Ovisit</th> + * </tr> + * <tr> + * <td>No compatibility flag \n </td> + * <td>#H5Ovisit3 in 1.12 or after \n + * #H5Ovisit1 for 1.8 and 1.10</td> + * </tr> + * <tr> + * <td>Emulate Release 1.12</td> + * <td>#H5Ovisit3</td> + * </tr> + * <tr> + * <td>Emulate Release 1.10 or 1.8 interface</td> + * <td>#H5Ovisit1</td> + * </tr> + * </table> + * + * \version 1.12.0 The macro #H5Ovisit and function #H5Ovisit3 were added, + * and #H5Ovisit1 was deprecated. + * \version 1.10.5 The macro #H5Ovisit was removed. The functions + * #H5Ovisit and #H5Ovisit1 are identical in this release. + * This change was added to restore the broken API compatibility + * introduced in HDF5-1.10.3. + * \version 1.10.3 The function #H5Ovisit was renamed to #H5Ovisit1. + * The macro #H5Ovisit and the function #H5Ovisit2 were + * introduced in this release. + * \version 1.8.8 Fortran subroutine and data structure added. + * + * \since 1.8.0 + * + */ diff --git a/doxygen/dox/H5Ovisit_by_name.dox b/doxygen/dox/H5Ovisit_by_name.dox new file mode 100644 index 0000000..2ba4846 --- /dev/null +++ b/doxygen/dox/H5Ovisit_by_name.dox @@ -0,0 +1,54 @@ +/** + * \ingroup H5O + * \def H5Ovisit_by_name + * + * #H5Ovisit_by_name is a macro that is mapped to one of the following: + * \li #H5Ovisit_by_name3 + * \li #H5Ovisit_by_name1 + * + * \details Such macros are provided to facilitate application + * compatibility. Their use and mappings are fully described in + * API Compatibility Macros in HDF5; we urge you to read that + * document closely. + * + * In HDF5 versions 1.12 and after, #H5Ovisit_by_name is mapped to + * #H5Ovisit_by_name3. In version 1.10, #H5Ovisit_by_name + * is identical to #H5Ovisit_by_name1. + * + * Specific compile-time compatibility flags and the resulting + * mappings are as follows: + * + * \par + * <table> + * <tr> + * <th>Compatibility settings</th> + * <th>H5Ovisit_by_name</th> + * </tr> + * <tr> + * <td>No compatibility flag \n </td> + * <td>#H5Ovisit_by_name3 for 1.12 and above \n + * #H5Ovisit_by_name1 for 1.10 or 1.8</td> + * </tr> + * <tr> + * <td>Emulate Release 1.12 interface</td> + * <td>#H5Ovisit_by_name3</td> + * </tr> + * <tr> + * <td>Emulate Release 1.10 or 1.8 interface</td> + * <td>#H5Ovisit_by_name1</td> + * </tr> + * </table> + * + * \version 1.12.0 The macro #H5Ovisit_by_name and function #H5Ovisit_by_name3 were added. + * \version 1.10.5 The macro #H5Ovisit_by_name was removed. The functions + * #H5Ovisit_by_name and #H5Ovisit_by_name1 are identical + * in this release. This change was added to restore the + * broken API compatibility introduced in HDF5-1.10.3. + * \version 1.10.3 The function #H5Ovisit_by_name was renamed to #H5Ovisit_by_name1. + * The macro #H5Ovisit_by_name and the function #H5Ovisit_by_name2 + * were introduced in this release. + * \version 1.8.8 Fortran subroutine introduced in this release. + * + * \since 1.8.0 + * + */ diff --git a/doxygen/dox/H5Sencode.dox b/doxygen/dox/H5Sencode.dox new file mode 100644 index 0000000..fe0995c --- /dev/null +++ b/doxygen/dox/H5Sencode.dox @@ -0,0 +1,5 @@ +/** + * \ingroup H5S + * \def H5Sencode() + * H5Sencode() is a macro that is mapped to either H5Sencode1() or H5Sencode2(). +*/ diff --git a/doxygen/dox/MetadataCachingInHDF5.dox b/doxygen/dox/MetadataCachingInHDF5.dox new file mode 100644 index 0000000..9ba0fab --- /dev/null +++ b/doxygen/dox/MetadataCachingInHDF5.dox @@ -0,0 +1,1020 @@ +/** \page TNMDC Metadata Caching in HDF5 + +\todo Revise this! + +\section intro Introduction + +In the 1.6.4 release, we introduced a re-implementation of the metadata +cache. That release contained an incomplete version of the cache which could not +be controlled via the API. The version in the 1.8 release is more mature and +includes new API calls that allow the user program to configure the metadata +cache both on file open and at run time. + +From the user perspective, the most striking effect of the new cache should be a +large reduction in the cache memory requirements when working with complex HDF5 +files. + +Those working with such files may also notice a reduction in file close time. + +Those working with HDF5 files with a simple structure shouldn't notice any +particular changes in most cases. In rare cases, there may be a significant +improvement in performance. + +The remainder of this document contains an architectural overview of the old and +new metadata caches, a discussion of algorithms used to automatically adjust +cache size to circumstances, and a high-level discussion of the cache +configuration controls. It can be safely skipped by anyone who works only with +HDF5 files with relatively simple structure (i.e. no huge groups, no datasets +with large numbers of chunks, and no objects with large numbers of attributes.) + +On the other hand, it is mandatory reading if you want to use something other +than the default metadata cache configuration. The documentation on the metadata +cache-related API calls will not make much sense without this background. + +\section oldnew Old and New Metadata Cache + +\subsection old The Old Metadata Cache + +The old metadata cache indexed the cache with a hash table with no provision for +collisions. Instead, collisions were handled by evicting the existing entry to +make room for the new entry. Aside from flushes, there was no other mechanism +for evicting entries, so the replacement policy could best be described as +"Evict on Collision". + +As a result, if two frequently used entries hashed to the same location, they +would evict each other regularly. To decrease the likelihood of this situation, +the default hash table size was set fairly large -- slightly more than +10,000. This worked well, but since the size of metadata entries is not bounded, +and since entries were only evicted on collision, the large hash table size +allowed the cache size to explode when working with HDF5 files with complex +structure. + +The "Evict on Collision" replacement policy also caused problems with the +parallel version of the HDF5 library, as a collision with a dirty entry could +force a write in response to a metadata read. Since all metadata writes must be +collective in the parallel case while reads need not be, this could cause the +library to hang if only some of the processes participated in a metadata read +that forced a write. Prior to the implementation of the new metadata cache, we +dealt with this issue by maintaining a shadow cache for dirty entries evicted by +a read. + +\subsection new The New Metadata Cache + +The new metadata cache was designed to address the above issues. After +implementation, it became evident that the working set size for HDF5 files +varies widely depending on both structure and access patterns. Thus it was +necessary to add support for cache size adjustment under either automatic or +user program control (see section 2.3 for details). + +When the cache is operating under direct user program control, it is also +possible to temporarily disable evictions from the metadata cache so as to +maximize raw data throughput at the expense of allowing the cache to grow +without bound until evictions are enabled again. + +Structurally, the new metadata cache can be thought of as a heavily modified +version of the UNIX buffer cache as described in chapter three of M. J. Bach's +"The Design of the UNIX Operating System" In essence, the UNIX buffer cache uses +a hash table with chaining to index a pool of fixed-size buffers. It uses the +LRU replacement policy to select candidates for eviction. + +Since HDF5 metadata entries are not of fixed size and may grow arbitrarily +large, the size of the new metadata cache cannot be controlled by setting a +maximum number of entries. Instead, the new cache keeps a running sum of the +sizes of all entries and attempts to evict entries as necessary to stay within a +user-specified maximum size. (Note the use of the word "attempts" here -- as +will be seen, it is possible for the cache to exceed its currently specified +maximum size.) At present, the LRU replacement policy is the only option for +selecting candidates for eviction. + +Per the standard Unix buffer cache, dirty entries are given two passes through +the LRU list before being evicted. The first time they reach the end of the LRU +list, they are flushed, marked as clean, and moved to the head of the LRU +list. When a clean entry reaches the end of the LRU list, it is simply evicted +if space is needed. + +The cache cannot evict entries that are locked, and thus it will temporarily +grow beyond its maximum size if there are insufficient unlocked entries +available for eviction. + +In the parallel version of the library, only the cache running under process 0 +of the file communicator is allowed to write metadata to file. All the other +caches must retain dirty metadata until the process 0 cache tells them that the +metadata is clean. + +Since all operations modifying metadata must be collective, all caches see the +same stream of dirty metadata. This fact is used to allow them to synchronize +every n bytes of dirty metadata, where n is a user-configurable value that +defaults to 256 KB. + +To avoid sending the other caches messages from the future, process 0 must not +write any dirty entries until it reaches a synchronization point. When it +reaches a synchronization point, it writes entries as needed, and then +broadcasts the list of flushed entries to the other caches. The caches on the +other processes use this list to mark entries clean before they leave the +synchronization point, allowing them to evict those entries as needed. + +The caches will also synchronize on a user-initiated flush. + +To minimize overhead when running in parallel, the cache maintains a "clean" LRU +list in addition to the regular LRU list. This list contains only clean entries +and is used as a source of candidates for eviction when flushing dirty entries +is not allowed. + +Since flushing entries is forbidden most of the time when running in parallel, +the caches can be forced to exceed their maximum sizes if they run out of clean +entries to evict. + +To decrease the likelihood of this event, the new cache allows the user to +specify a minimum clean size -- which is a minimum total size of all the entries +on the clean LRU plus all unused space in the cache. + +While the clean LRU list is only maintained in the parallel version of the HDF5 +library, the notion of a minimum clean size still applies in the serial +case. Here it is used to force a mix of clean and dirty entries in the cache +even in the write-only case. + +This, in turn, reduces the number of redundant flushes by avoiding the case in +which the cache fills with dirty metadata and all entries must be flushed before +a clean entry can be evicted to make room for a new entry. + +Observe that in both the serial and parallel cases, the maintenance of a minimum +clean size modifies the replacement policy, as dirty entries may be flushed +earlier than would otherwise be the case so as to maintain the desired amount of +clean and/or empty space in the cache. + +While the new metadata cache only supports the LRU replacement policy at +present, that may change. Support for multiple replacement policies was very +much in mind when the cache was designed, as was the ability to switch +replacement policies at run time. The situation has been complicated by the +later addition of the adaptive cache resizing requirement, as two of the +resizing algorithms piggyback on the LRU list. However, if there is a need for +additional replacement policies, it shouldn't be too hard to implement them. + +\section adapt Adaptive Cache Resizing in the New Metadata Cache + +As mentioned earlier, the metadata working set size for an HDF5 file varies +wildly depending on the structure of the file and the access pattern. For +example, a 2MB limit on metadata cache size is excessive for an H5repack of +almost all HDF5 files we have tested. However, I have a file submitted by one of +our users that will run a 13% hit rate with this cache size and will lock up one +of our Linux boxes using the old metadata cache. Increase the new metadata cache +size to 4 MB, and the hit rate exceeds 99%. + +In this case, the main culprit is a root group with more than 20,000 entries in +it. As a result, the root group heap exceeds 1 MB, which tends to crowd out the +rest of the metadata in a 2 MB cache + +This case and a number of synthetic tests convinced us that we needed to modify +the new metadata cache to expand and contract according to need within +user-specified bounds. + +I was unable to find any previous work on this problem, so I invented solutions +as I went along. If you are aware of prior work, please send me references. The +closest I was able to come was a group of embedded CPU designers who were +turning off sections of their cache to conserve power. + +\subsection increasing Increasing the Cache Size + +In the context of the HDF5 library, the problem of increasing the cache size as +necessary to contain the current working set turns out to involve two rather +different issues. + +The first of these, which was recognized immediately, is the problem of +recognizing long term changes in working set size, and increasing the cache size +accordingly, while not reacting to transients. + +The second, which I recognized the hard way, is to adjust the cache size for +sudden, dramatic increases in working set size caused by requests for large +pieces of metadata which may be larger than the current metadata cache size. + +The algorithms for handling these situations are discussed below. These problems +are largely orthogonal to each other, so both algorithms may be used +simultaneously. + +\subsubsection hrtcsi Hit Rate Threshold Cache Size Increment + +Perhaps the most obvious heuristic for identifying cases in which the cache is +too small involves monitoring the hit rate. If the hit rate is low for a while, +and the cache is at its current maximum size, the current maximum cache size is +probably too small. + +The hit rate threshold algorithm for increasing cache size applies this +intuition directly. + +Hit rate statistics are collected over a user-specified number of cache +accesses. This period is known as an epoch. + +At the end of each epoch, the hit rate is computed, and the counters are +reset. If the hit rate is below a user-specified threshold and the cache is at +its current maximum size, the maximum size of the cache is increased by a +user-specified multiple. If required, the new cache maximum size is clipped to +stay within the user-specified upper bound on the maximum cache size, and +optionally, within a user-specified maximum increment. + +My tests indicate that this algorithm works well in most cases. However, in a +synthetic test in which hit rate increased slowly with cache size, and load +remained steady for many epochs, I observed a case in which cache size increased +until the hit rate just exceeded the specified minimum and then stalled. This is +a problem, as to avoid volatility, it is necessary to set the minimum hit rate +threshold well below the desired hit rate. Thus we may find ourselves with a +cache running with a 91% hit rate when we really want it to increase its size +until the hit rate is about 99%. + +If this case occurs frequently in actual use, I will have to come up with an +improved algorithm. Please let me know if you see this behavior. However, I had +to work rather hard to create it in my synthetic tests, so I would expect it to +be uncommon. + +\subsubsection fcsi Flash Cache Size Increment + +A fundamental problem with the above algorithm is that contains the hidden +assumption that cache entries are relatively small in comparison to the cache +itself. While I knew this assumption was not generally true when I developed the +algorithm, I thought that cases, where it failed, would be so rare as to not be +worth considering, as even if they did occur, the above algorithm would rectify +the situation within an epoch or two. + +While it is true that such occurrences are rare, and it is true that the hit +rate threshold cache size increment algorithm will rectify the situation +eventually, the performance degradation experienced by users while waiting for +the epoch to end was so extreme that some way of accelerating response to such +situations was essential. + +To understand the problem, consider the following use case: + +Suppose we create a group, and then repeatedly create a new data set in the +group, write some data to it and then close it. + +In some versions of the HDF5 file format, the names of the datasets will be +stored in a local heap associated with the group, and the space for that heap +will be allocated in a single, contiguous chunk. When this local heap is full, +we allocate a new chunk twice the size of the old, copy the data from the old +local heap into the new, and discard the old local heap. + +By default, the minimum metadata cache size is set to 2 MB. Thus in this use +case, our hit rate will be fine as long as the local heap is no larger than a +little less than 2 MB, as the group related metadata is accessed frequently and +never evicted, and the data set related metadata is never accessed once the data +set is closed, and thus is evicted smoothly to make room for new data sets. + +All this changes abruptly when the local heap finally doubles in size to a value +above the slightly less than 2 MB limit. All of a sudden, the local heap is the +size of the metadata cache, and the cache must constantly swap it in to access +it, and then swap it out to make room for other metadata. + +The hit rate threshold-based algorithm for increasing the cache size will fix +this problem eventually, but performance will be very bad until it does, as the +metadata cache will largely ineffective until its size is increased. + +An obvious heuristic for addressing this "big rock in a small pond" issue is to +watch for large "incoming rocks", and increase the size of the "pond" if the +rock is so big that it will force most of the "water" out of the "pond". + +The add space flash cache size increment algorithm applies this intuition +directly: + +Let x be either the size of a newly inserted entry, a newly loaded entry, or the +number of bytes by which the size of an existing entry has been increased +(i.e. the size of the "rock"). + +If x is greater than some user-specified fraction of the current maximum cache +size, increase the current maximum cache size by x times some user-specified +multiple, less any free space that was in the cache, to begin with. Further, to +avoid confusing the other cache size increment/decrement code, start a new +epoch. + +At present, this algorithm pays no attention to any user-specified limit on the +maximum size of any single cache size increase, but it DOES stay within the +user-specified upper bound on the maximum cache size. + +While it should be easy to see how this algorithm could be fooled into +inactivity by a large number of entries that were not quite large enough to +cross the threshold, in practice it seems to work reasonably well. + +Needless to say, I will revisit the issue should this cease to be the case. + +\subsection decreasing Decreasing the Cache Size + +Identifying cases in which the maximum cache size is larger than necessary +turned out to be more difficult. + +\subsubsection hrtcsr Hit Rate Threshold Cache Size Reduction + +One obvious heuristic is to monitor the hit rate and guess that we can safely +decrease cache size if the hit rate exceeds some user-supplied threshold (say +.99995). The hit rate threshold size decrement algorithm implemented in the new +metadata cache implements this intuition as follows: + +At the end of each epoch (this is the same epoch that is used in the cache size +increment algorithm), the hit rate is compared with the user-specified +threshold. If the hit rate exceeds that threshold, the current maximum cache +size is decreased by a user-specified factor. If required, the size of the +reduction is clipped to stay within a user-specified lower bound on the maximum +cache size, and optionally, within a user-specified maximum decrement. + +In my synthetic tests, this algorithm works poorly. Even with a very high +threshold and a small maximum reduction, it results in cache size +oscillations. The size increment code typically increments the maximum cache +size above the working set size. This results in a high hit rate, which causes +the threshold size decrement code to reduce the maximum cache size below the +working set size, which causes the hit rate to crash causing the cycle to +repeat. The resulting average hit rate is poor. + +It remains to be seen if this behavior will be seen in the field. The algorithm +is available for use, but it wouldn't be my first choice. If you use it, please +report back. + +\subsubsection acsr Ageout Cache Size Reduction + +Another heuristic for dealing with oversized cache conditions is to look for +entries that haven't been accessed for a long time, evict them, and reduce the +cache size accordingly. + +The age out cache size reduction applies this intuition as follows: At the end +of each epoch (again the same epoch as used in the cache size increment +algorithm), all entries that haven't been accessed for a user-configurable +number of epochs (1 - 10 at present) are evicted. The maximum cache size is then +reduced to equal the sum of the sizes of the remaining entries. The size of the +reduction is clipped to stay within a user-specified lower bound on maximum +cache size, and optionally, within a user-specified maximum decrement. + +In addition, the user may specify a minimum fraction of the cache which must be +empty before the cache size is reduced. Thus if an empty reserve of 0.1 was +specified on a 10 MB cache, there would be no cache size reduction unless the +eviction of aged out entries resulted in more than 1 MB of empty space. Further, +even after the reduction, the cache would be one-tenth empty. + +In my synthetic tests, the age out algorithm works rather well, although it is +somewhat sensitive to the epoch length and age out period selection. + +\subsubsection awhrtcsr Ageout With Hit Rate Threshold Cache Size Reduction + +To address these issues, I combined the hit rate threshold and age out +heuristics. + +Age out with threshold works just like age out, except that the algorithm is not +run unless the hit rate exceeded a user-specified threshold in the previous +epoch. + +In my synthetic tests, age out with threshold seems to work nicely, with no +observed oscillation. Thus I have selected it as the default cache size +reduction algorithm. + +For those interested in such things, the age out algorithm is implemented by +inserting a marker entry at the head of the LRU list at the beginning of each +epoch. Entries that haven't been accessed for at least n epochs are simply +entries that appear in the LRU list after the n-th marker at the end of an +epoch. + +\section configuring Configuring the New Metadata Cache + +Due to a lack of resources, the design work on the automatic cache size +adjustment algorithms was done hastily, using primarily synthetic tests. I don't +think I spent more than a couple weeks writing and running performance tests -- +most time went into coding and functional testing. + +As a result, while I think the algorithms provided for adaptive cache resizing +will work well in actual use, I don't really know (although preliminary results +from the field are promising). Fortunately, the issue shouldn't arise for the +vast majority of HDF5 users, and those for whom it may arise should be savvy +enough to recognize problems and deal with them. + +For this latter class of users, I have implemented a number of new API calls +allowing the user to select and configure the cache resize algorithms, or to +turn them off and control cache size directly from the user program. There are +also API calls that allow the user program to monitor hit rate and cache size. + +From the user perspective, all the cache configuration data for a given file is +contained in an instance of the \ref H5AC_cache_config_t structure -- the definition +of which is given below: + +\snippet H5ACpublic.h H5AC_cache_config_t_snip + +This structure is defined in \c H5ACpublic.h. Each field is discussed below and in +the associated header comment. + +The C API allows you to get and set this structure directly. Unfortunately, the +Fortran API has to do this with individual parameters for each of the fields +(with the exception of version). + +While the API calls are discussed individually in the reference manual, the +following high-level discussion of what fields to change for different purposes +should be useful. + +\subsection gconfig General Configuration + +The \c version field is intended to allow \THG to change the \c +H5AC_cache_config_t structure without breaking old code. For now, this field +should always be set to \c H5AC__CURR_CACHE_CONFIG_VERSION, even when you are +getting the current configuration data from the cache. The library needs the +version number to know where fields are located with reference to the supplied +base address. + +The \ref H5AC_cache_config_t.rpt_fcn_enabled "rpt_fcn_enabled" field is a +boolean flag that allows you to turn on and off the resize reporting function +that reports the activities of the adaptive cache resize code at the end of each +epoch -- assuming that it is enabled. + +The report function is unsupported, so you are on your own if you use it. Since +it dumps status data to stdout, you should not attempt to use it with Windows +unless you modify the source. You may find it useful if you want to experiment +with different adaptive resize configurations. It is also a convenient way of +diagnosing poor cache configuration. Finally, if you do lots of runs with +identical behavior, you can use it to determine the metadata cache size needed +in each phase of your program so you can set the required cache sizes manually. + +The trace file fields are also unsupported. They allow one to open and close a +trace file in which all calls to the metadata cache are logged in a +user-specified file for later analysis. The feature is intended primarily for +THG use in debugging or optimizing the metadata cache in cases where users in +the field observe obscure failures or poor performance that we cannot re-create +in the lab. The trace file will allow us to re-create the exact sequence of +cache operations that are triggering the problem. + +At present we do not have a playback utility for trace files, although I imagine +that we will write one quickly when and if we need it. + +To enable the trace file, you load the full path of the desired trace file into +\ref H5AC_cache_config_t.trace_file_name "trace_file_name", and set \ref +H5AC_cache_config_t.open_trace_file "open_trace_file" to \c TRUE. In the +parallel case, an ASCII representation of the rank of each process is appended +to the supplied trace file name to create a unique trace file name for that +process. + +To close an open trace file, set \ref H5AC_cache_config_t.close_trace_file +"close_trace_file" to \c TRUE. + +It must be emphasized that you are on your own if you play with the trace file +feature absent a request from \THG. Needless to say, the trace file feature is +disabled by default. If you enable it, you will take a large performance hit and +generate huge trace files. + +The \ref H5AC_cache_config_t.evictions_enabled "evictions_enabled" field is a +boolean flag allowing the user to disable the eviction of entries from the +metadata cache. Under normal operation conditions, this field will always be set +to \c TRUE. + +In rare circumstances, the raw data throughput requirements may be so high that +the user wishes to postpone metadata writes so as to reserve I/O throughput for +raw data. The \ref H5AC_cache_config_t.evictions_enabled "evictions_enabled" +field exists to allow this -- although the user is to be warned that the +metadata cache will grow without bound while evictions are disabled. Thus +evictions should be re-enabled as soon as possible, and it may be wise to +monitor cache size and statistics (to see how to enable statistics, see the +debugging facilities section below). + +Evictions may only be disabled when the automatic cache resize code is disabled +as well. Thus to disable evictions, not only must the user set the \ref +H5AC_cache_config_t.evictions_enabled "evictions_enabled" field to \c FALSE, but +he must also set \ref H5AC_cache_config_t.incr_mode "incr_mode" to +#H5C_incr__off, set \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" +to #H5C_flash_incr__off, and set \ref H5AC_cache_config_t.decr_mode "decr_mode" +to #H5C_decr__off. + +To re-enable evictions, just set \ref H5AC_cache_config_t.evictions_enabled +"evictions_enabled" back to \c TRUE. + +Before passing on to other subjects, it is worth re-iterating that disabling +evictions is an extreme step. Before attempting it, you might consider setting a +large cache size manually, and flushing the cache just before high raw data +throughput is required. This may yield the desired results without the risks +inherent in disabling evictions. + +The \ref H5AC_cache_config_t.set_initial_size "set_initial_size" and \ref +H5AC_cache_config_t.initial_size "initial_size" fields allow you to specify an +initial maximum cache size. If \ref H5AC_cache_config_t.set_initial_size +"set_initial_size" is \c TRUE, \ref H5AC_cache_config_t.initial_size +"initial_size" must lie in the interval [\ref H5AC_cache_config_t.min_size +"min_size", \ref H5AC_cache_config_t.max_size "max_size"] (see below for a +discussion of the \ref H5AC_cache_config_t.min_size "min_size" and \ref +H5AC_cache_config_t.max_size "max_size" fields). + +If you disable the adaptive cache resizing code (done by setting \ref +H5AC_cache_config_t.incr_mode "incr_mode" to #H5C_incr__off, \ref +H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" to #H5C_flash_incr__off, +and \ref H5AC_cache_config_t.decr_mode "decr_mode" to #H5C_decr__off), you can +use these fields to control maximum cache size manually, as the maximum cache +size will remain at the initial size. + +Note, that the maximum cache size is only modified when \ref +H5AC_cache_config_t.set_initial_size "set_initial_size" is \c TRUE. This allows +the use of configurations specified at compile time to change resize +configuration without altering the current maximum size of the cache. Without +this feature, an additional call would be required to get the current maximum +cache size so as to set the \ref H5AC_cache_config_t.initial_size "initial_size" +to the current maximum cache size, and thereby avoid changing it. + +The \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" sets the +current minimum clean size as a fraction of the current max cache size. While +this field was originally used only in the parallel version of the library, it +now applies to the serial version as well. Its value must lie in the range +\Code{[0.0, 1.0]}. 0.01 is reasonable in the serial case, and 0.3 in the +parallel. + +A potential interaction, discovered at release 1.8.3, between the enforcement of +the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and the +adaptive cache resize code can severely degrade performance. While this +interaction is easily dealt with in the serial case by setting \ref +H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" to 0.01, the problem +is more difficult in the parallel case. Please see the Interactions section +below for further details. + +The \ref H5AC_cache_config_t.max_size "max_size" and \ref +H5AC_cache_config_t.min_size "min_size" fields specify the range of maximum +sizes that may be set for the cache by the automatic resize code. \ref +H5AC_cache_config_t.min_size "min_size" must be less than or equal to +\ref H5AC_cache_config_t.max_size "max_size", and both must lie in the range +\Code{[H5C__MIN_MAX_CACHE_SIZE, H5C__MAX_MAX_CACHE_SIZE]} -- currently [1 KB, +128 MB]. If you routinely run a cache size in the top half of this range, you +should increase the hash table size. To do this, modify the \c +H5C__HASH_TABLE_LEN \Code{\#define} in \c H5Cpkg.h and re-compile. At present, +\c H5C__HASH_TABLE_LEN must be a power of two. + +The \c epoch_length is the number of cache accesses between runs of the adaptive +cache size control algorithms. It is ignored if these algorithms are turned +off. It must lie in the range \Code{[H5C__MIN_AR_EPOCH_LENGTH, +H5C__MAX_AR_EPOCH_LENGTH]} -- currently [100, 1000000]. The above constants are +defined in \c H5Cprivate.h. 50000 is a reasonable value. + +\subsection increment Increment Configuration + +The \ref H5AC_cache_config_t.incr_mode "incr_mode" field specifies the cache +size increment algorithm used. Its value must be a member of the \ref +H5C_cache_incr_mode enum type -- currently either #H5C_incr__off or +#H5C_incr__threshold (note the double underscores after \c "incr"). This type is +defined in H5Cpublic.h. + +If \ref H5AC_cache_config_t.incr_mode "incr_mode" is set to #H5C_incr__off, +regular automatic cache size increases are disabled, and the \ref +H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold", \ref +H5AC_cache_config_t.increment "increment", \ref +H5AC_cache_config_t.apply_max_increment "apply_max_increment", and \ref +H5AC_cache_config_t.max_increment "max_increment", fields are ignored. + +The \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" field specifies +the flash cache size increment algorithm used. Its value must be a member of the +\ref H5C_cache_flash_incr_mode enum type -- currently either +#H5C_flash_incr__off or #H5C_flash_incr__add_space (note the double underscores +after \c "incr"). This type is defined in H5Cpublic.h. + +If \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" is set to +#H5C_flash_incr__off, flash cache size increases are disabled, and the \ref +H5AC_cache_config_t.flash_multiple "flash_multiple", and \ref +H5AC_cache_config_t.flash_threshold "flash_threshold", fields are ignored. + +\subsubsection hrtcsic Hit Rate Threshold Cache Size Increase Configuration + +If \ref H5AC_cache_config_t.incr_mode "incr_mode" is #H5C_incr__threshold, the +cache size is increased via the hit rate threshold algorithm. The remaining +fields in the section are then used as follows: + +\ref H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold" is the +threshold below which the hit rate must fall to trigger an increase. The value +must lie in the range \Code{[0.0 - 1.0]}. In my tests, a relatively high value +seems to work best -- 0.9 for example. + +\ref H5AC_cache_config_t.increment "increment" is the factor by which the old +maximum cache size is multiplied to obtain an initial new maximum cache size +when an increment is needed. The actual change in size may be smaller as +required by \ref H5AC_cache_config_t.max_size "max_size" (above) and \c +max_increment (discussed below). increment must be greater than or equal to +1.0. If you set it to 1.0, you will effectively turn off the increment code. 2.0 +is a reasonable value. + +\ref H5AC_cache_config_t.apply_max_increment "apply_max_increment" and \ref +H5AC_cache_config_t.max_increment "max_increment" allow the user to specify a +maximum increment. If \ref H5AC_cache_config_t.apply_max_increment +"apply_max_increment" is \c TRUE, the cache size will never be increased by more +than the number of bytes specified in \ref H5AC_cache_config_t.max_increment +"max_increment" in any single increase. + +\subsubsection fcsic Flash Cache Size Increase Configuration + +If \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" is set to +#H5C_flash_incr__add_space, flash cache size increases are enabled. The size of +the cache will be increased under the following circumstances: + +Let \c t be the current maximum cache size times the value of the \ref +H5AC_cache_config_t.flash_threshold "flash_threshold" field. + +Let \c x be either the size of the newly inserted entry, the size of the newly +loaded entry, or the number of bytes added to the size of the entry under +consideration for triggering a flash cache size increase. + +If \Code{t < x}, the basic condition for a flash cache size increase is met, and +we proceed as follows: + +Let \c space_needed equal \c x less the amount of free space in the cache. + +Further, let \ref H5AC_cache_config_t.increment "increment" equal \c +space_needed times the value of the \ref H5AC_cache_config_t.flash_multiple +"flash_multiple" field. If \ref H5AC_cache_config_t.increment "increment" plus +the current cache size is greater than \ref H5AC_cache_config_t.max_size +"max_size" (discussed above), reduce \ref H5AC_cache_config_t.increment +"increment" so that \ref H5AC_cache_config_t.increment "increment" plus the +current cache size is equal to \ref H5AC_cache_config_t.max_size "max_size". + +If the increment is greater than zero, increase the current cache size by \ref +H5AC_cache_config_t.increment "increment". To avoid confusing the other cache +size increment or decrement algorithms, start a new epoch. Note, however, that +we do not cycle the epoch markers if some variant of the age out algorithm is in +use. + +The use of the \ref H5AC_cache_config_t.flash_threshold "flash_threshold" field +is discussed above. It must be a floating-point value in the range of +\Code{[0.1, 1.0]}. 0.25 is a reasonable value. + +The use of the \ref H5AC_cache_config_t.flash_multiple "flash_multiple" field is +also discussed above. It must be a floating-point value in the range of +\Code{[0.1, 10.0]}. 1.4 is a reasonable value. + +\subsection decrement Decrement Configuration + +The \ref H5AC_cache_config_t.decr_mode "decr_mode" field specifies the cache +size decrement algorithm used. Its value must be a member of the \ref +H5C_cache_decr_mode enum type -- currently either #H5C_decr__off, +#H5C_decr__threshold, #H5C_decr__age_out, or #H5C_decr__age_out_with_threshold +(note the double underscores after \c "decr"). This type is defined in +H5Cpublic.h. + +If \ref H5AC_cache_config_t.decr_mode "decr_mode" is set to #H5C_decr__off, +automatic cache size decreases are disabled, and the remaining fields in the +cache size decrease control section are ignored. + +\subsubsection hrtcsdc Hit Rate Threshold Cache Size Decrease Configuration + +If \ref H5AC_cache_config_t.decr_mode "decr_mode" is #H5C_decr__threshold, the +cache size is decreased by the threshold algorithm, and the remaining fields of +the decrement section are used as follows: + +\ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" is the +threshold above which the hit rate must rise to trigger cache size reduction. It +must be in the range \Code{[0.0, 1.0]}. In my synthetic tests, very high values +like .9995 or .99995 seemed to work best. + +\ref H5AC_cache_config_t.decrement "decrement" is the factor by which the +current maximum cache size is multiplied to obtain a tentative new maximum cache +size. It must lie in the range \Code{[0.0, 1.0]}. Relatively large values like +.9 seem to work best in my synthetic tests. Note that the actual size reduction +may be smaller as required by \ref H5AC_cache_config_t.min_size "min_size" and +\ref H5AC_cache_config_t.max_decrement "max_decrement" (discussed below). \ref +H5AC_cache_config_t.apply_max_decrement "apply_max_decrement" and \ref +H5AC_cache_config_t.max_decrement "max_decrement" allow the user to specify a +maximum decrement. If \ref H5AC_cache_config_t.apply_max_decrement +"apply_max_decrement" is \c TRUE, the cache size will never be reduced by more +than \ref H5AC_cache_config_t.max_decrement "max_decrement" bytes in any single +reduction. + +With the hit rate threshold cache size decrement algorithm, the remaining fields +in the section are ignored. + +\subsubsection acsr Ageout Cache Size Reduction + +If \ref H5AC_cache_config_t.decr_mode "decr_mode" is #H5C_decr__age_out the +cache size is decreased by the ageout algorithm, and the remaining fields of the +decrement section are used as follows: + +\ref H5AC_cache_config_t.epochs_before_eviction "epochs_before_eviction" is the +number of epochs an entry must reside unaccessed in the cache before it is +evicted. This value must lie in the range \Code{[1, H5C__MAX_EPOCH_MARKERS]}. \c +H5C__MAX_EPOCH_MARKERS is defined in H5Cprivate.h, and is currently set to 10. + +\ref H5AC_cache_config_t.apply_max_decrement "apply_max_decrement" and \ref +H5AC_cache_config_t.max_decrement "max_decrement" are used as in section +2.4.3.1. + +\ref H5AC_cache_config_t.apply_empty_reserve "apply_emty_reserve" and \ref +H5AC_cache_config_t.empty_reserve "empty_reserve" allow the user to specify a +minimum empty reserve as discussed in section 2.3.2.2. An empty reserve of 0.05 +or 0.1 seems to work well. + +The \ref H5AC_cache_config_t.decrement "decrement" and \ref +H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" fields are ignored +in this case. + +\subsubsection awhrtcsr Ageout With Hit Rate Threshold Cache Size Reduction + +If \ref H5AC_cache_config_t.decr_mode "decr_mode" is +#H5C_decr__age_out_with_threshold, the cache size is decreased by the ageout +with hit rate threshold algorithm, and the fields of decrement section are used +as per the Ageout algorithm (see 5.3.2) with the exception of \ref +H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold". + +Here, \ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" is the +threshold above which the hit rate must rise to trigger cache size reduction. It +must be in the range \Code{[0.0, 1.0]}. In my synthetic tests, high values like +.999 seemed to work well. + +\subsection parallel Parallel Configuration + +This section is a catch-all for parallel specific configuration data. At +present, it has only one field -- +\ref H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold". + +In PHDF5, all operations that modify metadata must be executed collectively. We +used to think that this was enough to ensure consistency across the metadata +caches, but since we allow processes to read metadata individually, the order of +dirty entries in the LRU list can vary across processes. This, in turn, can +change the order in which dirty metadata cache entries reach the bottom of the +LRU and are flushed to disk -- opening the door to messages from the past and +messages from the future bugs. + +To prevent this, only the metadata cache on process 0 of the file communicator +is allowed to write to file, and then only after entering a sync point with the +other caches. After it writes entries to file, it sends the base addresses of +the now clean entries to the other caches, so they can mark these entries clean +as well, and then leaves the sync point. The other caches mark the specified +entries as clean before they leave the synch point as well. (Observe, that since +all caches see the same stream of dirty metadata, they will all have the same +set of dirty entries upon sync point entry and exit.) + +The different caches know when to synchronize by counting the number of bytes of +dirty metadata created by the collective operations modifying metadata. Whenever +this count exceeds the value specified in the \ref +H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold", they all +enter the sync point, and process 0 flushes down to its minimum clean size and +sends the list of newly cleaned entries to the other caches. + +Needless to say, the value of the \ref H5AC_cache_config_t.dirty_bytes_threshold +"dirty_bytes_threshold" field must be consistent across all the caches operating +on a given file. + +All dirty metadata can also by flushed under programmatic control via the +H5Fflush() call. This call must be collective and will reset the dirty data +counts on each metadata cache. + +Absent calls to H5Fflush(), dirty metadata will only be flushed when the \ref +H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold" is exceeded, +and then only down to the H5AC_cache_config_t.min_clean_fraction +"min_clean_fraction". Thus, if a program does all its metadata modifications in +one phase, and then doesn't modify metadata thereafter, a residue of dirty +metadata will be frozen in the metadata caches for the remainder of the +computation -- effectively reducing the sizes of the caches. + +In the default configuration, the caches will eventually resize themselves to +maintain an acceptable hit rate. However, this will take time, and it will +increase the application's footprint in memory. + +If your application behaves in this manner, you can avoid this by a collective +call to H5Fflush() immediately after the metadata modification phase. + +\subsection interactions Interactions + +Evictions may not be disabled unless the automatic cache resize code is disabled +as well (by setting \ref H5AC_cache_config_t.decr_mode "decr_mode" to +#H5C_decr__off, \c flash_decr_mode to #H5C_flash_incr__add_space, and \ref +H5AC_cache_config_t.incr_mode "incr_mode" to #H5C_incr__off) -- thus placing the +cache size under the direct control of the user program. + +There is no logical necessity for this restriction. It is imposed because it +simplifies testing greatly and because I can't see any reason why one would want +to disable evictions while the automatic cache size adjustment code was +enabled. This restriction can be relaxed if anyone can come up with a good +reason to do so. + +At present, there are two interactions between the increment and decrement +sections of the configuration. + +If \ref H5AC_cache_config_t.incr_mode "incr_mode" is #H5C_incr__threshold, and +\ref H5AC_cache_config_t.decr_mode "decr_mode" is either #H5C_decr__threshold or +#H5C_decr__age_out_with_threshold, then \ref +H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold" must be strictly +less than \ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold". + +Also, if the flash cache size increment code is enabled and is triggered, it +will restart the current epoch without calling any other cache size increment or +decrement code. + +In both the serial and parallel cases, there is the potential for an interaction +between the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and +the cache size increment code that can severely degrade +performance. Specifically, if the \ref H5AC_cache_config_t.min_clean_fraction +"min_clean_fraction" is large enough, it is possible that keeping the specified +fraction of the cache clean may generate enough flushes to seriously degrade +performance even though the hit rate is excellent. + +In the serial case, this is easily dealt with by selecting a very small \ref +H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" -- 0.01 for example +-- as this still avoids the "metadata blizzard" phenomenon that appears when the +cache fills with dirty metadata and must then flush all of it before evicting an +entry to make space for a new entry. + +The problem is more difficult in the parallel case, as the \ref +H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" is used to ensure +that the cache contains clean entries that can be evicted to make space for new +entries when metadata writes are forbidden -- i.e. between sync points. + +This issue was discovered shortly before release 1.8.3 and an automated solution +has not been implemented. Should it become an issue for an application, try +manually setting the cache size to ~1.5 times the maximum working set size for +the application, and leave \ref H5AC_cache_config_t.min_clean_fraction +"min_clean_fraction" set to 0.3. + +You can approximate the working set size of your application via repeated calls +to H5Fget_mdc_size() and H5Fget_mdc_hit_rate() while running your program with +the cache resize code enabled. The maximum value returned by H5Fget_mdc_size() +should be a reasonable approximation -- particularly if the associated hit rate +is good. In the parallel case, there is also an interaction between \c +min_clean_fraction and \ref H5AC_cache_config_t.dirty_bytes_threshold +"dirty_bytes_threshold". Absent calls to H5Fflush() (discussed above), the upper +bound on the amount of dirty data in the metadata caches will oscillate between +(1 - \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction") times +current maximum cache size, and that value plus the \ref +H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold". Needless to +say, it will be best if the \ref H5AC_cache_config_t.min_size "min_size", \ref +H5AC_cache_config_t.min_clean_fraction "min_clean_fraction", and the \ref +H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold" +are chosen so that the cache can't fill with dirty data. + +\subsection defaults Default Metadata Cache Configuration + +Starting with release 1.8.3, HDF5 provides different default metadata cache +configurations depending on whether the library is compiled for serial or +parallel. + +The default configuration for the serial case is as follows: + +\code{.c} +{ + /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, + /* hbool_t rpt_fcn_enabled = */ FALSE, + /* hbool_t open_trace_file = */ FALSE, + /* hbool_t close_trace_file = */ FALSE, + /* char trace_file_name[] = */ "", + /* hbool_t evictions_enabled = */ TRUE, + /* hbool_t set_initial_size = */ TRUE, + /* size_t initial_size = */ ( 2 * 1024 * 1024), + /* double min_clean_fraction = */ 0.01, + /* size_t max_size = */ (32 * 1024 * 1024), + /* size_t min_size = */ ( 1 * 1024 * 1024), + /* long int epoch_length = */ 50000, + /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold, + /* double lower_hr_threshold = */ 0.9, + /* double increment = */ 2.0, + /* hbool_t apply_max_increment = */ TRUE, + /* size_t max_increment = */ (4 * 1024 * 1024), + /* enum H5C_cache_flash_incr_mode */ + /* flash_incr_mode = */ H5C_flash_incr__add_space, + /* double flash_multiple = */ 1.4, + /* double flash_threshold = */ 0.25, + /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold, + /* double upper_hr_threshold = */ 0.999, + /* double decrement = */ 0.9, + /* hbool_t apply_max_decrement = */ TRUE, + /* size_t max_decrement = */ (1 * 1024 * 1024), + /* int epochs_before_eviction = */ 3, + /* hbool_t apply_empty_reserve = */ TRUE, + /* double empty_reserve = */ 0.1, + /* int dirty_bytes_threshold = */ (256 * 1024) +} +\endcode + +The default configuration for the parallel case is as follows: + +\code{.c} +{ + /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, + /* hbool_t rpt_fcn_enabled = */ FALSE, + /* hbool_t open_trace_file = */ FALSE, + /* hbool_t close_trace_file = */ FALSE, + /* char trace_file_name[] = */ "", + /* hbool_t evictions_enabled = */ TRUE, + /* hbool_t set_initial_size = */ TRUE, + /* size_t initial_size = */ ( 2 * 1024 * 1024), + /* double min_clean_fraction = */ 0.3, + /* size_t max_size = */ (32 * 1024 * 1024), + /* size_t min_size = */ ( 1 * 1024 * 1024), + /* long int epoch_length = */ 50000, + /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold, + /* double lower_hr_threshold = */ 0.9, + /* double increment = */ 2.0, + /* hbool_t apply_max_increment = */ TRUE, + /* size_t max_increment = */ (4 * 1024 * 1024), + /* enum H5C_cache_flash_incr_mode */ + /* flash_incr_mode = */ H5C_flash_incr__add_space, + /* double flash_multiple = */ 1.0, + /* double flash_threshold = */ 0.25, + /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold, + /* double upper_hr_threshold = */ 0.999, + /* double decrement = */ 0.9, + /* hbool_t apply_max_decrement = */ TRUE, + /* size_t max_decrement = */ (1 * 1024 * 1024), + /* int epochs_before_eviction = */ 3, + /* hbool_t apply_empty_reserve = */ TRUE, + /* double empty_reserve = */ 0.1, + /* int dirty_bytes_threshold = */ (256 * 1024) +} +\endcode + +The default serial configuration should be adequate for most serial HDF5 users. + +The same may not be true for the default parallel configuration due to the +interaction between the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and the cache size increase code. See +the Interactions section for further details. + +Should you need to change the default configuration, it can be found in +H5ACprivate.h. Look for the definition of H5AC__DEFAULT_RESIZE_CONFIG. + +\section controlling Controlling the New Metadata Cache Size From Your Program + +You have already seen how \ref H5AC_cache_config_t has facilities that allow you +to control the metadata cache size directly. Use H5Fget_mdc_config() and +H5Fset_mdc_config() to get and set the metadata cache configuration on an open +file. Use H5Pget_mdc_config() and H5Pset_mdc_config() to get and set the initial +metadata cache configuration in a file access property list. Recall that this +list contains configuration data used when opening a file. + +Use H5Fget_mdc_hit_rate() to get the average hit rate since the last time the +hit rate stats were reset. This happens automatically at the beginning of each +epoch if the adaptive cache resize code is enabled. You can also do it manually +with H5Freset_mdc_hit_rate_stats(). Be careful about doing this if the adaptive +cache resize code is enabled, as you may confuse it. + +Use H5Fget_mdc_size() to get metadata cache size data on an open file. + +Finally, note that cache size and cache footprint are two different things -- in +my tests, the cache footprint (as inferred from the UNIX \c top command) is +typically about three times the maximum cache size. I haven't tracked it down +yet, but I would guess that most of this is due to the very small typical cache +entry size combined with the rather large size of the cache entry header +structure. This should be investigated further, but there are other matters of +higher priority. + +\section news New Metadata Cache Debugging Facilities + +The new metadata cache has a variety of debugging facilities that may be of +use. I doubt that any other than the report function and the trace file will +ever be accessible via the API, but they are relatively easy to turn on in the +source code. + +Note that none of this should be viewed as supported -- it is described here on +the off chance that you want to use it, but you are on your own if you do. Also, +there are no promises as to consistency between versions. + +As mentioned above, you can use the \ref H5AC_cache_config_t.rpt_fcn_enabled "rpt_fcn_enabled" field of the +configuration structure to enable the default reporting function +(H5C_def_auto_resize_rpt_fcn() in H5C.c). If this function doesn't work for you, +you will have to write your own. In particular, remember that it uses \c stdout, +so it will probably be unhappy under Windows. + +Again, remember that this facility is not supported. Further, it is likely to +change every time I do any serious work on the cache. + +There is also an extensive statistics collection code. Use +H5C_COLLECT_CACHE_STATS and H5C_COLLECT_CACHE_ENTRY_STATS in H5Cprivate.h to +turn this on. If you also turn on H5AC_DUMP_STATS_ON_CLOSE in H5ACprivate.h, +stats will be dumped when you close a file. Alternatively you can call +H5C_stats() and H5C_stats__reset() within the library to dump and reset +stats. Both of these functions are defined in H5C.c. + +Finally, the cache also contains an extensive sanity checking code. Much of this +is turned on when you compile in debug mode, but to enable the full suite, turn +on H5C_DO_SANITY_CHECKS in H5Cprivate.h. + +\section trouble Trouble Shooting + +Absent major bugs in the cache, the only troubleshooting you should have to do +is diagnosing and fixing problems with your cache configuration. + +Assuming it runs on your platform (I've only used it under Linux), the reporting +function is probably the most convenient diagnosis tool. However, since it is +unsupported code, I will not discuss it further beyond directing you to the +source (H5C_def_auto_resize_rpt_fcn() in H5C.c). + +Absent the reporting function, regular calls to H5Fget_mdc_hit_rate() should +give you a good idea of the hit rate over time. Remember that the hit rate stats +are reset at the end of each epoch (when adaptive cache resizing is enabled), so +you should expect some jitter. + +Similar calls to H5Fget_mdc_size() should allow you to monitor cache size and +the fraction of the current maximum cache size that is actually in use. + +If the hit rate is consistently low, and the cache it at its current maximum +size, increasing the maximum size is an obvious fix. + +If you see hit rate and cache size oscillations, try disabling adaptive cache +resizing and setting a fixed cache size a bit greater than the high end of the +cache size oscillations you observed. + +If the hit rate oscillations don't go away, you are probably looking at a +feature of your application that can't be helped without major changes to the +cache. Please send along a description of the situation. + +If the oscillations do go away, you may be able to come up with a configuration +that deals with the situation. If that fails, control the cache size manually, +and write to me, so I can try to develop an adaptive resize algorithm that works +in your case. + +Needless to say, you should give the cache a few epochs to adapt to +circumstances. If that is too slow for you, try manual cache size control. + +If you find it necessary to disable evictions, you may find it useful to enable +the internal statistics collection code mentioned above in the section on +debugging facilities. + +Amongst many other things, the stats code will report the maximum cache size, +and the average successful and unsuccessful search depths in the hash table. If +these latter figures are significantly above 1, you should increase the size of +the hash table. + + */
\ No newline at end of file diff --git a/doxygen/dox/OtherSpecs.dox b/doxygen/dox/OtherSpecs.dox new file mode 100644 index 0000000..e53f26e --- /dev/null +++ b/doxygen/dox/OtherSpecs.dox @@ -0,0 +1,11 @@ +/** \page IMG HDF5 Image and Palette Specification Version 1.2 + +\htmlinclude ImageSpec.html + +*/ + +/** \page TBL HDF5 Table Specification Version 1.0 + +\htmlinclude TableSpec.html + +*/ diff --git a/doxygen/dox/Overview.dox b/doxygen/dox/Overview.dox new file mode 100644 index 0000000..754722e --- /dev/null +++ b/doxygen/dox/Overview.dox @@ -0,0 +1,32 @@ + +/** \mainpage notitle + +This is the documentation set for HDF5. You can +<a href="hdf5-doc.tgz">download</a> it as a tgz archive for offline reading. + +This is the documention set for HDF5 in terms of specifications and software +developed and maintained by <a href="https://www.hdfgroup.org/">The HDF +Group</a>. It is impractical to document the entire HDF5 ecosystem in one place, +and you should also consult the documentation sets of the many outstanding +community projects. + +For a first contact with HDF5, the best place is to have a look at the \link +GettingStarted getting started \endlink page that shows you how to write and +compile your first program with HDF5. + +The \b main \b documentation is organized by documentation flavor. Most +technical documentation consists to varying degrees of information related to +<em>tasks</em>, <em>concepts</em>, or <em>reference</em> material. As its title +suggests, the \link RM Reference Manual \endlink is 100% reference material, +while the \link Cookbook \endlink is focused on tasks. The different guide-type +documents cover a mix of tasks, concepts, and reference, to help a certain +<em>audience</em> succeed. + +Finally, do not miss the search engine (top right-hand corner)! If you are +looking for a specific function, it'll take you there directly. If unsure, it'll +give you an idea of what's on offer and a few promising leads. + +\par ToDo List + There is plenty of <a href="./todo.html">unfinished business</a>. + +*/ diff --git a/doxygen/dox/ReferenceManual.dox b/doxygen/dox/ReferenceManual.dox new file mode 100644 index 0000000..0e0a494 --- /dev/null +++ b/doxygen/dox/ReferenceManual.dox @@ -0,0 +1,40 @@ +/** \page RM Reference Manual + +The functions provided by the HDF5 C-API are grouped into the following +\Emph{modules}: + +\li \ref H5A "Attributes" — Management of HDF5 attributes (\ref H5A) +\li \ref H5D "Datasets" — Management of HDF5 datasets (\ref H5D) +\li \ref H5S "Dataspaces" — Management of HDF5 dataspaces which describe the shape of datasets and attributes (\ref H5S) +\li \ref H5T "Datatypes" — Management of datatypes which describe elements of datasets and attributes (\ref H5T) +\li \ref H5E "Error Handling" — Functions for handling HDF5 errors (\ref H5E) +\li \ref H5F "Files" — Management of HDF5 files (\ref H5F) +\li \ref H5Z "Filters" — Configuration of filters that process data during I/O operation (\ref H5Z) +\li \ref H5G "Groups" — Management of groups in HDF5 files (\ref H5G) +\li \ref H5I "Identifiers" — Management of object identifiers and object names (\ref H5I) +\li \ref H5 "Library" — General purpose library functions (\ref H5) +\li \ref H5L "Links" — Management of links in HDF5 groups (\ref H5L) +\li \ref H5O "Objects" — Management of objects in HDF5 files (\ref H5O) +\li \ref H5PL "Plugins" — Programmatic control over dynamically loaded plugins (\ref H5PL) +\li \ref H5P "Property Lists" — Management of property lists to control HDF5 library behavior (\ref H5P) +\li \ref H5R "References" — Management of references to specific objects and data regions in an HDF5 file (\ref H5R) + +\par Asynchronous Functions + A subset of functions has \ref ASYNC "asynchronous variants". + +\par API Versioning + See \ref api-compat-macros + +\par Deprecated Functions and Types + A list of deprecated functions and types can be found + <a href="./deprecated.html">here</a>. + +\par Etiquette + Here are a few simple rules to follow: + \li \Bold{Handle discipline:} If you acquire a handle (by creation or copy), \Emph{you own it!} (..., i.e., you have to close it.) + \li \Bold{Dynamic memory allocation:} ... + \li \Bold{Use of locations:} Identifier + name combo + +\cpp_c_api_note + +*/
\ No newline at end of file diff --git a/doxygen/dox/Specifications.dox b/doxygen/dox/Specifications.dox new file mode 100644 index 0000000..4ae48d0 --- /dev/null +++ b/doxygen/dox/Specifications.dox @@ -0,0 +1,22 @@ +/** \page SPEC Specifications + +\section DDL + +\li \ref DDLBNF110 "DDL in BNF through HDF5 1.10" +\li \ref DDLBNF112 "DDL in BNF for HDF5 1.12 and above" + +\section File Format + +\li \ref FMT1 "HDF5 File Format Specification Version 1.0" +\li \ref FMT11 "HDF5 File Format Specification Version 1.1" +\li \ref FMT2 "HDF5 File Format Specification Version 2.0" +\li \ref FMT3 "HDF5 File Format Specification Version 3.0" + +\section Other + +\li \ref IMG "HDF5 Image and Palette Specification Version 1.2" +\li \ref TBL "HDF5 Table Specification Version 1.0" +\li <a href="https://support.hdfgroup.org/HDF5/doc/HL/H5DS_Spec.pdf"> + HDF5 Dimension Scale Specification</a> + +*/
\ No newline at end of file diff --git a/doxygen/dox/TechnicalNotes.dox b/doxygen/dox/TechnicalNotes.dox new file mode 100644 index 0000000..2bda175 --- /dev/null +++ b/doxygen/dox/TechnicalNotes.dox @@ -0,0 +1,20 @@ +/** \page TN Technical Notes + +\li \link api-compat-macros API Compatibility Macros \endlink +\li \ref TNMDC "Metadata Caching in HDF5" +\li \ref MT "Thread Safe library" +\li \ref VFL "Virtual File Layer" + + */ + +/** \page MT HDF5 Thread Safe library + +\htmlinclude ThreadSafeLibrary.html + +*/ + +/** \page VFL HDF5 Virtual File Layer + +\htmlinclude VFL.html + +*/ diff --git a/doxygen/dox/api-compat-macros.dox b/doxygen/dox/api-compat-macros.dox index 6b85ccb..4a1578d 100644 --- a/doxygen/dox/api-compat-macros.dox +++ b/doxygen/dox/api-compat-macros.dox @@ -1,5 +1,4 @@ /** \page api-compat-macros API Compatibility Macros - \tableofcontents \section audience Audience The target audience for this document has existing applications that use the diff --git a/doxygen/dox/mainpage.dox b/doxygen/dox/mainpage.dox deleted file mode 100644 index 83fc323..0000000 --- a/doxygen/dox/mainpage.dox +++ /dev/null @@ -1,36 +0,0 @@ -/*! \mainpage API Documentation for HDF5 Version 1.13 (Draft) - * - * \todo Fix the search form for server deployments. - * \todo Make it mobile-friendly - * - * \section intro_sec Introduction - * - * \todo Write an introduction. - * - * \section quick_links Quick Links - * - * <ul> - * <li>\ref PDT "Predefined Datatypes"</li> - * <li>\ref api-compat-macros "API Compatibility Macros"</li> - * <li><a href="https://hdf5.wiki/">HDF5 Wiki</a></li> - * </ul> - * - * \section using_locations The Use of Locations (Identifier + Name) in the HDF5 API - * - * \todo Make this crystal clear! - * - * \section cpp_note Programming Note for C++ Developers Using C Functions - * - * If a C routine that takes a function pointer as an argument is called from - * within C++ code, the C routine should be returned from normally. - * - * Examples of this kind of routine include callbacks such as H5Pset_elink_cb() - * and H5Pset_type_conv_cb() and functions such as H5Tconvert() and H5Ewalk2(). - * - * Exiting the routine in its normal fashion allows the HDF5 C library to clean - * up its work properly. In other words, if the C++ application jumps out of - * the routine back to the C++ \c catch statement, the library is not given the - * opportunity to close any temporary data structures that were set up when the - * routine was called. The C++ application should save some state as the - * routine is started so that any problem that occurs might be diagnosed. - */
\ No newline at end of file diff --git a/doxygen/dox/maybe_metadata_reads.dox b/doxygen/dox/maybe_metadata_reads.dox new file mode 100644 index 0000000..25c905f --- /dev/null +++ b/doxygen/dox/maybe_metadata_reads.dox @@ -0,0 +1,82 @@ +/** + * \page maybe_metadata_reads Functions with No Access Property List Parameter that May Generate Metadata Reads + * + * \ingroup GACPL + * + * Currently there are several operations in HDF5 that can issue metadata reads + * from the metadata cache, but that take no property list. It is therefore not + * possible to set a collective requirement individually for those operations. The + * only solution with the HDF5 1.10.0 release is to set the collective requirement + * globally on H5Fopen() or H5Fcreate() for all metadata operations to be + * collective. + * + * The following is a list of those functions in the HDF5 library. This list is + * integral to the discussion in the H5Pset_all_coll_metadata_ops() entry: + * + * <pre> + * + * H5Awrite() + * H5Aread() + * H5Arename() + * H5Aiterate2() + * H5Adelete() + * H5Aexists() + * + * H5Dget_space_status() + * H5Dget_storage_size() + * H5Dset_extent() + * H5Ddebug() + * H5Dclose() + * H5Dget_create_plist() + * H5Dget_space() (when dataset is a virtual dataset) + * + * H5Gget_create_plist() + * H5Gget_info() + * H5Gclose() + * + * H5Literate() + * H5Lvisit() + * + * H5Rcreate() + * H5Rdereference2() (when reference is an object reference) + * H5Rget_region() + * H5Rget_obj_type2() + * H5Rget_name() + * + * H5Ocopy() + * H5Oopen_by_addr() + * H5Oincr_refcount() + * H5Odecr_refcount() + * H5Oget_info() + * H5Oset_comment() + * H5Ovisit() + * + * H5Fis_hdf5() + * H5Fflush() + * H5Fclose() + * H5Fget_file_image() + * H5Freopen() + * H5Fget_freespace() + * H5Fget_info2() + * H5Fget_free_sections() + * H5Fmount() + * H5Funmount() + * + * H5Iget_name() + * + * H5Tget_create_plist() + * H5Tclose() + * + * H5Zunregister() + * </pre> + * + * In addition, \b most deprecated functions fall into this category. + * + * The HDF Group may address the above limitation in a future major release, but + * no decision has been made at this time. Such a change might, for example, + * include adding new versions of some or all the above functions with an extra + * property list parameter to allow an individual setting for the collective + * calling requirement. + * + * \sa_metadata_ops + */ diff --git a/doxygen/examples/FF-IH_FileGroup.gif b/doxygen/examples/FF-IH_FileGroup.gif Binary files differnew file mode 100644 index 0000000..b0d76f5 --- /dev/null +++ b/doxygen/examples/FF-IH_FileGroup.gif diff --git a/doxygen/examples/FF-IH_FileObject.gif b/doxygen/examples/FF-IH_FileObject.gif Binary files differnew file mode 100644 index 0000000..8eba623 --- /dev/null +++ b/doxygen/examples/FF-IH_FileObject.gif diff --git a/doxygen/examples/FileFormatSpecChunkDiagram.jpg b/doxygen/examples/FileFormatSpecChunkDiagram.jpg Binary files differnew file mode 100644 index 0000000..03fd90a --- /dev/null +++ b/doxygen/examples/FileFormatSpecChunkDiagram.jpg diff --git a/doxygen/examples/H5.format.1.0.html b/doxygen/examples/H5.format.1.0.html new file mode 100644 index 0000000..2d3ffbe --- /dev/null +++ b/doxygen/examples/H5.format.1.0.html @@ -0,0 +1,4050 @@ +<html> + <head> + <title> + HDF5 File Format Specification + </title> + </head> + <body bgcolor="#FFFFFF"> + + <center> + <table border=0 width=90%> + <tr> + <td valign=top> + <ol type=I> + <li><a href="#Intro">Introduction</a> + <li><a href="#BootBlock">Disk Format Level 0 - File Signature and Super Block</a> + <li><a href="#Group">Disk Format Level 1 - File Infrastructure</a> + <font size=-2> + <ol type=A> + <li><a href="#Btrees">Disk Format Level 1A - B-link Trees and B-tree Nodes</a> + <li><a href="#SymbolTable">Disk Format Level 1B - Group</a> + <li><a href="#SymbolTableEntry">Disk Format Level 1C - Group Entry</a> + <li><a href="#LocalHeap">Disk Format Level 1D - Local Heaps</a> + <li><a href="#GlobalHeap">Disk Format Level 1E - Global Heap</a> + <li><a href="#FreeSpaceIndex">Disk Format Level 1F - Free-space Index</a> + </ol> + </font> + <li><a href="#DataObject">Disk Format Level 2 - Data Objects</a> + <font size=-2> + <ol type=A> + <li><a href="#ObjectHeader">Disk Format Level 2a - Data Object Headers</a> + <ol type=1> + <li><a href="#NILMessage">Name: NIL</a> <!-- 0x0000 --> + <li><a href="#SimpleDataSpace">Name: Simple Dataspace</a> <!-- 0x0001 --> +<!-- + <li><a href="#DataSpaceMessage">Name: Complex Dataspace</a> --> <!-- 0x0002 --> + <li><a href="#DataTypeMessage">Name: Datatype</a> <!-- 0x0003 --> + <li><a href="#FillValueMessage">Name: Data Storage - Fill Value</a> <!-- 0x0004 --> + <li><a href="#ReservedMessage_0005">Name: Reserved - not assigned yet</a> <!-- 0x0005 --> + </ol> + </ol> + </font> + </ol> + </td><td> </td><td valign=top> + <ol type=I> + + <li><a href="#DataObject">Disk Format Level 2 - Data Objects</a> + <font size=-2><i>(Continued)</i> + <ol type=A> + <li><a href="#ObjectHeader">Disk Format Level 2a - Data Object Headers</a><i>(Continued)</i> + <ol type=1> + <li><a href="#CompactDataStorageMessage">Name: Data Storage - Compact</a> <!-- 0x0006 --> + <li><a href="#ExternalFileListMessage">Name: Data Storage - External Data Files</a> <!-- 0x0007 --> + <li><a href="#LayoutMessage">Name: Data Storage - Layout</a> <!-- 0x0008 --> + <li><a href="#ReservedMessage_0009">Name: Reserved - not assigned yet</a> <!-- 0x0009 --> + <li><a href="#ReservedMessage_000A">Name: Reserved - not assigned yet</a> <!-- 0x000a --> + <li><a href="#FilterMessage">Name: Data Storage - Filter Pipeline</a> <!-- 0x000b --> + <li><a href="#AttributeMessage">Name: Attribute</a> <!-- 0x000c --> + <li><a href="#NameMessage">Name: Object Name</a> <!-- 0x000d --> + <li><a href="#ModifiedMessage">Name: Object Modification Date and Time</a> <!-- 0x000e --> + <li><a href="#SharedMessage">Name: Shared Object Message</a> <!-- 0x000f --> + <li><a href="#ContinuationMessage">Name: Object Header Continuation</a> <!-- 0x0010 --> + <li><a href="#SymbolTableMessage">Name: Group Message</a> <!-- 0x0011 --> + </ol> + <li><a href="#SharedObjectHeader">Disk Format: Level 2b - Shared Data Object Headers</a> + <li><a href="#DataStorage">Disk Format: Level 2c - Data Object Data Storage</a> + </ol> + </font> + </ol> +</td></tr> +</table> +</center> + +<br><br> + + + <h2>Introduction</h2> + + <table align=right width=100> + <tr><td> </td><td align=center> + <hr> + <img src="FF-IH_FileGroup.gif" alt="HDF5 Groups" hspace=15 vspace=15> + </td><td> </td></tr> + <tr><td> </td><td align=center> + <strong>Figure 1:</strong> Relationships among the HDF5 root group, other groups, and objects + <hr> + </td><td> </td></tr> + + <tr><td> </td><td align=center> + <img src="FF-IH_FileObject.gif" alt="HDF5 Objects" hspace=15 vspace=15> + </td><td> </td></tr> + <tr><td> </td><td align=center> + <strong>Figure 2:</strong> HDF5 objects -- datasets, datatypes, or dataspaces + <hr> + </td><td> </td></tr> + </table> + + + <P>The format of an HDF5 file on disk encompasses several + key ideas of the HDF4 and AIO file formats as well as + addressing some shortcomings therein. The new format is + more self-describing than the HDF4 format and is more + uniformly applied to data objects in the file. + + <P>An HDF5 file appears to the user as a directed graph. + The nodes of this graph are the higher-level HDF5 objects + that are exposed by the HDF5 APIs: + + <ul> + <li>Groups + <li>Datasets + <li>Datatypes + <li>Dataspaces + </ul> + + <P>At the lowest level, as information is actually written to the disk, + an HDF5 file is made up of the following objects: + <ul> + <li>A super block + <li>B-tree nodes (containing either symbol nodes or raw data chunks) + <li>Object headers + + <li>Collections + <li>Local heaps + <li>Free space + </ul> + + The HDF5 library uses these lower-level objects to represent the + higher-level objects that are then presented to the user or + to applications through the APIs. + For instance, a group is an object header that contains a message that + points to a local heap and to a B-tree which points to symbol nodes. + A dataset is an object header that contains messages that describe + datatype, space, layout, filters, external files, fill value, etc + with the layout message pointing to either a raw data chunk or to a + B-tree that points to raw data chunks. + + + <h3>This Document</h3> + + <p>This document describes the lower-level data objects; + the higher-level objects and their properties are described + in the <a href="H5.user.html"><cite>HDF5 User's Guide</cite></a>. + + +<!-- +<blockquote> +<pre> + +Elena> NOTE: give reference to the detailed discussion of the B-trees +Elena> when needed. Right now we do not have specification (only general one) +Elena> for the Symbol Table B-trees and B-trees used to manage chunked datasets. +Elena> B-trees +Elena> General Discussion +Elena> Object related discussions +Elena> Symbol Tables +Elena> Global heap +Elena> "Free-space object" + + +</pre> +</blockquote> +--> + + + + <P>Three levels of information comprise the file format. + Level 0 contains basic information for identifying and + defining information about the file. Level 1 information contains + the group information (stored as a B-tree) and is used as the + index for all the objects in the file. Level 2 is the rest + of the file and contains all of the data objects, with each object + partitioned into header information, also known as + <em>meta information</em>, and data. + + <p>The sizes of various fields in the following layout tables are + determined by looking at the number of columns the field spans + in the table. There are three exceptions: (1) The size may be + overridden by specifying a size in parentheses, (2) the size of + addresses is determined by the <em>Size of Offsets</em> field + in the super block, and (3) the size of size fields is determined + by the <em>Size of Lengths</em> field in the super block. + + + +<br><br> +<br><br> + + + <h2><a name="BootBlock"> + Disk Format: Level 0 - File Signature and Super Block</a></h2> + + <P>The super block may begin at certain predefined offsets within + the HDF5 file, allowing a block of unspecified content for + users to place additional information at the beginning (and + end) of the HDF5 file without limiting the HDF5 library's + ability to manage the objects within the file itself. This + feature was designed to accommodate wrapping an HDF5 file in + another file format or adding descriptive information to the + file without requiring the modification of the actual file's + information. The super block is located by searching for the + HDF5 file signature at byte offset 0, byte offset 512 and at + successive locations in the file, each a multiple of two of + the previous location, i.e. 0, 512, 1024, 2048, etc. + + <P>The super block is composed of a file signature, followed by + super block and group version numbers, information + about the sizes of offset and length values used to describe + items within the file, the size of each group page, + and a group entry for the root object in the file. + + <p> + <center> + <table border align=center cellpadding=4 width="80%"> + <caption align=top> + <B>HDF5 Super Block Layout</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>HDF5 File Signature (8 bytes)<br><br></td> + </tr> + + <tr align=center> + <td>Version # of Super Block</td> + <td>Version # of Global Free-space Storage</td> + <td>Version # of Group</td> + <td>Reserved</td> + </tr> + + <tr align=center> + <td>Version # of Shared Header Message Format</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>Reserved (zero)</td> + </tr> + + <tr align=center> + <td colspan=2>Group Leaf Node K</td> + <td colspan=2>Group Internal Node K</td> + </tr> + + <tr align=center> + <td colspan=4>File Consistency Flags</td> + </tr> + + <tr align=center> + <td colspan=4>Base Address*</td> + </tr> + + <tr align=center> + <td colspan=4>Address of Global Free-space Heap*</td> + </tr> + + <tr align=center> + <td colspan=4>End of File Address*</td> + </tr> + + <tr align=center> + <td colspan=4>Driver Information Block Address*</td> + </tr> + + <tr align=center> + <td colspan=4>Root Group Address*</td> + </tr> + </table> + + <table width="80%" border=0> + <tr><td> + <div align=right> + (Items marked with an asterisk (*) in the above table + <br> + are of the size specified in "Size of Offsets.") + </div> + </td></tr> + </table> + </center> + + <p> + <center> + <table width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>File Signature</td> + <td>This field contains a constant value and can be used to + quickly identify a file as being an HDF5 file. The + constant value is designed to allow easy identification of + an HDF5 file and to allow certain types of data corruption + to be detected. The file signature of an HDF5 file always + contains the following values: + + <br><br><center> + <table border align=center cellpadding=4 width="100%"> + <tr align=center> + <td>decimal</td> + <td width="8%">137</td> + <td width="8%">72</td> + <td width="8%">68</td> + <td width="8%">70</td> + <td width="8%">13</td> + <td width="8%">10</td> + <td width="8%">26</td> + <td width="8%">10</td> + </tr> + + <tr align=center> + <td>hexadecimal</td> + <td width="8%">89</td> + <td width="8%">48</td> + <td width="8%">44</td> + <td width="8%">46</td> + <td width="8%">0d</td> + <td width="8%">0a</td> + <td width="8%">1a</td> + <td width="8%">0a</td> + </tr> + + <tr align=center> + <td>ASCII C Notation</td> + <td width="8%">\211</td> + <td width="8%">H</td> + <td width="8%">D</td> + <td width="8%">F</td> + <td width="8%">\r</td> + <td width="8%">\n</td> + <td width="8%">\032</td> + <td width="8%">\n</td> + </tr> + </table> + </center> + <br> + + This signature both identifies the file as an HDF5 file + and provides for immediate detection of common + file-transfer problems. The first two bytes distinguish + HDF5 files on systems that expect the first two bytes to + identify the file type uniquely. The first byte is + chosen as a non-ASCII value to reduce the probability + that a text file may be misrecognized as an HDF5 file; + also, it catches bad file transfers that clear bit + 7. Bytes two through four name the format. The CR-LF + sequence catches bad file transfers that alter newline + sequences. The control-Z character stops file display + under MS-DOS. The final line feed checks for the inverse + of the CR-LF translation problem. (This is a direct + descendent of the PNG file signature.)</td> + </tr> + + <tr valign=top> + <td>Version Number of the Super Block</td> + <td>This value is used to determine the format of the + information in the super block. When the format of the + information in the super block is changed, the version number + is incremented to the next integer and can be used to + determine how the information in the super block is + formatted.</td> + </tr> + + <tr valign=top> + <td>Version Number of the Global Free-space Heap</td> + <td>This value is used to determine the format of the + information in the Global Free-space Heap.</td> + </tr> + + <tr valign=top> + <td>Version Number of the Group</td> + <td>This value is used to determine the format of the + information in the Group. When the format of + the information in the Group is changed, the + version number is incremented to the next integer and can be + used to determine how the information in the Group + is formatted.</td> + </tr> + + <tr valign=top> + <td>Version Number of the Shared Header Message Format</td> + <td>This value is used to determine the format of the + information in a shared object header message, which is + stored in the global small-data heap. Since the format + of the shared header messages differs from the private + header messages, a version number is used to identify changes + in the format.</td> + </tr> + + <tr valign=top> + <td>Size of Offsets</td> + <td>This value contains the number of bytes used to store + addresses in the file. The values for the addresses of + objects in the file are offsets relative to a base address, + usually the address of the super block signature. This + allows a wrapper to be added after the file is created + without invalidating the internal offset locations.</td> + </tr> + + <tr valign=top> + <td>Size of Lengths</td> + <td>This value contains the number of bytes used to store + the size of an object.</td> + </tr> + + <tr valign=top> + <td>Group Leaf Node K</td> + <td>Each leaf node of a group B-tree will have at + least this many entries but not more than twice this + many. If a group has a single leaf node then it + may have fewer entries.</td> + </tr> + + <tr valign=top> + <td>Group Internal Node K</td> + <td>Each internal node of a group B-tree will have + at least K pointers to other nodes but not more than 2K + pointers. If the group has only one internal + node then it might have fewer than K pointers.</td> + </tr> + + <tr valign=top> + <td>Bytes per B-tree Page</td> + <td>This value contains the number of bytes used for symbol + pairs per page of the B-trees used in the file. All + B-tree pages will have the same size per page. + <br> + For 32-bit file offsets, 340 objects is the maximum + per 4KB page; for 64-bit file offset, 254 objects will fit + per 4KB page. In general, the equation is: + <br> + <code> <<i>number of objects</i>> = + <br> + FLOOR((<<i>page size</i>> - <<i>offset size</i>>) / + <br> + (<<i>Symbol size</i>> + <<i>offset size</i>>)) + - 1 </code></td> + </tr> + + <tr valign=top> + <td>File Consistency Flags</td> + <td>This value contains flags to indicate information + about the consistency of the information contained + within the file. Currently, the following bit flags are + defined: + <ul> + <li>Bit 0 set indicates that the file is opened for + write-access. + <li>Bit 1 set indicates that the file has + been verified for consistency and is guaranteed to be + consistent with the format defined in this document. + <li>Bits 2-31 are reserved for future use. + </ul> + Bit 0 should be + set as the first action when a file is opened for write + access and should be cleared only as the final action + when closing a file. Bit 1 should be cleared during + normal access to a file and only set after the file's + consistency is guaranteed by the library or a + consistency utility.</td> + </tr> + + <tr valign=top> + <td>Base Address</td> + <td>This is the absolute file address of the first byte of + the HDF5 data within the file. The library currently + constrains this value to be the absolute file address + of the super block itself when creating new files; + future versions of the library may provide greater + flexibility. Unless otherwise noted, + all other file addresses are relative to this base + address.</td> + </tr> + + <tr valign=top> + <td>Address of Global Free-space Heap</td> + <td>Free-space management is not yet defined in the HDF5 + file format and is not handled by the library. + Currently this field always contains the + undefined address <code>0xfff...ff</code>. +<!-- + <td>This value contains the relative address of the B-tree + used to manage the blocks of data which are unused in the + file currently. The free-space heap is used to manage the + blocks of bytes at the file-level which become unused when + objects are moved within the file.</td> +--> + </tr> + + <tr valign=top> + <td>End of File Address</td> + <td>This is the relative file address of the first byte past + the end of all HDF5 data. It is used to determine whether a + file has been accidently truncated and as an address where + file data allocation can occur if the free list is not + used.</td> + </tr> + + <tr valign=top> + <td>Driver Information Block Address</td> + <td>This is the relative file address of the file driver + information block which contains driver-specific + information needed to reopen the file. If there is no + driver information block then this entry should be the + undefined address (all bits set).</td> + </tr> + + <tr valign=top> + <td>Root Group Address</td> + <td>This is the address of the root group (described later + in this document), which serves as the entry point into + the group graph.</td> + </tr> + </table> + </center> + + + <p>The <em>file driver information block</em> is an optional region of the + file which contains information needed by the file driver in + order to reopen a file. The format of the file driver information + block is: + + <p> + <center> + <table border align=center cellpadding=4 width="80%"> + <caption align=top> + <B>Driver Information Block</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr align=center> + <td colspan=4>Driver Information Size (4 bytes)</td> + </tr> + + <tr align=center> + <td colspan=4><br>Driver Identification (8 bytes)<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br><br>Driver Information<br><br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version</td> + <td>The version number of the driver information block. The + file format documented here is version zero.</td> + </tr> + + <tr valign=top> + <td>Driver Information Size</td> + <td>The size in bytes of the Driver Information part of this + structure.</td> + </tr> + + <tr valign=top> + <td>Driver Identification</td> + <td>This is an eight-byte ASCII string without null + termination which identifies the driver and version number + of the Driver Information block. The predefined drivers + supplied with the HDF5 library are identified by the + letters <code>NCSA</code> followed by the first four characters of + the driver name. If the Driver Information block is not + the original version then the last letter(s) of the + identification will be replaced by a version number in + ASCII. + For example, the various versions of the <em>family driver</em> + will be identified by <code>NCSAfami</code>, <code>NCSAfam0</code>, + <code>NCSAfam1</code>, etc. + (<code>NCSAfami</code> is simply <code>NCSAfamily</code> truncated + to eight characters. Subsequent identifiers will be created by + substituting sequential numerical values for the final character, + starting with zero.) + <p> + Identification for user-defined drivers + is arbitrary but should be unique.</td> + </tr> + + <tr valign=top> + <td>Driver Information</td> + <td>Driver information is stored in a format defined by the + file driver and encoded/decoded by the driver callbacks + invoked from the <code>H5FD_sb_encode</code> and + <code>H5FD_sb_decode</code> functions.</td> + </tr> + </table> + </center> + + + <br><br> + <br><br> + + + <h2><a name="Group"> + Disk Format: Level 1 - File Infrastructure</a></h2> + <h3><a name="Btrees">Disk Format: Level 1A - B-link Trees and B-tree Nodes</a></h3> + + <p>B-link trees allow flexible storage for objects which tend to grow + in ways that cause the object to be stored discontiguously. B-trees + are described in various algorithms books including "Introduction to + Algorithms" by Thomas H. Cormen, Charles E. Leiserson, and Ronald + L. Rivest. The B-link tree, in which the sibling nodes at a + particular level in the tree are stored in a doubly-linked list, + is described in the "Efficient Locking for Concurrent Operations + on B-trees" paper by Phillip Lehman and S. Bing Yao as published + in the <em>ACM Transactions on Database Systems</em>, Vol. 6, + No. 4, December 1981. + + <p>The B-link trees implemented by the file format contain one more + key than the number of children. In other words, each child + pointer out of a B-tree node has a left key and a right key. + The pointers out of internal nodes point to sub-trees while + the pointers out of leaf nodes point to symbol nodes and + raw data chunks. + Aside from that difference, internal nodes and leaf nodes + are identical. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>B-tree Nodes</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Node Signature</td> + + <tr align=center> + <td>Node Type</td> + <td>Node Level</td> + <td colspan=2>Entries Used</td> + + <tr align=center> + <td colspan=4>Address of Left Sibling</td> + + <tr align=center> + <td colspan=4>Address of Right Sibling</td> + + <tr align=center> + <td colspan=4>Key 0 (variable size)</td> + + <tr align=center> + <td colspan=4>Address of Child 0</td> + + <tr align=center> + <td colspan=4>Key 1 (variable size)</td> + + <tr align=center> + <td colspan=4>Address of Child 1</td> + + <tr align=center> + <td colspan=4>...</td> + + <tr align=center> + <td colspan=4>Key 2<em>K</em> (variable size)</td> + + <tr align=center> + <td colspan=4>Address of Child 2<em>K</em></td> + + <tr align=center> + <td colspan=4>Key 2<em>K</em>+1 (variable size)</td> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Node Signature</td> + <td>The ASCII character string <code>TREE</code> is + used to indicate the + beginning of a B-link tree node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file.</td> + </tr> + + <tr valign=top> + <td>Node Type</td> + <td>Each B-link tree points to a particular type of data. + This field indicates the type of data as well as + implying the maximum degree <em>K</em> of the tree and + the size of each Key field. + <br> + <dl compact> + <dt>0 + <dd>This tree points to group nodes. + <dt>1 + <dd>This tree points to a new data chunk. + </dl> + </td> + </tr> + + <tr valign=top> + <td>Node Level</td> + <td>The node level indicates the level at which this node + appears in the tree (leaf nodes are at level zero). Not + only does the level indicate whether child pointers + point to sub-trees or to data, but it can also be used + to help file consistency checking utilities reconstruct + damanged trees.</td> + </tr> + + <tr valign=top> + <td>Entries Used</td> + <td>This determines the number of children to which this + node points. All nodes of a particular type of tree + have the same maximum degree, but most nodes will point + to less than that number of children. The valid child + pointers and keys appear at the beginning of the node + and the unused pointers and keys appear at the end of + the node. The unused pointers and keys have undefined + values.</td> + </tr> + + <tr valign=top> + <td>Address of Left Sibling</td> + <td>This is the file address of the left sibling of the + current node relative to the super block. If the current + node is the left-most node at this level then this field + is the undefined address (all bits set).</td> + </tr> + + <tr valign=top> + <td>Address of Right Sibling</td> + <td>This is the file address of the right sibling of the + current node relative to the super block. If the current + node is the right-most node at this level then this + field is the undefined address (all bits set).</td> + </tr> + + <tr valign=top> + <td>Keys and Child Pointers</td> + <td>Each tree has 2<em>K</em>+1 keys with 2<em>K</em> + child pointers interleaved between the keys. The number + of keys and child pointers actually containing valid + values is determined by the <em>Entries Used</em> field. If + that field is <em>N</em> then the B-link tree contains + <em>N</em> child pointers and <em>N</em>+1 keys.</td> + </tr> + + <tr valign=top> + <td>Key</td> + <td>The format and size of the key values is determined by + the type of data to which this tree points. The keys are + ordered and are boundaries for the contents of the child + pointer; that is, the key values represented by child + <em>N</em> fall between Key <em>N</em> and Key + <em>N</em>+1. Whether the interval is open or closed on + each end is determined by the type of data to which the + tree points. + <p> + The format of the key depends on the node type. + For nodes of node type 1, the key is formatted as follows: + <center> + <table> + <tr valign=top align=left> + <td width=40%>Bytes 1-4</td> + <td>Size of chunk in bytes.</td> + <tr valign=top align=left></tr> + <td>Bytes 4-8</td> + <td>Filter mask, a 32-bit bitfield indicating which + filters have been applied to that chunk.</td> + </tr><tr valign=top align=left> + <td><i>N</i> fields of 8 bytes each</td> + <td>A 64-bit index indicating the offset of the + chunk within the dataset where <i>N</i> is the number + of dimensions of the dataset. For example, if + a chunk in a 3-dimensional dataset begins at the + position <code>[5,5,5]</code>, there will be three + such 8-bit indices, each with the value of + <code>5</code>.</td> + </tr> + </table> + </center> + <p> + For nodes of node type 0, the key is formatted as follows: + <center> + <table> + <tr valign=top align=left> + <td width=40%>A single field of <i>Size of Lengths</i> + bytes</td> + <td>Indicates the byte offset into the local heap + for the first object name in the subtree which + that key describes.</td> + </tr> + </table> + </center> + </td> + </tr> + + <tr valign=top> + <td>Child Pointers</td> + <td>The tree node contains file addresses of subtrees or + data depending on the node level. Nodes at Level 0 point + to data addresses, either data chunk or group nodes. + Nodes at non-zero levels point to other nodes of the + same B-tree.</td> + </tr> + </table> + </center> + +<p> + Each B-tree node looks like this: + + <center> + <table> + <tr valign=top align=center> + <td>key[0]</td><td> </td> + <td>child[0]</td><td> </td> + <td>key[1]</td><td> </td> + <td>child[1]</td><td> </td> + <td>key[2]</td><td> </td> + <td>...</td><td> </td> + <td>...</td><td> </td> + <td>key[<i>N</i>-1]</td><td> </td> + <td>child[<i>N</i>-1]</td><td> </td> + <td>key[<i>N</i>]</td> + </tr> + </table> + </center> + + where child[<i>i</i>] is a pointer to a sub-tree (at a level + above Level 0) or to data (at Level 0). + Each key[<i>i</i>] describes an <i>item</i> stored by the B-tree + (a chunk or an object of a group node). The range of values + represented by child[<i>i</i>] are indicated by key[<i>i</i>] + and key[<i>i</i>+1]. + + + <p>The following question must next be answered: + "Is the value described by key[<i>i</i>] contained in + child[<i>i</i>-1] or in child[<i>i</i>]?" + The answer depends on the type of tree. + In trees for groups (node type 0) the object described by + key[<i>i</i>] is the greatest object contained in + child[<i>i</i>-1] while in chunk trees (node type 1) the + chunk described by key[<i>i</i>] is the least chunk in + child[<i>i</i>]. + + <p>That means that key[0] for group trees is sometimes unused; + it points to offset zero in the heap, which is always the + empty string and compares as "less-than" any valid object name. + + <p>And key[<i>N</i>] for chunk trees is sometimes unused; + it contains a chunk offset which compares as "greater-than" + any other chunk offset and has a chunk byte size of zero + to indicate that it is not actually allocated. + + + <h3><a name="SymbolTable">Disk Format: Level 1B - Group and Symbol Nodes</a></h3> + + <p>A group is an object internal to the file that allows + arbitrary nesting of objects (including other groups). + A group maps a set of names to a set of file + address relative to the base address. Certain meta data + for an object to which the group points can be duplicated + in the group symbol table in addition to the object header. + + <p>An HDF5 object name space can be stored hierarchically by + partitioning the name into components and storing each + component in a group. The group entry for a + non-ultimate component points to the group containing + the next component. The group entry for the last + component points to the object being named. + + <p>A group is a collection of group nodes pointed + to by a B-link tree. Each group node contains entries + for one or more symbols. If an attempt is made to add a + symbol to an already full group node containing + 2<em>K</em> entries, then the node is split and one node + contains <em>K</em> symbols and the other contains + <em>K</em>+1 symbols. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Group Node (A Leaf of a B-tree)</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Node Signature</td> + + <tr align=center> + <td>Version Number</td> + <td>Reserved for Future Use</td> + <td colspan=2>Number of Symbols</td> + + <tr align=center> + <td colspan=4><br><br>Group Entries<br><br><br></td> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Node Signature</td> + <td>The ASCII character string <code>SNOD</code> is + used to indicate the + beginning of a group node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file.</td> + </tr> + + <tr valign=top> + <td>Version Number</td> + <td>The version number for the group node. This + document describes version 1.</td> + </tr> + + <tr valign=top> + <td>Number of Symbols</td> + <td>Although all group nodes have the same length, + most contain fewer than the maximum possible number of + symbol entries. This field indicates how many entries + contain valid data. The valid entries are packed at the + beginning of the group node while the remaining + entries contain undefined values.</td> + </tr> + + <tr valign=top> + <td>Group Entries</td> + <td>Each symbol has an entry in the group node. + The format of the entry is described below.</td> + </tr> + </table> + </center> + + <h3><a name="SymbolTableEntry"> + Disk Format: Level 1C - Group Entry </a></h3> + + <p>Each group entry in a group node is designed + to allow for very fast browsing of stored objects. + Toward that design goal, the group entries + include space for caching certain constant meta data from the + object header. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Group Entry</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Name Offset (<size> bytes)</td> + </tr> + + <tr align=center> + <td colspan=4>Object Header Address</td> + </tr> + + <tr align=center> + <td colspan=4>Cache Type</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4><br><br>Scratch-pad Space (16 bytes)<br><br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Name Offset</td> + <td>This is the byte offset into the group local + heap for the name of the object. The name is null + terminated.</td> + </tr> + + <tr valign=top> + <td>Object Header Address</td> + <td>Every object has an object header which serves as a + permanent location for the object's meta data. In addition + to appearing in the object header, some meta data can be + cached in the scratch-pad space.</td> + </tr> + + <tr valign=top> + <td>Cache Type</td> + <td>The cache type is determined from the object header. + It also determines the format for the scratch-pad space. + <br> + <dl compact> + <dt>0 + <dd>No data is cached by the group entry. This + is guaranteed to be the case when an object header + has a link count greater than one. + + <dt>1 + <dd>Object header meta data is cached in the group + entry. This implies that the group + entry refers to another group. + + <dt>2 + <dd>The entry is a symbolic link. The first four bytes + of the scratch-pad space are the offset into the local + heap for the link value. The object header address + will be undefined. + + <dt><em>N</em> + <dd>Other cache values can be defined later and + libraries that do not understand the new values will + still work properly. + </dl> + </td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>These four bytes are present so that the scratch-pad + space is aligned on an eight-byte boundary. They are + always set to zero.</td> + </tr> + + <tr valign=top> + <td>Scratch-pad Space</td> + <td>This space is used for different purposes, depending + on the value of the Cache Type field. Any meta-data + about a dataset object represented in the scratch-pad + space is duplicated in the object header for that + dataset. This meta data can include the datatype + and the size of the dataspace for a dataset whose datatype + is atomic and whose dataspace is fixed and less than + four dimensions. + Furthermore, no data is cached in the group + entry scratch-pad space if the object header for + the group entry has a link count greater than + one.</td> + </tr> + </table> + </center> + + <h4>Format of the Scratch-pad Space</h4> + + <p>The group entry scratch-pad space is formatted + according to the value in the Cache Type field. + + <p>If the Cache Type field contains the value zero + (<code>0</code>) then no information is + stored in the scratch-pad space. + + <p>If the Cache Type field contains the value one + (<code>1</code>), then the scratch-pad space + contains cached meta data for another object header + in the following format: + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Object Header Scratch-pad Format</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Address of B-tree</td> + + <tr align=center> + <td colspan=4>Address of Name Heap</td> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Address of B-tree</td> + <td>This is the file address for the root of the + group's B-tree.</td> + </tr> + + <tr valign=top> + <td>Address of Name Heap</td> + <td>This is the file address for the group's local + heap, in which are stored the symbol names.</td> + </tr> + </table> + </center> + + + <p>If the Cache Type field contains the value two + (<code>2</code>), then the scratch-pad space + contains cached meta data for another symbolic link + in the following format: + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Symbolic Link Scratch-pad Format</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Offset to Link Value</td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Offset to Link Value</td> + <td>The value of a symbolic link (that is, the name of the + thing to which it points) is stored in the local heap. + This field is the 4-byte offset into the local heap for + the start of the link value, which is null terminated.</td> + </tr> + </table> + </center> + + <h3><a name="LocalHeap">Disk Format: Level 1D - Local Heaps</a></h3> + + <p>A heap is a collection of small heap objects. Objects can be + inserted and removed from the heap at any time. + The address of a heap does not change once the heap is created. + References to objects are stored in the group table; + the names of those objects are stored in the local heap. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Local Heaps</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Heap Signature</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved (zero)</td> + </tr> + + <tr align=center> + <td colspan=4>Data Segment Size</td> + </tr> + + <tr align=center> + <td colspan=4>Offset to Head of Free-list (<size> bytes)</td> + </tr> + + <tr align=center> + <td colspan=4>Address of Data Segment</td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Heap Signature</td> + <td>The ASCII character string <code>HEAP</code> + is used to indicate the + beginning of a heap. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file.</td> + </tr> + + <tr valign=top> + <td>Data Segment Size</td> + <td>The total amount of disk memory allocated for the heap + data. This may be larger than the amount of space + required by the object stored in the heap. The extra + unused space holds a linked list of free blocks.</td> + </tr> + + <tr valign=top> + <td>Offset to Head of Free-list</td> + <td>This is the offset within the heap data segment of the + first free block (or all 0xff bytes if there is no free + block). The free block contains <size> bytes that + are the offset of the next free chunk (or all 0xff bytes + if this is the last free chunk) followed by <size> + bytes that store the size of this free chunk.</td> + </tr> + + <tr valign=top> + <td>Address of Data Segment</td> + <td>The data segment originally starts immediately after + the heap header, but if the data segment must grow as a + result of adding more objects, then the data segment may + be relocated, in its entirety, to another part of the + file.</td> + </tr> + </table> + </center> + + <p>Objects within the heap should be aligned on an 8-byte boundary. + + <h3><a name="GlobalHeap">Disk Format: Level 1E - Global Heap</a></h3> + + <p>Each HDF5 file has a global heap which stores various types of + information which is typically shared between datasets. The + global heap was designed to satisfy these goals: + + <ol type="A"> + <li>Repeated access to a heap object must be efficient without + resulting in repeated file I/O requests. Since global heap + objects will typically be shared among several datasets, it is + probable that the object will be accessed repeatedly. + + <br><br> + <li>Collections of related global heap objects should result in + fewer and larger I/O requests. For instance, a dataset of + void pointers will have a global heap object for each + pointer. Reading the entire set of void pointer objects + should result in a few large I/O requests instead of one small + I/O request for each object. + + <br><br> + <li>It should be possible to remove objects from the global heap + and the resulting file hole should be eligible to be reclaimed + for other uses. + <br><br> + </ol> + + <p>The implementation of the heap makes use of the memory + management already available at the file level and combines that + with a new top-level object called a <em>collection</em> to + achieve Goal B. The global heap is the set of all collections. + Each global heap object belongs to exactly one collection and + each collection contains one or more global heap objects. For + the purposes of disk I/O and caching, a collection is treated as + an atomic object. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>A Global Heap Collection</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Magic Number</td> + </tr> + + <tr align=center> + <td>Version</td> + <td colspan=3>Reserved</td> + </td> + + <tr align=center> + <td colspan=4>Collection Size</td> + </tr> + + <tr align=center> + <td colspan=4><br>Global Heap Object 1 + <i>(described below)</i><br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Global Heap Object 2<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>...<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Global Heap Object <em>N</em><br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Global Heap Object 0 (free space)<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Magic Number</td> + <td>The magic number for global heap collections are the + four bytes <code>G</code>, <code>C</code>, <code>O</code>, + and <code>L</code>.</td> + </tr> + + <tr valign=top> + <td>Version</td> + <td>Each collection has its own version number so that new + collections can be added to old files. This document + describes version zero of the collections. + </tr> + + <tr valign=top> + <td>Collection Data Size</td> + <td>This is the size in bytes of the entire collection + including this field. The default (and minimum) + collection size is 4096 bytes which is a typical file + system block size and which allows for 170 16-byte heap + objects plus their overhead.</td> + </tr> + + <tr valign=top> + <td>Object 1 through <em>N</em></td> + <td>The objects are stored in any order with no + intervening unused space.</td> + </tr> + + <tr valign=top> + <td>Object 0</td> + <td>Object 0 (zero), when present, represents the free space in + the collection. Free space always appears at the end of + the collection. If the free space is too small to store + the header for Object 0 (described below) then the + header is implied and the collection contains no free space. + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Global Heap Object</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=2>Object ID</td> + <td colspan=2>Reference Count</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Object Data Size</td> + </tr> + + <tr align=center> + <td colspan=4><br>Object Data<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Object ID</td> + <td>Each object has a unique identification number within a + collection. The identification numbers are chosen so that + new objects have the smallest value possible with the + exception that the identifier <code>0</code> always refers to the + object which represents all free space within the + collection.</td> + </tr> + + <tr valign=top> + <td>Reference Count</td> + <td>All heap objects have a reference count field. An + object which is referenced from some other part of the + file will have a positive reference count. The reference + count for Object 0 is always zero.</td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>Zero padding to align next field on an 8-byte + boundary.</td> + </tr> + + <tr valign=top> + <td>Object Size</td> <td>This is the size of the the fields + above plus the object data stored for the object. The + actual storage size is rounded up to a multiple of + eight.</td> + </tr> + + <tr valign=top> + <td>Object Data</td> + <td>The object data is treated as a one-dimensional array + of bytes to be interpreted by the caller.</td> + </tr> + </table> + </center> + + <h3><a name="FreeSpaceIndex">Disk Format: Level 1F - Free-space Heap</a></h3> + + <p>The Free-space Index is a collection of blocks of data, + dispersed throughout the file, which are currently not used by + any file objects. + + <p>The super block contains a pointer to root of the free-space description; + that pointer is currently (i.e., in HDF5 Release 1.2) required + to be the undefined address <code>0xfff...ff</code>. + + <p>The free-sapce index is not otherwise publicly defined at this time. + + + <!-- + <p>The Free-space Index is a collection of blocks of data, + dispersed throughout the file, which are currently not used by + any file objects. The blocks of data are indexed by a B-tree of + their length within the file. + + + <p>Each B-tree page is composed of the following entries and + B-tree management information, organized as follows: + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Free-space Heap Page</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Free-space Heap Signature</td> + <tr align=center> + <td colspan=4>B-tree Left-link Offset</td> + <tr align=center> + <td colspan=4><br>Length of Free-block #1<br> <br></td> + <tr align=center> + <td colspan=4><br>Offset of Free-block #1<br> <br></td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4><br>Length of Free-block #n<br> <br></td> + <tr align=center> + <td colspan=4><br>Offset of Free-block #n<br> <br></td> + <tr align=center> + <td colspan=4>"High" Offset</td> + <tr align=center> + <td colspan=4>Right-link Offset</td> + </table> + </center> + + <p> + <dl> + <dt> The elements of the free-space heap page are described below: + <dd> + <dl> + <dt>Free-space Heap Signature: (4 bytes) + <dd>The ASCII character string <code>FREE</code> + is used to indicate the + beginning of a free-space heap B-tree page. This gives + file consistency checking utilities a better chance of + reconstructing a damaged file. + + <dt>B-tree Left-link Offset: (<offset> bytes) + <dd>This value is used to indicate the offset of all offsets + in the B-link-tree which are smaller than the value of the + offset in entry #1. This value is also used to indicate a + leaf node in the B-link-tree by being set to all ones. + + <dt>Length of Free-block #n: (<length> bytes) + <dd>This value indicates the length of an unused block in + the file. + + <dt>Offset of Free-block #n: (<offset> bytes) + <dd>This value indicates the offset in the file of an + unused block in the file. + + <dt>"High" Offset: (4-bytes) + <dd>This offset is used as the upper bound on offsets + contained within a page when the page has been split. + + <dt>Right-link Offset: (<offset> bytes) + <dd>This value is used to indicate the offset of the next + child to the right of the parent of this group + page. When there is no node to the right, this value is + all zeros. + </dl> + </dl> + + <p>The algorithms for searching and inserting objects in the + B-tree pages are described fully in the Lehman and Yao paper, + which should be read to provide a full description of the + B-tree's usage. +--> + + +<br><br> +<br><br> + + + <h2><a name="DataObject">Disk Format: Level 2 - Data Objects </a></h2> + + <p>Data objects contain the real information in the file. These + objects compose the scientific data and other information which + are generally thought of as "data" by the end-user. All the + other information in the file is provided as a framework for + these data objects. + + <p>A data object is composed of header information and data + information. The header information contains the information + needed to interpret the data information for the data object as + well as additional "meta-data" or pointers to additional + "meta-data" used to describe or annotate each data object. + + <h3><a name="ObjectHeader"> + Disk Format: Level 2a - Data Object Headers</a></h3> + + <p>The header information of an object is designed to encompass + all the information about an object which would be desired to be + known, except for the data itself. This information includes + the dimensionality, number-type, information about how the data + is stored on disk (in external files, compressed, broken up in + blocks, etc.), as well as other information used by the library + to speed up access to the data objects or maintain a file's + integrity. The header of each object is not necessarily located + immediately prior to the object's data in the file and in fact + may be located in any position in the file. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Object Headers</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=1 width="25%">Version # of Object Header</td> + <td colspan=1 width="25%">Reserved</td> + <td colspan=2 width="50%">Number of Header Messages</td> + </tr> + <tr align=center> + <td colspan=4>Object Reference Count</td> + </tr> + <tr align=center> + <td colspan=4><br>Total Object Header Size<br><br></td> + </tr> + <tr align=center> + <td colspan=2>Header Message Type #1</td> + <td colspan=2>Size of Header Message Data #1</td> + </tr> + <tr align=center> + <td>Flags</td> + <td colspan=3>Reserved</td> + </tr> + <tr align=center> + <td colspan=4><br>Header Message Data #1<br><br></td> + </tr> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + </tr> + <tr align=center> + <td colspan=2>Header Message Type #n</td> + <td colspan=2>Size of Header Message Data #n</td> + </tr> + <tr align=center> + <td>Flags</td> + <td colspan=3>Reserved</td> + </tr> + <tr align=center> + <td colspan=4><br>Header Message Data #n<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version number of the object header</td> + <td>This value is used to determine the format of the + information in the object header. When the format of the + information in the object header is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted.</td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>Always set to zero.</td> + </tr> + + <tr valign=top> + <td>Number of header messages</td> + <td>This value determines the number of messages listed in + this object header. This provides a fast way for software + to prepare storage for the messages in the header.</td> + </tr> + + <tr valign=top> + <td>Object Reference Count</td> + <td>This value specifies the number of references to this + object within the current file. References to the + data object from external files are not tracked.</td> + </tr> + + <tr valign=top> + <td>Total Object Header Size</td> + <td>This value specifies the total number of bytes of header + message data following this length field for the current + message as well as any continuation data located elsewhere + in the file.</td> + </tr> + + <tr valign=top> + <td>Header Message Type</td> + <td>The header message type specifies the type of + information included in the header message data following + the type along with a small amount of other information. + Bit 15 of the message type is set if the message is + constant (constant messages cannot be changed since they + may be cached in group entries throughout the + file). The header message types for the pre-defined + header messages will be included in further discussion + below.</td> + </tr> + + <tr valign=top> + <td>Size of Header Message Data</td> + <td>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size includes + padding bytes to make the message a multiple of eight + bytes.</td> + </tr> + + <tr valign=top> + <td>Flags</td> + <td>This is a bit field with the following definition: + <dl> + <dt><code>0</code> + <dd>If set, the message data is constant. This is used + for messages like the datatype message of a dataset. + <dt><code>1</code> + <dd>If set, the message is stored in the global heap and + the Header Message Data field contains a Shared Object + message and the Size of Header Message Data field + contains the size of that Shared Object message. + <dt><code>2-7</code> + <dd>Reserved + </dl> + </td> + + <tr valign=top> + <td>Header Message Data</td> + <td>The format and length of this field is determined by the + header message type and size respectively. Some header + message types do not require any data and this information + can be eliminated by setting the length of the message to + zero. The data is padded with enough zeros to make the + size a multiple of eight.</td> + </tr> + </table> + </center> + + <p>The header message types and the message data associated with + them compose the critical "meta-data" about each object. Some + header messages are required for each object while others are + optional. Some optional header messages may also be repeated + several times in the header itself, the requirements and number + of times allowed in the header will be noted in each header + message description below. + + <P>The following is a list of currently defined header messages: + + <hr> + <h4><a name="NILMessage">Name: NIL</a></h4> + <b>Type: </b>0x0000<br> + <b>Length:</b> varies<br> + <b>Status:</b> Optional, may be repeated.<br> + <b>Purpose and Description:</b> The NIL message is used to + indicate a message + which is to be ignored when reading the header messages for a data object. + [Probably one which has been deleted for some reason.]<br> + <b>Format of Data:</b> Unspecified.<br> + +<!-- Delete examples throughout doc + <b>Examples:</b> None. +--> + + + <hr> + <h4><a name="SimpleDataSpace">Name: Simple Dataspace</a></h4> + + <b>Type: </b>0x0001<br> + <b>Length:</b> Varies according to the number of dimensions, + as described in the following table<br> + <b>Status:</b> The <em>Simple Dataspace</em> message is required + and may not be repeated. This message is currently used with + datasets and named dataspaces.<br> + + <p>The <em>Simple Dataspace</em> message describes the number + of dimensions and size of each dimension that the data object + has. This message is only used for datasets which have a + simple, rectilinear grid layout; datasets requiring a more + complex layout (irregularly structured or unstructured grids, etc.) + must use the <em>Complex Dataspace</em> message for expressing + the space the dataset inhabits. + <i>(Note: The <em>Complex Dataspace</em> functionality is + not yet implemented (as of HDF5 Release 1.2). It is not described + in this document.)</i> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Simple Dataspace Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Dimension Size #1 (<size> bytes)</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Dimension Size #n (<size> bytes)</td> + <tr align=center> + <td colspan=4>Dimension Maximum #1 (<size> bytes)</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Dimension Maximum #n (<size> bytes)</td> + <tr align=center> + <td colspan=4>Permutation Index #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Permutation Index #n</td> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version </td> + <td>This value is used to determine the format of the + Simple Dataspace Message. When the format of the + information in the message is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted.</td> + </tr> + + <tr valign=top> + <td>Dimensionality</td> + <td>This value is the number of dimensions that the data + object has.</td> + </tr> + + <tr valign=top> + <td>Flags</td> + <td>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. Bit 1 is used to indicate that + permutation indices are present for each dimension.</td> + </tr> + + <tr valign=top> + <td>Dimension Size #n (<size> bytes)</td> + <td>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension.</td> + </tr> + + <tr valign=top> + <td>Dimension Maximum #n (<size> bytes)</td> + <td>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + value <UNLIMITED> (all bits set) which indicates + that the data may expand along this dimension + indefinitely. If these values are not stored, the maximum + value of each dimension is assumed to be the same as the + current size value.</td> + </tr> + + <tr valign=top> + <td>Permutation Index #n (4 bytes)</td> + <td>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. If these values are + not stored, the first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension.</td> + </tr> + </table> + </center> + +<!-- Delete examples throughout doc + <h4>Examples</h4> + <dl> + <dt> Example #1 + <dd>A sample 640 horizontally by 480 vertically raster image + dimension header. The number of dimensions would be set to 2 + and the first dimension's size and maximum would both be set + to 480. The second dimension's size and maximum would both be + set to 640 +. + <dt>Example #2 + <dd>A sample 4 dimensional scientific dataset which is composed + of 30x24x3 slabs of data being written out in an unlimited + series every several minutes as timestep data (currently there + are five slabs). The number of dimensions is 4. The first + dimension size is 5 and its maximum is <UNLIMITED>. The + second through fourth dimension's size and maximum value are + set to 3, 24, and 30 respectively. + + <dt>Example #3 + <dd>A sample unlimited length text string, currently of length + 83. The number of dimensions is 1, the size of the first + dimension is 83 and the maximum of the first dimension is set + to <UNLIMITED>, allowing further text data to be + appended to the string or possibly the string to be replaced + with another string of a different size. (This could also be + stored as a scalar dataset with number-type set to "string") + </dl> +--> + +<!-- DELETE ENTIRE DATASPACE SECTION --> +<!-- + <hr> + <h4><a name="DataSpaceMessage">Name: Complex Dataspace (Fiber Bundle?)</a></h4> + <b>Type: </b>0x0002<br> + <b>Length:</b> varies<br> + + <b>Status:</b> One of the <em>Simple Dataspace</em> or + <em>Complex Dataspace</em> messages is required (but not both) and may + not be repeated.<br> <b>Purpose and Description:</b> The + <em>Dataspace</em> message describes space that the dataset is + mapped onto in a more comprehensive way than the <em>Simple + Dimensionality</em> message is capable of handling. The + dataspace of a dataset encompasses the type of coordinate system + used to locate the dataset's elements as well as the structure and + regularity of the coordinate system. The dataspace also + describes the number of dimensions which the dataset inhabits as + well as a possible higher dimensional space in which the dataset + is located within. + + <br> + <b>Format of Data:</b> + + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Message Layout</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Mesh Type</td> + <tr align=center> + <td colspan=4>Logical Dimensionality</td> + </table> + </center> + + <p> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Mesh Type: (unsigned 32-bit integer) + <dd>This value indicates whether the grid is + polar/spherical/cartesion, + structured/unstructured and regular/irregular. <br> + The mesh type value is broken up as follows: <br> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Mesh-type Layout</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=1>Mesh Embedding</td> + <td colspan=1>Coordinate System</td> + <td colspan=1>Structure</td> + <td colspan=1>Regularity</td> + </table> + </center> + The following are the definitions of mesh-type bytes: + <dl> + <dt>Mesh Embedding + <dd>This value indicates whether the dataset dataspace + is located within + another dataspace or not: + <dl> <dl> + <dt><STANDALONE> + <dd>The dataset mesh is self-contained and is not + embedded in another mesh. + <dt><EMBEDDED> + <dd>The dataset's dataspace is located within + another dataspace, as + described in information below. + </dl> </dl> + <dt>Coordinate System + <dd>This value defines the type of coordinate system + used for the mesh: + <dl> <dl> + <dt><POLAR> + <dd>The last two dimensions are in polar + coordinates, higher dimensions are + cartesian. + <dt><SPHERICAL> + <dd>The last three dimensions are in spherical + coordinates, higher dimensions + are cartesian. + <dt><CARTESIAN> + <dd>All dimensions are in cartesian coordinates. + </dl> </dl> + <dt>Structure + <dd>This value defines the locations of the grid-points + on the axes: + <dl> <dl> + <dt><STRUCTURED> + <dd>All grid-points are on integral, sequential + locations, starting from 0. + <dt><UNSTRUCTURED> + <dd>Grid-points locations in each dimension are + explicitly defined and + may be of any numeric datatype. + </dl> </dl> + <dt>Regularity + <dd>This value defines the locations of the dataset + points on the grid: + <dl> <dl> + <dt><REGULAR> + <dd>All dataset elements are located at the + grid-points defined. + <dt><IRREGULAR> + <dd>Each dataset element has a particular + grid-location defined. + </dl> </dl> + </dl> + <p>The following grid combinations are currently allowed: + <dl> <dl> + <dt><POLAR-STRUCTURED-REGULAR> + <dt><SPHERICAL-STRUCTURED-REGULAR> + <dt><CARTESIAN-STRUCTURED-REGULAR> + <dt><POLAR-UNSTRUCTURED-REGULAR> + <dt><SPHERICAL-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-IRREGULAR> + </dl> </dl> + All of the above grid types can be embedded within another + dataspace. + <br> <br> + <dt>Logical Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions that the dataset occupies. + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Embedded Dimensionality Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Embedded Dimensionality</td> + <tr align=center> + <td colspan=4>Embedded Dimension Size #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Embedded Dimension Size #n</td> + <tr align=center> + <td colspan=4>Embedded Origin Location #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Embedded Origin Location #n</td> + </table> + </center> + + <dt>Embedded Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions of the space the + dataset is located + within. i.e. a planar dataset located within a 3-D space, + or a 3-D dataset + which is a subset of another 3-D space, etc. + <dt>Embedded Dimension Size: (unsigned 32-bit integer) + <dd>These values are the sizes of the dimensions of the + embedded dataspace + that the dataset is located within. + <dt>Embedded Origin Location: (unsigned 32-bit integer) + <dd>These values comprise the location of the dataset's + origin within the embedded dataspace. + </dl> + </dl> + [Comment: need some way to handle different orientations of the + dataset dataspace + within the embedded dataspace]<br> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Structured/Regular Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Logical Dimension Size #1</td> + <tr align=center> + <td colspan=4>Logical Dimension Maximum #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Logical Dimension Size #n</td> + <tr align=center> + <td colspan=4>Logical Dimension Maximum #n</td> + </table> + </center> + + <p> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Logical Dimension Size #n: (unsigned 32-bit integer) + <dd>This value is the current size of the dimension of the + data as stored in + the file. The first dimension stored in the list of + dimensions is the slowest + changing dimension and the last dimension stored is the + fastest changing + dimension. + <dt>Logical Dimension Maximum #n: (unsigned 32-bit integer) + <dd>This value is the maximum size of the dimension of the + data as stored in + the file. This value may be the special value + <UNLIMITED> which + indicates that the data may expand along this dimension + indefinitely. + </dl> + </dl> + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Structured/Irregular Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4># of Grid Points in Dimension #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4># of Grid Points in Dimension #n</td> + <tr align=center> + <td colspan=4>Datatype of Grid Point Locations</td> + <tr align=center> + <td colspan=4>Location of Grid Points in Dimension #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Location of Grid Points in Dimension #n</td> + </table> + </center> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Unstructured Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4># of Grid Points</td> + <tr align=center> + <td colspan=4>Datatype of Grid Point Locations</td> + <tr align=center> + <td colspan=4>Grid Point Locations<br>.<br>.<br></td> + </table> + </center> + + <h4><a name="DataSpaceExample">Examples:</a></h4> + Need some good examples, this is complex! +--> + + + <hr> + <h4><a name="DataTypeMessage">Name: Datatype</a></h4> + + <b>Type:</b> 0x0003<br> + <b>Length:</b> variable<br> + <b>Status:</b> One required per dataset or named datatype<br> + + <p>The datatype message defines the datatype for each data point + of a dataset. A datatype can describe an atomic type like a + fixed- or floating-point type or a compound type like a C + struct. A datatype does not, however, describe how data points + are combined to produce a dataset. Datatypes are stored on disk + as a datatype message, which is a list of datatype classes and + their associated properties. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Datatype Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Type Class and Version</td> + <td colspan=3>Class Bit Field</td> + </tr> + + <tr align=center> + <td colspan=4>Size in Bytes (4 bytes)</td> + </tr> + + <tr align=center> + <td colspan=4><br><br>Properties<br><br><br></td> + </tr> + </table> + </center> + + <p>The Class Bit Field and Properties fields vary depending + on the Type Class, which is the low-order four bits of the Type + Class and Version field (the high-order four bits are the + version, which should be set to the value one). The type class + is one of 0 (fixed-point number), 1 (floating-point number), + 2 (date and time), 3 (text string), 4 (bit field), 5 (opaque), + 6 (compound), 7 (reference), 8 (enumeration), or 9 (variable-length). + The Class Bit Field is zero and the size of the + Properties field is zero except for the cases noted here. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Fixed-point Numbers (Class 0)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr valign=top> + <td>1, 2</td> + <td><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</td> + </tr> + + <tr valign=top> + <td>3</td> + <td><b>Signed.</b> If this bit is set then the fixed-point + number is in 2's complement form.</td> + </tr> + + <tr valign=top> + <td>4-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Fixed-point Numbers (Class 0)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Floating-point Numbers (Class 1)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr valign=top> + <td>1, 2, 3</td> + <td><b>Padding type.</b> Bit 1 is the low bits pad type, bit 2 + is the high bits pad type, and bit 3 is the internal bits + pad type. If a datum has unused bits at either or between + the sign bit, exponent, or mantissa, then the value of bit + 1, 2, or 3 is copied to those locations.</td> + </tr> + + <tr valign=top> + <td>4-5</td> + <td><b>Normalization.</b> The value can be 0 if there is no + normalization, 1 if the most significant bit of the + mantissa is always set (except for 0.0), and 2 if the most + signficant bit of the mantissa is not stored but is + implied to be set. The value 3 is reserved and will not + appear in this field.</td> + </tr> + + <tr valign=top> + <td>6-7</td> + <td>Reserved (zero).</td> + </tr> + + <tr valign=top> + <td>8-15</td> + <td><b>Sign.</b> This is the bit position of the sign + bit.</td> + </tr> + + <tr valign=top> + <td>16-23</td> + <td>Reserved (zero).</td> + </tr> + + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Floating-point Numbers (Class 1)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + + <tr align=center> + <td>Exponent Location</td> + <td>Exponent Size in Bits</td> + <td>Mantissa Location</td> + <td>Mantissa Size in Bits</td> + </tr> + + <tr align=center> + <td colspan=4>Exponent Bias</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Strings (Class 3)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0-3</td> + <td><b>Padding type.</b> This four-bit value determines the + type of padding to use for the string. The values are: + + <dl> + <dt><code>0</code> Null terminate. + <dd>A zero byte marks the end of the string and is + guaranteed to be present after converting a long + string to a short string. When converting a short + string to a long string the value is padded with + additional null characters as necessary. + + <br><br> + <dt><code>1</code> Null pad. + <dd>Null characters are added to the end of the value + during conversions from short values to long values + but conversion in the opposite direction simply + truncates the value. + + <br><br> + <dt><code>2</code> Space pad. + <dd>Space characters are added to the end of the value + during conversions from short values to long values + but conversion in the opposite direction simply + truncates the value. This is the Fortran + representation of the string. + + <br><br> + <dt><code>3-15</code> Reserved. + <dd>These values are reserved for future use. + </dl> + </tr> + + <tr valign=top> + <td>4-7</td> + <td><b>Character Set.</b> The character set to use for + encoding the string. The only character set supported is + the 8-bit ASCII (zero) so no translations have been defined + yet.</td> + </tr> + + <tr valign=top> + <td>8-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Bitfield Types (Class 4)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr valign=top> + <td>1, 2</td> + <td><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</td> + </tr> + + <tr valign=top> + <td>3-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Bitfield Types (Class 4)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Opaque Types (Class 5)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Opaque Types (Class 5)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>Null-terminated ASCII Tag<br> + (multiple of 8 bytes)<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Compound Types (Class 6)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0-15</td> + <td><b>Number of Members.</b> This field contains the number + of members defined for the compound datatype. The member + definitions are listed in the Properties field of the data + type message. + </tr> + + <tr valign=top> + <td>15-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p>The Properties field of a compound datatype is a list of the + member definitions of the compound datatype. The member + definitions appear one after another with no intervening bytes. + The member types are described with a recursive datatype + message. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Compound Types (Class 6)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=4><br><br>Name (null terminated, multiple of + eight bytes)<br><br><br></td> + </tr> + + <tr align=center> + <td colspan=4>Byte Offset of Member in Compound Instance</td> + </tr> + + <tr align=center> + <td>Dimensionality</td> + <td colspan=3>reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Dimension Permutation</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Size of Dimension 0 (required)</td> + </tr> + + <tr align=center> + <td colspan=4>Size of Dimension 1 (required)</td> + </tr> + + <tr align=center> + <td colspan=4>Size of Dimension 2 (required)</td> + </tr> + + <tr align=center> + <td colspan=4>Size of Dimension 3 (required)</td> + </tr> + + <tr align=center> + <td colspan=4><br><br>Member Type Message<br><br><br></td> + </tr> + + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Enumeration Types (Class 8)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0-15</td> + <td><b>Number of Members.</b> The number of name/value + pairs defined for the enumeration type.</td> + </tr> + + <tr valign=top> + <td>16-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Enumeration Types (Class 8)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>Parent Type<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Names<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Values<br><br></td> + </tr> + + </table> + </center> + + <center> + <table border=0 cellpadding=4 width="80%"> + <tr align=left valign=top> + <td valign=top width=20%>Parent Type:</td> + <td valign=top>Each enumeration type is based on some parent type, + usually an integer. The information for that parent type is + described recursively by this field.</td> + </tr><tr align=left valign=top> + <td valign=top>Names:</td> + <td valign=top>The name for each name/value pair. Each name is + stored as a null terminated ASCII string in a multiple of + eight bytes. The names are in no particular order.</td> + </tr><tr align=left valign=top> + <td valign=top>Values:</td> + <td valign=top>The list of values in the same order as the names. + The values are packed (no inter-value padding) and the + size of each value is determined by the parent type.</td> + </tr> + </table> + </center> + + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Bit Field for Variable-length Types (Class 9)</b> + </caption> + + <tr align=center> + <th width="10%">Bits</th> + <th width="90%">Meaning</th> + </tr> + + <tr valign=top> + <td>0-3</td> + <td><dl><dt><b>Type</b></dt> + <dt>0 Variable-length sequence</dt> + <dd>This variable-length datatype can be of any sequence + of data. Variable-length sequences do not have padding + or character set information.</dd> + <dt>1 Variable-length string</dt> + <dd>This variable-length datatype is composed of a series of + characters. Variable-length strings have padding and + character set information.</dd></dl> + </td> + </tr> + + <tr valign=top> + <td>4-7</td> + <td><dl><dt><b>Padding type</b> (variable-length string only)</dt> + <dd>This four-bit value determines the type of padding + used for variable-length strings. The values are the same + as for the string padding type, as follows:</dd> + <dt>0 Null terminate</dt> + <dd>A zero byte marks the end of a string and is guaranteed + to be present after converting a long string to a short + string. When converting a short string to a long string, + the value is padded with additional null characters + as necessary. + <dt>1 Null pad</dt> + <dd>Null characters are added to the end of the value + during conversion from a short string to a longer string. + Conversion from a long string to a shorter string + simply truncates the value.</dd> + <dt>2 Space pad</dt> + <dd>Space characters are added to the end of the value + during conversion from a short string to a longer string. + Conversion from a long string to a shorter string simply + truncates the value. + This is the Fortran representation of the string. + </dd> + <dt>3-15 Reserved</dt> + <dd>These values are reserved for future use.</dd></dl> + </td> + </tr> + + <tr valign=top> + <td>8-11</td> + <td><dl><dt><b>Character set</b> (variable-length string only)</dt> + <dd>This four-bit value specifies the character set + to be used for encoding the string.</dd> + <dt>0 8-bit ASCII</dt> + <dd>As of this writing (July 2002, Release 1.4.4), + 8-bit ASCII is the only character set supported. + Therefore, no translations have been defined.</dd></dl> + </td> + </tr> + + <tr valign=top> + <td>12-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Properties for Variable-length Types (Class 9)</b> + </caption> + + <tr align=center> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>Parent Type<br><br></td> + </tr> + + </table> + </center> + + <center> + <table border=0 cellpadding=4 width="80%"> + <tr align=left valign=top> + <td valign=top width=20%>Parent Type:</td> + <td valign=top>Each variable-length type is based on + some parent type. The information for that parent type is + described recursively by this field.</td> + </tr> + </table> + </center> + + + + <p> + +<!-- + <p>Datatype examples are <a href="Datatypes.html">here</a>. +--> + + + <hr> + <h4><a name="FillValueMessage">Name: Data Storage - Fill Value</a></h4> + <b>Type:</b> 0x0004<br> + <b>Length:</b> varies<br> + <b>Status:</b> Optional, may not be repeated.<br> + + <p>The fill value message stores a single data point value which + is returned to the application when an uninitialized data point + is read from the dataset. The fill value is interpretted with + the same datatype as the dataset. If no fill value message is + present then a fill value of all zero is assumed. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Fill Value Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Size (4 bytes)</td> + </tr> + + <tr align=center> + <td colspan=4><br>Fill Value<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Size (4 bytes)</td> + <td>This is the size of the Fill Value field in bytes.</td> + </tr> + + <tr valign=top> + <td>Fill Value</td> + <td>The fill value. The bytes of the fill value are + interpreted using the same datatype as for the dataset.</td> + </tr> + </table> + </center> + + <hr> + <h4><a name="ReservedMessage_0005">Name: Reserved - Not Assigned Yet</a></h4> + <b>Type:</b> 0x0005<br> + <b>Length:</b> N/A<br> + <b>Status:</b> N/A<br> + + + + <hr> + <h4><a name="CompactDataStorageMessage">Name: Data Storage - Compact</a></h4> + + <b>Type:</b> 0x0006<br> + <b>Length:</b> varies<br> + <b>Status:</b> Optional, may not be repeated.<br> + + <p>This message indicates that the data for the data object is + stored within the current HDF file by including the actual + data as the header data for this message. The data is + stored internally in + the <em>normal format</em>, i.e. in one chunk, uncompressed, etc. + + <P>Note that one and only one of the <em>Data Storage</em> headers can be + stored for each data object. + + <P><b>Format of Data:</b> The message data is actually composed + of dataset data, so the format will be determined by the dataset + format. + +<!-- Delete examples throughout doc + <h4><a name="CompactDataStorageExample">Examples:</a></h4> + [very straightforward] +--> + + <hr> + <h4><a name="ExternalFileListMessage">Name: Data Storage - + External Data Files</a></h4> + <b>Type:</b> 0x0007<BR> + <b>Length:</b> varies<BR> + <b>Status:</b> Optional, may not be repeated.<BR> + + <p><b>Purpose and Description:</b> The external object message + indicates that the data for an object is stored outside the HDF5 + file. The filename of the object is stored as a Universal + Resource Location (URL) of the actual filename containing the + data. An external file list record also contains the byte offset + of the start of the data within the file and the amount of space + reserved in the file for that data. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>External File List Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td colspan=3>Reserved</td> + </tr> + + <tr align=center> + <td colspan=2>Allocated Slots</td> + <td colspan=2>Used Slots</td> + </tr> + + <tr align=center> + <td colspan=4><br>Heap Address<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Slot Definitions...<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version </td> + <td>This value is used to determine the format of the + External File List Message. When the format of the + information in the message is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted.</td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>This field is reserved for future use.</td> + </tr> + + <tr valign=top> + <td>Allocated Slots</td> + <td>The total number of slots allocated in the message. Its + value must be at least as large as the value contained in + the Used Slots field.</td> + </tr> + + <tr valign=top> + <td>Used Slots</td> + <td>The number of initial slots which contain valid + information. The remaining slots are zero filled.</td> + </tr> + + <tr valign=top> + <td>Heap Address</td> + <td>This is the address of a local name heap which contains + the names for the external files. The name at offset zero + in the heap is always the empty string.</td> + </tr> + + <tr valign=top> + <td>Slot Definitions</td> + <td>The slot definitions are stored in order according to + the array addresses they represent. If more slots have + been allocated than what has been used then the defined + slots are all at the beginning of the list.</td> + </tr> + </table> + </center> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>External File List Slot</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>Name Offset (<size> bytes)<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>File Offset (<size> bytes)<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Size<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Name Offset (<size> bytes)</td> + <td>The byte offset within the local name heap for the name + of the file. File names are stored as a URL which has a + protocol name, a host name, a port number, and a file + name: + <code><em>protocol</em>:<em>port</em>//<em>host</em>/<em>file</em></code>. + If the protocol is omitted then "file:" is assumed. If + the port number is omitted then a default port for that + protocol is used. If both the protocol and the port + number are omitted then the colon can also be omitted. If + the double slash and host name are omitted then + "localhost" is assumed. The file name is the only + mandatory part, and if the leading slash is missing then + it is relative to the application's current working + directory (the use of relative names is not + recommended).</td> + </tr> + + <tr valign=top> + <td>File Offset (<size> bytes)</td> + <td>This is the byte offset to the start of the data in the + specified file. For files that contain data for a single + dataset this will usually be zero.</td> + </tr> + + <tr valign=top> + <td>Size</td> + <td>This is the total number of bytes reserved in the + specified file for raw data storage. For a file that + contains exactly one complete dataset which is not + extendable, the size will usually be the exact size of the + dataset. However, by making the size larger one allows + HDF5 to extend the dataset. The size can be set to a value + larger than the entire file since HDF5 will read zeros + past the end of the file without failing.</td> + </tr> + </table> + </center> + + + <hr> + <h4><a name="LayoutMessage">Name: Data Storage - Layout</a></h4> + + <b>Type:</b> 0x0008<BR> + <b>Length:</b> varies<BR> + <b>Status:</b> Required for datasets, may not be repeated. + + <p><b>Purpose and Description:</b> Data layout describes how the + elements of a multi-dimensional array are arranged in the linear + address space of the file. Two types of data layout are + supported: + + <ol> + <li>The array can be stored in one contiguous area of the file. + The layout requires that the size of the array be constant and + does not permit chunking, compression, checksums, encryption, + etc. The message stores the total size of the array and the + offset of an element from the beginning of the storage area is + computed as in C. + + <li>The array domain can be regularly decomposed into chunks and + each chunk is allocated separately. This layout supports + arbitrary element traversals, compression, encryption, and + checksums, and the chunks can be distributed across external + raw data files (these features are described in other + messages). The message stores the size of a chunk instead of + the size of the entire array; the size of the entire array can + be calculated by traversing the B-tree that stores the chunk + addresses. + </ol> + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <B>Data Layout Message</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td>Dimensionality</td> + <td>Layout Class</td> + <td>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4><br>Address<br><br></td> + </tr> + + <tr align=center> + <td colspan=4>Dimension 0 (4-bytes)</td> + </tr> + + <tr align=center> + <td colspan=4>Dimension 1 (4-bytes)</td> + </tr> + + <tr align=center> + <td colspan=4>...</td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version</td> + <td>A version number for the layout message. This + documentation describes version one.</td> + </tr> + + <tr valign=top> + <td>Dimensionality</td> + <td>An array has a fixed dimensionality. This field + specifies the number of dimension size fields later in the + message.</td> + </tr> + + <tr valign=top> + <td>Layout Class</td> + <td>The layout class specifies how the other fields of the + layout message are to be interpreted. A value of one + indicates contiguous storage while a value of two + indicates chunked storage. Other values will be defined + in the future.</td> + </tr> + + <tr valign=top> + <td>Address</td> + <td>For contiguous storage, this is the address of the first + byte of storage. For chunked storage this is the address + of the B-tree that is used to look up the addresses of the + chunks.</td> + </tr> + + <tr valign=top> + <td>Dimensions</td> + <td>For contiguous storage the dimensions define the entire + size of the array while for chunked storage they define + the size of a single chunk.</td> + </tr> + </table> + </center> + + + <hr> + <h4><a name="ReservedMessage_0009">Name: Reserved - Not Assigned Yet</a></h4> + <b>Type:</b> 0x0009<BR> + <b>Length:</b> N/A<BR> + <b>Status:</b> N/A<BR> + <b>Purpose and Description:</b> N/A<BR> + <b>Format of Data:</b> N/A + + <hr> + <h4><a name="ReservedMessage_000A">Name: Reserved - Not Assigned Yet</a></h4> + <b>Type:</b> 0x000A<BR> + <b>Length:</b> N/A<BR> + <b>Status:</b> N/A<BR> + <b>Purpose and Description:</b> N/A<BR> + <b>Format of Data:</b> N/A + + <hr> + <h4><a name="FilterMessage">Name: Data Storage - Filter Pipeline</a></h4> + <b>Type:</b> 0x000B<BR> + <b>Length:</b> varies<BR> + <b>Status:</b> Optional, may not be repeated. + + <p><b>Purpose and Description:</b> This message describes the + filter pipeline which should be applied to the data stream by + providing filter identification numbers, flags, a name, an + client data. + + <p> + <center> + <table border align=center cellpadding=4 witdh="80%"> + <caption align=top> + <b>Filter Pipeline Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td>Number of Filters</td> + <td colspan=2>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4><br>Filter List<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version</td> + <td>The version number for this message. This document + describes version one.</td> + </tr> + + <tr valign=top> + <td>Number of Filters</td> + <td>The total number of filters described by this + message. The maximum possible number of filters in a + message is 32.</td> + </tr> + + <tr valign=top> + <td>Filter List</td> + <td>A description of each filter. A filter description + appears in the next table.</td> + </tr> + </table> + </center> + + <p> + <center> + <table border align=center cellpadding=4 witdh="80%"> + <caption align=top> + <b>Filter Pipeline Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=2>Filter Identification</td> + <td colspan=2>Name Length</td> + </tr> + + <tr align=center> + <td colspan=2>Flags</td> + <td colspan=2>Client Data Number of Values</td> + </tr> + + <tr align=center> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Client Data<br><br></td> + </tr> + + <tr align=center> + <td colspan=4>Padding</td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Filter Identification</td> + <td>This is a unique (except in the case of testing) + identifier for the filter. Values from zero through 255 + are reserved for filters defined by the NCSA HDF5 + library. Values 256 through 511 have been set aside for + use when developing/testing new filters. The remaining + values are allocated to specific filters by contacting the + <a href="mailto:hdf5dev@ncsa.uiuc.edu">HDF5 Development + Team</a>.</td> + </tr> + + <tr valign=top> + <td>Name Length</td> + <td>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</td> + </tr> + + <tr valign=top> + <td>Flags</td> + <td>The flags indicate certain properties for a filter. The + bit values defined so far are: + + <dl> + <dt><code>bit 1</code> + <dd>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently removed from the pipeline. + </dl> + </tr> + + <tr valign=top> + <td>Client Data Number of Values</td> + <td>Each filter can store a few integer values to control + how the filter operates. The number of entries in the + Client Data array is stored in this field.</td> + </tr> + + <tr valign=top> + <td>Name</td> + <td>If the Name Length field is non-zero then it will + contain the size of this field, a multiple of eight. This + field contains a null-terminated, ASCII character + string to serve as a comment/name for the filter.</td> + </tr> + + <tr valign=top> + <td>Client Data</td> + <td>This is an array of four-byte integers which will be + passed to the filter function. The Client Data Number of + Values determines the number of elements in the + array.</td> + </tr> + + <tr valign=top> + <td>Padding</td> + <td>Four bytes of zeros are added to the message at this + point if the Client Data Number of Values field contains + an odd number.</td> + </tr> + </table> + </center> + + <hr> + <h4><a name="AttributeMessage">Name: Attribute</a></h4> + <b>Type:</b> 0x000C<BR> + <b>Length:</b> varies<BR> + <b>Status:</b> Optional, may be repeated.<BR> + + <p><b>Purpose and Description:</b> The <em>Attribute</em> + message is used to list objects in the HDF file which are used + as attributes, or "meta-data" about the current object. An + attribute is a small dataset; it has a name, a datatype, a data + space, and raw data. Since attributes are stored in the object + header they must be relatively small (<64kb) and can be + associated with any type of object which has an object header + (groups, datasets, named types and spaces, etc.). + + <p> + <center> + <table border align=center cellpadding=4 width="80%"> + <caption align=top> + <b>Attribute Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td>Version</td> + <td>Reserved</td> + <td colspan=2>Name Size</td> + </tr> + + <tr align=center> + <td colspan=2>Type Size</td> + <td colspan=2>Space Size</td> + </tr> + + <tr align=center> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Type<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Space<br><br></td> + </tr> + + <tr align=center> + <td colspan=4><br>Data<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version</td> + <td>Version number for the message. This document describes + version 1 of attribute messages.</td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>This field is reserved for later use and is set to + zero.</td> + </tr> + + <tr valign=top> + <td>Name Size</td> + <td>The length of the attribute name in bytes including the + null terminator. Note that the Name field below may + contain additional padding not represented by this + field.</td> + </tr> + + <tr valign=top> + <td>Type Size</td> + <td>The length of the datatype description in the Type + field below. Note that the Type field may contain + additional padding not represented by this field.</td> + </tr> + + <tr valign=top> + <td>Space Size</td> + <td>The length of the dataspace description in the Space + field below. Note that the Space field may contain + additional padding not represented by this field.</td> + </tr> + + <tr valign=top> + <td>Name</td> + <td>The null-terminated attribute name. This field is + padded with additional null characters to make it a + multiple of eight bytes.</td> + </tr> + + <tr valign=top> + <td>Type</td> + <td>The datatype description follows the same format as + described for the datatype object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</td> + </tr> + + <tr valign=top> + <td>Space</td> + <td>The dataspace description follows the same format as + described for the dataspace object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</td> + </tr> + + <tr valign=top> + <td>Data</td> + <td>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. This + field is <em>not</em> padded with additional zero + bytes.</td> + </tr> + </table> + </center> + + <hr> + <h4><a name="NameMessage">Name: Object Name</a></h4> + + <p><b>Type:</b> 0x000D<br> + <b>Length:</b> varies<br> + <b>Status:</b> Optional, may not be repeated. + + <p><b>Purpose and Description:</b> The object name or comment is + designed to be a short description of an object. An object name + is a sequence of non-zero (<code>\0</code>) ASCII characters with no other + formatting included by the library. + + <p> + <center> + <table border align=center cellpadding=4 width="80%"> + <caption align=top> + <b>Name Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4><br>Name<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Name</td> + <td>A null terminated ASCII character string.</td> + </tr> + </table> + </center> + + <hr> + <h4><a name="ModifiedMessage">Name: Object Modification Date & Time</a></h4> + + <p><b>Type:</b> 0x000E<br> + <b>Length:</b> fixed<br> + <b>Status:</b> Optional, may not be repeated. + + <p><b>Purpose and Description:</b> The object modification date + and time is a timestamp which indicates (using ISO-8601 date and + time format) the last modification of an object. The time is + updated when any object header message changes according to the + system clock where the change was posted. + + <p> + <center> + <table border align=center cellpadding=4 width="80%"> + <caption align=top> + <b>Modification Time Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align=center> + <td colspan=4>Year</td> + </tr> + + <tr align=center> + <td colspan=2>Month</td> + <td colspan=2>Day of Month</td> + </tr> + + <tr align=center> + <td colspan=2>Hour</td> + <td colspan=2>Minute</td> + </tr> + + <tr align=center> + <td colspan=2>Second</td> + <td colspan=2>Reserved</td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Year</td> + <td>The four-digit year as an ASCII string. For example, + <code>1998</code>. All fields of this message should be interpreted + as coordinated universal time (UTC)</td> + </tr> + + <tr valign=top> + <td>Month</td> + <td>The month number as a two digit ASCII string where + January is <code>01</code> and December is <code>12</code>.</td> + </tr> + + <tr valign=top> + <td>Day of Month</td> + <td>The day number within the month as a two digit ASCII + string. The first day of the month is <code>01</code>.</td> + </tr> + + <tr valign=top> + <td>Hour</td> + <td>The hour of the day as a two digit ASCII string where + midnight is <code>00</code> and 11:00pm is <code>23</code>.</td> + </tr> + + <tr valign=top> + <td>Minute</td> + <td>The minute of the hour as a two digit ASCII string where + the first minute of the hour is <code>00</code> and + the last is <code>59</code>.</td> + </tr> + + <tr valign=top> + <td>Second</td> + <td>The second of the minute as a two digit ASCII string + where the first second of the minute is <code>00</code> + and the last is <code>59</code>.</td> + </tr> + + <tr valign=top> + <td>Reserved</td> + <td>This field is reserved and should always be zero.</td> + </tr> + </table> + </center> + + <hr> + <h4><a name="SharedMessage">Name: Shared Object Message</a></h4> + <b>Type:</b> 0x000F<br> + <b>Length:</b> 4 Bytes<br> + <b>Status:</b> Optional, may be repeated. + + <p>A constant message can be shared among several object headers + by writing that message in the global heap and having the object + headers all point to it. The pointing is accomplished with a + Shared Object message which is understood directly by the object + header layer of the library. It is also possible to have a + message of one object header point to a message in some other + object header, but care must be exercised to prevent cycles. + + <p>If a message is shared, then the message appears in the global + heap and its message ID appears in the Header Message Type + field of the object header. Also, the Flags field in the object + header for that message will have bit two set (the + <code>H5O_FLAG_SHARED</code> bit). The message body in the + object header will be that of a Shared Object message defined + here and not that of the pointed-to message. + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=top> + <b>Shared Message Message</b> + </caption> + + <tr align=center> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + </tr> + + <tr align=center> + <td>Version</td> + <td>Flags</td> + <td colspan=2>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4>Reserved</td> + </tr> + + <tr align=center> + <td colspan=4><br>Pointer<br><br></td> + </tr> + </table> + </center> + + <p> + <center> + <table align=center width="80%"> + <tr> + <th width="30%">Field Name</th> + <th width="70%">Description</th> + </tr> + + <tr valign=top> + <td>Version</td> + <td>The version number for the message. This document + describes version one of shared messages.</td> + </tr> + + <tr valign=top> + <td>Flags</td> + <td>The Shared Message message points to a message which is + shared among multiple object headers. The Flags field + describes the type of sharing: + + <dl> + <dt><code>Bit 0</code> + <dd>If this bit is clear then the actual message is the + first message in some other object header; otherwise + the actual message is stored in the global heap. + + <dt><code>Bits 2-7</code> + <dd>Reserved (always zero) + </dl> + </tr> + + <tr valign=top> + <td>Pointer</td> + <td>This field points to the actual message. The format of + the pointer depends on the value of the Flags field. If + the actual message is in the global heap then the pointer + is the file address of the global heap collection that + holds the message, and a four-byte index into that + collection. Otherwise the pointer is a group entry + that points to some other object header.</td> + </tr> + </table> + </center> + + +<hr> +<h4><a name="ContinuationMessage">Name: Object Header Continuation</a></h4> +<b>Type:</b> 0x0010<BR> +<b>Length:</b> fixed<BR> +<b>Status:</b> Optional, may be repeated.<BR> +<b>Purpose and Description:</b> The object header continuation is the location +in the file of more header messages for the current data object. This can be +used when header blocks are large, or likely to change over time.<BR> +<b>Format of Data:</b><p> + The object header continuation is formatted as follows (assuming a 4-byte +length & offset are being used in the current file): + +<P> +<center> +<table border cellpadding=4 width=60%> +<caption align=bottom> +<B>HDF5 Object Header Continuation Message Layout</B> +</caption> + +<tr align=center> +<th width=25%>byte</th> +<th width=25%>byte</th> +<th width=25%>byte</th> +<th width=25%>byte</th> + +<tr align=center> +<td colspan=4>Header Continuation Offset</td> +<tr align=center> +<td colspan=4>Header Continuation Length</td> +</table> +</center> + +<P> +<dl> +<dt>The elements of the Header Continuation Message are described below: +<dd> +<dl> +<dt>Header Continuation Offset: (<offset> bytes) +<dd>This value is the offset in bytes from the beginning of the file where the +header continuation information is located. +<dt>Header Continuation Length: (<length> bytes) +<dd>This value is the length in bytes of the header continuation information in +the file. +</dl> +</dl> + +<!-- Delete examples throughout doc +<h4><a name="ContinuationExample">Examples:</a></h4> + [straightforward] +--> + +<hr> +<h4><a name="SymbolTableMessage">Name: Group Message</a></h4> +<b>Type:</b> 0x0011<BR> +<b>Length:</b> fixed<BR> +<b>Status:</b> Required for groups, may not be repeated.<BR> +<b>Purpose and Description:</b> Each group has a B-tree and a +name heap which are pointed to by this message.<BR> +<b>Format of data:</b> +<p>The group message is formatted as follows: + +<p> +<center> +<table border cellpadding=4 width="80%"> +<caption align=bottom> +<b>HDF5 Object Header Group Message Layout</b> +</caption> + +<tr align=center> +<th width="25%">byte</th> +<th width="25%">byte</th> +<th width="25%">byte</th> +<th width="25%">byte</th> + +<tr align=center> +<td colspan=4>B-tree Address</td> + +<tr align=center> +<td colspan=4>Heap Address</td> +</table> +</center> + +<P> +<dl> +<dt>The elements of the Group Message are described below: +<dd> +<dl> +<dt>B-tree Address (<offset> bytes) +<dd>This value is the offset in bytes from the beginning of the file +where the B-tree is located. +<dt>Heap Address (<offset> bytes) +<dd>This value is the offset in bytes from the beginning of the file +where the group name heap is located. +</dl> +</dl> + +<h3><a name="SharedObjectHeader">Disk Format: Level 2b - Shared Data Object Headers</a></h3> +<P>In order to share header messages between several dataset objects, object +header messages may be placed into the global heap. Since these +messages require additional information beyond the basic object header message +information, the format of the shared message is detailed below. + +<BR> <BR> +<center> +<table border cellpadding=4 width=60%> +<caption align=bottom> +<B>HDF5 Shared Object Header Message</B> +</caption> + +<tr align=center> +<th width=25%>byte</th> +<th width=25%>byte</th> +<th width=25%>byte</th> +<th width=25%>byte</th> + +<tr align=center> +<td colspan=4>Reference Count of Shared Header Message</td> +<tr align=center> +<td colspan=4><br> Shared Object Header Message<br> <br></td> +</table> +</center> + +<p> +<dl> +<dt> The elements of the shared object header message are described below: +<dd> +<dl> +<dt>Reference Count of Shared Header Message: (32-bit unsigned integer) +<dd>This value is used to keep a count of the number of dataset objects which +refer to this message from their dataset headers. When this count reaches zero, +the shared message header may be removed from the global heap. +<dt>Shared Object Header Message: (various lengths) +<dd>The data stored for the shared object header message is formatted in the +same way as the private object header messages described in the object header +description earlier in this document and begins with the header message Type. +</dl> +</dl> + + +<h3><a name="DataStorage">Disk Format: Level 2c - Data Object Data Storage</a></h3> +<P>The data for an object is stored separately from the header +information in the file and may not actually be located in the HDF5 file +itself if the header indicates that the data is stored externally. The +information for each record in the object is stored according to the +dimensionality of the object (indicated in the dimensionality header message). +Multi-dimensional data is stored in C order [same as current scheme], i.e. the +"last" dimension changes fastest. +<P>Data whose elements are composed of simple number-types are stored in +native-endian IEEE format, unless they are specifically defined as being stored +in a different machine format with the architecture-type information from the +number-type header message. This means that each architecture will need to +[potentially] byte-swap data values into the internal representation for that +particular machine. +<P> Data with a "variable" sized number-type is stored in a data heap +internal to the HDF5 file. Global heap identifiers are stored in the +data object storage. +<P>Data whose elements are composed of pointer number-types are stored in several +different ways depending on the particular pointer type involved. Simple +pointers are just stored as the dataset offset of the object being pointed to with the +size of the pointer being the same number of bytes as offsets in the file. +Partial-object pointers are stored as a heap-ID which points to the following +information within the file-heap: an offset of the object pointed to, number-type +information (same format as header message), dimensionality information (same +format as header message), sub-set start and end information (i.e. a coordinate +location for each), and field start and end names (i.e. a [pointer to the] +string indicating the first field included and a [pointer to the] string name +for the last field). + +<P>Data of a compound datatype is stored as a contiguous stream of the items +in the structure, with each item formatted according to its datatype. + +</body> +</html> diff --git a/doxygen/examples/H5.format.1.1.html b/doxygen/examples/H5.format.1.1.html new file mode 100644 index 0000000..ebbbe8e --- /dev/null +++ b/doxygen/examples/H5.format.1.1.html @@ -0,0 +1,6439 @@ +<html> + <head> + <title> + HDF5 File Format Specification Version 1.1 + </title> + +<STYLE TYPE="text/css"> + +P { text-indent: 2em} +P.item { margin-left: 2em; text-indent: -2em} +P.item2 { margin-left: 2em; text-indent: 2em} + +TABLE.format { border:solid; border-collapse:collapse; caption-side:top; text-align:center; width:80%;} +TABLE.format TH { border:ridge; padding:4px; width:25%;} +TABLE.format TD { border:ridge; padding:4px; } +TABLE.format CAPTION { font-weight:bold; font-size:larger;} + +TABLE.note {border:none; text-align:right; width:80%;} + +TABLE.desc { border:solid; border-collapse:collapse; caption-size:top; text-align:left; width:80%;} +TABLE.desc TR { vertical-align:top;} +TABLE.desc TH { border-style:ridge; font-size:larger; padding:4px; text-decoration:underline;} +TABLE.desc TD { border-style:ridge; padding:4px; } +TABLE.desc CAPTION { font-weight:bold; font-size:larger;} + +TABLE.list { border:none; } +TABLE.list TR { vertical-align:top;} +TABLE.list TH { border:none; text-decoration:underline;} +TABLE.list TD { border:none; } + +</STYLE> +</head> + <body> + + <center> + <table border=0 width=90%> + <tr> + <td valign=top> + <ol type=I> + <li><a href="#Intro">Introduction</a> + <li><a href="#FileMetaData">Disk Format Level 0 - File Metadata</a> + <font size=-2> + <ol type=A> + <li><a href="#SuperBlock">Disk Format Level 0A - File Signature and Super Block</a> + <li><a href="#DriverInfo">Disk Format Level 0B - File Driver Info</a> + </ol> + </font> + <li><a href="#FileInfra">Disk Format Level 1 - File Infrastructure</a> + <font size=-2> + <ol type=A> + <li><a href="#Btrees">Disk Format Level 1A - B-link Trees and B-tree Nodes</a> + <li><a href="#SymbolTable">Disk Format Level 1B - Group</a> + <li><a href="#SymbolTableEntry">Disk Format Level 1C - Group Entry</a> + <li><a href="#LocalHeap">Disk Format Level 1D - Local Heaps</a> + <li><a href="#GlobalHeap">Disk Format Level 1E - Global Heap</a> + <li><a href="#FreeSpaceIndex">Disk Format Level 1F - Free-space Index</a> + </ol> + </font> + <li><a href="#DataObject">Disk Format Level 2 - Data Objects</a> + <font size=-2> + <ol type=A> + <li><a href="#ObjectHeader">Disk Format Level 2a - Data Object Headers</a> + <ol type=1> + <li><a href="#NILMessage">Name: NIL</a> <!-- 0x0000 --> + <li><a href="#SimpleDataSpace">Name: Simple Dataspace</a> <!-- 0x0001 --> +<!-- <li><a href="#DataSpaceMessage">Name: Complex Dataspace</a> --> <!-- 0x0002 --> + <li><a href="#ReservedMessage_0002">Name: Reserved - not assigned yet</a> <!-- 0x0002 --> + <li><a href="#DataTypeMessage">Name: Datatype</a> <!-- 0x0003 --> + <li><a href="#OldFillValueMessage">Name: Data Storage - Fill Value (Old)</a> <!-- 0x0004 --> + <li><a href="#FillValueMessage">Name: Data Storage - Fill Value</a> <!-- 0x0005 --> + </ol> + </ol> + </font> + </ol> + </td><td> </td><td valign=top> + <ol type=I start=4> + + <li><a href="#DataObject">Disk Format Level 2 - Data Objects</a> + <font size=-2><i>(Continued)</i> + <ol type=A> + <li><a href="#ObjectHeader">Disk Format Level 2a - Data Object Headers</a><i>(Continued)</i> + <ol type=1 start=6> +<!-- <li><a href="#CompactDataStorageMessage">Name: Data Storage - Compact</a> --> <!-- 0x0006 --> + <li><a href="#ReservedMessage_0006">Name: Reserved - not assigned yet</a> <!-- 0x0006 --> + <li><a href="#ExternalFileListMessage">Name: Data Storage - External Data Files</a> <!-- 0x0007 --> + <li><a href="#LayoutMessage">Name: Data Storage - Layout</a> <!-- 0x0008 --> + <li><a href="#ReservedMessage_0009">Name: Reserved - not assigned yet</a> <!-- 0x0009 --> + <li><a href="#ReservedMessage_000A">Name: Reserved - not assigned yet</a> <!-- 0x000a --> + <li><a href="#FilterMessage">Name: Data Storage - Filter Pipeline</a> <!-- 0x000b --> + <li><a href="#AttributeMessage">Name: Attribute</a> <!-- 0x000c --> + <li><a href="#CommentMessage">Name: Object Comment</a> <!-- 0x000d --> + <li><a href="#OldModifiedMessage">Name: Object Modification Date and Time (Old)</a> <!-- 0x000e --> + <li><a href="#SharedMessage">Name: Shared Object Message</a> <!-- 0x000f --> + <li><a href="#ContinuationMessage">Name: Object Header Continuation</a> <!-- 0x0010 --> + <li><a href="#SymbolTableMessage">Name: Group Message</a> <!-- 0x0011 --> + <li><a href="#ModifiedMessage">Name: Object Modification Date and Time</a> <!-- 0x0012 --> + </ol> + <li><a href="#DataStorage">Disk Format: Level 2b - Data Object Data Storage</a> + </ol> + </font> + <LI><A href="#Appendix">Appendix</A> + </ol> +</td></tr> +</table> +</center> + + <BR> + <HR> + + + <h2>Introduction</h2> + + <table align=right width=100> + <tr><td> </td><td align=center> + <hr> + <img src="FF-IH_FileGroup.gif" alt="HDF5 Groups" hspace=15 vspace=15> + </td><td> </td></tr> + <tr><td> </td><td align=center> + <strong>Figure 1:</strong> Relationships among the HDF5 root group, other groups, and objects + <hr> + </td><td> </td></tr> + + <tr><td> </td><td align=center> + <img src="FF-IH_FileObject.gif" alt="HDF5 Objects" hspace=15 vspace=15> + </td><td> </td></tr> + <tr><td> </td><td align=center> + <strong>Figure 2:</strong> HDF5 objects -- datasets, datatypes, or dataspaces + <hr> + </td><td> </td></tr> + </table> + + + <P>The format of an HDF5 file on disk encompasses several + key ideas of the HDF4 and AIO file formats as well as + addressing some shortcomings therein. The new format is + more self-describing than the HDF4 format and is more + uniformly applied to data objects in the file. + + <P>An HDF5 file appears to the user as a directed graph. + The nodes of this graph are the higher-level HDF5 objects + that are exposed by the HDF5 APIs: + + <ul> + <li>Groups + <li>Datasets + <li>Named datatypes + </ul> + + <P>At the lowest level, as information is actually written to the disk, + an HDF5 file is made up of the following objects: + <ul> + <li>A super block + <li>B-tree nodes (containing either symbol nodes or raw data chunks) + <li>Object headers + <li>A global heap + <li>Local heaps + <li>Free space + </ul> + + <P>The HDF5 library uses these low-level objects to represent the + higher-level objects that are then presented to the user or + to applications through the APIs. + For instance, a group is an object header that contains a message that + points to a local heap and to a B-tree which points to symbol nodes. + A dataset is an object header that contains messages that describe + datatype, space, layout, filters, external files, fill value, etc + with the layout message pointing to either a raw data chunk or to a + B-tree that points to raw data chunks. + + + <h3>This Document</h3> + + <p>This document describes the lower-level data objects; + the higher-level objects and their properties are described + in the <a href="H5.user.html"><cite>HDF5 User's Guide</cite></a>. + + <P>Three levels of information comprise the file format. + Level 0 contains basic information for identifying and + defining information about the file. Level 1 information contains + the information about the pieces of a file shared by many objects + in the file (such as a B-trees and heaps). Level 2 is the rest + of the file and contains all of the data objects, with each object + partitioned into header information, also known as + <em>metadata</em>, and data. + + <p>The sizes of various fields in the following layout tables are + determined by looking at the number of columns the field spans + in the table. There are three exceptions: (1) The size may be + overridden by specifying a size in parentheses, (2) the size of + addresses is determined by the <em>Size of Offsets</em> field + in the super block and is indicated in this document with a + superscripted 'O', and (3) the size of length fields is determined + by the <em>Size of Lengths</em> field in the super block and is + indicated in this document with a superscripted 'L'. + + <P>Values for all fields in this document should be treated as unsigned + integers, unless otherwise noted in the description of a field. + Additionally, all metadata fields are stored in little-endian byte + order. + </P> + + <BR> + <HR> + + <h2><a name="FileMetaData"> + Disk Format: Level 0 - File Metadata</a></h2> + + <H3><A name="SuperBlock"> + Disk Format: Level 0A - File Signature and Super Block</A></H3> + + <P>The super block may begin at certain predefined offsets within + the HDF5 file, allowing a block of unspecified content for + users to place additional information at the beginning (and + end) of the HDF5 file without limiting the HDF5 library's + ability to manage the objects within the file itself. This + feature was designed to accommodate wrapping an HDF5 file in + another file format or adding descriptive information to the + file without requiring the modification of the actual file's + information. The super block is located by searching for the + HDF5 file signature at byte offset 0, byte offset 512 and at + successive locations in the file, each a multiple of two of + the previous location, i.e. 0, 512, 1024, 2048, etc. + + <P>The super block is composed of a file signature, followed by + super block and group version numbers, information + about the sizes of offset and length values used to describe + items within the file, the size of each group page, + and a group entry for the root object in the file. + + <br> + <div align=center> + <table class=format> + <caption> + HDF5 Super Block Layout + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=4><br>HDF5 File Signature (8 bytes)<br><br></td> + </tr> + + <tr> + <td>Version # of Super Block</td> + <td>Version # of Global Free-space Storage</td> + <td>Version # of Root Group Symbol Table Entry</td> + <td>Reserved (zero)</td> + </tr> + + <tr> + <td>Version # of Shared Header Message Format</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=2>Group Leaf Node K</td> + <td colspan=2>Group Internal Node K</td> + </tr> + + <tr> + <td colspan=4>File Consistency Flags</td> + </tr> + + <tr> + <td colspan=2 style="border:dotted;">Indexed Storage Internal Node K<sup>1</sup></td> + <td colspan=2 style="border:dotted;">Reserved (zero)<sup>1</sup></td> + </tr> + + <tr> + <td colspan=4>Base Address<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>Address of Global Free-space Heap<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>End of File Address<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>Driver Information Block Address<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>Root Group Symbol Table Entry</td> + </tr> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'O' the above table are + <br> + of the size specified in "Size of Offsets.") + </td></tr> + <tr><td> + (Items marked with an '1' the above table are + <br> + new in version 1 of the superblock) + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>HDF5 File Signature</td> + <td> + <P>This field contains a constant value and can be used to + quickly identify a file as being an HDF5 file. The + constant value is designed to allow easy identification of + an HDF5 file and to allow certain types of data corruption + to be detected. The file signature of an HDF5 file always + contains the following values: + </P> + + <center> + <table border align=center cellpadding=4> + <tr align=center> + <td align=right>Decimal:</td> + <td width="8%">137</td> + <td width="8%">72</td> + <td width="8%">68</td> + <td width="8%">70</td> + <td width="8%">13</td> + <td width="8%">10</td> + <td width="8%">26</td> + <td width="8%">10</td> + </tr> + + <tr align=center> + <td align=right>Hexadecimal:</td> + <td>89</td> + <td>48</td> + <td>44</td> + <td>46</td> + <td>0d</td> + <td>0a</td> + <td>1a</td> + <td>0a</td> + </tr> + + <tr align=center> + <td align=right>ASCII C Notation:</td> + <td>\211</td> + <td>H</td> + <td>D</td> + <td>F</td> + <td>\r</td> + <td>\n</td> + <td>\032</td> + <td>\n</td> + </tr> + </table> + </center> + <br> + + <P>This signature both identifies the file as an HDF5 file + and provides for immediate detection of common + file-transfer problems. The first two bytes distinguish + HDF5 files on systems that expect the first two bytes to + identify the file type uniquely. The first byte is + chosen as a non-ASCII value to reduce the probability + that a text file may be misrecognized as an HDF5 file; + also, it catches bad file transfers that clear bit + 7. Bytes two through four name the format. The CR-LF + sequence catches bad file transfers that alter newline + sequences. The control-Z character stops file display + under MS-DOS. The final line feed checks for the inverse + of the CR-LF translation problem. (This is a direct + descendent of the <A href="http://www.libpng.org/pub/png/spec/PNG-Rationale.html#R.PNG-file-signature">PNG</A> file + signature.) + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Version Number of the Super Block</td> + <td> + <P>This value is used to determine the format of the + information in the super block. When the format of the + information in the super block is changed, the version number + is incremented to the next integer and can be used to + determine how the information in the super block is + formatted. + </P> + + <P>Values of 0 and 1 are defined for this field. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Version Number of the File Free-space Information</td> + <td> + <P>This value is used to determine the format of the + information in the File Free-space Information. + </P> + <P>The only value currently valid in this field is '0', which + indicates that the free space index is formatted as described + <A href="#FreeSpaceIndex">below</A>. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Version Number of the Root Group Symbol Table Entry</td> + <td> + <P>This value is used to determine the format of the + information in the Root Group Symbol Table Entry. When the + format of the information in that field is changed, the + version number is incremented to the next integer and can be + used to determine how the information in the field + is formatted. + </P> + <P>The only value currently valid in this field is '0', which + indicates that the root group symbol table entry is formatted as + described <A href="#SymbolTableEntry">below</A>. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Version Number of the Shared Header Message Format</td> + <td> + <P>This value is used to determine the format of the + information in a shared object header message. Since the format + of the shared header messages differs from the other private + header messages, a version number is used to identify changes + in the format. + </P> + <P>The only value currently valid in this field is '0', which + indicates that shared header messages are formatted as + described <A href="#SharedMessage">below</A>. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Size of Offsets</td> + <td> + <P>This value contains the number of bytes used to store + addresses in the file. The values for the addresses of + objects in the file are offsets relative to a base address, + usually the address of the super block signature. This + allows a wrapper to be added after the file is created + without invalidating the internal offset locations. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Size of Lengths</td> + <td> + <P>This value contains the number of bytes used to store + the size of an object. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Group Leaf Node K</td> + <td> + <P>Each leaf node of a group B-tree will have at + least this many entries but not more than twice this + many. If a group has a single leaf node then it + may have fewer entries. + </P> + <P>This value must be greater than zero. + </P> + <P>See the <A href="#Btrees">description</A> of B-trees below. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Group Internal Node K</td> + <td> + <P>Each internal node of a group B-tree will have at + least this many entries but not more than twice this + many. If the group has only one internal + node then it might have fewer entries. + </P> + <P>This value must be greater than zero. + </P> + <P>See the <A href="#Btrees">description</A> of B-trees below. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>File Consistency Flags</td> + <td> + <P>This value contains flags to indicate information + about the consistency of the information contained + within the file. Currently, the following bit flags are + defined: + <ul> + <li>Bit 0 set indicates that the file is opened for + write-access. + <li>Bit 1 set indicates that the file has + been verified for consistency and is guaranteed to be + consistent with the format defined in this document. + <li>Bits 2-31 are reserved for future use. + </ul> + Bit 0 should be + set as the first action when a file is opened for write + access and should be cleared only as the final action + when closing a file. Bit 1 should be cleared during + normal access to a file and only set after the file's + consistency is guaranteed by the library or a + consistency utility. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Indexed Storage Internal Node K</td> + <td> + <P>Each internal node of a indexed storage B-tree will have at + least this many entries but not more than twice this + many. If the group has only one internal + node then it might have fewer entries. + </P> + <P>This value must be greater than zero. + </P> + <P>See the <A href="#Btrees">description</A> of B-trees below. + </P> + + <P><EM>This field is present in version 1+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Base Address</td> + <td> + <P>This is the absolute file address of the first byte of + the HDF5 data within the file. The library currently + constrains this value to be the absolute file address + of the super block itself when creating new files; + future versions of the library may provide greater + flexibility. When opening an existing file and this address does + not match the offset of the superblock, the library assumes + that the entire contents of the HDF5 file have been adjusted in + the file and adjusts the base address and end of file address to + reflect their new positions in the file. Unless otherwise noted, + all other file addresses are relative to this base + address. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Address of Global Free-space Index</td> + <td> + <P>Free-space management is not yet defined in the HDF5 + file format and is not handled by the library. + Currently this field always contains the + <A href="#UndefinedAddress">undefined address</A>. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>End of File Address</td> + <td> + <P>This is the absolute file address of the first byte past + the end of all HDF5 data. It is used to determine whether a + file has been accidently truncated and as an address where + file data allocation can occur if space from the free list is + not used. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Driver Information Block Address</td> + <td> + <P>This is the relative file address of the file driver + information block which contains driver-specific + information needed to reopen the file. If there is no + driver information block then this entry should be the + <A href="#UndefinedAddress">undefined address</A>. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + + <tr> + <td>Root Group Symbol Table Entry</td> + <td> + <P>This is the <A href="#SymbolTableEntry">symbol table entry</A> + of the root group, which serves as the entry point into + the group graph for the file. + </P> + + <P><EM>This field is present in version 0+ of the superblock.</EM> + </P> + </td> + </tr> + </table> + </div> + + <H3><A name="DriverInfo"> + Disk Format: Level 0B - File Driver Info</A></H3> + + <p>The <em>file driver information block</em> is an optional region of the + file which contains information needed by the file driver in + order to reopen a file. The format of the file driver information + block is: + + <br> + <div align=center> + <table class=format> + <caption> + Driver Information Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Driver Information Size (4 bytes)</td> + </tr> + + <tr> + <td colspan=4><br>Driver Identification (8 bytes)<br><br></td> + </tr> + + <tr> + <td colspan=4><br><br>Driver Information (<em>n</em> bytes)<br><br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>The version number of the driver information block. The + file format documented here is version zero. + </P> + </td> + </tr> + + <tr> + <td>Driver Information Size</td> + <td> + <P>The size in bytes of the Driver Information part of this + structure. + </P> + </td> + </tr> + + <tr> + <td>Driver Identification</td> + <td> + <P>This is an eight-byte ASCII string without null + termination which identifies the driver and version number + of the Driver Information block. The predefined drivers + supplied with the HDF5 library are identified by the + letters <code>NCSA</code> followed by the first four characters of + the driver name. If the Driver Information block is not + the original version then the last letter(s) of the + identification will be replaced by a version number in + ASCII. + </P> + <P> + For example, the various versions of the <em>multi driver</em> + will be identified by <code>NCSAmult</code>. + (<code>NCSAmult</code> is simply <code>NCSAmulti</code> truncated + to eight characters. Subsequent identifiers will be created by + substituting sequential numerical values for the final character, + starting with zero.) <em>multi driver</em> is the only default driver that + is encoded in this field. + </P> + <P> + Identification for user-defined drivers + is eight-byte long and arbitrary but should be unique and avoid + the four character prefix "NCSA". + </P> + </td> + </tr> + + <tr valign=top> + <td>Driver Information</td> + <td>Driver information is encoded/decoded in a format defined by the + file driver. <em>multi driver</em> is the only default driver that has driver + information stored in this field. Its format is explained in the + following block.</td> + </tr> + </table> + </div> + + <BR> + <P><em>Multi driver</em> has the following format:</P> + + <div align=center> + <table class=format> + <caption> + Multi Driver Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Reserved</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan=4><br>Address of Member File 1<br><br></td> + </tr> + + <tr> + <td colspan=4><br>End of Address for Member File 1<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Address of Member File 2<br><br></td> + </tr> + + <tr> + <td colspan=4><br>End of Address for Member File 2<br><br></td> + </tr> + + <tr> + <td colspan=4><br>... ...<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Name of Member File 1<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Name of Member File 2<br><br></td> + </tr> + + <tr> + <td colspan=4><br>... ...<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Member Mapping</td> + <td><P><em>Multi driver</em> enables different types of HDF5 data and + metadata to be written to separate files. These files are viewed by the + library as a single virtual HDF5 file with a single file address. + It allows maximal 6 files to be created. + In sequence, these <em>Member Mapping</em> fields are for super block, + B-tree, raw data, global heap, local heap, + and object header. More than one type of data can be written to the + same file.</P> + <P>These <em>Member Mapping</em> fields are integer values from 1 to 6 + indicating how the data can be mapped to or merged with another type of + data. + <table class=list> + <tr> + <th width="30%">Member Mapping</th> + <th align=left>Description</th> + </tr> + <tr> + <td align=center>1</td> + <td>The super block data.</td> + </tr> + <tr> + <td align=center>2</td> + <td>The B-tree data.</td> + </tr> + <tr> + <td align=center>3</td> + <td>The raw data.</td> + </tr> + <tr> + <td align=center>4</td> + <td>The global heap data.</td> + </tr> + <tr> + <td align=center>5</td> + <td>The local heap data.</td> + </tr> + <tr> + <td align=center>6</td> + <td>The object header data.</td> + </tr> + </table></P> + For example, if the third field has the value 3 and all the rest have the + value 1, it means there are two files, one for raw data, one for super block, + B-tree, global heap, local heap, and object header. + </td> + </tr> + + <tr> + <td>Reserved</td> + <td><P>These fields are reserved and should always be zero.</P></td> + </tr> + + <tr> + <td>Address of Member File</td> + <td><P>Specifies the virtual address. A normally eight-byte integer with + the value from <em>0</em> (zero) to maximal value, + at which the member file starts.</P></td> + </tr> + + <tr> + <td>End of Address for Member File</td> + <td><P>The end of allocated address for the member file. A normally eight-byte + integer value.</P></td> + </tr> + + <tr> + <td>Name of Member File</td> + <td><P>The null-terminated name of member file. Its length should be multiples of + 8 bytes. Additional bytes will be padded with <em>NULL</em>s. The default naming + convention is <em>%%s-X.h5</em>, where <em>X</em> is one of the letters + <em>s</em> (for super block), <em>b</em> (for B-tree), <em>r</em> (for raw data), + <em>g</em> (for global heap), <em>l</em> (for local heap), and <em>o</em> (for + object header). The name for the whole HDF5 file will substitute the <em>%s</em> + in the string. + </P> + </td> + </tr> + </table> + </div> + + <BR> + <HR> + + <h2><a name="FileInfra"> + Disk Format: Level 1 - File Infrastructure</a></h2> + <h3><a name="Btrees">Disk Format: Level 1A - B-link Trees and B-tree Nodes</a></h3> + + <p>B-link trees allow flexible storage for objects which tend to grow + in ways that cause the object to be stored discontiguously. B-trees + are described in various algorithms books including "Introduction to + Algorithms" by Thomas H. Cormen, Charles E. Leiserson, and Ronald + L. Rivest. The B-link tree, in which the sibling nodes at a + particular level in the tree are stored in a doubly-linked list, + is described in the "Efficient Locking for Concurrent Operations + on B-trees" paper by Phillip Lehman and S. Bing Yao as published + in the <cite>ACM Transactions on Database Systems</cite>, Vol. 6, + No. 4, December 1981. + + <p>The B-link trees implemented by the file format contain one more + key than the number of children. In other words, each child + pointer out of a B-tree node has a left key and a right key. + The pointers out of internal nodes point to sub-trees while + the pointers out of leaf nodes point to symbol nodes and + raw data chunks. + Aside from that difference, internal nodes and leaf nodes + are identical. + + <br> + <div align=center> + <table class=format> + <caption> + B-tree Nodes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + + <tr> + <td colspan=4>Signature</td> + + <tr> + <td>Node Type</td> + <td>Node Level</td> + <td colspan=2>Entries Used</td> + + <tr> + <td colspan=4>Address of Left Sibling<sup>O</sup></td> + + <tr> + <td colspan=4>Address of Right Sibling<sup>O</sup></td> + + <tr> + <td colspan=4>Key 0 (variable size)</td> + + <tr> + <td colspan=4>Address of Child 0<sup>O</sup></td> + + <tr> + <td colspan=4>Key 1 (variable size)</td> + + <tr> + <td colspan=4>Address of Child 1<sup>O</sup></td> + + <tr> + <td colspan=4>...</td> + + <tr> + <td colspan=4>Key 2<em>K</em> (variable size)</td> + + <tr> + <td colspan=4>Address of Child 2<em>K</em><sup>O</sup></td> + + <tr> + <td colspan=4>Key 2<em>K</em>+1 (variable size)</td> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'O' the above table are + <br> + of the size specified in "Size of Offsets.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Signature</td> + <td> + <P>The ASCII character string "<code>TREE</code>" is + used to indicate the + beginning of a B-link tree node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file. + </P> + </td> + </tr> + + <tr> + <td>Node Type</td> + <td> + <P>Each B-link tree points to a particular type of data. + This field indicates the type of data as well as + implying the maximum degree <em>K</em> of the tree and + the size of each Key field. + </P> + + <table class=list> + <tr> + <th width="30%">Node Type</th> + <th align=left>Description</th> + </tr> + <tr> + <td align=center>0</td> + <td>This tree points to group nodes.</td> + </tr> + <tr> + <td align=center>1</td> + <td>This tree points to raw data chunk nodes.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Node Level</td> + <td> + <P>The node level indicates the level at which this node + appears in the tree (leaf nodes are at level zero). Not + only does the level indicate whether child pointers + point to sub-trees or to data, but it can also be used + to help file consistency checking utilities reconstruct + damanged trees. + </P> + </td> + </tr> + + <tr valign=top> + <td>Entries Used</td> + <td> + <P>This determines the number of children to which this + node points. All nodes of a particular type of tree + have the same maximum degree, but most nodes will point + to less than that number of children. The valid child + pointers and keys appear at the beginning of the node + and the unused pointers and keys appear at the end of + the node. The unused pointers and keys have undefined + values. + </P> + </td> + </tr> + + <tr valign=top> + <td>Address of Left Sibling</td> + <td> + <P>This is the relative file address of the left sibling of + the current node. If the current + node is the left-most node at this level then this field + is the <A href="#UndefinedAddress">undefined address</A>. + </P> + </td> + </tr> + + <tr valign=top> + <td>Address of Right Sibling</td> + <td> + <P>This is the relative file address of the right sibling of + the current node. If the current + node is the right-most node at this level then this + field is the <A href="#UndefinedAddress">undefined address</A>. + </P> + </td> + </tr> + + <tr valign=top> + <td>Keys and Child Pointers</td> + <td> + <P>Each tree has 2<em>K</em>+1 keys with 2<em>K</em> + child pointers interleaved between the keys. The number + of keys and child pointers actually containing valid + values is determined by the node's <em>Entries Used</em> field. + If that field is <em>N</em> then the B-link tree contains + <em>N</em> child pointers and <em>N</em>+1 keys. + </P> + </td> + </tr> + + <tr valign=top> + <td>Key</td> + <td> + <P>The format and size of the key values is determined by + the type of data to which this tree points. The keys are + ordered and are boundaries for the contents of the child + pointer; that is, the key values represented by child + <em>N</em> fall between Key <em>N</em> and Key + <em>N</em>+1. Whether the interval is open or closed on + each end is determined by the type of data to which the + tree points. + </P> + + <P> + The format of the key depends on the node type. + For nodes of node type 0 (group nodes), the key is formatted as + follows: + <center> + <table class=list> + <tr> + <td width=30%>A single field of <i>Size of Lengths</i> + bytes:</td> + <td>Indicates the byte offset into the local heap + for the first object name in the subtree which + that key describes. + </td> + </tr> + </table> + </center> + </P> + + <P> + For nodes of node type 1 (chunked raw data nodes), the key is + formatted as follows: + <center> + <table class=list> + <tr> + <td width=30%>Bytes 1-4:</td> + <td>Size of chunk in bytes.</td> + </tr> + <tr> + <td>Bytes 4-8:</td> + <td>Filter mask, a 32-bit bitfield indicating which + filters have been skipped for this chunk. Each filter + has an index number in the pipeline (starting at 0, with + the first filter to apply) and if that filter is skipped, + the bit corresponding to it's index is set.</td> + </tr> + <tr> + <td><em>N</em> 64-bit fields:</td> + <td>A 64-bit index indicating the offset of the + chunk within the dataset where <i>N</i> is the number + of dimensions of the dataset. For example, if + a chunk in a 3-dimensional dataset begins at the + position <code>[5,5,5]</code>, there will be three + such 64-bit indices, each with the value of + <code>5</code>.</td> + </tr> + </table> + </center> + </P> + </td> + </tr> + + <tr valign=top> + <td>Child Pointer</td> + <td> + <P>The tree node contains file addresses of subtrees or + data depending on the node level. Nodes at Level 0 point + to data addresses, either raw data chunk or group nodes. + Nodes at non-zero levels point to other nodes of the + same B-tree. + </P> + <P>For raw data chunk nodes, the child pointer is the address + of a single raw data chunk. For group nodes, the child pointer + points to a <A href="#SymbolTable">symbol table</A>, which contains + information for multiple symbol table entries. + </P> + </td> + </tr> + </table> + </div> + + <p> + Conceptually, each B-tree node looks like this: + <center> + <table> + <tr valign=top align=center> + <td>key[0]</td><td> </td> + <td>child[0]</td><td> </td> + <td>key[1]</td><td> </td> + <td>child[1]</td><td> </td> + <td>key[2]</td><td> </td> + <td>...</td><td> </td> + <td>...</td><td> </td> + <td>key[<i>N</i>-1]</td><td> </td> + <td>child[<i>N</i>-1]</td><td> </td> + <td>key[<i>N</i>]</td> + </tr> + </table> + </center> + <br> + + where child[<i>i</i>] is a pointer to a sub-tree (at a level + above Level 0) or to data (at Level 0). + Each key[<i>i</i>] describes an <i>item</i> stored by the B-tree + (a chunk or an object of a group node). The range of values + represented by child[<i>i</i>] is indicated by key[<i>i</i>] + and key[<i>i</i>+1]. + + + <p>The following question must next be answered: + "Is the value described by key[<i>i</i>] contained in + child[<i>i</i>-1] or in child[<i>i</i>]?" + The answer depends on the type of tree. + In trees for groups (node type 0) the object described by + key[<i>i</i>] is the greatest object contained in + child[<i>i</i>-1] while in chunk trees (node type 1) the + chunk described by key[<i>i</i>] is the least chunk in + child[<i>i</i>]. + + <p>That means that key[0] for group trees is sometimes unused; + it points to offset zero in the heap, which is always the + empty string and compares as "less-than" any valid object name. + + <p>And key[<i>N</i>] for chunk trees is sometimes unused; + it contains a chunk offset which compares as "greater-than" + any other chunk offset and has a chunk byte size of zero + to indicate that it is not actually allocated. + + + <h3><a name="SymbolTable">Disk Format: Level 1B - Group and Symbol Nodes</a></h3> + + <p>A group is an object internal to the file that allows + arbitrary nesting of objects within the file (including other groups). + A group maps a set of names in the group to a set of relative + file addresses where objects with those names are located in + the file. Certain metadata for an object to which the group points + can be cached in the group's symbol table in addition to the + object's header. + + <p>An HDF5 object name space can be stored hierarchically by + partitioning the name into components and storing each + component in a group. The group entry for a + non-ultimate component points to the group containing + the next component. The group entry for the last + component points to the object being named. + + <p>A group is a collection of group nodes pointed + to by a B-link tree. Each group node contains entries + for one or more symbols. If an attempt is made to add a + symbol to an already full group node containing + 2<em>K</em> entries, then the node is split and one node + contains <em>K</em> symbols and the other contains + <em>K</em>+1 symbols. + + <br> + <div align=center> + <table class=format> + <caption> + Group Node (A Leaf of a B-tree) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + + <tr> + <td colspan=4>Signature</td> + + <tr> + <td>Version Number</td> + <td>Reserved (0)</td> + <td colspan=2>Number of Symbols</td> + + <tr> + <td colspan=4><br><br>Group Entries<br><br><br></td> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Signature</td> + <td> + <P>The ASCII character string "<code>SNOD</code>" is + used to indicate the + beginning of a group node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file. + </P> + </td> + </tr> + + <tr> + <td>Version Number</td> + <td> + <P>The version number for the group node. This + document describes version 1. (There is no version '0' + of the group node) + </P> + </td> + </tr> + + <tr> + <td>Number of Symbols</td> + <td> + <P>Although all group nodes have the same length, + most contain fewer than the maximum possible number of + symbol entries. This field indicates how many entries + contain valid data. The valid entries are packed at the + beginning of the group node while the remaining + entries contain undefined values. + </P> + </td> + </tr> + + <tr> + <td>Group Entries</td> + <td> + <P>Each symbol has an entry in the group node. + The format of the entry is described below. + There are 2<EM>K</EM> entries in each group node, where + <EM>K</EM> is the "Group Leaf Node K" value from the + <A href="#SuperBlock">super block</A>. + </P> + </td> + </tr> + </table> + </div> + + <h3><a name="SymbolTableEntry"> + Disk Format: Level 1C - Group Entry </a></h3> + + <p>Each group entry in a group node is designed + to allow for very fast browsing of stored objects. + Toward that design goal, the group entries + include space for caching certain constant metadata from the + object header. + + <br> + <div align=center> + <table class=format> + <caption> + Group Entry + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=4>Name Offset<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>Object Header Address<sup>O</sup></td> + </tr> + + <tr> + <td colspan=4>Cache Type</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4><br><br>Scratch-pad Space (16 bytes)<br><br><br></td> + </tr> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'O' the above table are + <br> + of the size specified in "Size of Offsets.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Name Offset</td> + <td> + <P>This is the byte offset into the group local + heap for the name of the object. The name is null + terminated. + </P> + </td> + </tr> + + <tr> + <td>Object Header Address</td> + <td> + <P>Every object has an object header which serves as a + permanent location for the object's metadata. In addition + to appearing in the object header, some metadata can be + cached in the scratch-pad space. + </P> + </td> + </tr> + + <tr> + <td>Cache Type</td> + <td> + <P>The cache type is determined from the object header. + It also determines the format for the scratch-pad space: + <br> + <table class=list> + <tr align=left> + <th>Type:</th> + <th>Description:</th> + </tr> + <tr> + <td width="10%" align=center>0</td> + <td>No data is cached by the group entry. This + is guaranteed to be the case when an object header + has a link count greater than one. + </td> + </tr> + <tr> + <td align=center>1</td> + <td>Object header metadata is cached in the group + entry. This implies that the group + entry refers to another group. + </td> + </tr> + <tr> + <td align=center>2</td> + <td>The entry is a symbolic link. The first four bytes + of the scratch-pad space are the offset into the local + heap for the link value. The object header address + will be undefined. + </td> + </tr> + <tr> + <td align=center><em>N</em></td> + <td>Other cache values can be defined later and + libraries that do not understand the new values will + still work properly. + </td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Reserved</td> + <td> + <P>These four bytes are present so that the scratch-pad + space is aligned on an eight-byte boundary. They are + always set to zero. + </P> + </td> + </tr> + + <tr> + <td>Scratch-pad Space</td> + <td> + <P>This space is used for different purposes, depending + on the value of the Cache Type field. Any metadata + about a dataset object represented in the scratch-pad + space is duplicated in the object header for that + dataset. This metadata can include the datatype + and the size of the dataspace for a dataset whose datatype + is atomic and whose dataspace is fixed and less than + four dimensions. + </P> + <P> + Furthermore, no data is cached in the group + entry scratch-pad space if the object header for + the group entry has a link count greater than + one. + </P> + </td> + </tr> + </table> + </div> + + <h4>Format of the Scratch-pad Space</h4> + + <p>The group entry scratch-pad space is formatted + according to the value in the Cache Type field. + + <p>If the Cache Type field contains the value zero + <code>(0)</code> then no information is + stored in the scratch-pad space. + + <p>If the Cache Type field contains the value one + <code>(1)</code>, then the scratch-pad space + contains cached metadata for another object header + in the following format: + + <br> + <div align=center> + <table class=format> + <caption> + Object Header Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + + <tr> + <td colspan=4>Address of B-tree<sup>O</sup></td> + + <tr> + <td colspan=4>Address of Name Heap<sup>O</sup></td> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'O' the above table are + <br> + of the size specified in "Size of Offsets.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Address of B-tree</td> + <td> + <P>This is the file address for the root of the + group's B-tree. + </P> + </td> + </tr> + + <tr> + <td>Address of Name Heap</td> + <td> + <P>This is the file address for the group's local + heap, in which are stored the group's symbol names. + </P> + </td> + </tr> + </table> + </div> + + + <P>If the Cache Type field contains the value two + <code>(2)</code>, then the scratch-pad space + contains cached metadata for another symbolic link + in the following format: + + <br> + <div align=center> + <table class=format> + <caption> + Symbolic Link Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=4>Offset to Link Value</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Offset to Link Value</td> + <td> + <P>The value of a symbolic link (that is, the name of the + thing to which it points) is stored in the local heap. + This field is the 4-byte offset into the local heap for + the start of the link value, which is null terminated. + </P> + </td> + </tr> + </table> + </div> + + <h3><a name="LocalHeap">Disk Format: Level 1D - Local Heaps</a></h3> + + <P>A heap is a collection of small heap objects. Objects can be + inserted and removed from the heap at any time. + The address of a heap does not change once the heap is created. + References to objects are stored in the group table; + the names of those objects are stored in the local heap. + </P> + + <br> + <div align=center> + <table class=format> + <caption> + Local Heap + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=4>Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Data Segment Size<sup>L</sup></td> + </tr> + + <tr> + <td colspan=4>Offset to Head of Free-list<sup>L</sup></td> + </tr> + + <tr> + <td colspan=4>Address of Data Segment<sup>O</sup></td> + </tr> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'L' the above table are + <br> + of the size specified in "Size of Lengths.") + </td></tr> + <tr><td> + (Items marked with an 'O' the above table are + <br> + of the size specified in "Size of Offsets.") + </td></tr> + </table> + </div> + + <p> + <center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Signature</td> + <td> + <P>The ASCII character string "<code>HEAP</code>" + is used to indicate the + beginning of a heap. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </P> + </td> + </tr> + + <tr> + <td>Version</td> + <td> + <P>Each local heap has its own version number so that new + heaps can be added to old files. This document + describes version zero (0) of the local heap. + </P> + </td> + </tr> + + <tr> + <td>Data Segment Size</td> + <td> + <P>The total amount of disk memory allocated for the heap + data. This may be larger than the amount of space + required by the objects stored in the heap. The extra + unused space in the heap holds a linked list of free blocks. + </P> + </td> + </tr> + + <tr> + <td>Offset to Head of Free-list</td> + <td> + <P>This is the offset within the heap data segment of the + first free block (or the + <A href="#UndefinedAddress">undefined address</A> if there is no + free block). The free block contains "Size of Lengths" bytes that + are the offset of the next free block (or the + value '1' if this is the + last free block) followed by "Size of Lengths" bytes that store + the size of this free block. The size of the free block includes + the space used to store the offset of the next free block and + the of the current block, making the minimum size of a free block + 2 * "Size of Lengths". + </P> + </td> + </tr> + + <tr> + <td>Address of Data Segment</td> + <td> + <P>The data segment originally starts immediately after + the heap header, but if the data segment must grow as a + result of adding more objects, then the data segment may + be relocated, in its entirety, to another part of the + file. + </P> + </td> + </tr> + </table> + </center> + + <p>Objects within the heap should be aligned on an 8-byte boundary. + + <h3><a name="GlobalHeap">Disk Format: Level 1E - Global Heap</a></h3> + + <P>Each HDF5 file has a global heap which stores various types of + information which is typically shared between datasets. The + global heap was designed to satisfy these goals: + + <ol type="A"> + <li>Repeated access to a heap object must be efficient without + resulting in repeated file I/O requests. Since global heap + objects will typically be shared among several datasets, it is + probable that the object will be accessed repeatedly. + <li>Collections of related global heap objects should result in + fewer and larger I/O requests. For instance, a dataset of + object references will have a global heap object for each + reference. Reading the entire set of object references + should result in a few large I/O requests instead of one small + I/O request for each reference. + <li>It should be possible to remove objects from the global heap + and the resulting file hole should be eligible to be reclaimed + for other uses. + </ol> + </P> + + <P>The implementation of the heap makes use of the memory + management already available at the file level and combines that + with a new top-level object called a <em>collection</em> to + achieve Goal B. The global heap is the set of all collections. + Each global heap object belongs to exactly one collection and + each collection contains one or more global heap objects. For + the purposes of disk I/O and caching, a collection is treated as + an atomic object. + </P> + + <P>The HDF5 library creates global heap collections as needed, so there may + be multiple collections throughout the file. The set of all of them is + abstractly called the "global heap", although they don't actually link + to each other, and there is no global place in the file where you can + discover all of the collections. The collections are found simply by + finding a reference to one through another object in the file (eg. + variable-length datatype elements, etc). + </P> + + <br> + <div align=center> + <table class=format> + <caption> + A Global Heap Collection + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=4>Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Collection Size<sup>L</sup></td> + </tr> + + <tr> + <td colspan=4><br>Global Heap Object 1<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Global Heap Object 2<br><br></td> + </tr> + + <tr> + <td colspan=4><br>...<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Global Heap Object <em>N</em><br><br></td> + </tr> + + <tr> + <td colspan=4><br>Global Heap Object 0 (free space)<br><br></td> + </tr> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'L' the above table are + <br> + of the size specified in "Size of Lengths.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Signature</td> + <td> + <P>The ASCII character string "<code>GCOL</code>" + is used to indicate the + beginning of a collection. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </P> + </td> + </tr> + + <tr> + <td>Version</td> + <td> + <P>Each collection has its own version number so that new + collections can be added to old files. This document + describes version one (1) of the collections (there is no + version zero (0)). + </P> + </td> + </tr> + + <tr> + <td>Collection Size</td> + <td> + <P>This is the size in bytes of the entire collection + including this field. The default (and minimum) + collection size is 4096 bytes which is a typical file + system block size. This allows for 127 16-byte heap + objects plus their overhead (the collection header of 16 bytes + and the 16 bytes of information about each heap object). + </P> + </td> + </tr> + + <tr> + <td>Global Heap Object 1 through <em>N</em></td> + <td> + <P>The objects are stored in any order with no + intervening unused space. + </P> + </td> + </tr> + + <tr> + <td>Global Heap Object 0</td> + <td> + <P>Global Heap Object 0 (zero), when present, represents the free + space in the collection. Free space always appears at the end of + the collection. If the free space is too small to store the header + for Object 0 (described below) then the header is implied and the + collection contains no free space. + </P> + </td> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Global Heap Object + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan=2>Heap Object ID</td> + <td colspan=2>Reference Count</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4>Object Size<sup>L</sup></td> + </tr> + + <tr> + <td colspan=4><br>Object Data<br><br></td> + </tr> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'L' the above table are + <br> + of the size specified in "Size of Lengths.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Heap Object ID</td> + <td> + <P>Each object has a unique identification number within a + collection. The identification numbers are chosen so that + new objects have the smallest value possible with the + exception that the identifier <code>0</code> always refers to the + object which represents all free space within the + collection. + </P> + </td> + </tr> + + <tr> + <td>Reference Count</td> + <td> + <P>All heap objects have a reference count field. An + object which is referenced from some other part of the + file will have a positive reference count. The reference + count for Object 0 is always zero. + </P> + </td> + </tr> + + <tr> + <td>Reserved</td> + <td> + <P>Zero padding to align next field on an 8-byte boundary. + </P> + </td> + </tr> + + <tr> + <td>Object Size</td> + <td> + <P>This is the size of the object data stored for the object. + The actual storage space allocated for the object data is rounded + up to a multiple of eight. + </P> + </td> + </tr> + + <tr> + <td>Object Data</td> + <td> + <P>The object data is treated as a one-dimensional array + of bytes to be interpreted by the caller. + </P> + </td> + </tr> + </table> + </div> + + <h3><a name="FreeSpaceIndex">Disk Format: Level 1F - Free-space Index</a></h3> + + <p>The free-space index is a collection of blocks of data, + dispersed throughout the file, which are currently not used by + any file objects. + + <p>The super block contains a pointer to root of the free-space description; + that pointer is currently required to be the + <A href="#UndefinedAddress">undefined address</A>. + + <p>The format of the free-space index is not defined at this time. + +<!-- + <p>The Free-space Index is a collection of blocks of data, + dispersed throughout the file, which are currently not used by + any file objects. The blocks of data are indexed by a B-tree of + their length within the file. + + + <p>Each B-tree page is composed of the following entries and + B-tree management information, organized as follows: + + <p> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Free-space Heap Page</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Signature</td> + <tr align=center> + <td colspan=4>B-tree Left-link Offset</td> + <tr align=center> + <td colspan=4><br>Length of Free-block #1<br> <br></td> + <tr align=center> + <td colspan=4><br>Offset of Free-block #1<br> <br></td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4><br>Length of Free-block #n<br> <br></td> + <tr align=center> + <td colspan=4><br>Offset of Free-block #n<br> <br></td> + <tr align=center> + <td colspan=4>"High" Offset</td> + <tr align=center> + <td colspan=4>Right-link Offset</td> + </table> + </center> + + <p> + <dl> + <dt> The elements of the free-space heap page are described below: + <dd> + <dl> + <dt>Signature: (4 bytes) + <dd>The ASCII character string <code>FREE</code> + is used to indicate the + beginning of a free-space heap B-tree page. This gives + file consistency checking utilities a better chance of + reconstructing a damaged file. + + <dt>B-tree Left-link Offset: (<offset> bytes) + <dd>This value is used to indicate the offset of all offsets + in the B-link-tree which are smaller than the value of the + offset in entry #1. This value is also used to indicate a + leaf node in the B-link-tree by being set to all ones. + + <dt>Length of Free-block #n: (<length> bytes) + <dd>This value indicates the length of an unused block in + the file. + + <dt>Offset of Free-block #n: (<offset> bytes) + <dd>This value indicates the offset in the file of an + unused block in the file. + + <dt>"High" Offset: (4-bytes) + <dd>This offset is used as the upper bound on offsets + contained within a page when the page has been split. + + <dt>Right-link Offset: (<offset> bytes) + <dd>This value is used to indicate the offset of the next + child to the right of the parent of this group + page. When there is no node to the right, this value is + all zeros. + </dl> + </dl> + + <p>The algorithms for searching and inserting objects in the + B-tree pages are described fully in the Lehman and Yao paper, + which should be read to provide a full description of the + B-tree's usage. +--> + + <BR> + <HR> + + <h2><a name="DataObject">Disk Format: Level 2 - Data Objects </a></h2> + + <P>Data objects contain the real information in the file. These + objects compose the scientific data and other information which + are generally thought of as "data" by the end-user. All the + other information in the file is provided as a framework for + these data objects. + </P> + + <P>A data object is composed of header information and data + information. The header information contains the information + needed to interpret the data information for the data object as + well as additional "metadata" or pointers to additional + "metadata" used to describe or annotate each data object. + </P> + + <h3><a name="ObjectHeader"> + Disk Format: Level 2A - Data Object Headers</a></h3> + + <P>The header information of an object is designed to encompass + all the information about an object, except for the data itself. + This information includes + the dataspace, datatype, information about how the data + is stored on disk (in external files, compressed, broken up in + blocks, etc.), as well as other information used by the library + to speed up access to the data objects or maintain a file's + integrity. Information stored by user applications as attributes + is also stored in the object's header. The header of each object is + not necessarily located immediately prior to the object's data in the + file and in fact may be located in any position in the file. The order + of the messages in an object header is not significant. + </P> + + <P>Header messages are aligned on 8-byte boundaries. + </P> + + <br> + <div align=center> + <table class=format> + <caption> + Object Headers + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved (zero)</td> + <td colspan=2>Number of Header Messages</td> + </tr> + + <tr> + <td colspan=4>Object Reference Count</td> + </tr> + + <tr> + <td colspan=4>Object Header Size</td> + </tr> + + <tr> + <td colspan=2>Header Message Type #1</td> + <td colspan=2>Size of Header Message Data #1</td> + </tr> + + <tr> + <td>Header Message #1 Flags</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4><br>Header Message Data #1<br><br></td> + </tr> + + <tr> + <td colspan=4>.<br>.<br>.<br></td> + </tr> + + <tr> + <td colspan=2>Header Message Type #n</td> + <td colspan=2>Size of Header Message Data #n</td> + </tr> + + <tr> + <td>Header Message #n Flags</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4><br>Header Message Data #n<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>This value is used to determine the format of the + information in the object header. When the format of the + information in the object header is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + document describes version one (1) (there was no version + zero (0)). + </P> + </td> + </tr> + + <tr> + <td>Number of Header Messages</td> + <td> + <P>This value determines the number of messages listed in + object headers for this object. This value includes the messages + in continuation messages for this object. + </P> + </td> + </tr> + + <tr> + <td>Object Reference Count</td> + <td> + <P>This value specifies the number of "hard links" to this object + within the current file. References to the object from external + files, "soft links" in this file and object references in this + file are not tracked. + </P> + </td> + </tr> + + <tr> + <td>Object Header Size</td> + <td> + <P>This value specifies the number of bytes of header message data + following this length field that contain object header messages + for this object header. This value does not include the size of + object header continuation blocks for this object elsewhere in the + file. + </P> + </td> + </tr> + + <tr> + <td>Header Message Type</td> + <td> + <P>This value specifies the type of information included in the + following header message data. The header message types for the + pre-defined header messages are included in sections below. + </P> + </td> + </tr> + + <tr> + <td>Size of Header Message Data</td> + <td> + <P>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size includes + padding bytes to make the message a multiple of eight + bytes. + </P> + </td> + </tr> + + <tr> + <td>Header Message Flags</td> + <td> + <P>This is a bit field with the following definition: + <table class=list> + <tr> + <th width="30%">Bit</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>If set, the message data is constant. This is used + for messages like the datatype message of a dataset. + </td> + </tr> + <tr> + <td align=center><code>1</code></td> + <td>If set, the message is stored in the global heap. + The Header Message Data field contains a Shared Object + message and the Size of Header Message Data field + contains the size of that Shared Object message. + </td> + </tr> + <tr> + <td align=center><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Header Message Data</td> + <td> + <P>The format and length of this field is determined by the + header message type and size respectively. Some header + message types do not require any data and this information + can be eliminated by setting the length of the message to + zero. The data is padded with enough zeros to make the + size a multiple of eight. + </P> + </td> + </tr> + </table> + </div> + + <P>The header message types and the message data associated with + them compose the critical "metadata" about each object. Some + header messages are required for each object while others are + optional. Some optional header messages may also be repeated + several times in the header itself, the requirements and number + of times allowed in the header will be noted in each header + message description below. + </P> + + <P>The following is a list of currently defined header messages: + </P> + + <hr> + <h4><a name="NILMessage">Name: NIL</a></h4> + + <P class=item><B>Header Message Type: </B>0x0000 + </P> + <P class=item><B>Length:</B> varies + </P> + <P class=item><B>Status:</B> Optional, may be repeated. + </P> + <P class=item><B>Purpose and Description:</B> The NIL message is used to indicate a + message which is to be ignored when reading the header messages for a + data object. [Possibly one which has been deleted for some reason.] + </P> + <P class=item><B>Format of Data:</B> Unspecified. + </P> + + <hr> + <h4><a name="SimpleDataSpace">Name: Simple Dataspace</a></h4> + + <P class=item><B>Header Message Type: </B>0x0001 + </P> + <P class=item><B>Length:</B> Varies according to the number of dimensions, + as described in the following table. + </P> + <P class=item><B>Status:</B> Required for dataset objects, may not be + repeated. + </P> + <P class=item><B>Description:</B> The simple dataspace message describes the + number of dimensions (i.e. "rank") and size of each dimension that the + data object has. This message is only used for datasets which have a + simple, rectilinear grid layout; datasets requiring a more complex + layout (irregularly structured or unstructured grids, etc.) must use + the <em>Complex Dataspace</em> message for expressing the space the + dataset inhabits. <i>(Note: The <em>Complex Dataspace</em> + functionality is not yet implemented and it is not described in this + document.)</i> + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Simple Dataspace Message + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4>Dimension #1 Size<sup>L</sup></td> + <tr> + <td colspan=4>.<br>.<br>.<br></td> + <tr> + <td colspan=4>Dimension #n Size<sup>L</sup></td> + <tr> + <td colspan=4>Dimension #1 Maximum Size<sup>L</sup></td> + <tr> + <td colspan=4>.<br>.<br>.<br></td> + <tr> + <td colspan=4>Dimension #n Maximum Size<sup>L</sup></td> + <tr> + <td colspan=4>Permutation Index #1<sup>L</sup></td> + <tr> + <td colspan=4>.<br>.<br>.<br></td> + <tr> + <td colspan=4>Permutation Index #n<sup>L</sup></td> + </table> + + <table class=note> + <tr><td> + (Items marked with an 'L' the above table are + <br> + of the size specified in "Size of Lengths.") + </td></tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>This value is used to determine the format of the + Simple Dataspace Message. When the format of the + information in the message is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + document describes version one (1) (there was no version + zero (0)). + </P> + </td> + </tr> + + <tr> + <td>Dimensionality</td> + <td> + <P>This value is the number of dimensions that the data + object has. + </P> + </td> + </tr> + + <tr> + <td>Flags</td> + <td> + <P>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. Bit 1 is used to indicate that + permutation indices are present. + </P> + </td> + </tr> + + <tr> + <td>Dimension #n Size</td> + <td> + <P>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </P> + </td> + </tr> + + <tr> + <td>Dimension #n Maximum Size</td> + <td> + <P>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + "<A href="#UnlimitedDim">unlimited</A>" size which indicates + that the data may expand along this dimension indefinitely. + If these values are not stored, the maximum size of each + dimension is assumed to be the dimension's current size. + </P> + </td> + </tr> + + <tr> + <td>Permutation Index #n</td> + <td> + <P>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. If these values are + not stored, the first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </P> + </td> + </tr> + </table> + </div> + + </P> + +<!-- + <hr> + <h4><a name="DataSpaceMessage">Name: Complex Dataspace (Fiber Bundle?)</a></h4> + <b>Header Message Type: </b>0x0002<br> + <b>Length:</b> varies<br> + + <b>Status:</b> One of the <em>Simple Dataspace</em> or + <em>Complex Dataspace</em> messages is required (but not both) and may + not be repeated.<br> <b>Purpose and Description:</b> The + <em>Dataspace</em> message describes space that the dataset is + mapped onto in a more comprehensive way than the <em>Simple + Dimensionality</em> message is capable of handling. The + dataspace of a dataset encompasses the type of coordinate system + used to locate the dataset's elements as well as the structure and + regularity of the coordinate system. The dataspace also + describes the number of dimensions which the dataset inhabits as + well as a possible higher dimensional space in which the dataset + is located within. + + <br> + <b>Format of Data:</b> + + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Message Layout</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Mesh Type</td> + <tr align=center> + <td colspan=4>Logical Dimensionality</td> + </table> + </center> + + <p> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Mesh Type: (unsigned 32-bit integer) + <dd>This value indicates whether the grid is + polar/spherical/cartesion, + structured/unstructured and regular/irregular. <br> + The mesh type value is broken up as follows: <br> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Mesh-type Layout</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=1>Mesh Embedding</td> + <td colspan=1>Coordinate System</td> + <td colspan=1>Structure</td> + <td colspan=1>Regularity</td> + </table> + </center> + The following are the definitions of mesh-type bytes: + <dl> + <dt>Mesh Embedding + <dd>This value indicates whether the dataset dataspace + is located within + another dataspace or not: + <dl> <dl> + <dt><STANDALONE> + <dd>The dataset mesh is self-contained and is not + embedded in another mesh. + <dt><EMBEDDED> + <dd>The dataset's dataspace is located within + another dataspace, as + described in information below. + </dl> </dl> + <dt>Coordinate System + <dd>This value defines the type of coordinate system + used for the mesh: + <dl> <dl> + <dt><POLAR> + <dd>The last two dimensions are in polar + coordinates, higher dimensions are + cartesian. + <dt><SPHERICAL> + <dd>The last three dimensions are in spherical + coordinates, higher dimensions + are cartesian. + <dt><CARTESIAN> + <dd>All dimensions are in cartesian coordinates. + </dl> </dl> + <dt>Structure + <dd>This value defines the locations of the grid-points + on the axes: + <dl> <dl> + <dt><STRUCTURED> + <dd>All grid-points are on integral, sequential + locations, starting from 0. + <dt><UNSTRUCTURED> + <dd>Grid-points locations in each dimension are + explicitly defined and + may be of any numeric datatype. + </dl> </dl> + <dt>Regularity + <dd>This value defines the locations of the dataset + points on the grid: + <dl> <dl> + <dt><REGULAR> + <dd>All dataset elements are located at the + grid-points defined. + <dt><IRREGULAR> + <dd>Each dataset element has a particular + grid-location defined. + </dl> </dl> + </dl> + <p>The following grid combinations are currently allowed: + <dl> <dl> + <dt><POLAR-STRUCTURED-REGULAR> + <dt><SPHERICAL-STRUCTURED-REGULAR> + <dt><CARTESIAN-STRUCTURED-REGULAR> + <dt><POLAR-UNSTRUCTURED-REGULAR> + <dt><SPHERICAL-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-IRREGULAR> + </dl> </dl> + All of the above grid types can be embedded within another + dataspace. + <br> <br> + <dt>Logical Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions that the dataset occupies. + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Embedded Dimensionality Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Embedded Dimensionality</td> + <tr align=center> + <td colspan=4>Embedded Dimension Size #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Embedded Dimension Size #n</td> + <tr align=center> + <td colspan=4>Embedded Origin Location #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Embedded Origin Location #n</td> + </table> + </center> + + <dt>Embedded Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions of the space the + dataset is located + within. i.e. a planar dataset located within a 3-D space, + or a 3-D dataset + which is a subset of another 3-D space, etc. + <dt>Embedded Dimension Size: (unsigned 32-bit integer) + <dd>These values are the sizes of the dimensions of the + embedded dataspace + that the dataset is located within. + <dt>Embedded Origin Location: (unsigned 32-bit integer) + <dd>These values comprise the location of the dataset's + origin within the embedded dataspace. + </dl> + </dl> + [Comment: need some way to handle different orientations of the + dataset dataspace + within the embedded dataspace]<br> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Structured/Regular Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4>Logical Dimension Size #1</td> + <tr align=center> + <td colspan=4>Logical Dimension Maximum #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Logical Dimension Size #n</td> + <tr align=center> + <td colspan=4>Logical Dimension Maximum #n</td> + </table> + </center> + + <p> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Logical Dimension Size #n: (unsigned 32-bit integer) + <dd>This value is the current size of the dimension of the + data as stored in + the file. The first dimension stored in the list of + dimensions is the slowest + changing dimension and the last dimension stored is the + fastest changing + dimension. + <dt>Logical Dimension Maximum #n: (unsigned 32-bit integer) + <dd>This value is the maximum size of the dimension of the + data as stored in + the file. This value may be the special value + <UNLIMITED> which + indicates that the data may expand along this dimension + indefinitely. + </dl> + </dl> + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Structured/Irregular Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4># of Grid Points in Dimension #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4># of Grid Points in Dimension #n</td> + <tr align=center> + <td colspan=4>Datatype of Grid Point Locations</td> + <tr align=center> + <td colspan=4>Location of Grid Points in Dimension #1</td> + <tr align=center> + <td colspan=4>.<br>.<br>.<br></td> + <tr align=center> + <td colspan=4>Location of Grid Points in Dimension #n</td> + </table> + </center> + + <P> + <center> + <table border cellpadding=4 width="80%"> + <caption align=bottom> + <B>HDF5 Dataspace Unstructured Grid Information</B> + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + + <tr align=center> + <td colspan=4># of Grid Points</td> + <tr align=center> + <td colspan=4>Datatype of Grid Point Locations</td> + <tr align=center> + <td colspan=4>Grid Point Locations<br>.<br>.<br></td> + </table> + </center> +--> + + <hr> + <h4><a name="ReservedMessage_0002">Name: Reserved - Not Assigned Yet</a></h4> + <b>Header Message Type:</b> 0x0002<BR> + <b>Length:</b> N/A<BR> + <b>Status:</b> N/A<BR> + <b>Format of Data:</b> N/A<BR> + + <p><b>Purpose and Description:</b> This message type was skipped during + the initial specification of the file format and may be used in a + future expansion to the format. + + + <hr> + <h4><a name="DataTypeMessage">Name: Datatype</a></h4> + + <P class=item><B>Header Message Type:</B> 0x0003 + </P> + <P class=item><B>Length:</B> variable + </P> + <P class=item><B>Status:</B> Required for dataset or named datatype objects, + may not be repeated. + </P> + + <P class=item><B>Description:</B> The datatype message defines the datatype + for each element of a dataset. A datatype can describe an atomic type + like a fixed- or floating-point type or a compound type like a C + struct. + Datatypes messages are stored + as a list of datatype classes and + their associated properties. + </P> + + <P class=item2>Datatype messages that are part of a dataset object, + do not describe how elements are related to one another, the dataspace + message is used for that purpose. Datatype messages that are part of + a named datatype message describe an "abstract" datatype that can be + used by other objects in the file. + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Datatype Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Class and Version</td> + <td>Class Bit Field, Bits 0-7</td> + <td>Class Bit Field, Bits 8-15</td> + <td>Class Bit Field, Bits 16-23</td> + </tr> + + <tr> + <td colspan=4>Size</td> + </tr> + + <tr> + <td colspan=4><br><br>Properties<br><br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Class and Version</td> + <td> + <P>The version of the datatype message and the datatype's class + information are packed together in this field. The version + number is packed in the top 4 bits of the field and the class + is contained in the bottom 4 bits. + </P> + <P>The version number information is used for changes in the + format of the datatype message and is described here: + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align=center><code>1</code></td> + <td>Used by early versions of the library to encode + compound datatypes with explicit array fields. + See the compound datatype description below for + further details. + </td> + </tr> + <tr> + <td align=center><code>2</code></td> + <td>The current version used by the library. + </td> + </tr> + </table> + </P> + <P>The class of the datatype determines the format for the class + bit field and properties portion of the datatype message, which + are described below. The + following classes are currently defined: + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Fixed-Point</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Floating-Point</td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Time</td> + </tr> + + <tr> + <td align=center><code>3</code></td> + <td>String</td> + </tr> + + <tr> + <td align=center><code>4</code></td> + <td>Bitfield</td> + </tr> + + <tr> + <td align=center><code>5</code></td> + <td>Opaque</td> + </tr> + + <tr> + <td align=center><code>6</code></td> + <td>Compound</td> + </tr> + + <tr> + <td align=center><code>7</code></td> + <td>Reference</td> + </tr> + + <tr> + <td align=center><code>8</code></td> + <td>Enumerated</td> + </tr> + + <tr> + <td align=center><code>9</code></td> + <td>Variable-Length</td> + </tr> + + <tr> + <td align=center><code>10</code></td> + <td>Array</td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Class Bit Fields</td> + <td> + <P>The information in these bit fields is specific to each datatype + class and is described below. All bits not defined for a + datatype class are set to zero. + </P> + </td> + </tr> + + <tr> + <td>Size</td> + <td> + <P>The size of the datatype in bytes. + </P> + </td> + </tr> + + <tr> + <td>Properties</td> + <td> + <P>This variable-sized field encodes information specific to each + datatype class and is described below. If there is no + property information specified for a datatype class, the size + of this field is zero. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Fixed-Point Numbers (Class 0): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr> + <td>1, 2</td> + <td><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</td> + </tr> + + <tr> + <td>3</td> + <td><b>Signed.</b> If this bit is set then the fixed-point + number is in 2's complement form.</td> + </tr> + + <tr> + <td>4-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Bit Offset</td> + <td> + <P>The bit offset of the first significant bit of the fixed-point + value within the datatype. The bit offset specifies the number + of bits "to the right of" the value. + </P> + </td> + </tr> + + <tr> + <td>Bit Precision</td> + <td> + <P>The number of bits of precision of the fixed-point value + within the datatype. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Floating-Point Numbers (Class 1): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr> + <td>1, 2, 3</td> + <td><b>Padding type.</b> Bit 1 is the low bits pad type, bit 2 + is the high bits pad type, and bit 3 is the internal bits + pad type. If a datum has unused bits at either end or between + the sign bit, exponent, or mantissa, then the value of bit + 1, 2, or 3 is copied to those locations.</td> + </tr> + + <tr> + <td>4-5</td> + <td><b>Normalization.</b> The value can be 0 if there is no + normalization, 1 if the most significant bit of the + mantissa is always set (except for 0.0), and 2 if the most + signficant bit of the mantissa is not stored but is + implied to be set. The value 3 is reserved and will not + appear in this field.</td> + </tr> + + <tr> + <td>6-7</td> + <td>Reserved (zero).</td> + </tr> + + <tr> + <td>8-15</td> + <td><b>Sign Location.</b> This is the bit position of the sign + bit. Bits are numbered with the least significant bit zero.</td> + </tr> + + <tr> + <td>16-23</td> + <td>Reserved (zero).</td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + + <tr> + <td>Exponent Location</td> + <td>Exponent Size</td> + <td>Mantissa Location</td> + <td>Mantissa Size</td> + </tr> + + <tr> + <td colspan=4>Exponent Bias</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Bit Offset</td> + <td> + <P>The bit offset of the first significant bit of the floating-point + value within the datatype. The bit offset specifies the number + of bits "to the right of" the value. + </P> + </td> + </tr> + + <tr> + <td>Bit Precision</td> + <td> + <P>The number of bits of precision of the floating-point value + within the datatype. + </P> + </td> + </tr> + + <tr> + <td>Exponent Location</td> + <td> + <P>The bit position of the exponent field. Bits are numbered with + the least significant bit number zero. + </P> + </td> + </tr> + + <tr> + <td>Exponent Size</td> + <td> + <P>The size of the exponent field in bits. + </P> + </td> + </tr> + + <tr> + <td>Mantissa Location</td> + <td> + <P>The bit position of the mantissa field. Bits are numbered with + the least significant bit number zero. + </P> + </td> + </tr> + + <tr> + <td>Mantissa Size</td> + <td> + <P>The size of the mantissa field in bits. + </P> + </td> + </tr> + + <tr> + <td>Exponent Bias</td> + <td> + <P>The bias of the exponent field. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Time (Class 2): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr> + <td>1-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=2>Bit Precision</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Bit Precision</td> + <td> + <P>The number of bits of precision of the time value. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Strings (Class 3): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-3</td> + <td><b>Padding type.</b> This four-bit value determines the + type of padding to use for the string. The values are: + + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Null Terminate: A zero byte marks the end of the + string and is guaranteed to be present after + converting a long string to a short string. When + converting a short string to a long string the value is + padded with additional null characters as necessary. + </td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Null Pad: Null characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. + </td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Space Pad: Space characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align=center><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table> + </tr> + + <tr> + <td>4-7</td> + <td><b>Character Set.</b> The character set to use for + encoding the string. The only character set supported is + the 8-bit ASCII (zero) so no translations have been defined + yet.</td> + </tr> + + <tr> + <td>8-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <P>There are no properties defined for the string class. + </P> + </P> + + <P>Class specific information for Bitfields (Class 4): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0</td> + <td><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</td> + </tr> + + <tr> + <td>1, 2</td> + <td><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</td> + </tr> + + <tr> + <td>3-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=2>Bit Offset</td> + <td colspan=2>Bit Precision</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Bit Offset</td> + <td> + <P>The bit offset of the first significant bit of the bitfield + within the datatype. The bit offset specifies the number + of bits "to the right of" the value. + </P> + </td> + </tr> + + <tr> + <td>Bit Precision</td> + <td> + <P>The number of bits of precision of the bitfield + within the datatype. + </P> + </td> + </tr> + </table> + </div> + </P> + + <P>Class specific information for Opaque (Class 5): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-7</td> + <td>Length of ASCII tag in bytes.</td> + </tr> + + <tr> + <td>8-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=4><br>ASCII Tag<br> + <br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>ASCII Tag</td> + <td> + <P>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </P> + </td> + </tr> + </table> + </div> + </P> + + <P>Class specific information for Compound (Class 6): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-15</td> + <td><b>Number of Members.</b> This field contains the number + of members defined for the compound datatype. The member + definitions are listed in the Properties field of the data + type message. + </tr> + + <tr> + <td>15-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + </P> + + <P>The Properties field of a compound datatype is a list of the + member definitions of the compound datatype. The member + definitions appear one after another with no intervening bytes. + The member types are described with a recursive datatype + message. + + <P>Note that the property descriptions are different for different + versions of the datatype version. Additionally note that the version + 0 properties are deprecated and have been replaced with the version + 1 properties in versions of the HDF5 library from the 1.4 release + onward. + + <br> + <div align=center> + <table class=format> + <caption> + Properties Description for Datatype Version 1 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr> + <td colspan=4>Byte Offset of Member</td> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Dimension Permutation</td> + </tr> + + <tr> + <td colspan=4>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Dimension #1 Size (required)</td> + </tr> + + <tr> + <td colspan=4>Dimension #2 Size (required)</td> + </tr> + + <tr> + <td colspan=4>Dimension #3 Size (required)</td> + </tr> + + <tr> + <td colspan=4>Dimension #4 Size (required)</td> + </tr> + + <tr> + <td colspan=4><br>Member Type Message<br><br></td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Name</td> + <td> + <P>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </P> + </td> + </tr> + + <tr> + <td>Byte Offset of Member</td> + <td> + <P>This is the byte offset of the member within the datatype. + </P> + </td> + </tr> + + <tr> + <td>Dimensionality</td> + <td> + <P>If set to zero, this field indicates a scalar member. If set + to a value greater than zero, this field indicates that the + member is an array of values. For array members, the size of + the array is indicated by the 'Size of Dimension n' field in + this message. + </P> + </td> + </tr> + + <tr> + <td>Dimension Permutation</td> + <td> + <P>This field was intended to allow an array field to have + it's dimensions permuted, but this was never implemented. + This field should always be set to zero. + </P> + </td> + </tr> + + <tr> + <td>Dimension #n Size</td> + <td> + <P>This field is the size of a dimension of the array field as + stored in the file. The first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </P> + </td> + </tr> + + <tr> + <td>Member Type Message</td> + <td> + <P>This field is a datatype message describing the datatype of + the member. + </P> + </td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Properties Description for Datatype Version 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr> + <td colspan=4>Byte Offset of Member</td> + </tr> + + <tr> + <td colspan=4><br>Member Type Message<br><br></td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Name</td> + <td> + <P>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </P> + </td> + </tr> + + <tr> + <td>Byte Offset of Member</td> + <td> + <P>This is the byte offset of the member within the datatype. + </P> + </td> + </tr> + + <tr> + <td>Member Type Message</td> + <td> + <P>This field is a datatype message describing the datatype of + the member. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Reference (Class 7): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-3</td> + <td><b>Type.</b> This four-bit value contains the type of reference + described. The values defined are: + + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Object Reference: A reference to another object in this + HDF5 file. + </td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Dataset Region Reference: A reference to a region within + a dataset in this HDF5 file. + </td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Internal Reference: A reference to a region within the + current dataset. (Not currently implemented) + </td> + </tr> + + <tr> + <td align=center><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table> + + </td> + </tr> + + <tr> + <td>15-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <P>There are no properties defined for the reference class. + </P> + </P> + + <P>Class specific information for Enumeration (Class 8): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-15</td> + <td><b>Number of Members.</b> The number of name/value + pairs defined for the enumeration type.</td> + </tr> + + <tr> + <td>16-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=4><br>Base Type<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Names<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Values<br><br></td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Base Type</td> + <td> + <P>Each enumeration type is based on some parent type, usually an + integer. The information for that parent type is described + recursively by this field. + </P> + </td> + </tr> + + <tr> + <td>Names</td> + <td> + <P>The name for each name/value pair. Each name is stored as a null + terminated ASCII string in a multiple of eight bytes. The names + are in no particular order. + </P> + </td> + </tr> + + <tr> + <td>Values</td> + <td> + <P>The list of values in the same order as the names. The values + are packed (no inter-value padding) and the size of each value + is determined by the parent type. + </P> + </td> + </tr> + + </table> + </div> + </P> + + + <P>Class specific information for Variable-Length (Class 9): + + <br> + <div align=center> + <table class=desc> + <caption> + Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td>0-3</td> + <td><b>Type.</b> This four-bit value contains the type of + variable-length datatype described. The values defined are: + + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Sequence: A variable-length sequence of any sequence of + data. Variable-length sequences do not have padding or + character set information. + </td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>String: A variable-length sequence of characters. + Variable-length strings have padding and character set + information. + </td> + </tr> + + <tr> + <td align=center><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table> + + </td> + </tr> + + <tr> + <td>4-7</td> + <td><b>Padding type.</b> (variable-length string only) + This four-bit value determines the type of padding + used for variable-length strings. The values are the same + as for the string padding type, as follows: + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Null terminate: A zero byte marks the end of a string + and is guaranteed to be present after converting a long + string to a short string. When converting a short string + to a long string, the value is padded with additional null + characters as necessary. + </td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Null pad: Null characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. + </td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Space pad: Space characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align=center><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table> + + This value is set to zero for variable-length sequences. + + </td> + </tr> + + <tr> + <td>8-11</td> + <td><b>Character Set.</b> (variable-length string only) + This four-bit value specifies the character set + to be used for encoding the string: + <table width=100% class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>ASCII: As of this writing (July 2003, Release 1.6.0), + 8-bit ASCII is the only character set supported. Therefore, + no translations have been defined. + </td> + </tr> + + <tr> + <td align=center><code>1-15</code></td> + <td>Reserved + </td> + </tr> + </table> + + This value is set to zero for variable-length sequences. + + </td> + </tr> + + <tr> + <td>12-23</td> + <td>Reserved (zero).</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan=4><br>Base Type<br><br></td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Base Type</td> + <td> + <P>Each variable-length type is based on some parent type. The + information for that parent type is described recursively by + this field. + </P> + </td> + </tr> + + </table> + </div> + </P> + + <P>Class specific information for Array (Class 10): + + <P>There are no bit fields defined for the array class. + </P> + + <P>Note that the dimension information defined in the property for this + datatype class is independent of dataspace information for a dataset. + The dimension information here describes the dimensionality of the + information within a data element (or a component of an element, if the + array datatype is nested within another datatype) and the dataspace for a + dataset describes the location of the elements in a dataset. + </P> + + <br> + <div align=center> + <table class=format> + <caption> + Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan=3>Reserved (zero)</td> + </tr> + + <tr> + <td colspan=4>Dimension #1 Size</td> + </tr> + <tr> + <td colspan=4>.<br>.<br>.<br></td> + </tr> + <tr> + <td colspan=4>Dimension #n Size</td> + </tr> + + <tr> + <td colspan=4>Permutation Index #1</td> + </tr> + <tr> + <td colspan=4>.<br>.<br>.<br></td> + </tr> + <tr> + <td colspan=4>Permutation Index #n</td> + </tr> + + <tr> + <td colspan=4><br>Base Type<br><br></td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td> + <P>This value is the number of dimensions that the array has. + </P> + </td> + </tr> + + <tr> + <td>Dimension #n Size</td> + <td> + <P>This value is the size of the dimension of the array + as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </P> + </td> + </tr> + + <tr> + <td>Permutation Index #n</td> + <td> + <P>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. Currently, dimension + permutations are not supported and these indices should be set + to the index position minus one (i.e. the first dimension should + be set to 0, the second dimension should be set to 1, etc.) + </P> + </td> + </tr> + + <tr> + <td>Base Type</td> + <td> + <P>Each array type is based on some parent type. The + information for that parent type is described recursively by + this field. + </P> + </td> + </tr> + + </table> + </div> + + </P> + + <hr> + <h4><a name="OldFillValueMessage">Name: Data Storage - Fill Value (Old)</a></h4> + + <P class=item><B>Header Message Type:</B> 0x0004 + </P> + <P class=item><B>Length:</B> varies + </P> + <P class=item><B>Status:</B> Optional, may not be repeated. + </P> + + <P class=item><B>Description:</B> The fill value message stores a single + data value which is returned to the application when an uninitialized + data element is read from a dataset. The fill value is interpreted + with the same datatype as the dataset. If no fill value message is + present then a fill value of all zero bytes is assumed. + </P> + + <P class=item2>This fill value message is deprecated in favor of the "new" + fill value message (Message Type 0x0005) and is only written to the + file for forward compatibility with versions of the HDF5 library before + the 1.6.0 version. Additionally, it only appears for datasets with a + user defined fill value (as opposed to the library default fill value + or an explicitly set "undefined" fill value). + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Fill Value Message (Old) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4>Size</td> + </tr> + + <tr> + <td colspan=4><br>Fill Value<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Size</td> + <td> + <P>This is the size of the Fill Value field in bytes. + </P> + </td> + </tr> + + <tr> + <td>Fill Value</td> + <td> + <P>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. + </P> + </td> + </tr> + </table> + </div> + </P> + + <hr> + <h4><a name="FillValueMessage">Name: Data Storage - Fill Value </a></h4> + + <P class=item><B>Header Message Type:</B> 0x0005 + </P> + <P class=item><B>Length:</B> varies + </P> + <P class=item><B>Status:</B> Required for dataset objects, may not be repeated. + </P> + + <P class=item><B>Description:</B> The fill value message stores a single + data value which is returned to the application when an uninitialized + data element is read from a dataset. The fill value is interpreted + with the same datatype as the dataset. + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Fill Value Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Space Allocation Time</td> + <td>Fill Value Write Time</td> + <td>Fill Value Defined</td> + </tr> + + <tr> + <td colspan=4>Size</td> + </tr> + + <tr> + <td colspan=4><br>Fill Value<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>The version number information is used for changes in the + format of the fill value message and is described here: + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align=center><code>1</code></td> + <td>Used by version 1.6.x of the library to encode + fill values. In this version, the Size field is + always present. + </td> + </tr> + <tr> + <td align=center><code>2</code></td> + <td>The current version used by the library (version + 1.7.3 or later). In this version, the Size and + Fill Value fields are + only present if the Fill Value Defined field is set + to 1. + </td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Space Allocation Time</td> + <td> + <P>When the storage space for the dataset's raw data will be + allocated. The allowed values are: + <table class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Early allocation. Storage space for the entire dataset + should be allocated in the file when the dataset is + created. + </td> + </tr> + <tr> + <td align=center><code>2</code></td> + <td>Late allocation. Storage space for the entire dataset + should not be allocated until the dataset is written + to. + </td> + </tr> + <tr> + <td align=center><code>3</code></td> + <td>Incremental allocation. Storage space for the + dataset should not be allocated until the portion + of the dataset is written to. This is currently + used in conjunction with chunked data storage for + datasets. + </td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Fill Value Write Time</td> + <td> + <P>At the time that storage space for the dataset's raw data is + allocated, this value indicates whether the fill value should + be written to the raw data storage elements. The allowed values + are: + <table class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>On allocation. The fill value is always written to + the raw data storage when the storage space is allocated. + </td> + </tr> + <tr> + <td align=center><code>1</code></td> + <td>Never. The fill value should never be written to + the raw data storage. + </td> + </tr> + <tr> + <td align=center><code>2</code></td> + <td>Fill value written if set by user. The fill value + will be written to the raw data storage when the storage + space is allocated only if the user explicitly set + the fill value. If the fill value is the library + default or is undefined, it will not be written to + the raw data storage. + </td> + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Fill Value Defined</td> + <td> + <P>This value indicates if a fill value is defined for this + dataset. If this value is 0, the fill value is undefined. + If this value is 1, a fill value is defined for this dataset. + For version 2 or later of the fill value message, this value + controls the presence of the Size field. + </P> + </td> + </tr> + + <tr> + <td>Size</td> + <td> + <P>This is the size of the Fill Value field in bytes. This field + is not present if the Version field is >1 and the Fill Value + Defined field is set to 0. + </P> + </td> + </tr> + + <tr> + <td>Fill Value</td> + <td> + <P>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. This field is + not present if the Version field is >1 and the Fill Value + Defined field is set to 0. + </P> + </td> + </tr> + </table> + </div> + </P> + +<!-- + <hr> + <h4><a name="CompactDataStorageMessage">Name: Data Storage - Compact</a></h4> + + <b>Header Message Type:</b> 0x0006<br> + <b>Length:</b> varies<br> + <b>Status:</b> Optional, may not be repeated.<br> + + <p>This message indicates that the data for the data object is + stored within the current HDF file by including the actual + data as the header data for this message. The data is + stored internally in + the <em>normal format</em>, i.e. in one chunk, uncompressed, etc. + + <P>Note that one and only one of the <em>Data Storage</em> headers can be + stored for each data object. + + <P><b>Format of Data:</b> The message data is actually composed + of dataset data, so the format will be determined by the dataset + format. +--> + + <hr> + <h4><a name="ReservedMessage_0006">Name: Reserved - Not Assigned Yet</a></h4> + <P class=item><B>Header Message Type:</B> 0x0006</P> + <P class=item><B>Length:</B> N/A</P> + <P class=item><B>Status:</B> N/A</P> + <P class=item><B>Format of Data:</B> N/A</P> + + <P class=item><B>Purpose and Description:</B> This message type was skipped during + the initial specification of the file format and may be used in a + future expansion to the format.</P> + + <hr> + <h4><a name="ExternalFileListMessage">Name: Data Storage - + External Data Files</a></h4> + <P class=item><B>Header Message Type:</B> 0x0007 </P> + <P class=item><B>Length:</B> varies</P> + <P class=item><B>Status:</B> Optional, may not be repeated.</P> + + <P class=item><B>Purpose and Description:</B> The external object message + indicates that the data for an object is stored outside the HDF5 + file. The filename of the object is stored as a Universal + Resource Location (URL) of the actual filename containing the + data. An external file list record also contains the byte offset + of the start of the data within the file and the amount of space + reserved in the file for that data.</P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + External File List Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan=3>Reserved</td> + </tr> + + <tr> + <td colspan=2>Allocated Slots</td> + <td colspan=2>Used Slots</td> + </tr> + + <tr> + <td colspan=4><br>Heap Address<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Slot Definitions...<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>The version number information is used for changes in the format of External File + List Message and is described here: + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + <tr> + <td align=center><code>0</code></td> + <td>Never used. + </tr> + <tr> + <td align=center><code>1</code></td> + <td>The current version used by the library. + </tr> + </table> + </P> + </td> + </tr> + + <tr> + <td>Reserved</td> + <td> + <P>This field is reserved for future use.</P> + </td> + </tr> + + <tr> + <td>Allocated Slots</td> + <td> + <P>The total number of slots allocated in the message. Its value must be at least as + large as the value contained in the Used Slots field. (The current library simply + uses the number of Used Slots for this message)</P> + </td> + </tr> + + <tr> + <td>Used Slots</td> + <td> + <P>The number of initial slots which contains valid information.</P> + </td> + </tr> + + <tr> + <td>Heap Address</td> + <td> + <P>This is the address of a local heap which contains the names for the external + files (The local heap information can be found in Disk Format Level 1D in this + document). The name at offset zero in the heap is always the empty string.</P> + </td> + </tr> + + <tr> + <td>Slot Definitions</td> + <td> + <P>The slot definitions are stored in order according to the array addresses they + represent.</P> + </td> + </tr> + + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + External File List Slot + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4><br>Name Offset(<size> bytes)<br><br></td> + </tr> + + <tr> + <td colspan=4><br>File Offset(<size> bytes)<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Size<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Name Offset(<size> bytes)</td> + <td> + <P>The byte offset within the local name heap for the name + of the file. File names are stored as a URL which has a + protocol name, a host name, a port number, and a file + name: + <code><em>protocol</em>:<em>port</em>//<em>host</em>/<em>file</em></code>. + If the protocol is omitted then "file:" is assumed. If + the port number is omitted then a default port for that + protocol is used. If both the protocol and the port + number are omitted then the colon can also be omitted. If + the double slash and host name are omitted then + "localhost" is assumed. The file name is the only + mandatory part, and if the leading slash is missing then + it is relative to the application's current working + directory (the use of relative names is not + recommended).</P> + </td> + </tr> + + <tr> + <td>File Offset(<size> bytes)</td> + <td> + <P>This is the byte offset to the start of the data in the + specified file. For files that contain data for a single + dataset this will usually be zero.</P> + </td> + </tr> + + <tr> + <td>Size</td> + <td> + <P>This is the total number of bytes reserved in the + specified file for raw data storage. For a file that + contains exactly one complete dataset which is not + extendable, the size will usually be the exact size of the + dataset. However, by making the size larger one allows + HDF5 to extend the dataset. The size can be set to a value + larger than the entire file since HDF5 will read zeros + past the end of the file without failing.</P> + </td> + </tr> + </table> + </div> + + + <hr> + <h4><a name="LayoutMessage">Name: Data Storage - Layout</a></h4> + + <P class=item><B>Header Message Type:</B> 0x0008</P> + <P class=item><B>Length:</B> varies</P> + <P class=item><B>Status:</B> Required for datasets, may not be repeated.</P> + + <P class=item><B>Purpose and Description:</B> Data layout describes how the + elements of a multi-dimensional array are arranged in the linear + address space of the file. Three types of data layout are + supported: + + <ol> + <li>Contiguous: The array can be stored in one contiguous area of the file. + The layout requires that the size of the array be constant and + does not permit chunking, compression, checksums, encryption, + etc. The message stores the total size of the array and the + offset of an element from the beginning of the storage area is + computed as in C. + + <li>Chunked: The array domain can be regularly decomposed into chunks and + each chunk is allocated separately. This layout supports + arbitrary element traversals, compression, encryption, and + checksums, and the chunks can be distributed across external + raw data files (these features are described in other + messages). The message stores the size of a chunk instead of + the size of the entire array; the size of the entire array can + be calculated by traversing the B-tree that stores the chunk + addresses. + + <li>Compact: The array can be stored in one contiguous block, as part of + this object header message (this is called "compact" storage below). + </ol> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Data Layout Message (Versions 1 and 2) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Layout Class</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4><br>Address<br><br></td> + </tr> + + <tr> + <td colspan=4>Dimension 0 (4-bytes)</td> + </tr> + + <tr> + <td colspan=4>Dimension 1 (4-bytes)</td> + </tr> + + <tr> + <td colspan=4>...</td> + </tr> + + <tr> + <td colspan=4>Dataset Element Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan=4>Compact Data Size (4-bytes)</td> + </tr> + + <tr> + <td colspan=4><br>Compact Data...<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>The version number information is used for changes in the format of the data + layout message and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Used by version 1.4 and before of the library to encode layout information. + Data space is always allocated when the data set is created.</td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Used by version 1.6.x of the library to encode layout information. + Data space is allocated only when it is necessary.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Dimensionality</td> + <td><P>An array has a fixed dimensionality. This field + specifies the number of dimension size fields later in the + message.</P></td> + </tr> + + <tr> + <td>Layout Class</td> + <td><P>The layout class specifies how the other fields of the + layout message are to be interpreted. A value of one + indicates contiguous storage, a value of two indicates chunked storage, + while a value of zero indicates compact storage. Other values will be defined + in the future.</P></td> + </tr> + + <tr> + <td>Address</td> + <td><P>For contiguous storage, this is the address of the first + byte of storage. For chunked storage this is the address + of the B-tree that is used to look up the addresses of the + chunks. This field is not present for compact storage. + If the version for this message is set to 2, the address + may have the "undefined address" value, to indicate that + storage has not yet been allocated for this array.</P></td> + </tr> + + <tr> + <td>Dimensions</td> + <td><P>For contiguous and compact storage the dimensions define + the entire size of the array while for chunked storage they define + the size of a single chunk. In all cases, they are in units of + array elements (not bytes). The first dimension stored in the list + of dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </P> + </td> + </tr> + + <tr> + <td>Dataset Element Size</td> + <td><P>The size of a dataset element, in bytes. This field is only + present for chunked storage. + </P> + </td> + </tr> + + <tr> + <td>Compact Data Size</td> + <td><P>This field is only present for compact data storage. + It contains the size of the raw data for the dataset array.</P></td> + + <tr> + <td>Compact Data</td> + <td><P>This field is only present for compact data storage. + It contains the raw data for the dataset array.</P></td> + </tr> + </table> + </div> + + <br> + <P>Version 3 of this message re-structured the format into specific + properties that are required for each layout class. + + <br> + <div align=center> + <table class=format> + <caption> + <B>Data Layout Message (Version 3)</B> + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Layout Class</td> + <td colspan=2 bgcolor=#DDDDDD> </td> + </tr> + + <tr> + <td colspan=4><br>Properties<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td> + <P>The version number information is used for changes in the format of layout message + and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>3</code></td> + <td>Used by the version 1.6.3 and later of the library to store properties + for each layout class.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Layout Class</td> + <td><P>The layout class specifies how the other fields of the layout message are to be + interpreted. A value of one indicates contiguous storage, a value of two + indicates chunked storage, while a value of zero indicates compact storage.</P></td> + </tr> + + <tr> + <td>Properties</td> + <td><P>This variable-sized field encodes information specific to each + layout class and is described below. If there is no property + information specified for a layout class, the size of this field + is zero bytes.</P></td> + </tr> + </table> + </div> + + <br> + <P>Class-specific information for compact layout (Class 0): (Note: The dimensionality information + is in the Dataspace message) + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=2>Size</td> + <td colspan=2 bgcolor=#DDDDDD> </td> + </tr> + + <tr> + <td colspan=4><br>Raw Data...<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Size</td> + <td><P>This field contains the size of the raw data for the dataset array.</P></td> + </tr> + + <tr> + <td>Raw Data</td> + <td><P>This field contains the raw data for the dataset array.</P></td> + </tr> + </table> + </div> + + <br> + <P>Class-specific information for contiguous layout (Class 1): (Note: The dimensionality information + is in the Dataspace message) + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4><br>Address<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Size<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Address</td> + <td><P>This is the address of the first byte of raw data storage. + The address may have the "undefined address" value, to indicate + that storage has not yet been allocated for this array.</P></td> + </tr> + + <tr> + <td>Size</td> + <td><P>This field contains the size allocated to store the raw data.</P></td> + </tr> + </table> + </div> + + <br> + <P>Class-specific information for chunked layout (Class 2): + + <br> + <div align=center> + <table class=format> + <caption> + Property Descriptions + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan=3 bgcolor=#DDDDDD> </td> + </tr> + + <tr> + <td colspan=4><br>Address<br><br></td> + </tr> + + <tr> + <td colspan=4>Dimension 0 (4-bytes)</td> + </tr> + + <tr> + <td colspan=4>Dimension 1 (4-bytes)</td> + </tr> + + <tr> + <td colspan=4>...</td> + </tr> + + <tr> + <td colspan=4>Dataset Element Size</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td><P>A chunk has a fixed dimensionality. This field specifies + the number of dimension size fields later in the message.</P></td> + </tr> + + <tr> + <td>Address</td> + <td><P>This is the address of the B-tree that is used to look up the addresses of the + chunks. The address may have the "undefined address" value, to indicate that + storage has not yet been allocated for this array.</P></td> + </tr> + + <tr> + <td>Dimensions</td> + <td><P>These values define the dimension size of a single chunk, in + units of array elements (not bytes). The first dimension stored in + the list of dimensions is the slowest changing dimension and the + last dimension stored is the fastest changing dimension. + </P> + </td> + </tr> + + <tr> + <td>Dataset Element Size</td> + <td><P>The size of a dataset element, in bytes. + </P> + </td> + </tr> + </table> + </div> + + <hr> + <h4><a name="ReservedMessage_0009">Name: Reserved - Not Assigned Yet</a></h4> + <P class=item><B>Header Message Type:</B> 0x0009</P> + <P class=item><B>Length:</B> N/A</P> + <P class=item><B>Status:</B> N/A</P> + <P class=item><B>Format of Data:</B> N/A</P> + + <P class=item><B>Purpose and Description:</B> This message type was skipped during the initial + specification of the file format and may be used in a future expansion to the format. + + <hr> + <h4><a name="ReservedMessage_000A">Name: Reserved - Not Assigned Yet</a></h4> + <P class=item><B>Header Message Type:</B> 0x0009</P> + <P class=item><B>Length:</B> N/A</P> + <P class=item><B>Status:</B> N/A</P> + <P class=item><B>Format of Data:</B> N/A</P> + + <P class=item><B>Purpose and Description:</B> This message type was skipped during the initial + specification of the file format and may be used in a future expansion to the format. + + <hr> + <h4><a name="FilterMessage">Name: Data Storage - Filter Pipeline</a></h4> + <P class=item><B>Header Message Type:</B> 0x000B</P> + <P class=item><B>Length:</B> varies</P> + <P class=item><B>Status:</B> Optional, may not be repeated.</P> + + <P class=item><B>Description:</B> This message describes the + filter pipeline which should be applied to the data stream by + providing filter identification numbers, flags, a name, and + client data.</P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Filter Pipeline Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Number of Filters</td> + <td colspan=2>Reserved</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4><br>Filter List<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number for this message. This document + describes version 1.</P></td> + </tr> + + <tr> + <td>Number of Filters</td> + <td><P>The total number of filters described by this + message. The maximum possible number of filters in a + message is 32.</P></td> + </tr> + + <tr> + <td>Filter List</td> + <td><P>A description of each filter. A filter description + appears in the next table.</P></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Filter Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=2>Filter Identification</td> + <td colspan=2>Name Length</td> + </tr> + + <tr> + <td colspan=2>Flags</td> + <td colspan=2>Number of Values for Client Data</td> + </tr> + + <tr> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Client Data<br><br></td> + </tr> + + <tr> + <td colspan=4>Padding</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Filter Identification</td> + <td> + <p> + This value, often referred to as a filter identifier, + is designed to be a unique identifier for the filter. + Values from zero through 32,767 are reserved for filters + supported by The HDF Group in the HDF5 library and for + filters requested and supported by third parties. + Filters supported by The HDF Group are documented immediately + below. Information on 3rd-party filters can be found at + <a href="/services/contributions.html#filters"> + <code>https://support.hdfgroup.org/services/contributions.html#filters</code></a>. + <a href="#Footnote1Change"><sup><small>1</small></sup></a> + <p> + To request a filter identifier, please contact + The HDF Group’s Help Desk at + <img src="Graphics/help.png" valign="center" height=14>. + You will be asked to provide the following information: + <ol> + <li>Contact information for the developer requesting the + new identifier + <li>A short description of the new filter + <li>Links to any relevant information, including licensing + information + </ol> + <p> + Values from 32768 to 65535 are reserved for non-distributed uses + (for example, internal company usage) or for application usage + when testing a feature. The HDF Group does not track or document + the use of the filters with identifiers from this range. + + <p> + The filters currently in library version 1.6.5 are + listed below: + <table class=list> + <tr> + <th width="30%">Identification</th> + <th align=left>Name</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>deflate</td> + <td>GZIP deflate compression</td> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>shuffle</td> + <td>Data element shuffling</td> + </tr> + + <tr> + <td align=center><code>3</code></td> + <td>fletcher32</td> + <td>Fletcher32 checksum</td> + </tr> + + <tr> + <td align=center><code>4</code></td> + <td>szip</td> + <td>SZIP compression</td> + </tr> + </table> + </P></td> + </tr> + + <tr> + <td>Name Length</td> + <td><P>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</P></td> + </tr> + + <tr> + <td>Flags</td> + <td><P>The flags indicate certain properties for a filter. The + bit values defined so far are:</P> + <table class=list> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>bit 1</code></td> + <td>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently removed from the pipeline.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Client Data Number of Values</td> + <td><P>Each filter can store a few integer values to control + how the filter operates. The number of entries in the + Client Data array is stored in this field.</P></td> + </tr> + + <tr> + <td>Name</td> + <td><P>If the Name Length field is non-zero then it will + contain the size of this field, a multiple of eight. This + field contains a null-terminated, ASCII character + string to serve as a comment/name for the filter.</P></td> + </tr> + + <tr> + <td>Client Data</td> + <td><P>This is an array of four-byte integers which will be + passed to the filter function. The Client Data Number of + Values determines the number of elements in the array.</P></td> + </tr> + + <tr> + <td>Padding</td> + <td><P>Four bytes of zeros are added to the message at this + point if the Client Data Number of Values field contains + an odd number.</P></td> + </tr> + </table> + </div> + <p> + <hr align="left" width="50"> + <a name="Footnote1Change"><sup>1</sup></a>If you are reading + an earlier version of this document, this link may have changed. + If the link does not work, use the latest version of this document + on <a href="https://support.hdfgroup.org">The HDF Group</a>’s website, + <a href="/HDF5/doc/H5.format.html"> + <code>https://support.hdfgroup.org/HDF5/doc/H5.format.html</code></a>; + the link there will always be correct. + <small><a href="#FilterMessage">(Return)</a> + </P> + + <hr> + <h4><a name="AttributeMessage">Name: Attribute</a></h4> + <P class=item><B>Header Message Type:</B> 0x000C + <P class=item><B>Length:</B> varies + <P class=item><B>Status:</B> Optional, may be repeated. + + <P class=item><B>Description:</B> The <em>Attribute</em> + message is used to list objects in the HDF file which are used + as attributes, or "metadata" about the current object. An + attribute is a small dataset; it has a name, a datatype, a data + space, and raw data. Since attributes are stored in the object + header they must be relatively small (<64KB) and can be + associated with any type of object which has an object header + (groups, datasets, named types and spaces, etc.). + + <P class=item2>Note: Attributes on an object must have unique names. (The HDF5 library + currently enforces this by causing the creation of an attribute with + a duplicate name to fail). Attributes on different objects may have the + same name, however. + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Attribute Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved</td> + <td colspan=2>Name Size</td> + </tr> + + <tr> + <td colspan=2>Datatype Size</td> + <td colspan=2>Dataspace Size</td> + </tr> + + <tr> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Datatype<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Dataspace<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Data<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number information is used for changes in the format of the + attribute message and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Used by the library before version 1.6 to encode attribute message. + This version does not support shared data type.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Reserved</td> + <td><P>This field is reserved for later use and is set to + zero.</P></td> + </tr> + + <tr> + <td>Name Size</td> + <td><P>The length of the attribute name in bytes including the + null terminator. Note that the Name field below may + contain additional padding not represented by this + field.</P></td> + </tr> + + <tr> + <td>Datatype Size</td> + <td><P>The length of the datatype description in the Datatype + field below. Note that the Datatype field may contain + additional padding not represented by this field.</P></td> + </tr> + + <tr> + <td>Dataspace Size</td> + <td><P>The length of the dataspace description in the Dataspace + field below. Note that the Dataspace field may contain + additional padding not represented by this field.</P></td> + </tr> + + <tr> + <td>Name</td> + <td><P>The null-terminated attribute name. This field is + padded with additional null characters to make it a + multiple of eight bytes.</P></td> + </tr> + + <tr> + <td>Datatype</td> + <td><P>The datatype description follows the same format as + described for the datatype object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</P></td> + </tr> + + <tr> + <td>Dataspace</td> + <td><P>The dataspace description follows the same format as + described for the dataspace object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</P></td> + </tr> + + <tr> + <td>Data</td> + <td><P>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. This + field is <em>not</em> padded with additional bytes.</P></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Attribute Message (Version 2) + </caption> + + <tr align=center> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flag</td> + <td colspan=2>Name Size</td> + </tr> + + <tr> + <td colspan=2>Type Size</td> + <td colspan=2>Space Size</td> + </tr> + + <tr> + <td colspan=4><br>Name<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Type<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Space<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Data<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number information is used for changes in the format of the + attribute message and is described here:</P> + <table class=list width="90%"> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Used by the library of version 1.6.x and after to encode attribute message. + This version supports shared data type. The fields of name, type, and space + are not padded with additional bytes of zero.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Flag</td> + <td><P>This field indicates whether the data type of this attribute is shared:</P> + <table class=list width="90%"> + <tr> + <th width="30%">Value</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Datatype is <em>not</em> shared.</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Datatype is shared.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Name Size</td> + <td><P>The length of the attribute name in bytes including the + null terminator.</P></td> + </tr> + + <tr> + <td>Datatype Size</td> + <td><P>The length of the datatype description in the Datatype + field below.</P></td> + </tr> + + <tr> + <td>Dataspace Size</td> + <td><P>The length of the dataspace description in the Dataspace + field below.</P></td> + </tr> + + <tr> + <td>Name</td> + <td><P>The null-terminated attribute name. This field is <em>not</em> + padded with additional bytes.</P></td> + </tr> + + <tr> + <td>Datatype</td> + <td><P>The datatype description follows the same format as + described for the datatype object header message. This + field is <em>not</em> padded with additional bytes.</P></td> + </tr> + + <tr> + <td>Dataspace</td> + <td><P>The dataspace description follows the same format as + described for the dataspace object header message. This + field is <em>not</em> padded with additional bytes.</P></td> + </tr> + + <tr> + <td>Data</td> + <td><P>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. This + field is <em>not</em> padded with additional zero + bytes.</P></td> + </tr> + </table> + </div> + + <hr> + <h4><a name="CommentMessage">Name: Object Comment</a></h4> + + <P class=item><B>Header Message Type:</B> 0x000D</P> + <P class=item><B>Length:</B> varies</P> + <P class=item><B>Status:</B> Optional, may not be repeated.</P> + + <P class=item><B>Description:</B> The object comment is + designed to be a short description of an object. An object comment + is a sequence of non-zero (<code>\0</code>) ASCII characters with no other + formatting included by the library.</P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Name Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4><br>Comment<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Name</td> + <td>A null terminated ASCII character string.</td> + </tr> + </table> + </div> + + <hr> + <h4><a name="OldModifiedMessage">Name: Object Modification Date & Time (Old)</a></h4> + + <P class=item><B>Header Message Type:</B> 0x000E</P> + <P class=item><B>Length:</B> fixed</P> + <P class=item><B>Status:</B> Optional, may not be repeated.</P> + + <P class=item><B>Description:</B> The object modification date + and time is a timestamp which indicates (using ISO-8601 date and + time format) the last modification of an object. The time is + updated when any object header message changes according to the + system clock where the change was posted. + + <br><br>This modification time message is deprecated in favor of the "new" + modification time message (Message Type 0x0012) and is no longer written + to the file in versions of the HDF5 library after the 1.6.0 version. + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Modification Time Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4>Year</td> + </tr> + + <tr> + <td colspan=2>Month</td> + <td colspan=2>Day of Month</td> + </tr> + + <tr> + <td colspan=2>Hour</td> + <td colspan=2>Minute</td> + </tr> + + <tr> + <td colspan=2>Second</td> + <td colspan=2>Reserved</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Year</td> + <td><P>The four-digit year as an ASCII string. For example, + <code>1998</code>. All fields of this message should be interpreted + as coordinated universal time (UTC)</P></td> + </tr> + + <tr> + <td>Month</td> + <td><P>The month number as a two digit ASCII string where + January is <code>01</code> and December is <code>12</code>.</P></td> + </tr> + + <tr> + <td>Day of Month</td> + <td><P>The day number within the month as a two digit ASCII + string. The first day of the month is <code>01</code>.</P></td> + </tr> + + <tr> + <td>Hour</td> + <td><P>The hour of the day as a two digit ASCII string where + midnight is <code>00</code> and 11:00pm is <code>23</code>.</P></td> + </tr> + + <tr> + <td>Minute</td> + <td><P>The minute of the hour as a two digit ASCII string where + the first minute of the hour is <code>00</code> and + the last is <code>59</code>.</P></td> + </tr> + + <tr> + <td>Second</td> + <td><P>The second of the minute as a two digit ASCII string + where the first second of the minute is <code>00</code> + and the last is <code>59</code>.</P></td> + </tr> + + <tr> + <td>Reserved</td> + <td><P>This field is reserved and should always be zero.</P></td> + </tr> + </table> + </div> + + <hr> + <h4><a name="SharedMessage">Name: Shared Object Message</a></h4> + <P class=item><B>Header Message Type:</B> 0x000F</P> + <P class=item><B>Length:</B> Fixed</P> + <P class=item><B>Status:</B> Optional, may be repeated.</P> + + <P class=item><B>Description:</B> A constant message can be shared among + several object headers. A <em>Shared Object</em> Message contains the address of + the object message to be shared. Care must be exercised to prevent cycles when a + message of one object header points to a message in some other object header. + Starting from Version 2 of the Shared Object Message, the <em>Flags</em> + field becomes unused. + </P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Shared Object Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan=2>Reserved</td> + </tr> + + <tr> + <td colspan=4>Reserved</td> + </tr> + + <tr> + <td colspan=4><br>Pointer<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number is used when there are changes in the format + of a shared object message and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Used by the library before version 1.6.1. In this version, + the Flags field is used to indicate whether the actual message is + stored in the global heap (never implemented). The Pointer field + either contains the the header message address in the global heap + (never implemented) or the address of the shared object header.</td> + </tr> + </table> + </tr> + + <tr> + <td>Flags</td> + <td><P>The Shared Message message points to a message which is + shared among multiple object headers. The Flags field + describes the type of sharing:</P> + <table class=list> + <tr> + <th width="30%">Bit</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>If this bit is clear then the actual message is the + first message in some other object header; otherwise + the actual message is stored in the global heap (never + implemented).</td> + </tr> + + <tr> + <td align=center><code>2-7</code></td> + <td>Reserved (always zero)</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Pointer</td> + <td><P>The address of the object header + containing the message to be shared.</P></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=format> + <caption> + Shared Object Message (Version 2) + </caption> + + <tr> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + <th width="25%">byte</td> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan=2 bgcolor=#DDDDDD> </td> + </tr> + + <tr> + <td colspan=4><br>Pointer<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number is used when there are changes in the format + of a shared object message and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>2</code></td> + <td>Used by the library of version 1.6.1 and after. In this version, + The Flags field is not used and the Pointer field contains the address + of the object header containing the message to be shared. </td> + </tr> + </table> + </tr> + + <tr> + <td>Flags</td> + <td><P>Unused.</P></td> + </tr> + + <tr> + <td>Pointer</td> + <td><P>The address of the object header + containing the message to be shared.</P></td> + </tr> + </table> + </div> + + + <hr> + <h4><a name="ContinuationMessage">Name: Object Header Continuation</a></h4> + <P class=item><B>Header Message Type:</B> 0x0010</P> + <P class=item><B>Length:</B> fixed</P> + <P class=item><B>Status:</B> Optional, may be repeated.</P> + <P class=item><B>Description:</B> The object header continuation is the location + in the file of more header messages for the current data object. This can be + used when header blocks become too large or are likely to change over time.</P> + + <P class=item><B>Format of Data:</B> + <br> + <div align=center> + <table class=format> + <caption> + Object Header Continuation Message + </caption> + + <tr> + <th width=25%>byte</th> + <th width=25%>byte</th> + <th width=25%>byte</th> + <th width=25%>byte</th> + </tr> + + <tr> + <td colspan=4><br>Offset<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Length<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width=30%>Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Offset</td> + <td><P>This value is the offset in bytes from the beginning of the file where the + header continuation information is located.</P></td> + </tr> + + <tr> + <td>Length</td> + <td><P>This value is the length in bytes of the header continuation information in + the file.</P></td> + </tr> + </table> + </div> + + <hr> + <h4><a name="SymbolTableMessage">Name: Group Message</a></h4> + <P class=item><B>Header Message Type:</B> 0x0011</P> + <P class=item><B>Length:</B> fixed</P> + <P class=item><B>Status:</B> Required for groups, may not be repeated.</P> + <P class=item><B>Description:</B> Each group has a B-tree and a + name heap which are pointed to by this message.</P> + <P class=item><B>Format of data:</B> + + <br> + <div align=center> + <table class=format> + <caption> + <B>Group Message</B> + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan=4><br>B-tree Address<br><br></td> + </tr> + + <tr> + <td colspan=4><br>Heap Address<br><br></td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width=30%>Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>B-tree Address</td> + <td><P>This value is the offset in bytes from the beginning of the file + where the B-tree is located.</P></td> + </tr> + + <tr> + <td>Heap Address</td> + <td><P>This value is the offset in bytes from the beginning of the file + where the group name heap is located.</P></td> + </tr> + </table> + </div> + + <hr> + <h4><a name="ModifiedMessage">Name: Object Modification Date & Time</a></h4> + + <P class=item><B>Header Message Type:</B> 0x0012 </P> + <P class=item><B>Length:</B> Fixed </P> + <P class=item><B>Status:</B> Optional, may not be repeated. </P> + + <P class=item><B>Description:</B> The object modification date + and time is a timestamp which indicates the last modification of an object. + The time is updated when any object header message changes according to the + system clock where the change was posted. + </P> + + <P class=item><B>Format of Data:</B> + <div align=center> + <table class=format> + <caption> + Modification Time Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan=3>Reserved</td> + </tr> + + <tr> + <td colspan=4>Seconds After Epoch</td> + </tr> + </table> + </div> + + <br> + <div align=center> + <table class=desc> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td>Version</td> + <td><P>The version number is used for changes in the format of Object Modification Time + and is described here:</P> + <table class=list> + <tr> + <th width="30%">Version</th> + <th align=left>Description</th> + </tr> + + <tr> + <td align=center><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align=center><code>1</code></td> + <td>Used by Version 1.6.1 and after of the library to encode time. In + this version, the time is the seconds after Epoch.</td> + </tr> + </table> + </td> + </tr> + + <tr> + <td>Reserved</td> + <td><P>This field is reserved and should always be zero.</P></td> + </tr> + + <tr> + <td>Seconds After Epoch</td> + <td><P>The number of seconds since 0 hours, 0 minutes, 0 seconds, + January 1, 1970, Coordinated Universal Time.</P></td> + </tr> + </table> + </div> + +<hr> +<h3><a name="DataStorage">Disk Format: Level 2b - Data Object Data Storage</a></h3> +<P>The data for an object is stored separately from the header +information in the file and may not actually be located in the HDF5 file +itself if the header indicates that the data is stored externally. The +information for each record in the object is stored according to the +dimensionality of the object (indicated in the dimensionality header message). +Multi-dimensional data is stored in C order [same as current scheme], i.e. the +"last" dimension changes fastest. +<P>Data whose elements are composed of simple number-types are stored in +native-endian IEEE format, unless they are specifically defined as being stored +in a different machine format with the architecture-type information from the +number-type header message. This means that each architecture will need to +[potentially] byte-swap data values into the internal representation for that +particular machine. +<P> Data with a variable-length datatype is stored in the global heap +of the HDF5 file. Global heap identifiers are stored in the +data object storage. +<P>Data whose elements are composed of pointer number-types are stored in several +different ways depending on the particular pointer type involved. Simple +pointers are just stored as the dataset offset of the object being pointed to with the +size of the pointer being the same number of bytes as offsets in the file. +Dataset region references are stored as a heap-ID which points to the following +information within the file-heap: an offset of the object pointed to, number-type +information (same format as header message), dimensionality information (same +format as header message), sub-set start and end information (i.e. a coordinate +location for each), and field start and end names (i.e. a [pointer to the] +string indicating the first field included and a [pointer to the] string name +for the last field). + +<P>Data of a compound datatype is stored as a contiguous stream of the items +in the structure, with each item formatted according to its datatype.</p> + +<hr> +<h3><a name="Appendix">Appendix</a></h3> +<P>Definitions of various terms used in this document. +</P> +<P>The <A name="UndefinedAddress">"undefined address"</A> for a file is a +file address with all bits set, i.e. <code>0xffff...ff</code>. +<P>The <A name="UnlimitedDim">"unlimited size"</A> for a size is a +value with all bits set, i.e. <code>0xffff...ff</code>. + +</body> +</html> diff --git a/doxygen/examples/H5.format.2.0.html b/doxygen/examples/H5.format.2.0.html new file mode 100644 index 0000000..3653489 --- /dev/null +++ b/doxygen/examples/H5.format.2.0.html @@ -0,0 +1,14902 @@ +<!DOCTYPE HTML> +<html> + <head> + <title> + HDF5 File Format Specification Version 2.0 + </title> + +<style> +h1 { display: block; + margin-top: 24px; + margin-bottom: 24px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + } + +h2 { display: block; + margin-top: 8x; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + } +<!-- A horizontal rule (<hr />) should be placed on the line above +each h2 tag. The h2 tags are used on the main sections along with +the hr tags. --> + +h3 { display: block; + margin-top: 8px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + } + +h4 { display: block; + margin-top: 8px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + } + +p { display: block; + margin-top: 8px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + } +<!-- +p.item { margin-left: 2em; + text-indent: -2em + } --> +<!-- p.item2 { margin-left: 2em; text-indent: 2em} --> + +table.format { border:solid; + border-collapse:collapse; + caption-side:top; + text-align:center; + width:80%; + } +table.format th { border:ridge; + padding:4px; + width:25%; + } +table.format td { border:ridge; + padding:4px; + } +table.format caption { font-weight:bold; + font-size:larger; + } + +table.note {border:none; + text-align:right; + width:80%; + } + +table.desc { border:solid; + border-collapse:collapse; + caption-size:top; + text-align:left; + width:80%; + } +table.desc tr { vertical-align:top; + } +table.desc th { border-style:ridge; + font-size:larger; + padding:4px; + <!-- text-decoration:underline; --> + } +table.desc td { border-style:ridge; + <!-- padding: 4px; --> + vertical-align:text-top; + } +table.desc caption { font-weight:bold; + font-size:larger; + } + +table.list { border:none; + width:100% + } +table.list tr { vertical-align:text-top; + } +table.list th { border:none; + text-decoration:underline; + vertical-align:text-top; + } +table.list td { border:none; + vertical-align:text-top; + } + +table.msgdesc { border:none; + text-align:left; + width: 80% + } +table.msgdesc tr { vertical-align:text-top; + border-spacing:0; + padding:0; } +table.msgdesc th { border:none; + text-decoration:underline; + vertical-align:text-top; } +table.msgdesc td { border:none; + vertical-align:text-top; + } + +table.list80 { border:none; + width:80% + } +table.list80 tr { vertical-align:text-top; + } +table.list80 th { border:none; + text-decoration:underline; + vertical-align:text-top; + } +table.list80 td { border:none; + vertical-align:text-top; + } + +table.glossary { border:none; + text-align:left; + width: 80% + } +table.glossary tr { vertical-align:text-top; + border-spacing:0; + padding:0; } +table.glossary th { border:none; + text-align:left; + text-decoration:underline; + vertical-align:text-top; } +table.glossary td { border:none; + text-align:left; + vertical-align:text-top; + } + +div { page-break-inside:avoid; + page-break-after:auto + } + +</style> + + <center> + <table border="0" width="90%"> + <tr> + <td valign="top"> + <ol type="I"> + <li><a href="#Intro">Introduction</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#ThisDocument">This Document</a></li> + <li><a href="#ChangesForHdf5_1_10">Changes for HDF5 1.10</a></li> + </ol> + </font> + + <li><a href="#FileMetaData">Disk Format: Level 0 - File Metadata</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#Superblock">Disk Format: Level 0A - Format Signature and Superblock</a></li> + <li><a href="#DriverInfo">Disk Format: Level 0B - File Driver Info</a></li> + <li><a href="#SuperblockExt">Disk Format: Level 0C - Superblock Extension</a></li> + </ol> + </font> + <li><a href="#FileInfra">Disk Format: Level 1 - File Infrastructure</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#Btrees">Disk Format: Level 1A - B-trees and B-tree + Nodes</a></li> + <ol type="1"> + <li><a href="#V1Btrees">Disk Format: Level 1A1 - Version 1 + B-trees (B-link Trees)</a></li> + <li><a href="#V2Btrees">Disk Format: Level 1A2 - Version 2 + B-trees</a></li> + </ol> + <li><a href="#SymbolTable">Disk Format: Level 1B - Group Symbol Table Nodes</a></li> + <li><a href="#SymbolTableEntry">Disk Format: Level 1C - Symbol Table Entry</a></li> + <li><a href="#LocalHeap">Disk Format: Level 1D - Local Heaps</a></li> + <li><a href="#GlobalHeap">Disk Format: Level 1E - Global Heap</a></li> + <li><a href="#FractalHeap">Disk Format: Level 1F - Fractal Heap</a></li> + <li><a href="#FreeSpaceManager">Disk Format: Level 1G - Free-space Manager</a></li> + <li><a href="#SOHMTable">Disk Format: Level 1H - Shared Object Header Message Table</a></li> + </ol> + </font> + <li><a href="#DataObject">Disk Format: Level 2 - Data Objects</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#ObjectHeader">Disk Format: Level 2A - Data Object Headers</a></li> + <ol type="1"> + <li><a href="#ObjectHeaderPrefix">Disk Format: Level 2A1 - Data Object Header Prefix</a></li> + <ol type="a"> + <li><a href="#V1ObjectHeaderPrefix">Version 1 Data Object Header Prefix</a></li> + <li><a href="#V2ObjectHeaderPrefix">Version 2 Data Object Header Prefix</a></li> + </ol> + <li><a href="#ObjectHeaderMessages">Disk Format: Level 2A2 - Data Object Header Messages</a></li> + <ol type="a"> + <li><a href="#NILMessage">The NIL Message</a></li> <!-- 0x0000 --> + <li><a href="#DataspaceMessage">The Dataspace Message</a></li> <!-- 0x0001 --> + <li><a href="#LinkInfoMessage">The Link Info Message</a></li> <!-- 0x0002 --> + </ol> + </ol> + </ol> + </font> + </ol> + </td> + + <td> </td> + + <td valign="top"> + <ol type="I" start="4"> + <li><a href="#DataObject">Disk Format: Level 2 - Data + Objects</a><font size="-1"><i> (Continued)</i></li> + <ol type="A"> + <li><a href="#ObjectHeader">Disk Format: Level 2A - Data Object + Headers</a><i> (Continued)</i></li> + <ol type="1" start="2"> + <li><a href="#ObjectHeaderMessages">Disk Format: Level 2A2 - + Data Object Header Messages</a><i> (Continued)</i></li> + <ol type="a" start="4"> + <li><a href="#DatatypeMessage">The Datatype Message</a></li> <!-- 0x0003 --> + <li><a href="#OldFillValueMessage">The Data Storage - + Fill Value (Old) Message</a></li> <!-- 0x0004 --> + <li><a href="#FillValueMessage">The Data Storage - + Fill Value Message</a></li> <!-- 0x0005 --> + <li><a href="#LinkMessage">The Link Message</a></li> <!-- 0x0006 --> + <li><a href="#ExternalFileListMessage">The Data Storage - + External Data Files Message</a></li> <!-- 0x0007 --> + <li><a href="#LayoutMessage">The Data Storage - + Layout Message</a></li> <!-- 0x0008 --> + <li><a href="#BogusMessage">The Bogus Message</a></li> <!-- 0x0009 --> + <li><a href="#GroupInfoMessage">The Group Info + Message</a></li> <!-- 0x000a --> + <li><a href="#FilterMessage">The Data Storage - + Filter Pipeline Message</a></li> <!-- 0x000b --> + <li><a href="#AttributeMessage">The Attribute + Message</a></li> <!-- 0x000c --> + <li><a href="#CommentMessage">The Object Comment + Message</a></li> <!-- 0x000d --> + <li><a href="#OldModificationTimeMessage">The Object + Modification Time (Old) Message</a></li> <!-- 0x000e --> + <li><a href="#SOHMTableMessage">The Shared Message + Table Message</a></li> <!-- 0x000f --> + <li><a href="#ContinuationMessage">The Object Header + Continuation Message</a></li> <!-- 0x0010 --> + <li><a href="#SymbolTableMessage">The Symbol + Table Message</a></li> <!-- 0x0011 --> + <li><a href="#ModificationTimeMessage">The Object + Modification Time Message</a></li> <!-- 0x0012 --> + <li><a href="#BtreeKValuesMessage">The B-tree + ‘K’ Values Message</a></li> <!-- 0x0013 --> + <li><a href="#DrvInfoMessage">The Driver Info + Message</a></li> <!-- 0x0014 --> + <li><a href="#AinfoMessage">The Attribute Info + Message</a></li> <!-- 0x0015 --> + <li><a href="#RefCountMessage">The Object Reference + Count Message</a></li> <!-- 0x0016 --> + <li><a href="#FsinfoMessage">The File Space Info + Message</a></li> <!-- 0x0018 --> + </ol> + </ol> + <li><a href="#DataStorage">Disk Format: Level 2B - Data Object Data Storage</a></li> + </ol> + </font> + <li><a href="#AppendixA">Appendix A: Definitions</a></li> + <li><a href="#AppendixB">Appendix B: File Memory Allocation Types</a></li> + </ol> +</td></tr> +</table> +</center> + + + +<br /> +<br /> +<hr /> +<a name="Intro"><h2>I. Introduction</h2></a> + + <table align="right" width="100"> + <tr><td> </td><td align="center"> + <hr /> + <img src="FF-IH_FileGroup.gif" alt="HDF5 Groups" hspace="15" vspace="15"> + </td><td> </td></tr> + <tr><td> </td><td align="center"> + <strong>Figure 1:</strong> Relationships among the HDF5 root group, other groups, and objects + <hr /> + </td><td> </td></tr> + + <tr><td> </td><td align="center"> + <img src="FF-IH_FileObject.gif" alt="HDF5 Objects" hspace="15" vspace="15"> + </td><td> </td></tr> + <tr><td> </td><td align="center"> + <strong>Figure 2:</strong> HDF5 objects -- datasets, datatypes, or dataspaces + <hr /> + </td><td> </td></tr> + </table> + + + <p>The format of an HDF5 file on disk encompasses several + key ideas of the HDF4 and AIO file formats as well as + addressing some shortcomings therein. The new format is + more self-describing than the HDF4 format and is more + uniformly applied to data objects in the file.</p> + + <p>An HDF5 file appears to the user as a directed graph. + The nodes of this graph are the higher-level HDF5 objects + that are exposed by the HDF5 APIs:</p> + + <ul> + <li>Groups</li> + <li>Datasets</li> + <li>Committed (formerly Named) datatypes</li> + </ul> + + <p>At the lowest level, as information is actually written to the disk, + an HDF5 file is made up of the following objects:</p> + <ul> + <li>A superblock</li> + <li>B-tree nodes</li> + <li>Heap blocks</li> + <li>Object headers</li> + <li>Object data</li> + <li>Free space</li> + </ul> + + <p>The HDF5 Library uses these low-level objects to represent the + higher-level objects that are then presented to the user or + to applications through the APIs. For instance, a group is an + object header that contains a message that points to a local + heap (for storing the links to objects in the group) and to a + B-tree (which indexes the links). A dataset is an object header + that contains messages that describe datatype, dataspace, layout, + filters, external files, fill value, and other elements with the + layout message pointing to either a raw data chunk or to a + B-tree that points to raw data chunks.</p> + + +<br /> +<a name="ThisDocument"><h3>I.A. This Document</h3></a> + + <p>This document describes the lower-level data objects; + the higher-level objects and their properties are described + in the <a href="UG/HDF5_Users_Guide-Responsive HTML5/index.html"><cite>HDF5 User’s Guide</cite></a>.</p> + + <p>Three levels of information comprise the file format. + Level 0 contains basic information for identifying and + defining information about the file. Level 1 information contains + the information about the pieces of a file shared by many objects + in the file (such as a B-trees and heaps). Level 2 is the rest + of the file and contains all of the data objects, with each object + partitioned into header information, also known as + <em>metadata</em>, and data.</p> + + <p>The sizes of various fields in the following layout tables are + determined by looking at the number of columns the field spans + in the table. There are three exceptions: (1) The size may be + overridden by specifying a size in parentheses, (2) the size of + addresses is determined by the <em>Size of Offsets</em> field + in the superblock and is indicated in this document with a + superscripted ‘O’, and (3) the size of length fields is determined + by the <em>Size of Lengths</em> field in the superblock and is + indicated in this document with a superscripted ‘L’.</p> + + <p>Values for all fields in this document should be treated as unsigned + integers, unless otherwise noted in the description of a field. + Additionally, all metadata fields are stored in little-endian byte + order. + </p> + + <p>All checksums used in the format are computed with the + <a href="http://www.burtleburtle.net/bob/hash/doobs.html">Jenkins’ + lookup3</a> algorithm. + </p> + + <p>Whenever a bit flag or field is mentioned for an entry, bits are + numbered from the lowest bit position in the entry. + </p> + + <p>Various tables in this document aligned with “This space inserted + only to align table nicely”. These entries in the table are just + to make the table presentation nicer and do not represent any values + or padding in the file. + </p> + + +<br /> +<a name="ChangesForHdf5_1_10"><h3>I.B. Changes for HDF5 1.10</h3></a> + + <p>As of October 2015, changes in the file format for HDF5 1.10 + have not yet been finalized.</p> + + + +<br /> +<br /> +<hr /> +<h2><a name="FileMetaData"> +II. Disk Format: Level 0 - File Metadata</a></h2> + +<br /> +<h3><a name="Superblock"> +II.A. Disk Format: Level 0A - Format Signature and Superblock</a></h3> + + <p>The superblock may begin at certain predefined offsets within + the HDF5 file, allowing a block of unspecified content for + users to place additional information at the beginning (and + end) of the HDF5 file without limiting the HDF5 Library’s + ability to manage the objects within the file itself. This + feature was designed to accommodate wrapping an HDF5 file in + another file format or adding descriptive information to an HDF5 + file without requiring the modification of the actual file’s + information. The superblock is located by searching for the + HDF5 format signature at byte offset 0, byte offset 512, and at + successive locations in the file, each a multiple of two of + the previous location; in other words, at these byte offsets: + 0, 512, 1024, 2048, and so on.</p> + + <p>The superblock is composed of the format signature, followed by a + superblock version number and information that is specific to each + version of the superblock. + Currently, there are three versions of the superblock format. + Version 0 is the default format, while version 1 is basically the same + as version 0 with additional information when a non-default B-tree ‘K’ + value is stored. Version 2 is the latest format, with some fields + eliminated or compressed and with superblock extension and checksum + support.</p> + + <p>Version 0 and 1 of the superblock are described below:</p> + + + <div align="center"> + <table class="format"> + <caption> + Superblock (Versions 0 and 1) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Format Signature (8 bytes)<br /><br /></td> + </tr> + + <tr> + <td>Version # of Superblock</td> + <td>Version # of File’s Free Space Storage</td> + <td>Version # of Root Group Symbol Table Entry</td> + <td>Reserved (zero)</td> + </tr> + + <tr> + <td>Version # of Shared Header Message Format</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>Reserved (zero)</td> + </tr> + + <tr> + <td colspan="2">Group Leaf Node K</td> + <td colspan="2">Group Internal Node K</td> + </tr> + + <tr> + <td colspan="4">File Consistency Flags</td> + </tr> + + <tr> + <td colspan="2" style="border:dotted;">Indexed Storage Internal Node K<sup>1</sup></td> + <td colspan="2" style="border:dotted;">Reserved (zero)<sup>1</sup></td> + </tr> + + <tr> + <td colspan="4"><br />Base Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of File Free space Info<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of File Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Driver Information Block Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Root Group Symbol Table Entry</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in “Size of Offsets.”) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with a ‘1’ in the above table are + new in version 1 of the superblock) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Format Signature</p></td> + <td><p>This field contains a constant value and can be used to + quickly identify a file as being an HDF5 file. The + constant value is designed to allow easy identification of + an HDF5 file and to allow certain types of data corruption + to be detected. The file signature of an HDF5 file always + contains the following values:</p> + <center> + <table border align="center" cellpadding="4"> + <tr align="center"> + <td align="right">Decimal:</td> + <td width="8%">137</td> + <td width="8%">72</td> + <td width="8%">68</td> + <td width="8%">70</td> + <td width="8%">13</td> + <td width="8%">10</td> + <td width="8%">26</td> + <td width="8%">10</td> + </tr> + + <tr align="center"> + <td align="right">Hexadecimal:</td> + <td>89</td> + <td>48</td> + <td>44</td> + <td>46</td> + <td>0d</td> + <td>0a</td> + <td>1a</td> + <td>0a</td> + </tr> + + <tr align="center"> + <td align="right">ASCII C Notation:</td> + <td>\211</td> + <td>H</td> + <td>D</td> + <td>F</td> + <td>\r</td> + <td>\n</td> + <td>\032</td> + <td>\n</td> + </tr> + </table> + </center> + <p>This signature both identifies the file as an HDF5 file + and provides for immediate detection of common + file-transfer problems. The first two bytes distinguish + HDF5 files on systems that expect the first two bytes to + identify the file type uniquely. The first byte is + chosen as a non-ASCII value to reduce the probability + that a text file may be misrecognized as an HDF5 file; + also, it catches bad file transfers that clear bit + 7. Bytes two through four name the format. The CR-LF + sequence catches bad file transfers that alter newline + sequences. The control-Z character stops file display + under MS-DOS. The final line feed checks for the inverse + of the CR-LF translation problem. (This is a direct + descendent of the + <a href="http://www.libpng.org/pub/png/spec/iso/index-object.html#5PNG-file-signature">PNG</a> file + signature.)</p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p></td> + </tr> + + <tr> + <td><p>Version Number of the Superblock</p></td> + <td><p>This value is used to determine the format of the + information in the superblock. When the format of the + information in the superblock is changed, the version number + is incremented to the next integer and can be used to + determine how the information in the superblock is + formatted.</p> + + <p>Values of 0, 1 and 2 are defined for this field. (The format + of version 2 is described below, not here) + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Version Number of the File’s Free Space + Information</p></td> + <td> + <p>This value is used to determine the format of the + file’s free space information. + </p> + <p>The only value currently valid in this field is ‘0’, which + indicates that the file’s free space is as described + <a href="#FreeSpaceManager">below</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Version Number of the Root Group Symbol Table + Entry</p></td> + <td><p>This value is used to determine the format of the + information in the Root Group Symbol Table Entry. When the + format of the information in that field is changed, the + version number is incremented to the next integer and can be + used to determine how the information in the field + is formatted.</p> + <p>The only value currently valid in this field is ‘0’, + which indicates that the root group symbol table entry is + formatted as described <a href="#SymbolTableEntry">below</a>.</p> + <p><em>This field is present in version 0 and 1 of the + superblock.</em></p> + </td> + </tr> + + <tr> + <td><p>Version Number of the Shared Header Message Format</p></td> + <td><p>This value is used to determine the format of the + information in a shared object header message. Since the format + of the shared header messages differs from the other private + header messages, a version number is used to identify changes + in the format. + </p> + <p>The only value currently valid in this field is ‘0’, which + indicates that shared header messages are formatted as + described <a href="#ObjectHeaderMessages">below</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Size of Offsets</p></td> + <td><p>This value contains the number of bytes used to store + addresses in the file. The values for the addresses of + objects in the file are offsets relative to a base address, + usually the address of the superblock signature. This + allows a wrapper to be added after the file is created + without invalidating the internal offset locations. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Size of Lengths</p></td> + <td><p>This value contains the number of bytes used to store + the size of an object. + </p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Group Leaf Node K</p></td> + <td> + <p>Each leaf node of a group B-tree will have at + least this many entries but not more than twice this + many. If a group has a single leaf node then it + may have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Group Internal Node K</p></td> + <td> + <p>Each internal node of a group B-tree will have at + least this many entries but not more than twice this + many. If the group has only one internal + node then it might have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>File Consistency Flags</p></td> + <td> + <p>This value contains flags to indicate information + about the consistency of the information contained + within the file. Currently, the following bit flags are + defined: + <ul> + <li>Bit 0 set indicates that the file is opened for + write-access.</li> + <li>Bit 1 set indicates that the file has + been verified for consistency and is guaranteed to be + consistent with the format defined in this document.</li> + <li>Bits 2-31 are reserved for future use.</li> + </ul> + Bit 0 should be + set as the first action when a file is opened for write + access and should be cleared only as the final action + when closing a file. Bit 1 should be cleared during + normal access to a file and only set after the file’s + consistency is guaranteed by the library or a + consistency utility. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Indexed Storage Internal Node K</p></td> + <td> + <p>Each internal node of an indexed storage B-tree will have at + least this many entries but not more than twice this + many. If the index storage B-tree has only one internal + node then it might have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Base Address</p></td> + <td> + <p>This is the absolute file address of the first byte of + the HDF5 data within the file. The library currently + constrains this value to be the absolute file address + of the superblock itself when creating new files; + future versions of the library may provide greater + flexibility. When opening an existing file and this address does + not match the offset of the superblock, the library assumes + that the entire contents of the HDF5 file have been adjusted in + the file and adjusts the base address and end of file address to + reflect their new positions in the file. Unless otherwise noted, + all other file addresses are relative to this base + address. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Address of Global Free-space Index</p></td> + <td> + <p>The file’s free space is not persistent for version 0 and 1 of + the superblock. + Currently this field always contains the + <a href="#UndefinedAddress">undefined address</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>End of File Address</p></td> + <td> + <p>This is the absolute file address of the first byte past + the end of all HDF5 data. It is used to determine whether a + file has been accidently truncated and as an address where + file data allocation can occur if space from the free list is + not used. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Block Address</p></td> + <td> + <p>This is the relative file address of the file driver + information block which contains driver-specific + information needed to reopen the file. If there is no + driver information block then this entry should be the + <a href="#UndefinedAddress">undefined address</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Root Group Symbol Table Entry</p></td> + <td> + <p>This is the <a href="#SymbolTableEntry">symbol table entry</a> + of the root group, which serves as the entry point into + the group graph for the file. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + </table> + </div> + + <br /> + <p>Version 2 of the superblock is described below:</p> + + <div align="center"> + <table class="format"> + <caption> + Superblock (Version 2) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Format Signature (8 bytes)<br /><br /></td> + </tr> + + <tr> + <td>Version # of Superblock</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>File Consistency Flags</td> + </tr> + + <tr> + <td colspan="4"><br />Base Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Superblock Extension Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of File Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Root Group Object Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Superblock Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in “Size of Offsets.”) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Format Signature</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p></td> + </tr> + + <tr> + <td><p>Version Number of the Superblock</p></td> + <td> + <p>This field has a value of 2 and has the same meaning as for + versions 0 and 1. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Offsets</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Lengths</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>File Consistency Flags</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 except + that it is smaller (the number of reserved bits has been reduced + from 30 to 6). + </p> + </td> + </tr> + + <tr> + <td><p>Base Address</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Superblock Extension Address</p></td> + <td> + <p>The field is the address of the object header for the + <a href="#SuperblockExt">superblock extension</a>. + If there is no extension then this entry should be the + <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>End of File Address</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Root Group Object Header Address</p></td> + <td> + <p>This is the address of + the <a href="#DataObject">root group object header</a>, + which serves as the entry point into the group graph for the file. + </p> + </td> + </tr> + + <tr> + <td><p>Superblock Checksum</p></td> + <td> + <p>The checksum for the superblock. + </p> + </td> + </tr> + + </table> + </div> + +<br /> +<h3><a name="DriverInfo"> +II.B. Disk Format: Level 0B - File Driver Info</a></h3> + + <p>The <b>driver information block</b> is an optional region of the + file which contains information needed by the file driver + to reopen a file. The format is described below:</p> + + + <div align="center"> + <table class="format"> + <caption> + Driver Information Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved</td> + </tr> + + <tr> + <td colspan="4">Driver Information Size</td> + </tr> + + <tr> + <td colspan="4"><br />Driver Identification (8 bytes)<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Driver Information (<em>variable size</em>)<br /><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number of the Driver Information Block. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Size</p></td> + <td> + <p>The size in bytes of the <em>Driver Information</em> field. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Identification</p></td> + <td> + <p>This is an eight-byte ASCII string without null + termination which identifies the driver and/or version number + of the Driver Information Block. The predefined driver encoded + in this field by the HDF5 Library is identified by the + letters <code>NCSA</code> followed by the first four characters of + the driver name. If the Driver Information block is not + the original version then the last letter(s) of the + identification will be replaced by a version number in + ASCII, starting with 0. + </p> + <p> + Identification for user-defined drivers is also eight-byte long. + It can be arbitrary but should be unique to avoid + the four character prefix “NCSA”. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Driver Information</p></td> + <td>Driver information is stored in a format defined by the + file driver (see description below).</td> + </tr> + </table> + </div> + + <br /> + The two drivers encoded in the <em>Driver Identification</em> field are as follows: + <ul> + <li> + Multi driver: + <p> + The identifier for this driver is “NCSAmulti”. + This driver provides a mechanism for segregating raw data and different types of metadata + into multiple files. + These files are viewed by the library as a single virtual HDF5 file with a single file address. + A maximum of 6 files will be created for the following data: + superblock, B-tree, raw data, global heap, local heap, and object header. + More than one type of data can be written to the same file. + </p></li> + <li> + Family driver + <p> + The identifier for this driver is “NCSAfami” and is encoded in this field for library version 1.8 and after. + This driver is designed for systems that do not support files larger than 2 gigabytes + by splitting the HDF5 file address space across several smaller files. + It does nothing to segregate metadata and raw data; + they are mixed in the address space just as they would be in a single contiguous file. + </p></li> + </ul> + <p>The format of the <em>Driver Information</em> field for the + above two drivers are described below:</p> + + <div align="center"> + <table class="format"> + <caption> + Multi Driver Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Reserved</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />... ...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File N<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File N<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File 1 <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File 2 <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />... ...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File N <em>(variable size)</em><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Member Mapping</p></td> + <td><p>These fields are integer values from 1 to 6 + indicating how the data can be mapped to or merged with another type of + data. + <table class="list"> + <tr> + <th width="20%" align="center">Member Mapping</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">1</td> + <td>The superblock data.</td> + </tr> + <tr> + <td align="center">2</td> + <td>The B-tree data.</td> + </tr> + <tr> + <td align="center">3</td> + <td>The raw data.</td> + </tr> + <tr> + <td align="center">4</td> + <td>The global heap data.</td> + </tr> + <tr> + <td align="center">5</td> + <td>The local heap data.</td> + </tr> + <tr> + <td align="center">6</td> + <td>The object header data.</td> + </tr> + </table></p> + <p>For example, if the third field has the value 3 and all the rest have the + value 1, it means there are two files: one for raw data, and one for superblock, + B-tree, global heap, local heap, and object header.</p> + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td><p>These fields are reserved and should always be zero.</p></td> + </tr> + + <tr> + <td><p>Address of Member File N</p></td> + <td><p>This field Specifies the virtual address at which the member file starts.</p> + <p>N is the number of member files.</p> + </td> + </tr> + + <tr> + <td><p>End of Address for Member File N</p></td> + <td><p>This field is the end of the allocated address for the member file. + </p></td> + </tr> + + <tr> + <td><p>Name of Member File N</p></td> + <td><p>This field is the null-terminated name of the member file and + its length should be multiples of 8 bytes. + Additional bytes will be padded with <em>NULL</em>s. The default naming + convention is <em>%s-X.h5</em>, where <em>X</em> is one of the letters + <em>s</em> (for superblock), <em>b</em> (for B-tree), <em>r</em> (for raw data), + <em>g</em> (for global heap), <em>l</em> (for local heap), and <em>o</em> (for + object header). The name of the whole HDF5 file will substitute the <em>%s</em> + in the string. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Family Driver Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="8"><br />Size of Member File<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size of Member File</p></td> + <td><p>This field is the size of the member file in the family of files.</p></td> + </tr> + </table> + </div> + +<br /> +<h3><a name="SuperblockExt"> +II.C. Disk Format: Level 0C - Superblock Extension</a></h3> + + <p>The <em>superblock extension</em> is used to store superblock metadata + which is either optional, or added after the version of the superblock + was defined. Superblock extensions may only exist when version 2+ of + superblock is used. A superblock extension is an object header which may + hold the following messages:</p> + <ul> + <li> + <a href="#SOHMTableMessage">Shared Message Table message</a> containing + information to locate the master table of shared object header message + indices.</li> + <li> + <a href="#BtreeKValuesMessage">B-tree ‘K’ Values message</a> containing + non-default B-tree ‘K’ values.</li> + <li> + <a href="#DrvInfoMessage">Driver Info message</a> containing information + needed by the file driver in order to reopen a file. + See also the + <a href="#DriverInfo">“Disk Format: Level 0B - File Driver + Info”</a> section above.</li> + <li> + <a href="#FsinfoMessage">File Space Info message</a> containing + information about file space handling in the file.</li> + </ul> + + + +<br /> +<br /> +<hr /> +<h2><a name="FileInfra"> +III. Disk Format: Level 1 - File Infrastructure</a></h2> + +<br /> +<h3><a name="Btrees"> +III.A. Disk Format: Level 1A - B-trees and B-tree Nodes</a></h3> + + <p>B-trees allow flexible storage for objects which tend to grow + in ways that cause the object to be stored discontiguously. B-trees + are described in various algorithms books including “Introduction to + Algorithms” by Thomas H. Cormen, Charles E. Leiserson, and Ronald + L. Rivest. B-trees are used in several places in the HDF5 file format, + when an index is needed for another data structure.</p> + + <p>The version 1 B-tree structure described below is the original index + structure, but are limited by some bugs in our implementation (mainly in + how they handle deleting records). The version 1 B-trees are being phased + out in favor of the version 2 B-trees described below, although both + types of structures may be found in the same file, depending on + application settings when creating the file.</p> + +<br /> +<h4><a name="V1Btrees"> +III.A.1. Disk Format: Level 1A1 - Version 1 B-trees (B-link Trees)</a></h4> + + <p>Version 1 B-trees in HDF5 files an implementation of the B-link tree, + in which the sibling nodes at a particular level in the tree are stored + in a doubly-linked list, is described in the “Efficient Locking for + Concurrent Operations on B-trees” paper by Phillip Lehman and S. Bing Yao + as published in the <cite>ACM Transactions on Database Systems</cite>, + Vol. 6, No. 4, December 1981.</p> + + <p>The B-link trees implemented by the file format contain one more + key than the number of children. In other words, each child + pointer out of a B-tree node has a left key and a right key. + The pointers out of internal nodes point to sub-trees while + the pointers out of leaf nodes point to symbol nodes and + raw data chunks. + Aside from that difference, internal nodes and leaf nodes + are identical.</p> + + <div align="center"> + <table class="format"> + <caption> + B-link Tree Nodes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Node Type</td> + <td>Node Level</td> + <td colspan="2">Entries Used</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Left Sibling<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Right Sibling<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 0 (variable size)</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 0<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 1 (variable size)</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 1<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Key 2<em>K</em> (variable size)</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 2<em>K</em><sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 2<em>K</em>+1 (variable size)</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>TREE</code>” is + used to indicate the + beginning of a B-link tree node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Node Type</p></td> + <td> + <p>Each B-link tree points to a particular type of data. + This field indicates the type of data as well as + implying the maximum degree <em>K</em> of the tree and + the size of each Key field. + + + <table class="list"> + <tr> + <th width="20%" align="center">Node Type</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>This tree points to group nodes.</td> + </tr> + <tr> + <td align="center">1</td> + <td>This tree points to raw data chunk nodes.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Node Level</p></td> + <td> + <p>The node level indicates the level at which this node + appears in the tree (leaf nodes are at level zero). Not + only does the level indicate whether child pointers + point to sub-trees or to data, but it can also be used + to help file consistency checking utilities reconstruct + damaged trees. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Entries Used</p></td> + <td> + <p>This determines the number of children to which this + node points. All nodes of a particular type of tree + have the same maximum degree, but most nodes will point + to less than that number of children. The valid child + pointers and keys appear at the beginning of the node + and the unused pointers and keys appear at the end of + the node. The unused pointers and keys have undefined + values. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Address of Left Sibling</p></td> + <td> + <p>This is the relative file address of the left sibling of + the current node. If the current + node is the left-most node at this level then this field + is the <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Address of Right Sibling</p></td> + <td> + <p>This is the relative file address of the right sibling of + the current node. If the current + node is the right-most node at this level then this + field is the <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Keys and Child Pointers</p></td> + <td> + <p>Each tree has 2<em>K</em>+1 keys with 2<em>K</em> + child pointers interleaved between the keys. The number + of keys and child pointers actually containing valid + values is determined by the node’s <em>Entries Used</em> field. + If that field is <em>N</em> then the B-link tree contains + <em>N</em> child pointers and <em>N</em>+1 keys. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Key</p></td> + <td> + <p>The format and size of the key values is determined by + the type of data to which this tree points. The keys are + ordered and are boundaries for the contents of the child + pointer; that is, the key values represented by child + <em>N</em> fall between Key <em>N</em> and Key + <em>N</em>+1. Whether the interval is open or closed on + each end is determined by the type of data to which the + tree points. + </p> + + <p> + The format of the key depends on the node type. + For nodes of node type 0 (group nodes), the key is formatted as + follows: + + <table class="list"> + <tr> + <td width="20%">A single field of <i>Size of Lengths</i> + bytes:</td> + <td width="80%">Indicates the byte offset into the local heap + for the first object name in the subtree which + that key describes. + </td> + </tr> + </table> + </p> + + + <p> + For nodes of node type 1 (chunked raw data nodes), the key is + formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-4:</td> + <td width="80%">Size of chunk in bytes.</td> + </tr> + <tr> + <td>Bytes 4-8:</td> + <td>Filter mask, a 32-bit bit field indicating which + filters have been skipped for this chunk. Each filter + has an index number in the pipeline (starting at 0, with + the first filter to apply) and if that filter is skipped, + the bit corresponding to its index is set.</td> + </tr> + <tr> + <td>(<em>D + 1</em>) 64-bit fields:</td> + <td>The offset of the + chunk within the dataset where <i>D</i> is the number + of dimensions of the dataset, and the last value is the + offset within the dataset’s datatype and should always be + zero. For example, if + a chunk in a 3-dimensional dataset begins at the + position <code>[5,5,5]</code>, there will be three + such 64-bit values, each with the value of + <code>5</code>, followed by a <code>0</code> value.</td> + </tr> + </table> + </p> + + </td> + </tr> + + <tr valign="top"> + <td><p>Child Pointer</p></td> + <td> + <p>The tree node contains file addresses of subtrees or + data depending on the node level. Nodes at Level 0 point + to data addresses, either raw data chunks or group nodes. + Nodes at non-zero levels point to other nodes of the + same B-tree. + </p> + <p>For raw data chunk nodes, the child pointer is the address + of a single raw data chunk. For group nodes, the child pointer + points to a <a href="#SymbolTable">symbol table</a>, which contains + information for multiple symbol table entries. + </p> + </td> + </tr> + </table> + </div> + + <p> + Conceptually, each B-tree node looks like this:</p> + <center> + <table> + <tr valign="top" align="center"> + <td>key[0]</td><td> </td> + <td>child[0]</td><td> </td> + <td>key[1]</td><td> </td> + <td>child[1]</td><td> </td> + <td>key[2]</td><td> </td> + <td>...</td><td> </td> + <td>...</td><td> </td> + <td>key[<i>N</i>-1]</td><td> </td> + <td>child[<i>N</i>-1]</td><td> </td> + <td>key[<i>N</i>]</td> + </tr> + </table> + </center> + <br /> + + where child[<i>i</i>] is a pointer to a sub-tree (at a level + above Level 0) or to data (at Level 0). + Each key[<i>i</i>] describes an <i>item</i> stored by the B-tree + (a chunk or an object of a group node). The range of values + represented by child[<i>i</i>] is indicated by key[<i>i</i>] + and key[<i>i</i>+1]. + + + <p>The following question must next be answered: + “Is the value described by key[<i>i</i>] contained in + child[<i>i</i>-1] or in child[<i>i</i>]?” + The answer depends on the type of tree. + In trees for groups (node type 0) the object described by + key[<i>i</i>] is the greatest object contained in + child[<i>i</i>-1] while in chunk trees (node type 1) the + chunk described by key[<i>i</i>] is the least chunk in + child[<i>i</i>].</p> + + <p>That means that key[0] for group trees is sometimes unused; + it points to offset zero in the heap, which is always the + empty string and compares as “less-than” any valid object name.</p> + + <p>And key[<i>N</i>] for chunk trees is sometimes unused; + it contains a chunk offset which compares as “greater-than” + any other chunk offset and has a chunk byte size of zero + to indicate that it is not actually allocated.</p> + +<br /> +<h4><a name="V2Btrees"> +III.A.2. Disk Format: Level 1A2 - Version 2 B-trees</a></h4> + + <p>Version 2 B-trees are “traditional” B-trees, with one major difference. + Instead of just using a simple pointer (or address in the file) to a + child of an internal node, the pointer to the child node contains two + additional pieces of information: the number of records in the child + node itself, and the total number of records in the child node and + all its descendants. Storing this additional information allows fast + array-like indexing to locate the n<sup>th</sup> record in the B-tree.</p> + + <p>The entry into a version 2 B-tree is a header which contains global + information about the structure of the B-tree. The <em>root node + address</em> + field in the header points to the B-tree root node, which is either an + internal or leaf node, depending on the value in the header’s + <em>depth</em> field. An internal node consists of records plus + pointers to further leaf or internal nodes in the tree. A leaf node + consists of solely of records. The format of the records depends on + the B-tree type (stored in the header).</p> + + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Node Size</td> + </tr> + <tr> + <td colspan="2">Record Size</td> + <td colspan="2">Depth</td> + </tr> + <tr> + <td>Split Percent</td> + <td>Merge Percent</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Root Node Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="2">Number of Records in Root Node</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records in B-tree<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTHD</code>” is + used to indicate the header of a version 2 B-link tree node. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree header. This document + describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field indicates the type of B-tree: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>A “testing” B-tree, this value should <em>not</em> be + used for storing records in actual HDF5 files. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>This B-tree is used for indexing indirectly accessed, + non-filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">2</td> + <td>This B-tree is used for indexing indirectly accessed, + filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">3</td> + <td>This B-tree is used for indexing directly accessed, + non-filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">4</td> + <td>This B-tree is used for indexing directly accessed, + filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">5</td> + <td>This B-tree is used for indexing the ‘name’ field for + links in indexed groups. + </td> + </tr> + <tr> + <td align="center">6</td> + <td>This B-tree is used for indexing the ‘creation order’ + field for links in indexed groups. + </td> + </tr> + <tr> + <td align="center">7</td> + <td>This B-tree is used for indexing shared object header + messages. + </td> + </tr> + <tr> + <td align="center">8</td> + <td>This B-tree is used for indexing the ‘name’ field for + indexed attributes. + </td> + </tr> + <tr> + <td align="center">9</td> + <td>This B-tree is used for indexing the ‘creation order’ + field for indexed attributes. + </td> + </tr> + </table></p> + <p>The format of records for each type is described below.</p> + </td> + </tr> + + <tr valign="top"> + <td><p>Node Size</p></td> + <td> + <p>This is the size in bytes of all B-tree nodes. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Record Size</p></td> + <td> + <p>This field is the size in bytes of the B-tree record. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Depth</p></td> + <td> + <p>This is the depth of the B-tree. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Split Percent</p></td> + <td> + <p>The percent full that a node needs to increase above before it + is split. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Merge Percent</p></td> + <td> + <p>The percent full that a node needs to be decrease below before it + is split. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Root Node Address</p></td> + <td> + <p>This is the address of the root B-tree node. A B-tree with + no records will have the <a href="#UndefinedAddress">undefined + address</a> in this field. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Number of Records in Root Node</p></td> + <td> + <p>This is the number of records in the root node. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Total Number of Records in B-tree</p></td> + <td> + <p>This is the total number of records in the entire B-tree. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the B-tree header. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree Internal Node + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Records 0, 1, 2...N-1 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Child Node Pointer 0<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Number of Records N<sub>0</sub> for Child Node 0 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records for Child Node 0 <em>(optional, variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Child Node Pointer 1<sup>O</sup><br /><br /></td> + </tr> + <td colspan="4"><br />Number of Records N<sub>1</sub> for Child Node 1 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records for Child Node 1 <em>(optional, variable size)</em></td> + </tr> + <tr> + <td colspan="4">...</td> + </tr> + <tr> + <td colspan="4"><br />Child Node Pointer N<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Number of Records N<sub>n</sub> for Child Node N <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records for Child Node N <em>(optional, variable size)</em></td> + </tr> + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + </div> + + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTIN</code>” is + used to indicate the internal node of a B-link tree. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree internal node. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field is the type of the B-tree node. It should always + be the same as the B-tree type in the header. + </p> + </td> + </tr> + + <tr> + <td><p>Records</p></td> + <td> + <p>The size of this field is determined by the number of records + for this node and the record size (from the header). The format + of records depends on the type of B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Child Node Pointer</p></td> + <td> + <p>This field is the address of the child node pointed to by the + internal node. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Records in Child Node</p></td> + <td> + <p>This is the number of records in the child node pointed to by + the corresponding <em>Node Pointer</em>. + </p> + <p>The number of bytes used to store this field is determined by + the maximum possible number of records able to be stored in the + child node. + </p> + <p> + The maximum number of records in a child node is computed + in the following way: + + <ul> + <li>Subtract the fixed size overhead for + the child node (for example, its signature, version, + checksum, and so on and <em>one</em> pointer triplet + of information for the child node (because there is one + more pointer triplet than records in each internal node)) + from the size of nodes for the B-tree. </li> + <li>Divide that result by the size of a record plus the + pointer triplet of information stored to reach each + child node from this node. + </ul> + + </p> + <p> + Note that leaf nodes do not encode any + child pointer triplets, so the maximum number of records in a + leaf node is just the node size minus the leaf node overhead, + divided by the record size. + </p> + <p> + Also note that the first level of internal nodes above the + leaf nodes do not encode the <em>Total Number of Records in Child + Node</em> value in the child pointer triplets (since it is the + same as the <em>Number of Records in Child Node</em>), so the + maximum number of records in these nodes is computed with the + equation above, but using (<em>Child Pointer</em>, <em>Number of + Records in Child Node</em>) pairs instead of triplets. + </p> + <p> + The number of + bytes used to encode this field is the least number of bytes + required to encode the maximum number of records in a child + node value for the child nodes below this level + in the B-tree. + </p> + <p> + For example, if the maximum number of child records is + 123, one byte will be used to encode these values in this + node; if the maximum number of child records is + 20000, two bytes will be used to encode these values in this + node; and so on. The maximum number of bytes used to + encode these values is 8 (in other words, an unsigned + 64-bit integer). + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Records in Child Node</p></td> + <td> + <p>This is the total number of records for the node pointed to by + the corresponding <em>Node Pointer</em> and all its children. + This field exists only in nodes whose depth in the B-tree node + is greater than 1 (in other words, the “twig” + internal nodes, just above leaf nodes, do not store this + field in their child node pointers). + </p> + <p>The number of bytes used to store this field is determined by + the maximum possible number of records able to be stored in the + child node and its descendants. + </p> + <p> + The maximum possible number of records able to be stored in a + child node and its descendants is computed iteratively, in the + following way: The maximum number of records in a leaf node + is computed, then that value is used to compute the maximum + possible number of records in the first level of internal nodes + above the leaf nodes. Multiplying these two values together + determines the maximum possible number of records in child node + pointers for the level of nodes two levels above leaf nodes. + This process is continued up to any level in the B-tree. + </p> + <p> + The number of bytes used to encode this value is computed in + the same way as for the <em>Number of Records in Child Node</em> + field. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for this node. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree Leaf Node + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Record 0, 1, 2...N-1 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTLF</code>“ is + used to indicate the leaf node of a version 2 B-link tree. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree leaf node. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field is the type of the B-tree node. It should always + be the same as the B-tree type in the header. + </p> + </td> + </tr> + + <tr> + <td><p>Records</p></td> + <td> + <p>The size of this field is determined by the number of records + for this node and the record size (from the header). The format + of records depends on the type of B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for this node. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <p>The record layout for each stored (in other words, non-testing) + B-tree type is as follows:</p> + + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 1 Record Layout - Indirectly Accessed, Non-Filtered, + ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Huge Object Address</p></td> + <td> + <p>The address of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object Length</p></td> + <td> + <p>The length of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object ID</p></td> + <td> + <p>The heap ID for the huge object. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 2 Record Layout - Indirectly Accessed, Filtered, + ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Filtered Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask</td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Memory Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filtered Huge Object Address</p></td> + <td> + <p>The address of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Length</p></td> + <td> + <p>The length of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td> + <p>A 32-bit bit field indicating which filters have been skipped for + this chunk. Each filter has an index number in the pipeline + (starting at 0, with the first filter to apply) and if that + filter is skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Memory Size</p></td> + <td> + <p>The size of the de-filtered huge object in memory. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object ID</p></td> + <td> + <p>The heap ID for the huge object. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 3 Record Layout - Directly Accessed, Non-Filtered, + ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Huge Object Address</p></td> + <td> + <p>The address of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object Length</p></td> + <td> + <p>The length of the huge object in the file. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 4 Record Layout - Directly Accessed, Filtered, + ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Filtered Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask</td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Memory Size<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filtered Huge Object Address</p></td> + <td> + <p>The address of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Length</p></td> + <td> + <p>The length of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td> + <p>A 32-bit bit field indicating which filters have been skipped for + this chunk. Each filter has an index number in the pipeline + (starting at 0, with the first filter to apply) and if that + filter is skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Memory Size</p></td> + <td> + <p>The size of the de-filtered huge object in memory. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 5 Record Layout - Link Name for Indexed Group + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Hash of Name</td> + </tr> + <tr> + <td colspan="4">ID <em>(bytes 1-4)</em></td> + </tr> + + <tr> + <td colspan="3">ID <em>(bytes 5-7)</em></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the name for the link. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the link’s name. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This is a 7-byte sequence of bytes and is the heap ID for the + link record in the group’s fractal heap.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 6 Record Layout - Creation Order for Indexed Group + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Creation Order <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">ID <em>(bytes 1-4)</em></td> + </tr> + <tr> + <td colspan="3">ID <em>(bytes 5-7)</em></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the link. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This is a 7-byte sequence of bytes and is the heap ID for the + link record in the group’s fractal heap.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 7 Record Layout - Shared Object Header Messages (Sub-Type 0 - Message in Heap) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Hash</td> + </tr> + <tr> + <td colspan="4">Reference Count</td> + </tr> + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This field Indicates the location where the message is stored: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>Shared message is stored in shared message index heap. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Shared message is stored in object header. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the shared message. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>The number of objects which reference this message.</p> + </td> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + shared message in the shared message index’s fractal heap.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 7 Record Layout - Shared Object Header Messages (Sub-Type 1 - Message in Object Header) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Hash</td> + </tr> + <tr> + <td>Reserved (zero)</td> + <td>Message Type</td> + <td colspan="2">Object Header Index</td> + </tr> + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This field Indicates the location where the message is stored: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>Shared message is stored in shared message index heap. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Shared message is stored in object header. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the shared message. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Message Type</p></td> + <td> + <p>The object header message type of the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Object Header Index</p></td> + <td> + <p>This field indicates that the shared message is the n<sup>th</sup> message + of its type in the specified object header.</p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>The address of the object header containing the shared message.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 8 Record Layout - Attribute Name for Indexed Attributes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan>Message Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Creation Order</td> + </tr> + <tr> + <td colspan="4">Hash of Name</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + attribute in the object’s attribute fractal heap.</p> + </td> + </tr> + + <tr> + <td><p>Message Flags</p></td> + <td><p>The object header message flags for the attribute message.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the attribute. + </p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the name for the attribute. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the attribute’s name. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Version 2 B-tree, Type 9 Record Layout- Creation Order for Indexed Attributes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan>Message Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Creation Order</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + attribute in the object’s attribute fractal heap.</p> + </td> + </tr> + + <tr> + <td><p>Message Flags</p></td> + <td> + <p>The object header message flags for the attribute message.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the attribute. + </p> + </td> + </tr> + + </table> + </div> + + +<br /> +<h3><a name="SymbolTable"> +III.B. Disk Format: Level 1B - Group Symbol Table Nodes</a></h3> + + <p>A group is an object internal to the file that allows + arbitrary nesting of objects within the file (including other groups). + A group maps a set of link names in the group to a set of relative + file addresses of objects in the file. Certain metadata for an object to + which the group points can be cached in the group’s symbol table entry in + addition to being in the object’s header.</p> + + <p>An HDF5 object name space can be stored hierarchically by + partitioning the name into components and storing each + component as a link in a group. The link for a + non-ultimate component points to the group containing + the next component. The link for the last + component points to the object being named.</p> + + <p>One implementation of a group is a collection of symbol table nodes + indexed by a B-link tree. Each symbol table node contains entries + for one or more links. If an attempt is made to add a link to an already + full symbol table node containing 2<em>K</em> entries, then the node is + split and one node contains <em>K</em> symbols and the other contains + <em>K</em>+1 symbols.</p> + + <div align="center"> + <table class="format"> + <caption> + Symbol Table Node (A Leaf of a B-link tree) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version Number</td> + <td>Reserved (zero)</td> + <td colspan="2">Number of Symbols</td> + </tr> + + <tr> + <td colspan="4"><br /><br />Group Entries<br /><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SNOD</code>” is + used to indicate the + beginning of a symbol table node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version Number</p></td> + <td> + <p>The version number for the symbol table node. This + document describes version 1. (There is no version ‘0’ + of the symbol table node) + </p> + </td> + </tr> + + <tr> + <td><p>Number of Entries</p></td> + <td> + <p>Although all symbol table nodes have the same length, + most contain fewer than the maximum possible number of + link entries. This field indicates how many entries + contain valid data. The valid entries are packed at the + beginning of the symbol table node while the remaining + entries contain undefined values. + </p> + </td> + </tr> + + <tr> + <td><p>Symbol Table Entries</p></td> + <td> + <p>Each link has an entry in the symbol table node. + The format of the entry is described below. + There are 2<em>K</em> entries in each group node, where + <em>K</em> is the “Group Leaf Node K” value from the + <a href="#Superblock">superblock</a>. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h3><a name="SymbolTableEntry"> +III.C. Disk Format: Level 1C - Symbol Table Entry </a></h3> + + <p>Each symbol table entry in a symbol table node is designed + to allow for very fast browsing of stored objects. + Toward that design goal, the symbol table entries + include space for caching certain constant metadata from the + object header.</p> + + <div align="center"> + <table class="format"> + <caption> + Symbol Table Entry + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Link Name Offset<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Cache Type</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br /><br />Scratch-pad Space (16 bytes)<br /><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Link Name Offset</p></td> + <td> + <p>This is the byte offset into the group’s local + heap for the name of the link. The name is null + terminated. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>Every object has an object header which serves as a + permanent location for the object’s metadata. In addition + to appearing in the object header, some of the object’s metadata + can be cached in the scratch-pad space. + </p> + </td> + </tr> + + <tr> + <td><p>Cache Type</p></td> + <td> + <p>The cache type is determined from the object header. + It also determines the format for the scratch-pad space: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>No data is cached by the group entry. This + is guaranteed to be the case when an object header + has a link count greater than one. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Group object header metadata is cached in the + scratch-pad space. This implies that the symbol table + entry refers to another group. + </td> + </tr> + <tr> + <td align="center">2</td> + <td>The entry is a symbolic link. The first four bytes + of the scratch-pad space are the offset into the local + heap for the link value. The object header address + will be undefined. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td> + <p>These four bytes are present so that the scratch-pad + space is aligned on an eight-byte boundary. They are + always set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Scratch-pad Space</p></td> + <td> + <p>This space is used for different purposes, depending + on the value of the Cache Type field. Any metadata + about an object represented in the scratch-pad + space is duplicated in the object header for that + object. + </p> + <p> + Furthermore, no data is cached in the group + entry scratch-pad space if the object header for + the object has a link count greater than one. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h4>Format of the Scratch-pad Space</h4> + + <p>The symbol table entry scratch-pad space is formatted + according to the value in the Cache Type field.</p> + + <p>If the Cache Type field contains the value zero + <code>(0)</code> then no information is + stored in the scratch-pad space.</p> + + <p>If the Cache Type field contains the value one + <code>(1)</code>, then the scratch-pad space + contains cached metadata for another object header + in the following format:</p> + + <div align="center"> + <table class="format"> + <caption> + Object Header Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address of B-tree<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Name Heap<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address of B-tree</p></td> + <td> + <p>This is the file address for the root of the + group’s B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Name Heap</p></td> + <td> + <p>This is the file address for the group’s local + heap, in which are stored the group’s symbol names. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <p>If the Cache Type field contains the value two + <code>(2)</code>, then the scratch-pad space + contains cached metadata for a symbolic link + in the following format:</p> + + <div align="center"> + <table class="format"> + <caption> + Symbolic Link Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Offset to Link Value</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Offset to Link Value</p></td> + <td> + <p>The value of a symbolic link (that is, the name of the + thing to which it points) is stored in the local heap. + This field is the 4-byte offset into the local heap for + the start of the link value, which is null terminated. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h3><a name="LocalHeap"> +III.D. Disk Format: Level 1D - Local Heaps</a></h3> + + <p>A local heap is a collection of small pieces of data that are particular + to a single object in the HDF5 file. Objects can be + inserted and removed from the heap at any time. + The address of a heap does not change once the heap is created. + For example, a group stores addresses of objects in symbol table nodes + with the names of links stored in the group’s local heap. + </p> + + <div align="center"> + <table class="format"> + <caption> + Local Heap + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Data Segment Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset to Head of Free-list<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Data Segment<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>HEAP</code>” + is used to indicate the + beginning of a heap. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>Each local heap has its own version number so that new + heaps can be added to old files. This document + describes version zero (0) of the local heap. + </p> + </td> + </tr> + + <tr> + <td><p>Data Segment Size</p></td> + <td> + <p>The total amount of disk memory allocated for the heap + data. This may be larger than the amount of space + required by the objects stored in the heap. The extra + unused space in the heap holds a linked list of free blocks. + </p> + </td> + </tr> + + <tr> + <td><p>Offset to Head of Free-list</p></td> + <td> + <p>This is the offset within the heap data segment of the + first free block (or the + <a href="#UndefinedAddress">undefined address</a> if there is no + free block). The free block contains “Size of Lengths” bytes that + are the offset of the next free block (or the + value ‘1’ if this is the + last free block) followed by “Size of Lengths” bytes that store + the size of this free block. The size of the free block includes + the space used to store the offset of the next free block and + the size of the current block, making the minimum size of a free + block 2 * “Size of Lengths”. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Data Segment</p></td> + <td> + <p>The data segment originally starts immediately after + the heap header, but if the data segment must grow as a + result of adding more objects, then the data segment may + be relocated, in its entirety, to another part of the + file. + </p> + </td> + </tr> + </table> + </div> + + <p>Objects within a local heap should be aligned on an 8-byte boundary.</p> + +<br /> +<h3><a name="GlobalHeap"> +III.E. Disk Format: Level 1E - Global Heap</a></h3> + + <p>Each HDF5 file has a global heap which stores various types of + information which is typically shared between datasets. The + global heap was designed to satisfy these goals:</p> + + <ol type="A"> + <li>Repeated access to a heap object must be efficient without + resulting in repeated file I/O requests. Since global heap + objects will typically be shared among several datasets, it is + probable that the object will be accessed repeatedly.</li> + <li>Collections of related global heap objects should result in + fewer and larger I/O requests. For instance, a dataset of + object references will have a global heap object for each + reference. Reading the entire set of object references + should result in a few large I/O requests instead of one small + I/O request for each reference.</li> + <li>It should be possible to remove objects from the global heap + and the resulting file hole should be eligible to be reclaimed + for other uses.</li> + </ol> + + + <p>The implementation of the heap makes use of the memory management + already available at the file level and combines that with a new + object called a <em>collection</em> to achieve goal B. The global heap + is the set of all collections. Each global heap object belongs to + exactly one collection and each collection contains one or more global + heap objects. For the purposes of disk I/O and caching, a collection is + treated as an atomic object, addressing goal A. + </p> + + <p>When a global heap object is deleted from a collection (which occurs + when its reference count falls to zero), objects located after the + deleted object in the collection are packed down toward the beginning + of the collection and the collection’s global heap object 0 is created + (if possible) or its size is increased to account for the recently + freed space. There are no gaps between objects in each collection, + with the possible exception of the final space in the collection, if + it is not large enough to hold the header for the collection’s global + heap object 0. These features address goal C. + </p> + + <p>The HDF5 Library creates global heap collections as needed, so there may + be multiple collections throughout the file. The set of all of them is + abstractly called the “global heap”, although they do not actually link + to each other, and there is no global place in the file where you can + discover all of the collections. The collections are found simply by + finding a reference to one through another object in the file. For + example, data of variable-length datatype elements is stored in the + global heap and is accessed via a global heap ID. The format for + global heap IDs is described at the end of this section. + </p> + + <div align="center"> + <table class="format"> + <caption> + A Global Heap Collection + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Collection Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object <em>N</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 0 (free space)<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>GCOL</code>” + is used to indicate the + beginning of a collection. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>Each collection has its own version number so that new + collections can be added to old files. This document + describes version one (1) of the collections (there is no + version zero (0)). + </p> + </td> + </tr> + + <tr> + <td><p>Collection Size</p></td> + <td> + <p>This is the size in bytes of the entire collection + including this field. The default (and minimum) + collection size is 4096 bytes which is a typical file + system block size. This allows for 127 16-byte heap + objects plus their overhead (the collection header of 16 bytes + and the 16 bytes of information about each heap object). + </p> + </td> + </tr> + + <tr> + <td><p>Global Heap Object 1 through <em>N</em></p></td> + <td> + <p>The objects are stored in any order with no + intervening unused space. + </p> + </td> + </tr> + + <tr> + <td><p>Global Heap Object 0</p></td> + <td> + <p>Global Heap Object 0 (zero), when present, represents the free + space in the collection. Free space always appears at the end of + the collection. If the free space is too small to store the header + for Object 0 (described below) then the header is implied and the + collection contains no free space. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Global Heap Object + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="2">Heap Object Index</td> + <td colspan="2">Reference Count</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Object Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Object Data<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap Object Index</p></td> + <td> + <p>Each object has a unique identification number within a + collection. The identification numbers are chosen so that + new objects have the smallest value possible with the + exception that the identifier <code>0</code> always refers to the + object which represents all free space within the + collection. + </p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>All heap objects have a reference count field. An + object which is referenced from some other part of the + file will have a positive reference count. The reference + count for Object 0 is always zero. + </p> + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td> + <p>Zero padding to align next field on an 8-byte boundary. + </p> + </td> + </tr> + + <tr> + <td><p>Object Size</p></td> + <td> + <p>This is the size of the object data stored for the object. + The actual storage space allocated for the object data is rounded + up to a multiple of eight. + </p> + </td> + </tr> + + <tr> + <td><p>Object Data</p></td> + <td> + <p>The object data is treated as a one-dimensional array + of bytes to be interpreted by the caller. + </p> + </td> + </tr> + </table> + + </div> + + <br /> + <p> + The format for the ID used to locate an object in the global heap is + described here:</p> + + <div align="center"> + <table class="format"> + <caption> + Global Heap ID + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Collection Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Object Index</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Collection Address</p></td> + <td> + <p>This field is the address of the global heap collection + where the data object is stored. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This field is the index of the data object within the + global heap collection. + </p> + </td> + </tr> + + </table> + </div> + + +<br /> +<h3><a name="FractalHeap"> +III.F. Disk Format: Level 1F - Fractal Heap</a></h3> + + <p> + Each fractal heap consists of a header and zero or more direct and + indirect blocks (described below). The header contains general + information as well as + initialization parameters for the doubling table. The <em>Root + Block Address</em> in the header points to the first direct or + indirect block in the heap. + </p> + + <p> + Fractal heaps are based on a data structure called a <em>doubling + table</em>. A doubling table provides a mechanism for quickly + extending an array-like data structure that minimizes the number of + empty blocks in the heap, while retaining very fast lookup of any + element within the array. More information on fractal heaps and + doubling tables can be found in the RFC + “<a href="Supplements/FractalHeap/PrivateHeap.pdf">Private + Heaps in HDF5</a>.” + </p> + + <p> + The fractal heap implements the doubling table structure with + indirect and direct blocks. + Indirect blocks in the heap do not actually contain data for + objects in the heap, their “size” is abstract - + they represent the indexing structure for locating the + direct blocks in the doubling table. + Direct blocks + contain the actual data for objects stored in the heap. + </p> + + <p> + All indirect blocks have a constant number of block entries in each + row, called the <em>width</em> of the doubling table (stored in + the heap header). + + The number + of rows for each indirect block in the heap is determined by the + size of the block that the indirect block represents in the + doubling table (calculation of this is shown below) and is + constant, except for the “root” + indirect block, which expands and shrinks its number of rows as + needed. + </p> + + <p> + Blocks in the first <em>two</em> rows of an indirect block + are <em>Starting Block Size</em> number of bytes in size, + and the blocks in each subsequent row are twice the size of + the blocks in the previous row. In other words, blocks in + the third row are twice the <em>Starting Block Size</em>, + blocks in the fourth row are four times the + <em>Starting Block Size</em>, and so on. Entries for + blocks up to the <em>Maximum Direct Block Size</em> point to + direct blocks, and entries for blocks greater than that size + point to further indirect blocks (which have their own + entries for direct and indirect blocks). + </p> + + <p> + The number of rows of blocks, <em>nrows</em>, in an + indirect block of size <em>iblock_size</em> is given by the + following expression: + <br /> <br /> + <em>nrows</em> = (log<sub>2</sub>(<em>iblock_size</em>) - + log<sub>2</sub>(<em><Starting Block Size></em> * + <em><Width></em>)) + 1 + </p> + + <p> + The maximum number of rows of direct blocks, <em>max_dblock_rows</em>, + in any indirect block of a fractal heap is given by the + following expression: + <br /> <br /> + <em>max_dblock_rows</em> = + (log<sub>2</sub>(<em><Max. Direct Block Size></em>) - + log<sub>2</sub>(<em><Starting Block Size></em>)) + 2 + </p> + + <p> + Using the computed values for <em>nrows</em> and + <em>max_dblock_rows</em>, along with the <em>Width</em> of the + doubling table, the number of direct and indirect block entries + (<em>K</em> and <em>N</em> in the indirect block description, below) + in an indirect block can be computed: + <br /> <br /> + <em>K</em> = MIN(<em>nrows</em>, <em>max_dblock_rows</em>) * + <em>Width</em> + + <br /> <br /> + If <em>nrows</em> is less than or equal to <em>max_dblock_rows</em>, + <em>N</em> is 0. Otherwise, <em>N</em> is simply computed: + <br /> <br /> + <em>N</em> = <em>K</em> - (<em>max_dblock_rows</em> * + <em>Width</em>) + </p> + + <p> + The size indirect blocks on disk is determined by the number + of rows in the indirect block (computed above). The size of direct + blocks on disk is exactly the size of the block in the doubling + table. + </p> + + <div align="center"> + <table class="format"> + <caption> + Fractal Heap Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Heap ID Length</td> + <td colspan="2">I/O Filters’ Encoded Length</td> + </tr> + + <tr> + <td>Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Maximum Size of Managed Objects</td> + </tr> + + <tr> + <td colspan="4"><br />Next Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />v2 B-tree Address of Huge Objects<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Free Space in Managed Blocks<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Managed Block Free Space Manager<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Managed Space in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Allocated Managed Space in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset of Direct Block Allocation Iterator in Managed Space<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Managed Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Huge Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Huge Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Tiny Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Tiny Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Table Width</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Starting Block Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Direct Block Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Maximum Heap Size</td> + <td colspan="2">Starting # of Rows in Root Indirect Block</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Root Block<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Current # of Rows in Root Indirect Block</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Filtered Root Direct Block <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">I/O Filter Mask<em> (optional)</em></td> + </tr> + + <tr> + <td colspan="4">I/O Filter Information<em> (optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FRHP</code>” + is used to indicate the + beginning of a fractal heap header. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap ID Length</p></td> + <td> + <p>This is the length in bytes of heap object IDs for this heap.</p> + </td> + </tr> + + <tr> + <td><p>I/O Filters’ Encoded Length</p></td> + <td> + <p>This is the size in bytes of the encoded <em>I/O Filter Information</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is the heap status flag and is a bit field + indicating additional information about the fractal heap. + <table class="list"> + <tr> + <th width="20%" align="center">Bit(s)</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the ID value to use for huge object has wrapped + around. If the value for the <em>Next Huge Object ID</em> + has wrapped around, each new huge object inserted into the + heap will require a search for an ID value. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the direct blocks in the heap are checksummed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Size of Managed Objects</p></td> + <td> + <p>This is the maximum size of managed objects allowed in the heap. + Objects greater than this this are ‘huge’ objects and will be + stored in the file directly, rather than in a direct block for + the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Next Huge Object ID</p></td> + <td> + <p>This is the next ID value to use for a huge object in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>v2 B-tree Address of Huge Objects</p></td> + <td> + <p>This is the address of the <a href="#V2Btrees">v2 B-tree</a> + used to track huge objects in the heap. The type of records + stored in the <em>v2 B-tree</em> will + be determined by whether the address & length of a huge object + can fit into a heap ID (if yes, it is a “directly” accessed + huge object) and whether there is a filter used on objects + in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Free Space in Managed Blocks</p></td> + <td> + <p>This is the total amount of free space in managed direct blocks + (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Address of Managed Block Free Space Manager</p></td> + <td> + <p>This is the address of the + <em><a href="#FreeSpaceManager">Free-space Manager</a></em> for + managed blocks. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Managed Space in Heap</p></td> + <td> + <p>This is the total amount of managed space in the heap (in bytes), + essentially the upper bound of the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Allocated Managed Space in Heap</p></td> + <td> + <p>This is the total amount of managed space (in bytes) actually + allocated in + the heap. This can be less than the <em>Amount of Managed Space + in Heap</em> field, if some direct blocks in the heap’s linear + address space are not allocated. + </p> + </td> + </tr> + + <tr> + <td><p>Offset of Direct Block Allocation Iterator in Managed Space</p></td> + <td> + <p>This is the linear heap offset where the next direct + block should be allocated at (in bytes). This may be less than + the <em>Amount of Managed Space in Heap</em> value because the + heap’s address space is increased by a “row” of direct blocks + at a time, rather than by single direct block increments. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Managed Objects in Heap</p></td> + <td> + <p>This is the number of managed objects in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Huge Objects in Heap</p></td> + <td> + <p>This is the total size of huge objects in the heap (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Number of Huge Objects in Heap</p></td> + <td> + <p>This is the number of huge objects in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Tiny Objects in Heap</p></td> + <td> + <p>This is the total size of tiny objects that are packed in heap + IDs (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Number of Tiny Objects in Heap</p></td> + <td> + <p>This is the number of tiny objects that are packed in heap IDs. + </p> + </td> + </tr> + + <tr> + <td><p>Table Width</p></td> + <td> + <p>This is the number of columns in the doubling table for managed + blocks. This value must be a power of two. + </p> + </td> + </tr> + + <tr> + <td><p>Starting Block Size</p></td> + <td> + <p>This is the starting block size to use in the doubling table for + managed blocks (in bytes). This value must be a power of two. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Direct Block Size</p></td> + <td> + <p>This is the maximum size allowed for a managed direct block. + Objects inserted into the heap that are larger than this value + (less the # of bytes of direct block prefix/suffix) + are stored as ‘huge’ objects. This value must be a power of + two. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Heap Size</p></td> + <td> + <p>This is the maximum size of the heap’s linear address space for + managed objects (in bytes). The value stored is the log2 of + the actual value, that is: the # of bits of the address space. + ‘Huge’ and ‘tiny’ objects are not counted in this value, since + they do not store objects in the linear address space of the + heap. + </p> + </td> + </tr> + + <tr> + <td><p>Starting # of Rows in Root Indirect Block</p></td> + <td> + <p>This is the starting number of rows for the root indirect block. + A value of 0 indicates that the root indirect block will have + the maximum number of rows needed to address the heap’s <em>Maximum + Heap Size</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Root Block</p></td> + <td> + <p>This is the address of the root block for the heap. It can + be the <a href="#UndefinedAddress">undefined address</a> if + there is no data in the heap. It either points to a direct + block (if the <em>Current # of Rows in the Root Indirect Block</em> + value is 0), or an indirect block. + </p> + </td> + </tr> + + <tr> + <td><p>Current # of Rows in Root Indirect Block</p></td> + <td> + <p>This is the current number of rows in the root indirect block. + A value of 0 indicates that <em>Address of Root Block</em> + points to direct block instead of indirect block. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Filtered Root Direct Block</p></td> + <td> + <p>This is the size of the root direct block, if filters are + applied to heap objects (in bytes). This field is only + stored in the header if the <em>I/O Filters’ Encoded Length</em> + is greater than 0. + </p> + </td> + </tr> + + <tr> + <td><p>I/O Filter Mask</p></td> + <td> + <p>This is the filter mask for the root direct block, if filters + are applied to heap objects. This mask has the same format as + that used for the filter mask in chunked raw data records in a + <a href="#V1Btrees">v1 B-tree</a>. + This field is only + stored in the header if the <em>I/O Filters’ Encoded Length</em> + is greater than 0. + </p> + </td> + </tr> + + <tr> + <td><p>I/O Filter Information</p></td> + <td> + <p>This is the I/O filter information encoding direct blocks and + huge objects, if filters are applied to heap objects. This + field is encoded as a <a href="#FilterMessage">Filter Pipeline</a> + message. + The size of this field is determined by <em>I/O Filters’ + Encoded Length</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the header.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap Direct Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Heap Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Object Data <em>(variable size)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FHDB</code>” + is used to indicate the + beginning of a fractal heap direct block. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap Header Address</p></td> + <td> + <p>This is the address for the fractal heap header that this + block belongs to. This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>This is the offset of the block within the fractal heap’s + address space (in bytes). The number of bytes used to encode + this field is the <em>Maximum Heap Size</em> (in the heap’s + header) divided by 8 and rounded up to the next highest integer, + for values that are not a multiple of 8. This value is + principally used for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the direct block.</p> + <p>This field is only present if bit 1 of <em>Flags</em> in the + heap’s header is set.</p> + </td> + </tr> + + <tr> + <td><p>Object Data</p></td> + <td> + <p>This section of the direct block stores the actual data for + objects in the heap. The size of this section is determined by + the direct block’s size minus the size of the other fields + stored in the direct block (for example, the <em>Signature</em>, + <em>Version</em>, and others including the <em>Checksum</em> if it is + present). + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap Indirect Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Heap Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #0 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #0 <em>(optional)</em> <sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #0 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #1 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #1 <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #1 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #K-1 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #K-1 <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #K-1 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #0 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #1 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #N-1 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FHIB</code>” is used to + indicate the beginning of a fractal heap indirect block. This + gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap Header Address</p></td> + <td> + <p>This is the address for the fractal heap header that this + block belongs to. This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>This is the offset of the block within the fractal heap’s + address space (in bytes). The number of bytes used to encode + this field is the <em>Maximum Heap Size</em> (in the heap’s + header) divided by 8 and rounded up to the next highest integer, + for values that are not a multiple of 8. This value is + principally used for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Child Direct Block #K Address</p></td> + <td> + <p>This field is the address of the child direct block. + The size of the [uncompressed] direct block can be computed by + its offset in the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Filtered Direct Block #K</p></td> + <td> + <p>This is the size of the child direct block after passing through + the I/O filters defined for this heap (in bytes). If no I/O + filters are present for this heap, this field is not present. + </p> + </td> + </tr> + <tr> + <td><p>Filter Mask for Direct Block #K</p></td> + <td> + <p>This is the I/O filter mask for the filtered direct block. + This mask has the same format as that used for the filter mask + in chunked raw data records in a <a href="#V1Btrees">v1 B-tree</a>. + If no I/O filters are present for this heap, this field is not + present. + </p> + </td> + </tr> + + <tr> + <td><p>Child Indirect Block #N Address</p></td> + <td> + <p>This field is the address of the child indirect block. + The size of the indirect block can be computed by + its offset in the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the indirect block.</p> + </td> + </tr> + + </table> + + </div> + + <br /> + <p>An object in the fractal heap is identified by means of a fractal heap ID, + which encodes information to locate the object in the heap. + Currently, the fractal heap stores an object in one of three ways, + depending on the object’s size:</p> + + <div align="center"> + <table class="list80"> + <tr> + <th width="20%">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center">Tiny</td> + <td> + <p>When an object is small enough to be encoded in the heap ID, the + object’s data is embedded in the fractal heap ID itself. There are + 2 sub-types for this type of object: normal and extended. The + sub-type for tiny heap IDs depends on whether the heap ID is large + enough to store objects greater than 16 bytes or not. If the + heap ID length is 18 bytes or smaller, the ‘normal’ tiny heap ID + form is used. If the heap ID length is greater than 18 bytes in + length, the “extented” form is used. See format description below + for both sub-types. + </p> + </td> + </tr> + + <tr> + <td align="center">Huge</td> + <td> + <p>When the size of an object is larger than <em>Maximum Size of + Managed Objects</em> in the <em>Fractal Heap Header</em>, the + object’s data is stored on its own in the file and the object + is tracked/indexed via a version 2 B-tree. All huge objects + for a particular fractal heap use the same v2 B-tree. All huge + objects for a particular fractal heap use the same format for + their huge object IDs. + </p> + + <p>Depending on whether the IDs for a heap are large enough to hold + the object’s retrieval information and whether I/O pipeline filters + are applied to the heap’s objects, 4 sub-types are derived for + huge object IDs for this heap:</p> + + <div align="center"> + <table class="list"> + <tr> + <th align="left" width="35%">Sub-type</th> + <th align="left">Description</th> + </tr> + + <tr> + <td align="left">Directly accessed, non-filtered</td> + <td> + <p>The object’s address and length are embedded in the + fractal heap ID itself and the object is directly accessed + from them. This allows the object to be accessed without + resorting to the B-tree. + </p> + </td> + </tr> + + <tr> + <td align="left">Directly accessed, filtered</td> + <td> + <p>The filtered object’s address, length, filter mask and + de-filtered size are embedded in the fractal heap ID itself + and the object is accessed directly with them. This allows + the object to be accessed without resorting to the B-tree. + </p> + </td> + </tr> + + <tr> + <td align="left">Indirectly accessed, non-filtered</td> + <td> + <p>The object is located by using a B-tree key embedded in + the fractal heap ID to retrieve the address and length from + the version 2 B-tree for huge objects. Then, the address + and length are used to access the object. + </p> + </td> + </tr> + + <tr> + <td align="left">Indirectly accessed, filtered</td> + <td> + <p>The object is located by using a B-tree key embedded in + the fractal heap ID to retrieve the filtered object’s + address, length, filter mask and de-filtered size from the + version 2 B-tree for huge objects. Then, this information + is used to access the object. + </p> + </td> + </tr> + </table> + </div> + + </td> + </tr> + + <tr> + <td align="center">Managed</td> + <td> + <p>When the size of an object does not meet the above two + conditions, the object is stored and managed via the direct and + indirect blocks based on the doubling table. + </p> + </td> + </tr> + </table> + </div> + + + <p>The specific format for each type of heap ID is described below: + </p> + + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Tiny Objects (sub-type 1 - ‘Normal’) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version, Type & Length</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version, Type & Length</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Tiny objects have a value of <code>2</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>The length of the tiny object. The value stored + is one less than the actual length (since zero-length + objects are not allowed to be stored in the heap). + For example, an object of actual length 1 has an + encoded length of 0, an object of actual length 2 + has an encoded length of 1, and so on. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td> + <p>This is the data for the object. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Tiny Objects (sub-type 2 - ‘Extended’) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version, Type & Length</td> + <td>Extended Length</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Data <em>(variable size)</em></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version, Type & Length</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Tiny objects have a value of <code>2</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>These 4 bits, together with the next byte, form an + unsigned 12-bit integer for holding the length of the + object. These 4-bits are bits 8-11 of the 12-bit integer. + See description for the <em>Extended Length</em> field below. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Extended Length</p></td> + <td> + <p>This byte, together with the 4 bits in the previous byte, + forms an unsigned 12-bit integer for holding the length of + the tiny object. These 8 bits are bits 0-7 of the 12-bit + integer formed. The value stored is one less than the actual + length (since zero-length objects are not allowed to be + stored in the heap). For example, an object of actual length + 1 has an encoded length of 0, an object of actual length + 2 has an encoded length of 1, and so on. + </p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td> + <p>This is the data for the object. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Huge Objects (sub-type 1 & 2): indirectly accessed, non-filtered/filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version & Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />v2 B-tree Key<sup>L</sup><em> (variable size)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version & Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>v2 B-tree Key</p></td> + <td><p>This field is the B-tree key for retrieving the information + from the version 2 B-tree for huge objects needed to access the + object. See the description of <a href="#V2Btrees">v2 B-tree</a> + records sub-type 1 & 2 for a description of the fields. New key + values are derived from <em>Next Huge Object ID</em> in the + <em>Fractal Heap Header</em>.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Huge Objects (sub-type 3): directly accessed, non-filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version & Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address <sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length <sup>L</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version & Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This field is the address of the object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the object in the file.</p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Huge Objects (sub-type 4): directly accessed, filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version & Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address <sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length <sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filter Mask</td> + </tr> + + <tr> + <td colspan="4"><br />De-filtered Size <sup>L</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td> + </tr> + <tr> + <td> </td> + <td>(Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version & Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This field is the address of the filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td><p>This field is the I/O pipeline filter mask for the + filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Filtered Size</p></td> + <td><p>This field is the size of the de-filtered object in the file.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption>Fractal Heap ID for Managed Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version & Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Length <em>(variable size)</em></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version & Type</p></td> + <td><p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Managed objects have a value of <code>0</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Offset</p></td> + <td><p>This field is the offset of the object in the heap. + This field’s size is the minimum number of bytes + necessary to encode the <em>Maximum Heap Size</em> value + (from the <em>Fractal Heap Header</em>). For example, if the + value of the <em>Maximum Heap Size</em> is less than 256 bytes, + this field is 1 byte in length, a <em>Maximum Heap Size</em> + of 256-65535 bytes uses a 2 byte length, and so on.</p></td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the object in the heap. It + is determined by taking the minimum value of <em>Maximum + Direct Block Size</em> and <em>Maximum Size of Managed + Objects</em> in the <em>Fractal Heap Header</em>. Again, + the minimum number of bytes needed to encode that value is + used for the size of this field.</p></td> + </tr> + </table> + </div> + +<br /> +<h3><a name="FreeSpaceManager"> +III.G. Disk Format: Level 1G - Free-space Manager</a></h3> + + <p> + Free-space managers are used to describe space within a heap or + the entire HDF5 file that is not currently used for that heap or + file. + </p> + + <p> + The <em>free-space manager header</em> contains metadata information + about the space being tracked, along with the address of the list + of <em>free space sections</em> which actually describes the free + space. The header records information about free-space sections being + tracked, creation parameters for handling free-space sections of a + client, and section information used to locate the collection of + free-space sections. + </p> + + <p> + The <em>free-space section list</em> stores a collection of + free-space sections that is specific to each <em>client</em> of the + free-space manager. + + For example, the fractal heap is a client of the free space manager + and uses it to track unused space within the heap. There are 4 + types of section records for the fractal heap, each of which has + its own format, listed below. + </p> + + <div align="center"> + <table class="format"> + <caption> + Free-space Manager Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Total Space Tracked<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Total Number of Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Serialized Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Un-Serialized Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Number of Section Classes</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Shrink Percent</td> + <td colspan="2">Expand Percent</td> + </tr> + + <tr> + <td colspan="2">Size of Address Space</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Section Size <sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Serialized Section List<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Serialized Section List Used<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Allocated Size of Serialized Section List<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FSHD</code>” is used to + indicate the beginning of the Free-space Manager Header. + This gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This is the version number for the Free-space Manager Header + and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>This is the client ID for identifying the user of this + free-space manager: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Fractal heap + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>File + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Total Space Tracked</p></td> + <td> + <p>This is the total amount of free space being tracked, in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Sections</p></td> + <td> + <p>This is the total number of free-space sections being tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Serialized Sections</p></td> + <td> + <p>This is the number of serialized free-space sections being + tracked. + </p> + </td> + </tr> + <tr> + <td><p>Number of Un-Serialized Sections</p></td> + <td> + <p>This is the number of un-serialized free-space sections being + managed. Un-serialized sections are created by the free-space + client when the list of sections is read in. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Section Classes</p></td> + <td> + <p>This is the number of section classes handled by this free space + manager for the free-space client. + </p> + </td> + </tr> + + <tr> + <td><p>Shrink Percent</p></td> + <td> + <p>This is the percent of current size to shrink the allocated + serialized free-space section list. + </p> + </td> + </tr> + + <tr> + <td><p>Expand Percent</p></td> + <td> + <p>This is the percent of current size to expand the allocated + serialized free-space section list. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Address Space</p></td> + <td> + <p>This is the size of the address space that free-space sections + are within. This is stored as the log<sub>2</sub> of the + actual value (in other words, the number of bits required + to store values within that address space). + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Section Size</p></td> + <td> + <p>This is the maximum size of a section to be tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Serialized Section List</p></td> + <td> + <p>This is the address where the serialized free-space section + list is stored. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Serialized Section List Used</p></td> + <td> + <p>This is the size of the serialized free-space section + list used (in bytes). This value must be less than + or equal to the <em>allocated size of serialized section + list</em>, below. + </p> + </td> + </tr> + + <tr> + <td><p>Allocated Size of Serialized Section List</p></td> + <td> + <p>This is the size of serialized free-space section list + actually allocated (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the free-space manager header.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <p>The free-space sections being managed are stored in a + <em>free-space section list</em>, described below. The sections + in the free-space section list are stored in the following way: + a count of the number of sections describing a particular size of + free space and the size of the free-space described (in bytes), + followed by a list of section description records; then another + section count and size, followed by the list of section + descriptions for that size; and so on.</p> + + + <div align="center"> + <table class="format"> + <caption> + Free-space Section List + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Free-space Manager Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #0 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #0 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #0 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #0 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #K-1 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #1 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #1 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #K-1 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><strong>...</strong></td> + </tr> + + <tr> + <td colspan="4"><strong>...</strong></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #N-1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #N-1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #N-1 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #N-1 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #K-1 Data <em>(variable size)</td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FSSE</code>” is used to + indicate the beginning of the Free-space Section Information. + This gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This is the version number for the Free-space Section List + and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Free-space Manager Header Address</p></td> + <td> + <p>This is the address of the <em>Free-space Manager Header</em>. + This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Section Records for Set #N</p></td> + <td> + <p>This is the number of free-space section records for set #N. + The length of this field is the minimum number of bytes needed + to store the <em>number of serialized sections</em> (from the + <em>free-space manager header</em>). + </p> + + <p> + The number of sets of free-space section records is + determined by the <em>size of serialized section list</em> in + the <em>free-space manager header</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Section Size for Record Set #N</p></td> + <td> + <p>This is the size (in bytes) of the free-space section described + for <em>all</em> the section records in set #N. + </p> + + <p> + The length of this field is the minimum number of bytes needed + to store the <em>maximum section size</em> (from the + <em>free-space manager header</em>). + </p> + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Offset</p></td> + <td> + <p>This is the offset (in bytes) of the free-space section within + the client for the free-space manager. + </p> + + <p> + The length of this field is the minimum number of bytes needed + to store the <em>size of address space</em> (from the + <em>free-space manager header</em>). + </p> + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Type</p></td> + <td> + <p>This is the type of the section record, used to decode the + <em>record set #N section #K data</em> information. The defined + record type for <em>file</em> client is: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>File’s section (a range of actual bytes in file) + </td> + </tr> + <tr> + <td align="center"><code>1+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + <p>The defined record types for a <em>fractal heap</em> client are: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Fractal heap “single” section + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Fractal heap “first row” section + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Fractal heap “normal row” section + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Fractal heap “indirect” section + </td> + </tr> + + <tr> + <td align="center"><code>4+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Data</p></td> + <td> + <p>This is the section-type specific information for each record + in the record set, described below. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the <em>Free-space Section List</em>. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <p> + The section-type specific data for each free-space section record is + described below: + </p> + + <div align="center"> + <table class="format"> + <caption> + File’s Section Data Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap “Single” Section Data Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap “First Row” Section Data Record + </caption> + + <tr> + <td colspan="4"><em>Same format as “indirect” section data</em></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap “Normal Row” Section Data Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Fractal Heap “Indirect” Section Data Record + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Fractal Heap Indirect Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="2">Block Start Row</td> + <td colspan="2">Block Start Column</td> + </tr> + + <tr> + <td colspan="2">Number of Blocks</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Fractal Heap Block Offset</p></td> + <td> + <p>The offset of the indirect block in the fractal heap’s address + space containing the empty blocks. + </p> + <p> + The number of bytes used to encode this field is the minimum + number of bytes needed to encode values for the <em>Maximum + Heap Size</em> (in the fractal heap’s header). + </p> + </td> + </tr> + + <tr> + <td><p>Block Start Row</p></td> + <td> + <p>This is the row that the empty blocks start in. + </p> + </td> + </tr> + + <tr> + <td><p>Block Start Column</p></td> + <td> + <p>This is the column that the empty blocks start in. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Blocks</p></td> + <td> + <p>This is the number of empty blocks covered by the section. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h3><a name="SOHMTable"> +III.H. Disk Format: Level 1H - Shared Object Header Message Table</a></h3> + + <p> + The <em>shared object header message table</em> is used to locate + object + header messages that are shared between two or more object headers + in the file. Shared object header messages are stored and indexed + in the file in one of two ways: indexed sequentially in a + <em>shared header message list</em> or indexed with a v2 B-tree. + The shared messages themselves are either stored in a fractal + heap (when two or more objects share the message), or remain in an + object’s header (when only one object uses the message currently, + but the message can be shared in the future). + </p> + + <p> + The <em>shared object header message table</em> + contains a list of shared message index headers. Each index header + records information about the version of the index format, the index + storage type, flags for the message types indexed, the number of + messages in the index, the address where the index resides, + and the fractal heap address if shared messages are stored there. + </p> + + <p> + Each index can be either a list or a v2 B-tree and may transition + between those two forms as the number of messages in the index + varies. Each shared message record contains information used to + locate the shared message from either a fractal heap or an object + header. The types of messages that can be shared are: <em>Dataspace, + Datatype, Fill Value, Filter Pipeline and Attribute</em>. + </p> + + <p> + The <em>shared object header message table</em> is pointed to + from a <a href="#SOHMTableMessage">shared message table</a> message + in the superblock extension for a file. This message stores the + version of the table format, along with the number of index headers + in the table. + </p> + + <div align="center"> + <table class="format"> + <caption> + Shared Object Header Message Table + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version for index #0</td> + <td>Index Type for index #0</td> + <td colspan="2">Message Type Flags for index #0</td> + </tr> + + <tr> + <td colspan="4">Minimum Message Size for index #0</td> + </tr> + + <tr> + <td colspan="2">List Cutoff for index #0</td> + <td colspan="2">v2 B-tree Cutoff for index #0</td> + </tr> + + <tr> + <td colspan="2">Number of Messages for index #0</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Index Address<sup>O</sup> for index #0<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup> for index #0<br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td>Version for index #N-1</td> + <td>Index Type for index #N-1</td> + <td colspan="2">Message Type Flags for index #N-1</td> + </tr> + + <tr> + <td colspan="4">Minimum Message Size for index #N-1</td> + </tr> + + <tr> + <td colspan="2">List Cutoff for index #N-1</td> + <td colspan="2">v2 B-tree Cutoff for index #N-1</td> + </tr> + + <tr> + <td colspan="2">Number of Messages for index #N-1</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Index Address<sup>O</sup> for index #N-1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup> for index #N-1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SMTB</code>” is used to + indicate the beginning of the Shared Object Header Message table. + This gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version for index #N</p></td> + <td> + <p>This is the version number for the list of shared object header message + indexes and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Index Type for index #N</p></td> + <td> + <p>The type of index can be an unsorted list or a v2 B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Message Type Flags for index #N</p></td> + <td> + <p>This field indicates the type of messages tracked in the index, + as follows: + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the index tracks <em>Dataspace Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the message tracks <em>Datatype Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, the message tracks <em>Fill Value Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, the message tracks <em>Filter Pipeline Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, the message tracks <em>Attribute Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>5-15</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + + <p> + An index can track more than one type of message, but each type + of message can only by in one index. + </p> + </td> + </tr> + + <tr> + <td><p>Minimum Message Size for index #N</p></td> + <td> + <p>This is the message size sharing threshold for the index. + If the encoded size of the message is less than this value, the + message is not shared. + </p> + </td> + </tr> + + <tr> + <td><p>List Cutoff for index #N</p></td> + <td> + <p>This is the cutoff value for the indexing of messages to + switch from a list to a v2 B-tree. If the number of messages + is greater than this value, the index should be a v2 B-tree. + </p> + </td> + </tr> + <tr> + <td><p>v2 B-tree Cutoff for index #N</p></td> + <td> + <p>This is is the cutoff value for the indexing of messages to + switch from a v2 B-tree back to a list. If the number of + messages is less than this value, the index should be a list. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Messages for index #N</p></td> + <td> + <p>The number of shared messages being tracked for the index. + </p> + </td> + </tr> + + <tr> + <td><p>Index Address for index #N</p></td> + <td> + <p>This field is the address of the list or v2 B-tree where the + index nodes reside. + </p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address for index #N</p></td> + <td> + <p>This field is the address of the fractal heap if shared messages + are stored there. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the table.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <p> + Shared messages are indexed either with a <em>shared message record + list</em>, described below, or using a v2 B-tree (using record type 7). + The number of records in the <em>shared message record list</em> is + determined in the index’s entry in the <em>shared object header message + table</em>. + </p> + + <div align="center"> + <table class="format"> + <caption> + Shared Message Record List + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #0</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #1</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #N-1</td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SMLI</code>” is used to + indicate the beginning of a list of index nodes. + This gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Shared Message Record #N</p></td> + <td> + <p>The record for locating the shared message, either in the + fractal heap for the index, or an object header (see format for + <em>index nodes</em> below). + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the list. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <p> + The record for each shared message in an index is stored in one of the + following forms: + </p> + + <div align="center"> + <table class="format"> + <caption> + Shared Message Record, for messages stored in a fractal heap + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Hash Value</td> + </tr> + + <tr> + <td colspan="4">Reference Count</td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap ID<br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This has a value of 0 indicating that the message is stored in + the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Hash Value</p></td> + <td> + <p>This is the hash value for the message. + </p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>This is the number of times the message is used in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap ID</p></td> + <td> + <p>This is an 8-byte fractal heap ID for the message as stored in + the fractal heap for the index. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Shared Message Record, for messages stored in an object header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Hash Value</td> + </tr> + + <tr> + <td>Reserved</td> + <td>Message Type</td> + <td colspan="2">Creation Index</td> + </tr> + + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This has a value of 1 indicating that the message is stored in + an object header. + </p> + </td> + </tr> + + <tr> + <td><p>Hash Value</p></td> + <td> + <p>This is the hash value for the message. + </p> + </td> + </tr> + + <tr> + <td><p>Message Type</p></td> + <td> + <p>This is the message type in the object header. + </p> + </td> + </tr> + + <tr> + <td><p>Creation Index</p></td> + <td> + <p>This is the creation index of the message within the object + header. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>This is the address of the object header where the message is + located. + </p> + </td> + </tr> + </table> + </div> + + + +<br /> +<br /> +<hr /> +<h2><a name="DataObject"> +IV. Disk Format: Level 2 - Data Objects </a></h2> + + <p>Data objects contain the “real” user-visible information in the file. + These objects compose the scientific data and other information which + are generally thought of as “data” by the end-user. All the + other information in the file is provided as a framework for + storing and accessing these data objects. + </p> + + <p>A data object is composed of header and data + information. The header information contains the information + needed to interpret the data information for the object as + well as additional “metadata” or pointers to additional + “metadata” used to describe or annotate each object. + </p> + +<br /> +<h3><a name="ObjectHeader"> +IV.A. Disk Format: Level 2A - Data Object Headers</a></h3> + + <p>The header information of an object is designed to encompass + all of the information about an object, except for the data itself. + This information includes the dataspace, the datatype, information + about how the data is stored on disk (in external files, compressed, + broken up in blocks, and so on), as well as other information used + by the library to speed up access to the data objects or maintain + a file’s integrity. Information stored by user applications + as attributes is also stored in the object’s header. The header + of each object is not necessarily located immediately prior to the + object’s data in the file and in fact may be located in any + position in the file. The order of the messages in an object header + is not significant.</p> + + <p>Object headers are composed of a prefix and a set of messages. The + prefix contains the information needed to interpret the messages and + a small amount of metadata about the object, and the messages contain + the majority of the metadata about the object. + </p> + +<br /> +<h3><a name="ObjectHeaderPrefix"> +IV.A.1. Disk Format: Level 2A1 - Data Object Header Prefix</a></h3> + +<br /> +<h4><a name="V1ObjectHeaderPrefix"> +IV.A.1.a. Version 1 Data Object Header Prefix</a></h4> + + <p>Header messages are aligned on 8-byte boundaries for version 1 + object headers. + </p> + + <div align="center"> + <table class="format"> + <caption> + Version 1 Object Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved (zero)</td> + <td colspan="2">Total Number of Header Messages</td> + </tr> + + <tr> + <td colspan="4">Object Reference Count</td> + </tr> + + <tr> + <td colspan="4">Object Header Size</td> + </tr> + + <tr> + <td colspan="2">Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + </tr> + + <tr> + <td>Header Message #1 Flags</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td colspan="2">Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + </tr> + + <tr> + <td>Header Message #n Flags</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + information in the object header. When the format of the + object header is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + is version one (1) (there was no version zero (0)) of the + object header. + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Header Messages</p></td> + <td> + <p>This value determines the total number of messages listed in + object headers for this object. This value includes the messages + in continuation messages for this object. + </p> + </td> + </tr> + + <tr> + <td><p>Object Reference Count</p></td> + <td> + <p>This value specifies the number of “hard links” to this object + within the current file. References to the object from external + files, “soft links” in this file and object references in this + file are not tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Size</p></td> + <td> + <p>This value specifies the number of bytes of header message data + following this length field that contain object header messages + for this object header. This value does not include the size of + object header continuation blocks for this object elsewhere in the + file. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>This value specifies the type of information included in the + following header message data. The message types for + header messages are defined in sections below. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size includes + padding bytes to make the message a multiple of eight + bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the message data is constant. This is used + for messages like the datatype message of a dataset. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the message is <em>shared</em> and stored + in another location than the object header. The Header + Message Data field contains a Shared Message + (described in the <a href="#ObjectHeaderMessages">Data Object Header Messages</a> + section below) + and the Size of Header Message Data field + contains the size of that Shared Message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, the message should not be shared. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, the HDF5 decoder should fail to open this object + if it does not understand the message’s type and the file + is open with permissions allowing write access to the file. + (Normally, unknown messages can just be ignored by HDF5 + decoders) + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, the HDF5 decoder should set bit 5 of this + message’s flags (in other words, this bit field) + if it does not understand the message’s type + and the object is modified in any way. (Normally, + unknown messages can just be ignored by HDF5 + decoders) + </td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>If set, this object was modified by software that did not + understand this message. + (Normally, unknown messages should just be ignored by HDF5 + decoders) (Can be used to invalidate an index or a similar + feature) + </td> + </tr> + <tr> + <td align="center"><code>6</code></td> + <td>If set, this message is shareable. + </td> + </tr> + <tr> + <td align="center"><code>7</code></td> + <td>If set, the HDF5 decoder should always fail to open this + object if it does not understand the message’s type (whether + it is open for read-only or read-write access). (Normally, + unknown messages can just be ignored by HDF5 decoders) + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>The format and length of this field is determined by the + header message type and size respectively. Some header + message types do not require any data and this information + can be eliminated by setting the length of the message to + zero. The data is padded with enough zeroes to make the + size a multiple of eight. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="V2ObjectHeaderPrefix"> +IV.A.1.b. Version 2 Data Object Header Prefix</a></h4> + + <p>Note that the “total number of messages” field has been dropped from + the data object header prefix in this version. The number of messages + in the data object header is just determined by the messages encountered + in all the object header blocks.</p> + + <p>Note also that the fields and messages in this version of data object + headers have <em>no</em> alignment or padding bytes inserted - they are + stored packed together.</p> + + <div align="center"> + <table class="format"> + <caption> + Version 2 Object Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Access time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Modification Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Change Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Birth Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="2">Maximum # of compact attributes <em>(optional)</em></td> + <td colspan="2">Minimum # of dense attributes <em>(optional)</em></td> + </tr> + + <tr> + <td>Size of Chunk #0 <em>(variable size)</em></td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td>Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + <td>Header Message #1 Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #1 Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td>Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + <td>Header Message #n Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #n Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Gap <em>(optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>OHDR</code>” + is used to indicate the + beginning of an object header. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This field has a value of 2 indicating version 2 of the object header. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is a bit field indicating additional information + about the object header. + <table class="list"> + <tr> + <th width="20%" align="center">Bit(s)</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>This two bit field determines the size of the + <em>Size of Chunk #0</em> field. The values are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>The <em>Size of Chunk #0</em> field is 1 byte. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The <em>Size of Chunk #0</em> field is 2 bytes. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The <em>Size of Chunk #0</em> field is 4 bytes. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>The <em>Size of Chunk #0</em> field is 8 bytes. + </td> + </tr> + </table></p> + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, attribute creation order is tracked.</td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, attribute creation order is indexed.</td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, non-default attribute storage phase change + values are stored.</td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>If set, access, modification, change and birth times + are stored.</td> + </tr> + <tr> + <td align="center"><code>6-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Access Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object’s raw data was last accessed + (in other words, read or written). + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Modification Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after + the UNIX epoch when the object’s raw data was last + modified (in other words, written). + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Change Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object’s metadata was last changed. + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Birth Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object was created. + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum # of compact attributes</p></td> + <td> + <p>This is the maximum number of attributes to store in the compact + format before switching to the indexed format. + </p> + <p>This field is present if bit 4 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Minimum # of dense attributes</p></td> + <td> + <p>This is the minimum number of attributes to store in the indexed + format before switching to the compact format. + </p> + <p>This field is present if bit 4 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Chunk #0</p></td> + <td> + <p> + This unsigned value specifies the number of bytes of header + message data following this field that contain object header + information. + </p> + <p> + This value does not include the size of object header + continuation blocks for this object elsewhere in the file. + </p> + <p> + The length of this field varies depending on bits 0 and 1 of + the <em>flags</em> field. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size of messages + in this version does <em>not</em> include any padding bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Creation Order</p></td> + <td> + <p>This field stores the order that a message of a given type + was created in. + </p> + <p>This field is present if bit 2 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Gap</p></td> + <td> + <p>A gap in an object header chunk is inferred by the end of the + messages for the chunk before the beginning of the chunk’s + checksum. Gaps are always smaller than the size of an + object header message prefix (message type + message size + + message flags). + </p> + <p>Gaps are formed when a message (typically an attribute message) + in an earlier chunk is deleted and a message from a later + chunk that does not quite fit into the free space is moved + into the earlier chunk. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the object header chunk. + </p> + </td> + </tr> + </table> + </div> + + <p>The header message types and the message data associated with + them compose the critical “metadata” about each object. Some + header messages are required for each object while others are + optional. Some optional header messages may also be repeated + several times in the header itself, the requirements and number + of times allowed in the header will be noted in each header + message description below. + </p> + + +<br /> +<h3><a name="ObjectHeaderMessages"> +IV.A.2. Disk Format: Level 2A2 - Data Object Header Messages</a></h3> + + <p>Data object header messages are small pieces of metadata that are + stored in the data object header for each object in an HDF5 file. + Data object header messages provide the metadata required to describe + an object and its contents, as well as optional pieces of metadata + that annotate the meaning or purpose of the object. + </p> + + <p>Data object header messages are either stored directly in the data + object header for the object or are shared between multiple objects + in the file. When a message is shared, a flag in the <em>Message Flags</em> + indicates that the actual <em>Message Data</em> + portion of that message is stored in another location (such as another + data object header, or a heap in the file) and the <em>Message Data</em> + field contains the information needed to locate the actual information + for the message. + </p> + + <p> + The format of shared message data is described here:</p> + + <div align="center"> + <table class="format"> + <caption> + Shared Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used when there are changes in the format + of a shared object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by the library before version 1.6.1. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the object header + containing the message to be shared.</p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Shared Message (Version 2) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used when there are changes in the format + of a shared object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by the library of version 1.6.1 and after. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the object header + containing the message to be shared.</p></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Shared Message (Version 3) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Location <em>(variable size)</em></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number indicates changes in the format of shared + object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the library of version 1.8 and after. In this + version, the <em>Type</em> field can indicate that + the message is stored in the fractal heap. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message is not shared and is not shareable. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Message stored in file’s <em>shared object header message</em> + heap (a <em>shared</em> message). + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Message stored is not shared, but is sharable. + </td> + </tr> + + </table></p> + </td> + </tr> + + <tr> + <td><p>Location</p></td> + <td><p>This field contains either a <em>Size of Offsets</em>-bytes + address of the object header + containing the message to be shared, or an 8-byte fractal heap ID + for the message in the file’s <em>shared object header message</em> + heap. + </p> + </td> + </tr> + </table> + </div> + + + <p>The following is a list of currently defined header messages: + </p> + +<br /> +<h4><a name="NILMessage">IV.A.2.a. The NIL Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> NIL</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0000</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The NIL message is used to indicate a message which is to be + ignored when reading the header messages for a data object. + [Possibly one which has been deleted for some reason.] + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> Unspecified</td></tr> + </table></center> + <!-- end msgdesc table --> + + +<br /> +<h4><a name="DataspaceMessage">IV.A.2.b. The Dataspace Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Dataspace</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0001</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies according to the number of + dimensions, as described in the following table.</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset objects; + may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The dataspace message describes the number of dimensions (in + other words, “rank”) and size of each dimension that + the data object has. This message is only used for datasets which + have a simple, rectilinear, array-like layout; datasets requiring + a more complex layout are not yet supported. + </td> + </tr> + + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Dataspace Message - Version 1 + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan="4">Reserved</td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #1 Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #1 Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Permutation Index #1<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Permutation Index #n<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + Dataspace Message. When the format of the + information in the message is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + document describes version one (1) (there was no version + zero (0)). + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the data + object has. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. Bit 1 is used to indicate that + permutation indices are present. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Maximum Size</p></td> + <td> + <p>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + “<a href="#UnlimitedDim">unlimited</a>” size which indicates + that the data may expand along this dimension indefinitely. + If these values are not stored, the maximum size of each + dimension is assumed to be the dimension’s current size. + </p> + </td> + </tr> + + <tr> + <td><p>Permutation Index #n</p></td> + <td> + <p>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. If these values are + not stored, the first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + </table> + </div> + + + + <br /> + <p>Version 2 of the dataspace message dropped the optional + permutation index value support, as it was never implemented in the + HDF5 Library:</p> + + <div align="center"> + <table class="format"> + <caption> + Dataspace Message - Version 2 + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Type</td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #1 Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #1 Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + Dataspace Message. This field should be ‘2’ for version 2 + format messages. + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the data object has. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field indicates the type of the dataspace: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>A <em>scalar</em> dataspace; in other words, + a dataspace with a single, dimensionless element. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>A <em>simple</em> dataspace; in other words, + a dataspace with a rank > 0 and an appropriate # of + dimensions. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>A <em>null</em> dataspace; in other words, + a dataspace with no elements. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Maximum Size</p></td> + <td> + <p>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + “<a href="#UnlimitedDim">unlimited</a>” size which indicates + that the data may expand along this dimension indefinitely. + If these values are not stored, the maximum size of each + dimension is assumed to be the dimension’s current size. + </p> + </td> + </tr> + + </table> + </div> + + + +<!-- +<br /> +<h4><a name="DataSpaceMessage">Header Message Name: Complex Dataspace (Fiber Bundle?)</a></h4> + + <!-- start msgdesc table -- + <center> + <table class="msgdesc"> + <p><b>Header Message Name: ???????</b></td></tr> + <b>Header Message Type: </b>0x0002<br /> + <b>Length:</b> Varies</td></tr> + + <b>Status:</b> One of the <em>Simple Dataspace</em> or + <em>Complex Dataspace</em> messages is required (but not both) and may + not be repeated.<br /> <b>Description:</b> The + <em>Dataspace</em> message describes space that the dataset is + mapped onto in a more comprehensive way than the <em>Simple + Dimensionality</em> message is capable of handling. The + dataspace of a dataset encompasses the type of coordinate system + used to locate the dataset’s elements as well as the structure and + regularity of the coordinate system. The dataspace also + describes the number of dimensions which the dataset inhabits as + well as a possible higher dimensional space in which the dataset + is located within. + + <br /> + <p><b>Format of Data:</b></p> + + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Message Layout</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Mesh Type</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimensionality</td> + </tr> + </table> + </center> + + <br /> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Mesh Type: (unsigned 32-bit integer) + <dd>This value indicates whether the grid is + polar/spherical/cartesion, + structured/unstructured and regular/irregular. <br /> + The mesh type value is broken up as follows: <br /> + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Mesh-type Layout</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="1">Mesh Embedding</td> + <td colspan="1">Coordinate System</td> + <td colspan="1">Structure</td> + <td colspan="1">Regularity</td> + </tr> + </table> + </center> + The following are the definitions of mesh-type bytes: + <dl> + <dt>Mesh Embedding + <dd>This value indicates whether the dataset dataspace + is located within + another dataspace or not: + <dl> <dl> + <dt><STANDALONE> + <dd>The dataset mesh is self-contained and is not + embedded in another mesh. + <dt><EMBEDDED> + <dd>The dataset’s dataspace is located within + another dataspace, as + described in information below. + </dl> </dl> + <dt>Coordinate System + <dd>This value defines the type of coordinate system + used for the mesh: + <dl> <dl> + <dt><POLAR> + <dd>The last two dimensions are in polar + coordinates, higher dimensions are + cartesian. + <dt><SPHERICAL> + <dd>The last three dimensions are in spherical + coordinates, higher dimensions + are cartesian. + <dt><CARTESIAN> + <dd>All dimensions are in cartesian coordinates. + </dl> </dl> + <dt>Structure + <dd>This value defines the locations of the grid-points + on the axes: + <dl> <dl> + <dt><STRUCTURED> + <dd>All grid-points are on integral, sequential + locations, starting from 0. + <dt><UNSTRUCTURED> + <dd>Grid-points locations in each dimension are + explicitly defined and + may be of any numeric datatype. + </dl> </dl> + <dt>Regularity + <dd>This value defines the locations of the dataset + points on the grid: + <dl> <dl> + <dt><REGULAR> + <dd>All dataset elements are located at the + grid-points defined. + <dt><IRREGULAR> + <dd>Each dataset element has a particular + grid-location defined. + </dl> </dl> + </dl> + <p>The following grid combinations are currently allowed:</p> + <dl> <dl> + <dt><POLAR-STRUCTURED-REGULAR> + <dt><SPHERICAL-STRUCTURED-REGULAR> + <dt><CARTESIAN-STRUCTURED-REGULAR> + <dt><POLAR-UNSTRUCTURED-REGULAR> + <dt><SPHERICAL-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-IRREGULAR> + </dl> </dl> + All of the above grid types can be embedded within another + dataspace. + <br /> <br /> + <dt>Logical Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions that the dataset occupies. + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Embedded Dimensionality Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Embedded Dimensionality</td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Dimension Size #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Dimension Size #n</td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Origin Location #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Origin Location #n</td> + </tr> + </table> + </center> + + <dt>Embedded Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions of the space the + dataset is located within: in other words, a planar dataset + located within a 3-D space, a 3-D dataset + which is a subset of another 3-D space, and so on. + <dt>Embedded Dimension Size: (unsigned 32-bit integer) + <dd>These values are the sizes of the dimensions of the + embedded dataspace + that the dataset is located within. + <dt>Embedded Origin Location: (unsigned 32-bit integer) + <dd>These values comprise the location of the dataset’s + origin within the embedded dataspace. + </dl> + </dl> + [Comment: need some way to handle different orientations of the + dataset dataspace + within the embedded dataspace]<br /> + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Structured/Regular Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Logical Dimension Size #1</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Maximum #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Size #n</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Maximum #n</td> + </tr> + </table> + </center> + + <br /> + <dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Logical Dimension Size #n: (unsigned 32-bit integer) + <dd>This value is the current size of the dimension of the + data as stored in + the file. The first dimension stored in the list of + dimensions is the slowest + changing dimension and the last dimension stored is the + fastest changing + dimension. + <dt>Logical Dimension Maximum #n: (unsigned 32-bit integer) + <dd>This value is the maximum size of the dimension of the + data as stored in + the file. This value may be the special value + <UNLIMITED> which + indicates that the data may expand along this dimension + indefinitely. + </dl> + </dl> + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Structured/Irregular Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4"># of Grid Points in Dimension #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4"># of Grid Points in Dimension #n</td> + </tr> + <tr align="center"> + <td colspan="4">Datatype of Grid Point Locations</td> + </tr> + <tr align="center"> + <td colspan="4">Location of Grid Points in Dimension #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Location of Grid Points in Dimension #n</td> + </tr> + </table> + </center> + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Unstructured Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4"># of Grid Points</td> + </tr> + <tr align="center"> + <td colspan="4">Datatype of Grid Point Locations</td> + </tr> + <tr align="center"> + <td colspan="4">Grid Point Locations<br />.<br />.<br /></td> + </tr> + </table> + </center> +--> + +<br /> +<h4><a name="LinkInfoMessage">IV.A.2.c. The Link Info Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Link Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x002 </td></tr> + <tr><td colspan="2"><b>Length:</b> Varies </td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated. </td></tr> + <tr><td><b>Description:</b></td> + <td>The link info message tracks variable information about the + current state of the links for a “new style” + group’s behavior. Variable information will be stored in + this message and constant information will be stored in the + <a href="#GroupInfoMessage">Group Info</a> message. + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Link Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Creation Index <em>(8 bytes, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of v2 B-tree for Name Index<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of v2 B-tree for Creation Order Index<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This field determines various optional aspects of the link + info message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, creation order for the links is tracked. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, creation order for the links is indexed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Creation Index</p></td> + <td><p>This 64-bit value is the maximum creation order index value + stored for a link in this group.</p> + <p>This field is present if bit 0 of <em>flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address</p></td> + <td> + <p> + This is the address of the fractal heap to store dense links. + Each link stored in the fractal heap is stored as a + <a href="#LinkMessage">Link Message</a>. + </p> + <p> + If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of v2 B-tree for Name Index</p></td> + <td><p>This is the address of the version 2 B-tree to index names of links.</p> + <p>If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of v2 B-tree for Creation Order Index</p></td> + <td><p>This is the address of the version 2 B-tree to index creation order of links.</p> + <p>If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + <p>This field exists if bit 1 of <em>flags</em> is set.</p> + </td> + </tr> + + </table> + </div> + + +<br /> +<h4><a name="DatatypeMessage">IV.A.2.d. The Datatype Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Datatype</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0003 + </td></tr> + <tr><td colspan="2"><b>Length:</b> Variable</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset or committed + datatype (formerly named datatype) objects; may not be repeated. + </td></tr> + <tr><td><b>Description:</b></td> + <td><p>The datatype message defines the datatype for each element + of a dataset or a common datatype for sharing between multiple + datasets. A datatype can describe an atomic type like a fixed- + or floating-point type or more complex types like a C struct + (compound datatype), array (array datatype) or C++ vector + (variable-length datatype).</p> + <p>Datatype messages that are part of a dataset object do not + describe how elements are related to one another; the dataspace + message is used for that purpose. Datatype messages that are part of + a committed datatype (formerly named datatype) message describe + a common datatype that can be shared by multiple datasets in the + file.</p> + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Datatype Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Class and Version</td> + <td>Class Bit Field, Bits 0-7</td> + <td>Class Bit Field, Bits 8-15</td> + <td>Class Bit Field, Bits 16-23</td> + </tr> + + <tr> + <td colspan="4">Size</td> + </tr> + + <tr> + <td colspan="4"><br /><br />Properties<br /><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Class and Version</p></td> + <td> + <p>The version of the datatype message and the datatype’s class + information are packed together in this field. The version + number is packed in the top 4 bits of the field and the class + is contained in the bottom 4 bits. + </p> + <p>The version number information is used for changes in the + format of the datatype message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Used by early versions of the library to encode + compound datatypes with explicit array fields. + See the compound datatype description below for + further details. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Used when an array datatype needs to be encoded. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Used when a VAX byte-ordered type needs to be + encoded. Packs various other datatype classes more + efficiently also. + </td> + </tr> + </table></p> + + <p>The class of the datatype determines the format for the class + bit field and properties portion of the datatype message, which + are described below. The + following classes are currently defined: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Fixed-Point</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Floating-Point</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Time</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>String</td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>Bit field</td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td>Opaque</td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td>Compound</td> + </tr> + + <tr> + <td align="center"><code>7</code></td> + <td>Reference</td> + </tr> + + <tr> + <td align="center"><code>8</code></td> + <td>Enumerated</td> + </tr> + + <tr> + <td align="center"><code>9</code></td> + <td>Variable-Length</td> + </tr> + + <tr> + <td align="center"><code>10</code></td> + <td>Array</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Class Bit Fields</p></td> + <td> + <p>The information in these bit fields is specific to each datatype + class and is described below. All bits not defined for a + datatype class are set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>The size of a datatype element in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Properties</p></td> + <td> + <p>This variable-sized sequence of bytes encodes information + specific to each datatype class and is described for each class + below. If there is no property information specified for a + datatype class, the size of this field is zero bytes. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Fixed-Point Numbers (Class 0):</p> + + <div align="center"> + <table class="desc"> + <caption> + Fixed-point Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1, 2</p></td> + <td><p><b>Padding type.</b> Bit 1 is the lo_pad bit and bit 2 + is the hi_pad bit. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</p></td> + </tr> + + <tr> + <td><p>3</p></td> + <td><p><b>Signed.</b> If this bit is set then the fixed-point + number is in 2’s complement form.</p></td> + </tr> + + <tr> + <td><p>4-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Fixed-Point Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the fixed-point + value within the datatype. The bit offset specifies the number + of bits “to the right of” the value (which are set to the + lo_pad bit value). + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the fixed-point value + within the datatype. This value, combined with the datatype + element’s size and the Bit Offset field specifies the number + of bits “to the left of” the value (which are set to the + hi_pad bit value). + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Floating-Point Numbers (Class 1):</p> + + <div align="center"> + <table class="desc"> + <caption> + Floating-Point Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0, 6</p></td> + <td><p><b>Byte Order.</b> These two non-contiguous bits specify the + “endianness” of the bytes in the datatype element. + <table class="list"> + <tr> + <th width="10%" align="center">Bit 6</th> + <th width="10%" align="center">Bit 0</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td align="center"><code>0</code></td> + <td>Byte order is little-endian + </td> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td align="center"><code>1</code></td> + <td>Byte order is big-endian + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td align="center"><code>0</code></td> + <td>Reserved + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td align="center"><code>1</code></td> + <td>Byte order is VAX-endian + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>1, 2, 3</p></td> + <td><p><b>Padding type.</b> Bit 1 is the low bits pad type, bit 2 + is the high bits pad type, and bit 3 is the internal bits + pad type. If a datum has unused bits at either end or between + the sign bit, exponent, or mantissa, then the value of bit + 1, 2, or 3 is copied to those locations.</p></td> + </tr> + + <tr> + <td><p>4-5</p></td> + <td><p><b>Mantissa Normalization.</b> This 2-bit bit field specifies + how the most significant bit of the mantissa is managed. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>No normalization + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The most significant bit of the mantissa is always set + (except for 0.0). + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The most significant bit of the mantissa is not stored, + but is implied to be set. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>7</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + + <tr> + <td><p>8-15</p></td> + <td><p><b>Sign Location.</b> This is the bit position of the sign + bit. Bits are numbered with the least significant bit zero.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Floating-Point Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + + <tr> + <td>Exponent Location</td> + <td>Exponent Size</td> + <td>Mantissa Location</td> + <td>Mantissa Size</td> + </tr> + + <tr> + <td colspan="4">Exponent Bias</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the floating-point + value within the datatype. The bit offset specifies the number + of bits “to the right of” the value. + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the floating-point value + within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Location</p></td> + <td> + <p>The bit position of the exponent field. Bits are numbered with + the least significant bit number zero. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Size</p></td> + <td> + <p>The size of the exponent field in bits. + </p> + </td> + </tr> + + <tr> + <td><p>Mantissa Location</p></td> + <td> + <p>The bit position of the mantissa field. Bits are numbered with + the least significant bit number zero. + </p> + </td> + </tr> + + <tr> + <td><p>Mantissa Size</p></td> + <td> + <p>The size of the mantissa field in bits. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Bias</p></td> + <td> + <p>The bias of the exponent field. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Time (Class 2):</p> + + + <div align="center"> + <table class="desc"> + <caption> + Time Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Time Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the time value. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Strings (Class 3):</p> + + + <div align="center"> + <table class="desc"> + <caption> + String Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Padding type.</b> This four-bit value determines the + type of padding to use for the string. The values are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Null Terminate: A zero byte marks the end of the + string and is guaranteed to be present after + converting a long string to a short string. When + converting a short string to a long string the value is + padded with additional null characters as necessary. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Null Pad: Null characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Space Pad: Space characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align="center"><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>4-7</p></td> + <td><p><b>Character Set.</b> The character set used to + encode the string. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>8-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <p>There are no properties defined for the string class. + </p> + + + <p>Class specific information for bit fields (Class 4):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bitfield Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1, 2</p></td> + <td><p><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</p></td> + </tr> + + <tr> + <td><p>3-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Bit Field Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the bit field + within the datatype. The bit offset specifies the number + of bits “to the right of” the value. + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the bit field + within the datatype. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <p>Class specific information for Opaque (Class 5):</p> + + <div align="center"> + <table class="desc"> + <caption> + Opaque Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-7</p></td> + <td><p>Length of ASCII tag in bytes.</p></td> + </tr> + + <tr> + <td><p>8-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Opaque Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />ASCII Tag<br /> + <br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>ASCII Tag</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <p>Class specific information for Compound (Class 6):</p> + + <div align="center"> + <table class="desc"> + <caption> + Compound Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-15</p></td> + <td><p><b>Number of Members.</b> This field contains the number + of members defined for the compound datatype. The member + definitions are listed in the Properties field of the data + type message.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + + <p>The Properties field of a compound datatype is a list of the + member definitions of the compound datatype. The member + definitions appear one after another with no intervening bytes. + The member types are described with a (recursively) encoded datatype + message.</p> + + <p>Note that the property descriptions are different for different + versions of the datatype version. Additionally note that the version + 0 datatype encoding is deprecated and has been replaced with later + encodings in versions of the HDF5 Library from the 1.4 release + onward.</p> + + + <div align="center"> + <table class="format"> + <caption> + Compound Properties Description for Datatype Version 1 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member</td> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension Permutation</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #2 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #3 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #4 Size (required)</td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td> + <p>This is the byte offset of the member within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>If set to zero, this field indicates a scalar member. If set + to a value greater than zero, this field indicates that the + member is an array of values. For array members, the size of + the array is indicated by the ‘Size of Dimension n’ field in + this message. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension Permutation</p></td> + <td> + <p>This field was intended to allow an array field to have + its dimensions permuted, but this was never implemented. + This field should always be set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This field is the size of a dimension of the array field as + stored in the file. The first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td> + <p>This field is a datatype message describing the datatype of + the member. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Compound Properties Description for Datatype Version 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member</td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td> + <p>This is the byte offset of the member within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td> + <p>This field is a datatype message describing the datatype of + the member. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Compound Properties Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>This NUL-terminated string provides a description for the + opaque type. It is <em>not</em> NUL-padded to a multiple of 8 + bytes.</p></td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td><p>This is the byte offset of the member within the datatype. + The field size is the minimum number of bytes necessary, + based on the size of the datatype element. For example, a + datatype element size of less than 256 bytes uses a 1 byte + length, a datatype element size of 256-65535 bytes uses a + 2 byte length, and so on.</p></td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td><p>This field is a datatype message describing the datatype of + the member.</p></td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Reference (Class 7):</p> + + <div align="center"> + <table class="desc"> + <caption> + Reference Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Type.</b> This four-bit value contains the type of reference + described. The values defined are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Object Reference: A reference to another object in this + HDF5 file. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Dataset Region Reference: A reference to a region within + a dataset in this HDF5 file. + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>4-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <p>There are no properties defined for the reference class. + </p> + + + <br /> + <p>Class specific information for Enumeration (Class 8):</p> + + <div align="center"> + <table class="desc"> + <caption> + Enumeration Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-15</p></td> + <td><p><b>Number of Members.</b> The number of name/value + pairs defined for the enumeration type.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Enumeration Property Description for Datatype Versions 1 & 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Names<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Values<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each enumeration type is based on some parent type, usually an + integer. The information for that parent type is described + recursively by this field. + </p> + </td> + </tr> + + <tr> + <td><p>Names</p></td> + <td> + <p>The name for each name/value pair. Each name is stored as a null + terminated ASCII string in a multiple of eight bytes. The names + are in no particular order. + </p> + </td> + </tr> + + <tr> + <td><p>Values</p></td> + <td> + <p>The list of values in the same order as the names. The values + are packed (no inter-value padding) and the size of each value + is determined by the parent type. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Enumeration Property Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Names<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Values<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each enumeration type is based on some parent type, usually an + integer. The information for that parent type is described + recursively by this field. + </p> + </td> + </tr> + + <tr> + <td><p>Names</p></td> + <td> + <p>The name for each name/value pair. Each name is stored as a null + terminated ASCII string, <em>not</em> padded to a multiple of + eight bytes. The names are in no particular order. + </p> + </td> + </tr> + + <tr> + <td><p>Values</p></td> + <td> + <p>The list of values in the same order as the names. The values + are packed (no inter-value padding) and the size of each value + is determined by the parent type. + </p> + </td> + </tr> + + </table> + </div> + + + + <br /> + <p>Class specific information for Variable-Length (Class 9):</p> + + <div align="center"> + <table class="desc"> + <caption> + Variable-Length Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Type.</b> This four-bit value contains the type of + variable-length datatype described. The values defined are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Sequence: A variable-length sequence of any datatype. + Variable-length sequences do not have padding or + character set information. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>String: A variable-length sequence of characters. + Variable-length strings have padding and character set + information. + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>4-7</p></td> + <td><p><b>Padding type.</b> (variable-length string only) + This four-bit value determines the type of padding + used for variable-length strings. The values are the same + as for the string padding type, as follows: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Null terminate: A zero byte marks the end of a string + and is guaranteed to be present after converting a long + string to a short string. When converting a short string + to a long string, the value is padded with additional null + characters as necessary. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Null pad: Null characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Space pad: Space characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align="center"><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + <p>This value is set to zero for variable-length sequences.</p> + + </td> + </tr> + + <tr> + <td><p>8-11</p></td> + <td><p><b>Character Set.</b> (variable-length string only) + This four-bit value specifies the character set + to be used for encoding the string: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + <p>This value is set to zero for variable-length sequences.</p> + + </td> + </tr> + + <tr> + <td><p>12-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Variable-Length Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="10%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each variable-length type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <p>Class specific information for Array (Class 10):</p> + + <p>There are no bit fields defined for the array class. + </p> + + <p>Note that the dimension information defined in the property for this + datatype class is independent of dataspace information for a dataset. + The dimension information here describes the dimensionality of the + information within a data element (or a component of an element, if the + array datatype is nested within another datatype) and the dataspace for a + dataset describes the size and locations of the elements in a dataset. + </p> + + + <div align="center"> + <table class="format"> + <caption> + Array Property Description for Datatype Version 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Permutation Index #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Permutation Index #n</td> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the array has. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the size of the dimension of the array + as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Permutation Index #n</p></td> + <td> + <p>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. Currently, dimension + permutations are not supported, and these indices should + be set to the index position minus one. In other words, + the first dimension should be set to 0, the second dimension + should be set to 1, and so on. + </p> + </td> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each array type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Array Property Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the array has. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the size of the dimension of the array + as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each array type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + + +<br /> +<h4><a name="OldFillValueMessage">IV.A.2.e. The Data Storage - +Fill Value (Old) Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Fill Value + (old)</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0004</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The fill value message stores a single data value which + is returned to the application when an uninitialized data element + is read from a dataset. The fill value is interpreted with the + same datatype as the dataset. If no fill value message is present + then a fill value of all zero bytes is assumed.</p> + <p>This fill value message is deprecated in favor of the + “new” fill value message (Message Type 0x0005) and + is only written to the file for forward compatibility with + versions of the HDF5 Library before the 1.6.0 version. + Additionally, it only appears for datasets with a user-defined + fill value (as opposed to the library default fill value or an + explicitly set “undefined” fill value).</p> + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Fill Value Message (Old) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Size</td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. + </p> + </td> + </tr> + </table> + </div> + + +<br /> +<h4><a name="FillValueMessage">IV.A.2.f. The Data Storage - +Fill Value Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Fill + Value</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0005</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset objects; + may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The fill value message stores a single data value which is + returned to the application when an uninitialized data element + is read from a dataset. The fill value is interpreted with the + same datatype as the dataset.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Fill Value Message - Versions 1 & 2 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Space Allocation Time</td> + <td>Fill Value Write Time</td> + <td>Fill Value Defined</td> + </tr> + + <tr> + <td colspan="4">Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the + format of the fill value message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Initial version of this message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>In this version, the Size and Fill Value fields are + only present if the Fill Value Defined field is set + to 1. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>This version packs the other fields in the message + more efficiently than version 2. + </td> + </tr> + </table></p> + </p> + </td> + </tr> + + <tr> + <td><p>Space Allocation Time</p></td> + <td> + <p>When the storage space for the dataset’s raw data will be + allocated. The allowed values are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Not used. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Early allocation. Storage space for the entire dataset + should be allocated in the file when the dataset is + created. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Late allocation. Storage space for the entire dataset + should not be allocated until the dataset is written + to. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Incremental allocation. Storage space for the + dataset should not be allocated until the portion + of the dataset is written to. This is currently + used in conjunction with chunked data storage for + datasets. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Fill Value Write Time</p></td> + <td> + <p>At the time that storage space for the dataset’s raw data is + allocated, this value indicates whether the fill value should + be written to the raw data storage elements. The allowed values + are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>On allocation. The fill value is always written to + the raw data storage when the storage space is allocated. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Never. The fill value should never be written to + the raw data storage. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Fill value written if set by user. The fill value + will be written to the raw data storage when the storage + space is allocated only if the user explicitly set + the fill value. If the fill value is the library + default or is undefined, it will not be written to + the raw data storage. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Fill Value Defined</p></td> + <td> + <p>This value indicates if a fill value is defined for this + dataset. If this value is 0, the fill value is undefined. + If this value is 1, a fill value is defined for this dataset. + For version 2 or later of the fill value message, this value + controls the presence of the Size and Fill Value fields. + </p> + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. This field + is not present if the Version field is greater than 1, + and the Fill Value Defined field is set to 0. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. This field is + not present if the Version field is greater than 1, + and the Fill Value Defined field is set to 0. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Fill Value Message - Version 3 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the + format of the fill value message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Initial version of this message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>In this version, the Size and Fill Value fields are + only present if the Fill Value Defined field is set + to 1. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>This version packs the other fields in the message + more efficiently than version 2. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>When the storage space for the dataset’s raw data will be + allocated. The allowed values are: + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>Space Allocation Time, with the same + values as versions 1 and 2 of the message. + </td> + </tr> + <tr> + <td align="center"><code>2-3</code></td> + <td>Fill Value Write Time, with the same + values as versions 1 and 2 of the message. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>Fill Value Undefined, indicating that the fill + value has been marked as “undefined” for this dataset. + Bits 4 and 5 cannot both be set. + </td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>Fill Value Defined, with the same values as + versions 1 and 2 of the message. + Bits 4 and 5 cannot both be set. + </td> + </tr> + <tr> + <td align="center"><code>6-7</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. This field + is not present if the Version field is greater than 1, + and the Fill Value Defined flag is set to 0. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. This field is + not present if the Version field is greater than 1, + and the Fill Value Defined flag is set to 0. + </p> + </td> + </tr> + </table> + </div> + + +<br /> +<h4><a name="LinkMessage">IV.A.2.g. The Link Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Link</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0006</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies </td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated. </td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message encodes the information for a link in a + group’s object header, when the group is storing its links + “compactly”, or in the group’s fractal heap, + when the group is storing its links “densely”.</p> + <p>A group is storing its links compactly when the fractal heap + address in the <em><a href="#LinkInfoMessage">Link Info + Message</a></em> is set to the “undefined address” + value.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Link Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td>Link type <em>(optional)</em></td> + <td bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Creation Order <em>(8 bytes, optional)</em><br /><br /></td> + </tr> + <tr> + <td>Link Name Character Set <em>(optional)</em></td> + <td>Length of Link Name (variable size)</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Link Name (variable size)</td> + </tr> + <tr> + <td colspan="4"><br />Link Information (variable size)<br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 1.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This field contains information about the link and controls + the presence of other fields below. + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>Determines the size of the <em>Length of Link Name</em> + field. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 1 byte. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 2 bytes. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 4 bytes. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 8 bytes. + </td> + </tr> + </table> + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Creation Order Field Present: if set, the <em>Creation + Order</em> field is present. If not set, creation order + information is not stored for links in this group. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Link Type Field Present: if set, the link is not + a hard link and the <em>Link Type</em> field is present. + If not set, the link is a hard link. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>Link Name Character Set Field Present: if set, the + link name is not represented with the ASCII character + set and the <em>Link Name Character Set</em> field is + present. If not set, the link name is represented with + the ASCII character set. + </td> + </tr> + <tr> + <td align="center"><code>5-7</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Link type</p></td> + <td><p>This is the link class type and can be one of the following + values: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>A hard link (should never be stored in the file) + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>A soft link. + </td> + </tr> + <tr> + <td align="center"><code>2-63</code></td> + <td>Reserved for future HDF5 internal use. + </td> + </tr> + <tr> + <td align="center"><code>64</code></td> + <td>An external link. + </td> + </tr> + <tr> + <td align="center"><code>65-255</code></td> + <td>Reserved, but available for user-defined link types. + </td> + </tr> + </table></p> + + <p>This field is present if bit 3 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td><p>This 64-bit value is an index of the link’s creation time within + the group. Values start at 0 when the group is created an increment + by one for each link added to the group. Removing a link from a + group does not change existing links’ creation order field. + </p> + <p>This field is present if bit 2 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Link Name Character Set</p></td> + <td><p>This is the character set for encoding the link’s name: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding (this should never be stored + in the file) + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + </table></p> + + <p>This field is present if bit 4 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Length of link name</p></td> + <td><p>This is the length of the link’s name. The size of this field + depends on bits 0 and 1 of <em>Flags</em>.</p> + </td> + </tr> + + <tr> + <td><p>Link name</p></td> + <td><p>This is the name of the link, non-NULL terminated.</p> + </td> + </tr> + + <tr> + <td><p>Link information</p></td> + <td><p>The format of this field depends on the <em>link type</em>.</p> + <p>For <b>hard</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%"><i>Size of Offsets</i> bytes:</td> + <td width="80%">The address of the object header for the object that the + link points to. + </td> + </tr> + </table> + </p> + + <p> + For <b>soft</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of soft link value.</td> + </tr> + <tr> + <td><em>Length of soft link value</em> bytes:</td> + <td>A non-NULL-terminated string storing the value of the + soft link. + </td> + </tr> + </table> + </p> + + <p> + For <b>external</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of external link value.</td> + </tr> + <tr> + <td><em>Length of external link value</em> bytes:</td> + <td>The first byte contains the version number in the + upper 4 bits and flags in the lower 4 bits for the external + link. Both version and flags are defined to be zero in + this document. The remaining bytes consist of two + NULL-terminated strings, with no padding between them. + The first string is the name of the HDF5 file containing + the object linked to and the second string is the full path + to the object linked to, within the HDF5 file’s + group hierarchy. + </td> + </tr> + </table> + </p> + + <p> + For <b>user-defined</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of user-defined data.</td> + </tr> + <tr> + <td><em>Length of user-defined link value</em> bytes:</td> + <td>The data supplied for the user-defined link type.</td> + </tr> + </table> + </p> + + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="ExternalFileListMessage">IV.A.2.h. The Data Storage - +External Data Files Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> External + Data Files</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0007</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The external data storage message indicates that the data + for an object is stored outside the HDF5 file. The filename of + the object is stored as a Universal Resource Location (URL) of + the actual filename containing the data. An external file list + record also contains the byte offset of the start of the data + within the file and the amount of space reserved in the file + for that data.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + External File List Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="2">Allocated Slots</td> + <td colspan="2">Used Slots</td> + </tr> + + <tr> + <td colspan="4"><br />Heap Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Slot Definitions...<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of + External Data Storage Message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The current version used by the library.</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Allocated Slots</p></td> + <td> + <p>The total number of slots allocated in the message. Its value must be at least as + large as the value contained in the Used Slots field. (The current library simply + uses the number of Used Slots for this message)</p> + </td> + </tr> + + <tr> + <td><p>Used Slots</p></td> + <td> + <p>The number of initial slots which contains valid information.</p> + </td> + </tr> + + <tr> + <td><p>Heap Address</p></td> + <td> + <p>This is the address of a local heap which contains the names for the external + files (The local heap information can be found in Disk Format Level 1D in this + document). The name at offset zero in the heap is always the empty string.</p> + </td> + </tr> + + <tr> + <td><p>Slot Definitions</p></td> + <td> + <p>The slot definitions are stored in order according to the array addresses they + represent.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + External File List Slot + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name Offset in Local Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset in External Data File<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data Size in External File<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name Offset in Local Heap</p></td> + <td> + <p>The byte offset within the local name heap for the name + of the file. File names are stored as a URL which has a + protocol name, a host name, a port number, and a file + name: + <code><em>protocol</em>:<em>port</em>//<em>host</em>/<em>file</em></code>. + If the protocol is omitted then “file:” is assumed. If + the port number is omitted then a default port for that + protocol is used. If both the protocol and the port + number are omitted then the colon can also be omitted. If + the double slash and host name are omitted then + “localhost” is assumed. The file name is the only + mandatory part, and if the leading slash is missing then + it is relative to the application’s current working + directory (the use of relative names is not + recommended). + </p> + </td> + </tr> + + <tr> + <td><p>Offset in External Data File</p></td> + <td> + <p>This is the byte offset to the start of the data in the + specified file. For files that contain data for a single + dataset this will usually be zero.</p> + </td> + </tr> + + <tr> + <td><p>Data Size in External File</p></td> + <td> + <p>This is the total number of bytes reserved in the + specified file for raw data storage. For a file that + contains exactly one complete dataset which is not + extendable, the size will usually be the exact size of the + dataset. However, by making the size larger one allows + HDF5 to extend the dataset. The size can be set to a value + larger than the entire file since HDF5 will read zeroes + past the end of the file without failing.</p> + </td> + </tr> + </table> + </div> + + +<br /> +<h4><a name="LayoutMessage">IV.A.2.i. The Data Storage - Layout +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Data Storage - + Layout</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0008</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for datasets; may not + be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>Data layout describes how the elements of a multi-dimensional + array are stored in the HDF5 file. Three types of data layout + are supported: + <ol> + <li>Contiguous: The array is stored in one contiguous area of + the file. This layout requires that the size of the array be + constant: data manipulations such as chunking, compression, + checksums, or encryption are not permitted. The message stores + the total storage size of the array. The offset of an element + from the beginning of the storage area is computed as in a C + array.</li> + <li>Chunked: The array domain is regularly decomposed into + chunks, and each chunk is allocated and stored separately. This + layout supports arbitrary element traversals, compression, + encryption, and checksums. (these features are described + in other messages). The message stores the size of a chunk + instead of the size of the entire array; the storage size of + the entire array can be calculated by traversing the B-tree + that stores the chunk addresses.</li> + <li>Compact: The array is stored in one contiguous block, as + part of this object header message.</li> + </ol></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Data Layout Message (Versions 1 and 2) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Layout Class</td> + <td>Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Data Address<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Dimension 0 Size</td> + </tr> + + <tr> + <td colspan="4">Dimension 1 Size</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Dataset Element Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Compact Data Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Compact Data... <em>(variable size, optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of the data + layout message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by version 1.4 and before of the library to encode layout information. + Data space is always allocated when the data set is created.</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by version 1.6.x of the library to encode layout information. + Data space is allocated only when it is necessary.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td><p>An array has a fixed dimensionality. This field + specifies the number of dimension size fields later in the + message. The value stored for chunked storage is 1 greater than + the number of dimensions in the dataset’s dataspace. + For example, 2 is stored for a 1 dimensional dataset. + </p> + </td> + </tr> + + <tr> + <td><p>Layout Class</p></td> + <td><p>The layout class specifies the type of storage for the data + and how the other fields of the layout message are to be + interpreted. + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Compact Storage + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Contiguous Storage + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Chunked Storage + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Data Address</p></td> + <td><p>For contiguous storage, this is the address of the raw + data in the file. For chunked storage this is the address + of the v1 B-tree that is used to look up the addresses of the + chunks. This field is not present for compact storage. + If the version for this message is greater than 1, the address + may have the “undefined address” value, to indicate that + storage has not yet been allocated for this array.</p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td><p>For contiguous and compact storage the dimensions define + the entire size of the array while for chunked storage they define + the size of a single chunk. In all cases, they are in units of + array elements (not bytes). The first dimension stored in the list + of dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dataset Element Size</p></td> + <td><p>The size of a dataset element, in bytes. This field is only + present for chunked storage. + </p> + </td> + </tr> + + <tr> + <td><p>Compact Data Size</p></td> + <td><p>This field is only present for compact data storage. + It contains the size of the raw data for the dataset array, in + bytes.</p> + </td> + </tr> + + <tr> + <td><p>Compact Data</p></td> + <td><p>This field is only present for compact data storage. + It contains the raw data for the dataset array.</p> + </td> + </tr> + </table> + </div> + + <br /> + <p>Version 3 of this message re-structured the format into specific + properties that are required for each layout class.</p> + + + <div align="center"> + <table class="format"> + <caption> + <b>Data Layout Message (Version 3)</b> + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Layout Class</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Properties <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of layout message + and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the version 1.6.3 and later of the library to store properties + for each layout class.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Layout Class</p></td> + <td><p>The layout class specifies the type of storage for the data + and how the other fields of the layout message are to be + interpreted. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Compact Storage + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Contiguous Storage + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Chunked Storage + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Properties</p></td> + <td><p>This variable-sized field encodes information specific to each + layout class and is described below. If there is no property + information specified for a layout class, the size of this field + is zero bytes.</p></td> + </tr> + </table> + </div> + + <br /> + <p>Class-specific information for compact layout (Class 0): (Note: The dimensionality information + is in the Dataspace message)</p> + + + <div align="center"> + <table class="format"> + <caption> + Compact Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Raw Data... <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size</p></td> + <td><p>This field contains the size of the raw data for the dataset + array, in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Raw Data</p></td> + <td><p>This field contains the raw data for the dataset array.</p></td> + </tr> + </table> + </div> + + + <br /> + <p>Class-specific information for contiguous layout (Class 1): (Note: The dimensionality information + is in the Dataspace message)</p> + + + <div align="center"> + <table class="format"> + <caption> + Contiguous Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address of the raw data in the file. + The address may have the “undefined address” value, to indicate + that storage has not yet been allocated for this array.</p></td> + </tr> + + <tr> + <td><p>Size</p></td> + <td><p>This field contains the size allocated to store the raw data, + in bytes. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <p>Class-specific information for chunked layout (Class 2):</p> + + + <div align="center"> + <table class="format"> + <caption> + Chunked Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Dimension 0 Size</td> + </tr> + + <tr> + <td colspan="4">Dimension 1 Size</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Dataset Element Size</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td><p>A chunk has a fixed dimensionality. This field specifies + the number of dimension size fields later in the message.</p></td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address of the v1 B-tree that is used to look up the + addresses of the chunks that actually store portions of the array + data. The address may have the “undefined address” value, to + indicate that storage has not yet been allocated for this array.</p></td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td><p>These values define the dimension size of a single chunk, in + units of array elements (not bytes). The first dimension stored in + the list of dimensions is the slowest changing dimension and the + last dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dataset Element Size</p></td> + <td><p>The size of a dataset element, in bytes. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="BogusMessage">IV.A.2.j. The Bogus Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Bogus</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0009</td></tr> + <tr><td colspan="2"><b>Length:</b> 4 bytes</td></tr> + <tr><td colspan="2"><b>Status:</b> For testing only; should never + be stored in a valid file.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message is used for testing the HDF5 Library’s + response to an “unknown” message type and should + never be encountered in a valid HDF5 file.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Bogus Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Bogus Value</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bogus Value</p></td> + <td> + <p>This value should always be: <code>0xdeadbeef</code>.</p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="GroupInfoMessage">IV.A.2.k. The Group Info Message +</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Group Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000A</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message stores information for the constants defining + a “new style” group’s behavior. Constant + information will be stored in this message and variable + information will be stored in the + <a href="#LinkInfoMessage">Link Info</a> message.</p> + <p>Note: the “estimated entry” information below is + used when determining the size of the object header for the + group when it is created.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Group Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Link Phase Change: Maximum Compact Value <em>(optional)</em></td> + </tr> + <tr> + <td colspan="2">Link Phase Change: Minimum Dense Value <em>(optional)</em></td> + <td colspan="2">Estimated Number of Entries <em>(optional)</em></td> + </tr> + <tr> + <td colspan="2">Estimated Link Name Length of Entries <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is the group information flag with the following definition: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, link phase change values are stored. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the estimated entry information is non-default + and is stored. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Link Phase Change: Maximum Compact Value</p></td> + <td><p>The is the maximum number of links to store “compactly” (in + the group’s object header).</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Link Phase Change: Minimum Dense Value</p></td> + <td><p>This is the minimum number of links to store “densely” (in + the group’s fractal heap). The fractal heap’s address is + located in the <a href="#LinkInfoMessage">Link Info</a> + message.</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Estimated Number of Entries</p></td> + <td><p>This is the estimated number of entries in groups.</p> + <p>If this field is not present, the default value of <code>4</code> + will be used for the estimated number of group entries.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Estimated Link Name Length of Entries</p></td> + <td><p>This is the estimated length of entry name.</p> + <p>If this field is not present, the default value of <code>8</code> + will be used for the estimated link name length of group entries.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + </table> + </div> + </p> + +<br /> +<h4><a name="FilterMessage">IV.A.2.l. The Data Storage - Filter +Pipeline Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> + Data Storage - Filter Pipeline</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000B</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message describes the filter pipeline which should + be applied to the data stream by providing filter identification + numbers, flags, a name, and client data.</p> + <p>This message may be present in the object headers of both + dataset and group objects. For datasets, it specifies the + filters to apply to raw data. For groups, it specifies the + filters to apply to the group’s fractal heap. Currently, + only datasets using chunked data storage use the filter + pipeline on their raw data.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Filter Pipeline Message - Version 1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Number of Filters</td> + <td colspan="2">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Filter Description List <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This table + describes version 1.</p></td> + </tr> + + <tr> + <td><p>Number of Filters</p></td> + <td><p>The total number of filters described in this + message. The maximum possible number of filters in a + message is 32.</p></td> + </tr> + + <tr> + <td><p>Filter Description List</p></td> + <td><p>A description of each filter. A filter description + appears in the next table.</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Filter Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Filter Identification Value</td> + <td colspan="2">Name Length</td> + </tr> + + <tr> + <td colspan="2">Flags</td> + <td colspan="2">Number Client Data Values</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Client Data <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Padding <em>(variable size, optional)</em></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filter Identification Value</p></td> + <td> + <p> + This value, often referred to as a filter identifier, + is designed to be a unique identifier for the filter. + Values from zero through 32,767 are reserved for filters + supported by The HDF Group in the HDF5 Library and for + filters requested and supported by third parties. + Filters supported by The HDF Group are documented immediately + below. Information on 3rd-party filters can be found at + The HDF Group’s + <a href="http://www.hdfgroup.org/services/contributions.html"> + Contributions</a> page.</p> + + <p> + To request a filter identifier, please contact + The HDF Group’s Help Desk at + <img src="Graphics/help.png" valign="middle" height="14" + alt="The HDF Group Help Desk">. + You will be asked to provide the following information:</p> + <ol> + <li>Contact information for the developer requesting the + new identifier</li> + <li>A short description of the new filter</li> + <li>Links to any relevant information, including licensing + information</li> + </ol> + <p> + Values from 32768 to 65535 are reserved for non-distributed uses + (for example, internal company usage) or for application usage + when testing a feature. The HDF Group does not track or document + the use of the filters with identifiers from this range.</p> + + <p> + The filters currently in library version 1.8.0 are + listed below: + + <table class="list"> + <tr> + <th width="20%" align="center">Identification</th> + <th width="15%" align="left">Name</th> + <th width="65%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>N/A</td> + <td>Reserved</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>deflate</td> + <td>GZIP deflate compression</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>shuffle</td> + <td>Data element shuffling</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>fletcher32</td> + <td>Fletcher32 checksum</td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>szip</td> + <td>SZIP compression</td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td>nbit</td> + <td>N-bit packing</td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td>scaleoffset</td> + <td>Scale and offset encoded values</td> + </tr> + </table> + </p></td> + </tr> + + <tr> + <td><p>Name Length</p></td> + <td><p>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</p></td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>The flags indicate certain properties for a filter. The + bit values defined so far are: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently skipped in the pipeline.</td> + </tr> + + <tr> + <td align="center"><code>1-15</code></td> + <td>Reserved (zero)</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Number of Client Data Values</p></td> + <td><p>Each filter can store integer values to control + how the filter operates. The number of entries in the + <em>Client Data</em> array is stored in this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>If the <em>Name Length</em> field is non-zero then it will + contain the size of this field, padded to a multiple of eight. This + field contains a null-terminated, ASCII character + string to serve as a comment/name for the filter.</p></td> + </tr> + + <tr> + <td><p>Client Data</p></td> + <td><p>This is an array of four-byte integers which will be + passed to the filter function. The <em>Client Data Number</em> of + Values determines the number of elements in the array.</p></td> + </tr> + + <tr> + <td><p>Padding</p></td> + <td><p>Four bytes of zeroes are added to the message at this + point if the Client Data Number of Values field contains + an odd number.</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Filter Pipeline Message - Version 2 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Number of Filters</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Filter Description List <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This table + describes version 2.</p></td> + </tr> + + <tr> + <td><p>Number of Filters</p></td> + <td><p>The total number of filters described in this + message. The maximum possible number of filters in a + message is 32.</p></td> + </tr> + + <tr> + <td><p>Filter Description List</p></td> + <td><p>A description of each filter. A filter description + appears in the next table.</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Filter Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Filter Identification Value</td> + <td colspan="2">Name Length <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="2">Flags</td> + <td colspan="2">Number Client Data Values</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Client Data <em>(variable size, optional)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filter Identification Value</p></td> + <td> + <p> + This value, often referred to as a filter identifier, + is designed to be a unique identifier for the filter. + Values from zero through 32,767 are reserved for filters + supported by The HDF Group in the HDF5 Library and for + filters requested and supported by third parties. + Filters supported by The HDF Group are documented immediately + below. Information on 3rd-party filters can be found at + The HDF Group’s + <a href="http://www.hdfgroup.org/services/contributions.html"> + Contributions</a> page.</p> + + <p> + To request a filter identifier, please contact + The HDF Group’s Help Desk at + <img src="Graphics/help.png" valign="middle" height="14" + alt="The HDF Group Help Desk">. + You will be asked to provide the following information:</p> + <ol> + <li>Contact information for the developer requesting the + new identifier</li> + <li>A short description of the new filter</li> + <li>Links to any relevant information, including licensing + information</li> + </ol> + <p> + Values from 32768 to 65535 are reserved for non-distributed uses + (for example, internal company usage) or for application usage + when testing a feature. The HDF Group does not track or document + the use of the filters with identifiers from this range.</p> + + <p> + The filters currently in library version 1.8.0 are + listed below: + + <table class="list"> + <tr> + <th width="20%" align="center">Identification</th> + <th width="15%" align="left">Name</th> + <th width="65%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>N/A</td> + <td>Reserved</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>deflate</td> + <td>GZIP deflate compression</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>shuffle</td> + <td>Data element shuffling</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>fletcher32</td> + <td>Fletcher32 checksum</td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>szip</td> + <td>SZIP compression</td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td>nbit</td> + <td>N-bit packing</td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td>scaleoffset</td> + <td>Scale and offset encoded values</td> + </tr> + </table> + </p></td> + </tr> + + <tr> + <td><p>Name Length</p></td> + <td><p>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</p> + <p>Filters with IDs less than 256 (in other words, filters + that are defined in this format documentation) do not store + the <em>Name Length</em> or <em>Name</em> fields. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>The flags indicate certain properties for a filter. The + bit values defined so far are: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently skipped in the pipeline.</td> + </tr> + + <tr> + <td align="center"><code>1-15</code></td> + <td>Reserved (zero)</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Number of Client Data Values</p></td> + <td><p>Each filter can store integer values to control + how the filter operates. The number of entries in the + <em>Client Data</em> array is stored in this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>If the <em>Name Length</em> field is non-zero then it will + contain the size of this field, <em>not</em> padded to a multiple + of eight. This field contains a <em>non-</em>null-terminated, + ASCII character string to serve as a comment/name for the filter. + </p> + <p>Filters that are defined in this format documentation + such as deflate and shuffle do not store the <em>Name + Length</em> or <em>Name</em> fields. + </p> + </td> + </tr> + + <tr> + <td><p>Client Data</p></td> + <td><p>This is an array of four-byte integers which will be + passed to the filter function. The Client Data Number of + Values</em> determines the number of elements in the array.</p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="AttributeMessage">IV.A.2.m. The Attribute Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Attribute</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000C</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The <em>Attribute</em> message is used to store objects + in the HDF5 file which are used as attributes, or + “metadata” about the current object. An attribute + is a small dataset; it has a name, a datatype, a dataspace, and + raw data. Since attributes are stored in the object header, they + should be relatively small (in other words, less than 64KB). + They can be associated with any type of object which has an + object header (groups, datasets, or committed (named) + datatypes).</p> + <p>In 1.8.x versions of the library, attributes can be larger + than 64KB. See the + <a href="UG/HDF5_Users_Guide-Responsive%20HTML5/index.html#t=HDF5_Users_Guide%2FAttributes%2FHDF5_Attributes.htm%3Frhtocid%3Dtoc8.2_1%23TOC_8_5_Special_Issuesbc-13"> + “Special Issues”</a> section of the Attributes chapter + in the <cite>HDF5 User’s Guide</cite> for more information.</p> + <p>Note: Attributes on an object must have unique names: + the HDF5 Library currently enforces this by causing the + creation of an attribute with a duplicate name to fail. + Attributes on different objects may have the same name, + however.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Attribute Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved (zero)</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the format of the + attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by the library before version 1.6 to encode attribute message. + This version does not support shared datatypes.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator. Note that the <em>Name</em> field below may + contain additional padding not represented by this + field.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below. Note that the <em>Datatype</em> field may contain + additional padding not represented by this field.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below. Note that the <em>Dataspace</em> field may contain + additional padding not represented by this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is + padded with additional null characters to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. This + field is <em>not</em> padded with additional bytes.</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Attribute Message (Version 2) + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the + format of the attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by the library of version 1.6.x and after to encode + attribute messages. + This version supports shared datatypes. The fields of + name, datatype, and dataspace are not padded with + additional bytes of zero. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This bit field contains extra information about + interpreting the attribute message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, datatype is shared.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>If set, dataspace is shared.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is <em>not</em> + padded with additional bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s datatype is + shared, this field will contain a “shared message” encoding + instead of the datatype encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s dataspace is + shared, this field will contain a “shared message” encoding + instead of the dataspace encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes.</p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. + </p> + <p>This field is <em>not</em> padded with additional zero bytes. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Attribute Message (Version 3) + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td>Name Character Set Encoding</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the + format of the attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the library of version 1.8.x and after to + encode attribute messages. + This version supports attributes with non-ASCII names. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This bit field contains extra information about + interpreting the attribute message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, datatype is shared.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>If set, dataspace is shared.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Name Character Set Encoding</p></td> + <td><p>The character set encoding for the attribute’s name: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is <em>not</em> + padded with additional bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s datatype is + shared, this field will contain a “shared message” encoding + instead of the datatype encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s dataspace is + shared, this field will contain a “shared message” encoding + instead of the dataspace encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes.</p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. + </p> + <p>This field is <em>not</em> padded with additional zero bytes. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="CommentMessage">IV.A.2.n. The Object Comment +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Comment</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000D</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object comment is designed to be a short description of + an object. An object comment is a sequence of non-zero + (<code>\0</code>) ASCII characters with no other formatting + included by the library.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Name Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Comment <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>A null terminated ASCII character string.</p></td> + </tr> + </table> + </div> + +<br /> +<h4><a name="OldModificationTimeMessage">IV.A.2.o. The Object +Modification Time (Old) Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Modification Time (Old)</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000E</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The object modification date and time is a timestamp + which indicates (using ISO-8601 date and time format) the last + modification of an object. The time is updated when any object + header message changes according to the system clock where the + change was posted. All fields of this message should be + interpreted as coordinated universal time (UTC).</p> + <p>This modification time message is deprecated in favor of + the “new” <a href="#ModificationTimeMessage">Object + Modification Time</a> message and is no longer written to the + file in versions of the HDF5 Library after the 1.6.0 + version.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Modification Time Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Year</td> + </tr> + + <tr> + <td colspan="2">Month</td> + <td colspan="2">Day of Month</td> + </tr> + + <tr> + <td colspan="2">Hour</td> + <td colspan="2">Minute</td> + </tr> + + <tr> + <td colspan="2">Second</td> + <td colspan="2">Reserved</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Year</p></td> + <td><p>The four-digit year as an ASCII string. For example, + <code>1998</code>. + </p></td> + </tr> + + <tr> + <td><p>Month</p></td> + <td><p>The month number as a two digit ASCII string where + January is <code>01</code> and December is <code>12</code>.</p></td> + </tr> + + <tr> + <td><p>Day of Month</p></td> + <td><p>The day number within the month as a two digit ASCII + string. The first day of the month is <code>01</code>.</p></td> + </tr> + + <tr> + <td><p>Hour</p></td> + <td><p>The hour of the day as a two digit ASCII string where + midnight is <code>00</code> and 11:00pm is <code>23</code>.</p></td> + </tr> + + <tr> + <td><p>Minute</p></td> + <td><p>The minute of the hour as a two digit ASCII string where + the first minute of the hour is <code>00</code> and + the last is <code>59</code>.</p></td> + </tr> + + <tr> + <td><p>Second</p></td> + <td><p>The second of the minute as a two digit ASCII string + where the first second of the minute is <code>00</code> + and the last is <code>59</code>.</p></td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td><p>This field is reserved and should always be zero.</p></td> + </tr> + </table> + </div> + +<br /> +<h4><a name="SOHMTableMessage">IV.A.2.p. The Shared Message Table +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Shared Message + Table</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000F</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message is used to locate the table of shared object + header message (SOHM) indexes. Each index consists of information + to find the shared messages from either the heap or object header. + This message is <em>only</em> found in the superblock + extension.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Shared Message Table Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Shared Object Header Message Table Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td>Number of Indices</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 0.</p></td> + </tr> + + <tr> + <td><p>Shared Object Header Message Table Address</p></td> + <td><p>This field is the address of the master table for shared + object header message indexes.</p> + </td> + </tr> + + <tr> + <td><p>Number of Indices</p></td> + <td><p>This field is the number of indices in the master table. + </p></td> + </tr> + + </table> + </div> + +<br /> +<h4><a name="ContinuationMessage">IV.A.2.q. The Object Header +Continuation Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object Header + Continuation</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0010</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object header continuation is the location in the file + of a block containing more header messages for the current data + object. This can be used when header blocks become too large or + are likely to change over time.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Object Header Continuation Message + </caption> + + <tr> + <th width=25%>byte</th> + <th width=25%>byte</th> + <th width=25%>byte</th> + <th width=25%>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Offset<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Offset</p></td> + <td><p>This value is the address in the file where the + header continuation block is located.</p></td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This value is the length in bytes of the header continuation + block in the file.</p></td> + </tr> + </table> + </div> + <br /> + + <p>The format of the header continuation block that this message points + to depends on the version of the object header that the message is + contained within. + </p> + + <p> + Continuation blocks for version 1 object headers have no special + formatting information; they are merely a list of object header + message info sequences (type, size, flags, reserved bytes and data + for each message sequence). See the description + of <a href="#V1ObjectHeaderPrefix">Version 1 Data Object Header Prefix.</a> + </p> + + <p>Continuation blocks for version 2 object headers <em>do</em> have + special formatting information as described here + (see also the description of + <a href="#V2ObjectHeaderPrefix">Version 2 Data Object Header Prefix.</a>): + </p> + <div align="center"> + <table class="format"> + <caption> + Version 2 Object Header Continuation Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + <td>Header Message #1 Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #1 Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td>Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + <td>Header Message #n Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #n Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Gap <em>(optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>OCHK</code>” + is used to indicate the + beginning of an object header continuation block. This gives file + consistency checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Header Message #n Creation Order</p></td> + <td> + <p>This field stores the order that a message of a given type + was created in.</p> + <p>This field is present if bit 2 of <em>flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Gap</p></td> + <td> + <p>A gap in an object header chunk is inferred by the end of the + messages for the chunk before the beginning of the chunk’s + checksum. Gaps are always smaller than the size of an + object header message prefix (message type + message size + + message flags).</p> + <p>Gaps are formed when a message (typically an attribute message) + in an earlier chunk is deleted and a message from a later + chunk that does not quite fit into the free space is moved + into the earlier chunk.</p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the object header chunk. + </p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="SymbolTableMessage">IV.A.2.r. The Symbol Table +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Symbol Table + Message</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0011</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for + “old style” groups; may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>Each “old style” group has a v1 B-tree and a + local heap for storing symbol table entries, which are located + with this message.</td></tr> + <tr><td colspan="2"><b>Format of data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + <b>Symbol Table Message</b> + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />v1 B-tree Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Local Heap Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>v1 B-tree Address</p></td> + <td><p>This value is the address of the v1 B-tree containing the + symbol table entries for the group.</p></td> + </tr> + + <tr> + <td><p>Local Heap Address</p></td> + <td><p>This value is the address of the local heap containing + the link names for the symbol table entries for the group.</p></td> + </tr> + </table> + </div> + +<br /> +<h4><a name="ModificationTimeMessage">IV.A.2.s. The Object +Modification Time Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Modification Time</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0012</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object modification time is a timestamp which indicates + the time of the last modification of an object. The time is + updated when any object header message changes according to + the system clock where the change was posted.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Modification Time Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Seconds After UNIX Epoch</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used for changes in the format of Object Modification Time + and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by Version 1.6.1 and after of the library to encode time. In + this version, the time is the seconds after Epoch.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Seconds After UNIX Epoch</p></td> + <td><p>A 32-bit unsigned integer value that stores the number of + seconds since 0 hours, 0 minutes, 0 seconds, January 1, 1970, + Coordinated Universal Time.</p></td> + </tr> + </table> + </div> + +<br /> +<h4><a name="BtreeKValuesMessage">IV.A.2.t. The B-tree +‘K’ Values Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> B-tree + ‘K’ Values</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0013</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message retrieves non-default ‘K’ values + for internal and leaf nodes of a group or indexed storage v1 + B-trees. This message is <em>only</em> found in the superblock + extension.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + B-tree ‘K’ Values Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="2">Indexed Storage Internal Node K</td> + <td bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Group Internal Node K</td> + <td colspan="2">Group Leaf Node K</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Indexed Storage Internal Node K</p></td> + <td><p>This is the node ‘K’ value for each internal node of an + indexed storage v1 B-tree. See the description of this field + in version 0 and 1 of the superblock as well the section on + v1 B-trees. + </p> + </td> + </tr> + + <tr> + <td><p>Group Internal Node K</p></td> + <td><p>This is the node ‘K’ value for each internal node of a group + v1 B-tree. See the description of this field in version 0 and + 1 of the superblock as well as the section on v1 B-trees. + </p> + </td> + </tr> + + <tr> + <td><p>Group Leaf Node K</p></td> + <td><p>This is the node ‘K’ value for each leaf node of a group v1 + B-tree. See the description of this field in version 0 and 1 + of the superblock as well as the section on v1 B-trees. + </p> + </td> + </tr> + + </table> + </div> + +<br /> +<h4><a name="DrvInfoMessage">IV.A.2.u. The Driver Info +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Driver + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0014</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + + <tr><td> + <b>Description:</b></td> + <td>This message contains information needed by the file driver + to reopen a file. This message is <em>only</em> found in the + superblock extension: see the <a href="#SuperblockExt"> + “Disk Format: Level 0C - Superblock Extension”</a> + section for more information. For more information on the fields + in the driver info message, see the <a href="#DriverInfo"> + “Disk Format : Level 0B - File Driver Info”</a> + section; those who use the multi and family file drivers will + find this section particularly helpful.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Driver Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Driver Identification</td> + </tr> + + <tr> + <td colspan="2">Driver Information Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Driver Information <em>(variable size)</em><br /><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Driver Identification</p></td> + <td><p>This is an eight-byte ASCII string without null termination which + identifies the driver. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Size</p></td> + <td><p>The size in bytes of the <em>Driver Information</em> field of this + message.</p> + </td> + </tr> + + <tr> + <td><p>Driver Information</p></td> + <td><p>Driver information is stored in a format defined by the file driver.</p> + </td> + </tr> + </table> + </div> + +<br /> +<h4><a name="AinfoMessage">IV.A.2.v. The Attribute Info +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Attribute + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0015</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message stores information about the attributes on an + object, such as the maximum creation index for the attributes + created and the location of the attribute storage when the + attributes are stored “densely”.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Attribute Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Maximum Creation Index <em>(optional)</em></td> + </tr> + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Attribute Name v2 B-tree Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Attribute Creation Order v2 B-tree Address<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is the attribute index information flag with the + following definition: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, creation order for attributes is tracked. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, creation order for attributes is indexed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Creation Index</p></td> + <td><p>The is the maximum creation order index value for the + attributes on the object.</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address</p></td> + <td><p>This is the address of the fractal heap to store dense + attributes.</p> + </td> + </tr> + + <tr> + <td><p>Attribute Name v2 B-tree Address</p></td> + <td><p>This is the address of the version 2 B-tree to index the + names of densely stored attributes.</p> + </td> + </tr> + + <tr> + <td><p>Attribute Creation Order v2 B-tree Address</p></td> + <td><p>This is the address of the version 2 B-tree to index the + creation order of densely stored attributes.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + </table> + </div> + +<br /> +<h4><a name="RefCountMessage">IV.A.2.w. The Object Reference +Count Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object Reference + Count</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0016</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message stores the number of hard links (in groups or + objects) pointing to an object: in other words, its + <em>reference count</em>.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Object Reference Count + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Reference count</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td><p>The unsigned 32-bit integer is the reference count for the + object. This message is only present in “version 2” + (or later) object headers, and if not present those object + header versions, the reference count for the object is assumed + to be 1.</p> + </td> + </tr> + + </table> + </div> + +<br /> +<h4><a name="FsinfoMessage">IV.A.2.x. The File Space Info +Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> File Space + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0018</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td> + <b>Description:</b></td> + <td>This message stores the file space management strategy (see + description below) that the library uses in handling file space + request for the file. It also contains the free-space section + threshold used by the library’s free-space managers for + the file. If the strategy is 1, this message also contains the + addresses of the file’s free-space managers which track + free space for each type of file space allocation. There are + six basic types of file space allocation: superblock, B-tree, + raw data, global heap, local heap, and object header. See the + description of <a href="#FreeSpaceManager">Free-space + Manager</a> as well the description of allocation types in + <a href="#AppendixB">Appendix B</a>.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + File Space Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Strategy</td> + <td colspan="2">Threshold<sup>L</sup></td> + </tr> + <tr> + <td colspan="4">Super-block Free-space Manager Address<sup>O</sup></td> + </tr> + <tr> + <td colspan="4">B-tree Free-space Manager Address<sup>O</sup></td> + </tr> + <tr> + <td colspan="4">Raw Data Free-space Manager Address<sup>O</sup></td> + </tr> + <tr> + <td colspan="4">Global Heap Free-space Manager Address<sup>O</sup></td> + </tr> + <tr> + <td colspan="4">Local Heap Free-space Manager Address<sup>O</sup></td> + </tr> + <tr> + <td colspan="4">Object Header Free-space Manager Address<sup>O</sup></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are of the size + specified in “Size of Offsets” field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are of the size + specified in “Size of Lengths” field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>This is the version number of this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Strategy</p></td> + <td><p>This is the file space management strategy for the file. + There are four types of strategies: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>With this strategy, the HDF5 Library’s free-space managers track the + free space that results from the manipulation of HDF5 objects + in the HDF5 file. The free space information is saved when the + file is closed, and reloaded when the file is reopened. + <br /> + When space is needed for file metadata or raw data, + the HDF5 Library first requests space from the library’s free-space + managers. If the request is not satisfied, the library requests space + from the aggregators. If the request is still not satisfied, + the library requests space from the virtual file driver. + That is, the library will use all of the mechanisms for allocating + space. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>This is the HDF5 Library’s default file space management strategy. + With this strategy, the library’s free-space managers track the free space + that results from the manipulation of HDF5 objects in the HDF5 file. + The free space information is NOT saved when the file is closed and + the free space that exists upon file closing becomes unaccounted + space in the file. + <br /> + As with strategy #1, the library will try all of the mechanisms + for allocating space. When space is needed for file metadata or + raw data, the library first requests space from the free-space + managers. If the request is not satisfied, the library requests + space from the aggregators. If the request is still not satisfied, + the library requests space from the virtual file driver. + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>With this strategy, the HDF5 Library does not track free space that results + from the manipulation of HDF5 objects in the HDF5 file and + the free space becomes unaccounted space in the file. + <br /> + When space is needed for file metadata or raw data, + the library first requests space from the aggregators. + If the request is not satisfied, the library requests space from + the virtual file driver. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>With this strategy, the HDF5 Library does not track free space that results + from the manipulation of HDF5 objects in the HDF5 file and + the free space becomes unaccounted space in the file. + <br /> + When space is needed for file metadata or raw data, + the library requests space from the virtual file driver. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Threshold</p></td> + <td><p>This is the free-space section threshold. + The library’s free-space managers will track only + free-space sections with size greater than or equal to + <em>threshold</em>. The default is to track free-space + sections of all sizes.</p> + </td> + </tr> + <tr> + <td><p>Superblock Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_SUPER allocation type. + </p> + </td> + </tr> + + <tr> + <td><p>B-tree Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_BTREE allocation type. + </p> + </td> + </tr> + + <tr> + <td><p>Raw Data Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_DRAW allocation type. + </p> + </td> + </tr> + + <tr> + <td><p>Global Heap Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_GHEAP allocation type. + </p> + </td> + </tr> + + <tr> + <td><p>Local Heap Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_LHEAP allocation type. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Free-space Manager Address</p></td> + <td><p>This is the address of the free-space manager for + H5FD_MEM_OHDR allocation type. + </p> + </td> + </tr> + </table> + </div> + <br /> + + +<br /> +<h3><a name="DataStorage"> +IV.B. Disk Format: Level 2B - Data Object Data Storage</a></h3> + +<p>The data for an object is stored separately from its header + information in the file and may not actually be located in the HDF5 file + itself if the header indicates that the data is stored externally. The + information for each record in the object is stored according to the + dimensionality of the object (indicated in the dataspace header message). + Multi-dimensional array data is stored in C order; in other words, the + “last” dimension changes fastest.</p> + +<p>Data whose elements are composed of atomic datatypes are stored in IEEE + format, unless they are specifically defined as being stored in a different + machine format with the architecture-type information from the datatype + header message. This means that each architecture will need to [potentially] + byte-swap data values into the internal representation for that particular + machine.</p> + +<p> Data with a variable-length datatype is stored in the global heap + of the HDF5 file. Global heap identifiers are stored in the + data object storage.</p> + +<p>Data whose elements are composed of reference datatypes are stored in + several different ways depending on the particular reference type involved. + Object pointers are just stored as the offset of the object header being + pointed to with the size of the pointer being the same number of bytes as + offsets in the file.</p> + +<p>Dataset region references are stored as a heap-ID which points to +the following information within the file-heap: an offset of the object +pointed to, number-type information (same format as header message), +dimensionality information (same format as header message), sub-set start +and end information (in other words, a coordinate location for each), +and field start and end names (in other words, a [pointer to the] string +indicating the first field included and a [pointer to the] string name +for the last field). </p> + +<p>Data of a compound datatype is stored as a contiguous stream of the items + in the structure, with each item formatted according to its datatype.</p> + + + +<br /> +<br /> +<hr /> +<h2><a name="AppendixA"> +V. Appendix A: Definitions</a></h2> + +<p>Definitions of various terms used in this document are included in +this section.</p> + + <div align="center"> + <table class="glossary"> + <tr> + <th width="20%">Term</th> + <th>Definition</th> + </tr> + + <tr> + <td>Undefined Address</td> + <td>The <a name="UndefinedAddress">undefined + address</a> for a file is a file address with all bits + set: in other words, <code>0xffff...ff</code>.</td> + </tr> + + <tr> + <td>Unlimited Size</td> + <td>The <a name="UnlimitedDim">unlimited size</a> + for a size is a value with all bits set: in other words, + <code>0xffff...ff</code>.</td> + </tr> + + </table> + </div> + + + +<br /> +<br /> +<hr /> +<h2><a name="AppendixB"> +VI. Appendix B: File Memory Allocation Types</a></h2> + +<p>There are six basic types of file memory allocation as follows: +</p> + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Basic Allocation Type</th> + <th>Description</th> + </tr> + + <tr> + <td>H5FD_MEM_SUPER</td> + <td>File memory allocated for <em>Superblock.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_BTREE</td> + <td>File memory allocated for <em>B-tree.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_DRAW</td> + <td>File memory allocated for raw data.</td> + </tr> + + <tr> + <td>H5FD_MEM_GHEAP</td> + <td>File memory allocated for <em>Global Heap.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_LHEAP</td> + <td>File memory allocated for <em>Local Heap.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_OHDR</td> + <td>File memory allocated for <em>Object Header.</em></td> + </tr> + </table> + </div> + +<p>There are other file memory allocation types that are mapped to the +above six basic allocation types because they are similar in nature. +The mapping is listed in the following table: +</p> + + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Basic Allocation Type</th> + <th>Mapping of Allocation Types to Basic Allocation Types</th> + </tr> + + <tr> + <td>H5FD_MEM_SUPER</td> + <td><em>none</em></td> + </tr> + + <tr> + <td>H5FD_MEM_BTREE</td> + <td>H5FD_MEM_SOHM_INDEX</td> + </tr> + + <tr> + <td>H5FD_MEM_DRAW</td> + <td>H5FD_MEM_FHEAP_HUGE_OBJ</td> + </tr> + + <tr> + <td>H5FD_MEM_GHEAP</td> + <td><em>none</em></td> + </tr> + + <tr> + <td>H5FD_MEM_LHEAP</td> + <td>H5FD_MEM_FHEAP_DBLOCK, H5FD_MEM_FSPACE_SINFO</td> + </tr> + + <tr> + <td>H5FD_MEM_OHDR</td> + <td>H5FD_MEM_FHEAP_HDR, H5FD_MEM_FHEAP_IBLOCK, H5FD_MEM_FSPACE_HDR, H5FD_MEM_SOHM_TABLE</td> + </tr> + </table> + </div> + +<p>Allocation types that are mapped to basic allocation types are described below: +</p> + + <div align="center"> + <table class="desc"> + <tr> + <th width="30%">Allocation Type</th> + <th>Description</th> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_HDR</td> + <td>File memory allocated for <em>Fractal Heap Header.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_DBLOCK</td> + <td>File memory allocated for <em>Fractal Heap Direct Blocks.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_IBLOCK</td> + <td>File memory allocated for <em>Fractal Heap Indirect Blocks.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_HUGE_OBJ</td> + <td>File memory allocated for huge objects in the fractal heap.</td> + </tr> + + <tr> + <td>H5FD_MEM_FSPACE_HDR</td> + <td>File memory allocated for <em>Free-space Manager Header.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FSPACE_SINFO</td> + <td>File memory allocated for <em>Free-space Section List</em> of the free-space manager.</td> + </tr> + <tr> + <td>H5FD_MEM_SOHM_TABLE</td> + <td>File memory allocated for <em>Shared Object Header Message Table.</em></td> + </tr> + <tr> + <td>H5FD_MEM_SOHM_INDEX</td> + <td>File memory allocated for <em>Shared Message Record List.</em></td> + </tr> + </table> + </div> +</body> +</html> diff --git a/doxygen/examples/H5.format.html b/doxygen/examples/H5.format.html new file mode 100644 index 0000000..e16805f --- /dev/null +++ b/doxygen/examples/H5.format.html @@ -0,0 +1,20400 @@ +<html> + <head> + <title> + HDF5 File Format Specification Version 3.0 + </title> + + <style> + h1 { display: block; + margin-top: 24px; + margin-bottom: 24px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + font-size: 300%; + } + + h2 { display: block; + margin-top: 60px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + border-style: solid; + border-top-style: medium; + border-top-color: #A9A9A9; + border-bottom: none; + border-left: none; + border-right: none; + font-size: 250%; + } + + h3 { display: block; + margin-top: 40px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + font-size: 200%; + } + + h4 { display: block; + margin-top: 32px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + font-size: 150%; + } + + p { display: block; + margin-top: 8px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + text-indent: 0px; + font-size: 100%; + } + <!-- + p.item { margin-left: 2em; + text-indent: -2em + } --> + <!-- p.item2 { margin-left: 2em; text-indent: 2em} --> + + table.format { border:solid; + border-collapse:collapse; + caption-side:top; + text-align:center; + width:80%; + } + table.format th { border:ridge; + padding:4px; + width:25%; + } + table.format td { border:ridge; + padding:4px; + } + table.format caption { font-weight:bold; + font-size:larger; + } + + table.note {border:none; + text-align:right; + width:80%; + } + + table.desc { border:solid; + border-collapse:collapse; + caption-size:top; + text-align:left; + width:80%; + } + table.desc tr { vertical-align:top; + } + table.desc th { border-style:ridge; + font-size:larger; + padding:4px; + <!-- text-decoration:underline; --> + } + table.desc td { border-style:ridge; + <!-- padding: 4px; --> + vertical-align:text-top; + } + table.desc caption { font-weight:bold; + font-size:larger; + } + + table.list { border:none; + width:100% + } + table.list tr { vertical-align:text-top; + } + table.list th { border:none; + text-decoration:underline; + vertical-align:text-top; + } + table.list td { border:none; + vertical-align:text-top; + } + + table.msgdesc { border:none; + text-align:left; + width: 80% + } + table.msgdesc tr { vertical-align:text-top; + border-spacing:0; + padding:0; } + table.msgdesc th { border:none; + text-decoration:underline; + vertical-align:text-top; } + table.msgdesc td { border:none; + vertical-align:text-top; + } + + table.list80 { border:none; + width:80% + } + table.list80 tr { vertical-align:text-top; + } + table.list80 th { border:none; + text-decoration:underline; + vertical-align:text-top; + } + table.list80 td { border:none; + vertical-align:text-top; + } + + table.glossary { border:none; + text-align:left; + width: 80% + } + table.glossary tr { vertical-align:text-top; + border-spacing:0; + padding:0; } + table.glossary th { border:none; + text-align:left; + text-decoration:underline; + vertical-align:text-top; } + table.glossary td { border:none; + text-align:left; + vertical-align:text-top; + } + + div { page-break-inside:avoid; + page-break-after:auto + } + + </style> + + <!-- #BeginLibraryItem "/ed_libs/styles_Format.lbi" --> + <!-- + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://www.hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + --> + <!-- #EndLibraryItem --><!-- #BeginLibraryItem "/ed_libs/NavBar_ADevG.lbi" --> + </head> + <body> + <!-- #EndLibraryItem --> + + <center> + <table border="0" width="90%"> + <tr> + <td valign="top"> + <ol type="I"> + <li><a href="#Intro">Introduction</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#ThisDocument">This Document</a></li> + <li><a href="#ChangesForHdf5_1_12">Changes for HDF5 1.12</a></li> + <li><a href="#ChangesForHdf5_1_10">Changes for HDF5 1.10</a></li> + </ol> + </font> + + <li><a href="#FileMetaData">Disk Format: Level 0 - File Metadata</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#Superblock">Disk Format: Level 0A - Format Signature + and Superblock</a></li> + <li><a href="#DriverInfo">Disk Format: Level 0B - File Driver + Info</a></li> + <li><a href="#SuperblockExt">Disk Format: Level 0C - Superblock + Extension</a></li> + </ol> + </font> + <li><a href="#FileInfra">Disk Format: Level 1 - File Infrastructure</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#Btrees">Disk Format: Level 1A - B-trees and B-tree + Nodes</a> + <ol type="1"> + <li><a href="#V1Btrees">Disk Format: Level 1A1 - Version 1 + B-trees</a></li> + <li><a href="#V2Btrees">Disk Format: Level 1A2 - Version 2 + B-trees</a></li> + </ol> + </li> + <li><a href="#SymbolTable">Disk Format: Level 1B - Group Symbol + Table Nodes</a></li> + <li><a href="#SymbolTableEntry">Disk Format: Level 1C - Symbol + Table Entry</a></li> + <li><a href="#LocalHeap">Disk Format: Level 1D - Local Heaps</a></li> + <li><a href="#GlobalHeap">Disk Format: Level 1E - Global Heap</a></li> + <li><a href="#GlobalHeapVDS">Disk Format: Level 1F - Global Heap + Block for Virtual Datasets</a></li> + <li><a href="#FractalHeap">Disk Format: Level 1G - Fractal Heap</a></li> + <li><a href="#FreeSpaceManager">Disk Format: Level 1H - Free-space + Manager</a></li> + <li><a href="#SOHMTable">Disk Format: Level 1I - Shared Object + Header Message Table</a></li> + </ol> + </font> + <li><a href="#DataObject">Disk Format: Level 2 - Data Objects</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#ObjectHeader">Disk Format: Level 2A - Data Object Headers</a></li> + <ol type="1"> + <li><a href="#ObjectHeaderPrefix">Disk Format: Level 2A1 - + Data Object Header Prefix</a> + <ol type="a"> + <li><a href="#V1ObjectHeaderPrefix">Version 1 Data + Object Header Prefix</a></li> + <li><a href="#V2ObjectHeaderPrefix">Version 2 Data + Object Header Prefix</a></li> + </ol> + </li> + <li><a href="#ObjectHeaderMessages">Disk Format: Level 2A2 - + Data Object Header Messages</a></li> + <ol type="a"> + <li><a href="#NILMessage">The NIL Message</a></li> <!-- 0x0000 --> + <li><a href="#DataspaceMessage">The Dataspace Message</a></li> <!-- 0x0001 --> + <li><a href="#LinkInfoMessage">The Link Info Message</a></li> <!-- 0x0002 --> + <li><a href="#DatatypeMessage">The Datatype Message</a></li> <!-- 0x0003 --> + <li><a href="#OldFillValueMessage">The Data Storage - + Fill Value (Old) Message</a></li> <!-- 0x0004 --> + </ol> + </ol> + </ol> + </font> + </ol> + </td> + + <td> </td> + + <td valign="top"> + <ol type="I" start="4"> + <li><a href="#DataObject">Disk Format: Level 2 - Data + Objects</a><font size="-1"><i> (Continued)</i></li> + <ol type="A"> + <li><a href="#ObjectHeader">Disk Format: Level 2A - Data Object + Headers</a><i> (Continued)</i> + <ol type="1" start="2"> + <li><a href="#ObjectHeaderMessages">Disk Format: Level 2A2 - + Data Object Header Messages</a><i> (Continued)</i></li> + <ol type="a" start="6"> + <li><a href="#FillValueMessage">The Data Storage - + Fill Value Message</a></li> <!-- 0x0005 --> + <li><a href="#LinkMessage">The Link Message</a></li> <!-- 0x0006 --> + <li><a href="#ExternalFileListMessage">The Data Storage - + External Data Files Message</a></li> <!-- 0x0007 --> + <li><a href="#LayoutMessage">The Data Layout Message</a></li> <!-- 0x0008 --> + <li><a href="#BogusMessage">The Bogus Message</a></li> <!-- 0x0009 --> + <li><a href="#GroupInfoMessage">The Group Info + Message</a></li> <!-- 0x000a --> + <li><a href="#FilterMessage">The Data Storage - + Filter Pipeline Message</a></li> <!-- 0x000b --> + <li><a href="#AttributeMessage">The Attribute + Message</a></li> <!-- 0x000c --> + <li><a href="#CommentMessage">The Object Comment + Message</a></li> <!-- 0x000d --> + <li><a href="#OldModificationTimeMessage">The Object + Modification Time (Old) Message</a></li> <!-- 0x000e --> + <li><a href="#SOHMTableMessage">The Shared Message + Table Message</a></li> <!-- 0x000f --> + <li><a href="#ContinuationMessage">The Object Header + Continuation Message</a></li> <!-- 0x0010 --> + <li><a href="#SymbolTableMessage">The Symbol + Table Message</a></li> <!-- 0x0011 --> + <li><a href="#ModificationTimeMessage">The Object + Modification Time Message</a></li> <!-- 0x0012 --> + <li><a href="#BtreeKValuesMessage">The B-tree + ‘K’ Values Message</a></li> <!-- 0x0013 --> + <li><a href="#DrvInfoMessage">The Driver Info + Message</a></li> <!-- 0x0014 --> + <li><a href="#AinfoMessage">The Attribute Info + Message</a></li> <!-- 0x0015 --> + <li><a href="#RefCountMessage">The Object Reference + Count Message</a></li> <!-- 0x0016 --> + <li><a href="#FsinfoMessage">The File Space Info + Message</a></li> <!-- 0x0017 --> + </ol> + </ol> + </li> + <li><a href="#DataStorage">Disk Format: Level 2B - Data Object Data Storage</a></li> + </ol> + </font> + <li><a href="#AppendixA">Appendix A: Definitions</a></li> + <li><a href="#AppendixB">Appendix B: File Space Allocation + Types</a></li> + <li><a href="#AppendixC"> + Appendix C: Types of Indexes for Dataset Chunks</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#SingleChunk">The Single Chunk Index</a></li> + <li><a href="#Implicit">The Implicit Index</a></li> + <li><a href="#FixedArray">The Fixed Array Index</a></li> + <li><a href="#ExtensibleArray">The Extensible Array Index</a></li> + <li><a href="#AppendV2Btrees">The Version 2 B-trees Index</a></li> + </ol> + </font> + <li><a href="#AppendixD"> + Appendix D: Encoding for Dataspace and Reference</a></li> + <font size="-1"> + <ol type="A"> + <li><a href="#DataspaceEncode">Dataspace Encoding</a></li> + <li><a href="#ReferenceEncodeRV">Reference Encoding (Revised)</a></li> + <li><a href="#ReferenceEncodeDP">Reference Encoding (Backward Compatibility)</a></li> + </ol> + </font> + </ol> + </td></tr> + </table> + </center> + + + <a name="Intro"><h2>I. Introduction</h2></a> + + <table align="right" width="100"> + <tr><td> </td><td align="center"> + <hr /> + <img src="FF-IH_FileGroup.gif" alt="HDF5 Groups" hspace="15" vspace="15"> + </td><td> </td></tr> + <tr><td> </td><td align="center"> + <strong>Figure 1:</strong> Relationships among the HDF5 root group, other groups, and objects + <hr /> + </td><td> </td></tr> + + <tr><td> </td><td align="center"> + <img src="FF-IH_FileObject.gif" alt="HDF5 Objects" hspace="15" vspace="15"> + </td><td> </td></tr> + <tr><td> </td><td align="center"> + <strong>Figure 2:</strong> HDF5 objects -- datasets, datatypes, or dataspaces + <hr /> + </td><td> </td></tr> + </table> + + + <p>The format of an HDF5 file on disk encompasses several + key ideas of the HDF4 and AIO file formats as well as + addressing some shortcomings therein. The new format is + more self-describing than the HDF4 format and is more + uniformly applied to data objects in the file.</p> + + <p>An HDF5 file appears to the user as a directed graph. + The nodes of this graph are the higher-level HDF5 objects + that are exposed by the HDF5 APIs:</p> + + <ul> + <li>Groups</li> + <li>Datasets</li> + <li>Committed (formerly Named) datatypes</li> + </ul> + + <p>At the lowest level, as information is actually written to the disk, + an HDF5 file is made up of the following objects:</p> + <ul> + <li>A superblock</li> + <li>B-tree nodes</li> + <li>Heap blocks</li> + <li>Object headers</li> + <li>Object data</li> + <li>Free space</li> + </ul> + + <p>The HDF5 Library uses these low-level objects to represent the + higher-level objects that are then presented to the user or + to applications through the APIs. For instance, a group is an + object header that contains a message that points to a local + heap (for storing the links to objects in the group) and to a + B-tree (which indexes the links). A dataset is an object header + that contains messages that describe the datatype, dataspace, + layout, filters, external files, fill value, and other elements + with the layout message pointing to either a raw data chunk or + to a B-tree that points to raw data chunks.</p> + + + <a name="ThisDocument"><h3>I.A. This Document</h3></a> + + <p>This document describes the lower-level data objects; + the higher-level objects and their properties are described + in the <a href="UG/HDF5_Users_Guide-Responsive HTML5/index.html"><cite>HDF5 User’s Guide</cite></a>.</p> + + <p>Three levels of information comprise the file format. + Level 0 contains basic information for identifying and + defining information about the file. Level 1 information contains + the information about the pieces of a file shared by many objects + in the file (such as B-trees and heaps). Level 2 is the rest + of the file and contains all of the data objects with each object + partitioned into header information, also known as + <em>metadata</em>, and data.</p> + + <p>The various components of the lower-level data objects are + described in pairs of tables. The first table shows the format + layout, and the second table describes the fields. The titles + of format layout tables begin with “Layout”. The + titles of the tables where the fields are described begin with + “Fields”. For example, the table that describes the + format of the <a href="#V2Btrees">version 2 B-tree header</a> has + a title of “Layout: Version 2 B-tree Header”, and the + fields in the version 2 B-tree header are described in the table + titled “Fields: Version 2 B-tree Header”. + + <p>The sizes of various fields in the following layout tables are + determined by looking at the number of columns the field spans + in the table. There are exceptions: </p> + <ul> + <li> The size may be overridden by specifying a size in + parentheses</li> + <li> The size of addresses is determined by the + <em><a href="#SizeOfOffsetsV0">Size of Offsets</a></em> field + in the superblock and is indicated in this document with a + superscripted ‘O’</li> + <li> The size of length fields is determined by the + <em><a href="#SizeOfLengthsV0">Size of Lengths</a></em> field in + the superblock and is indicated in this document with a + superscripted ‘L’</li> + </ul> + + <p>Values for all fields in this document should be treated as unsigned + integers, unless otherwise noted in the description of a field. + Additionally, all metadata fields are stored in little-endian byte + order. + </p> + + <p>All checksums used in the format are computed with the + <a href="http://www.burtleburtle.net/bob/hash/doobs.html">Jenkins’ + lookup3</a> algorithm. + </p> + + <p>Whenever a bit flag or field is mentioned for an entry, bits are + numbered from the lowest bit position in the entry. + </p> + + <p>Various format tables in this document have cells with + “This space inserted only to align table nicely”. These + entries in the table are just to make the table presentation nicer + and do not represent any values or padding in the file. + </p> + + <a name="ChangesForHdf5_1_12"> + <h3>I.B. Changes for HDF5 1.12</h3></a> + <p>The following sections have been + changed or added for the 1.12 release:</p> + <ul> + <li>Under <a href="#DatatypeMessage">“The Datatype Message”</a>, + in the Description for “Fields:Datatype Message”, + version 4 was added and Reference class (7) of the datatype was updated to describe version 4.</li> + <li><a href="#AppendixD"> + “Appendix D: Encoding for Dataspace and Reference”</a> + was added. </li> + </ul> + + + <a name="ChangesForHdf5_1_10"> + <h3>I.C. Changes for HDF5 1.10</h3></a> + + <p>The following sections have been + changed or added for the 1.10 release:</p> + <ul> + <li>In the <a href="#Superblock"> + “Disk Format: Level 0A - Format Signature and + Superblock”</a> section, version 3 of the superblock was + added. </li> + <li>In the <a href="#SuperblockExt"> + “Disk Format: Level 0C - Superblock Extension”</a> + section, a link to the Data Storage message was added. </li> + <li>In the <a href="#V2Btrees"> + “Disk Format: Level 1A2 - Version 2 B-trees”</a> + section, additional B-tree types were added. Tables that + describe the <a href="#V2BtreesType10">type 10</a> and + <a href="#V2BtreesType11">11</a> record layouts were added at + the end of the section.</li> + <li>The <a href="#GlobalHeapVDS">“Disk Format: Level 1F - + Global Heap Block for Virtual Datasets”</a> was added. + </li> + <li><a href="#LayoutMessage"> + “The Data Layout Message”</a> section was changed. + The name was changed, and <a href="#DataLayoutV4">version 4</a> + of the data layout message was added for the virtual type.</li> + <li>The <a href="#FsinfoMessage"> + “The File Space Info Message”</a> header message + type was added.</li> + <li><a href="#AppendixC"> + “Appendix C: Types of Indexes for Dataset Chunks”</a> + was added. Five indexing types were added.</li> + </ul> + + + + <h2><a name="FileMetaData"> + II. Disk Format: Level 0 - File Metadata</a></h2> + + + + <h3><a name="Superblock"> + II.A. Disk Format: Level 0A - Format Signature and Superblock</a></h3> + + <p>The superblock may begin at certain predefined offsets within + the HDF5 file, allowing a block of unspecified content for + users to place additional information at the beginning (and + end) of the HDF5 file without limiting the HDF5 Library’s + ability to manage the objects within the file itself. This + feature was designed to accommodate wrapping an HDF5 file in + another file format or adding descriptive information to an HDF5 + file without requiring the modification of the actual file’s + information. The superblock is located by searching for the + HDF5 format signature at byte offset 0, byte offset 512, and at + successive locations in the file, each a multiple of two of + the previous location; in other words, at these byte offsets: + 0, 512, 1024, 2048, and so on.</p> + + <p>The superblock is composed of the format signature, followed by a + superblock version number and information that is specific to each + version of the superblock. + + <p>Currently, there are four versions of the superblock format: + <ul> + <li>Version 0 is the default format.</li> + <li>Version 1 is the same as version 0 but with the + “<em>Indexed Storage Internal Node K</em>” field + for storing non-default B-tree ‘K’ value.</li> + <li>Version 2 has some fields eliminated and compressed from + superblock format versions 0 and 1. It has added checksum support + and superblock extension to store additional superblock + metadata.</li> + <li>Version 3 is the same as version 2 except that the field + “<em>File Consistency Flags</em>” is used for file + locking. This format version will enable support for the latest + version.</li> + </ul> + + <p>Versions 0 and 1 of the superblock are described below:</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Superblock (Versions 0 and 1) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Format Signature + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td>Version # of Superblock</td> + <td>Version # of File’s Free Space Storage</td> + <td>Version # of Root Group Symbol Table Entry</td> + <td>Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td>Version Number of Shared Header Message Format</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="2">Group Leaf Node K</td> + <td colspan="2">Group Internal Node K</td> + </tr> + + <tr> + <td colspan="4">File Consistency Flags</td> + </tr> + + <tr> + <td colspan="2" style="border:dotted;">Indexed Storage Internal Node K<sup>1</sup></td> + <td colspan="2" style="border:dotted;">Reserved + <em>(zero)</em><sup>1</sup></td> + </tr> + + <tr> + <td colspan="4"><br />Base Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of File Free space Info<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of File Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Driver Information Block Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Root Group Symbol Table Entry</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with a ‘1’ in the above table are + new in version 1 of the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Superblock (Versions 0 and 1) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Format Signature</p></td> + <td><p>This field contains a constant value and can be used to + quickly identify a file as being an HDF5 file. The + constant value is designed to allow easy identification of + an HDF5 file and to allow certain types of data corruption + to be detected. The file signature of an HDF5 file always + contains the following values:</p> + <center> + <table border align="center" cellpadding="4"> + <tr align="center"> + <td align="right">Decimal:</td> + <td width="8%">137</td> + <td width="8%">72</td> + <td width="8%">68</td> + <td width="8%">70</td> + <td width="8%">13</td> + <td width="8%">10</td> + <td width="8%">26</td> + <td width="8%">10</td> + </tr> + + <tr align="center"> + <td align="right">Hexadecimal:</td> + <td>89</td> + <td>48</td> + <td>44</td> + <td>46</td> + <td>0d</td> + <td>0a</td> + <td>1a</td> + <td>0a</td> + </tr> + + <tr align="center"> + <td align="right">ASCII C Notation:</td> + <td>\211</td> + <td>H</td> + <td>D</td> + <td>F</td> + <td>\r</td> + <td>\n</td> + <td>\032</td> + <td>\n</td> + </tr> + </table> + </center> + <p>This signature both identifies the file as an HDF5 file + and provides for immediate detection of common + file-transfer problems. The first two bytes distinguish + HDF5 files on systems that expect the first two bytes to + identify the file type uniquely. The first byte is + chosen as a non-ASCII value to reduce the probability + that a text file may be misrecognized as an HDF5 file; + also, it catches bad file transfers that clear bit + 7. Bytes two through four name the format. The CR-LF + sequence catches bad file transfers that alter newline + sequences. The control-Z character stops file display + under MS-DOS. The final line feed checks for the inverse + of the CR-LF translation problem. (This is a direct + descendent of the + <a href="http://www.libpng.org/pub/png/spec/iso/index-object.html#5PNG-file-signature">PNG</a> file + signature.)</p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p></td> + </tr> + + <tr> + <td><p>Version Number of the Superblock</p></td> + <td><p>This value is used to determine the format of the + information in the superblock. When the format of the + information in the superblock is changed, the version number + is incremented to the next integer and can be used to + determine how the information in the superblock is + formatted.</p> + + <p>Values of 0, 1 and 2 are defined for this field (the + format of version 2 is described below, not here). + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Version Number of the File’s Free Space + Information</p></td> + <td> + <p>This value is used to determine the format of the + file’s free space information. + </p> + <p>The only value currently valid in this field is ‘0’, which + indicates that the file’s free space is as described + <a href="#FreeSpaceManager">below</a>. + </p> + + <p><em>This field is present in versions 0 and 1 of the + superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Version Number of the Root Group Symbol Table + Entry</p></td> + <td><p>This value is used to determine the format of the + information in the Root Group Symbol Table Entry. When the + format of the information in that field is changed, the + version number is incremented to the next integer and can be + used to determine how the information in the field + is formatted.</p> + <p>The only value currently valid in this field is ‘0’, + which indicates that the root group symbol table entry is + formatted as described <a href="#SymbolTableEntry">below</a>.</p> + <p><em>This field is present in version 0 and 1 of the + superblock.</em></p> + </td> + </tr> + + <tr> + <td><p>Version Number of the Shared Header Message Format</p></td> + <td><p>This value is used to determine the format of the + information in a shared object header message. Since the format + of the shared header messages differs from the other private + header messages, a version number is used to identify changes + in the format. + </p> + <p>The only value currently valid in this field is ‘0’, which + indicates that shared header messages are formatted as + described <a href="#ObjectHeaderMessages">below</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p><a name="SizeOfOffsetsV0">Size of Offsets</a></p></td> + <td><p>This value contains the number of bytes used to store + addresses in the file. The values for the addresses of + objects in the file are offsets relative to a base address, + usually the address of the superblock signature. This + allows a wrapper to be added after the file is created + without invalidating the internal offset locations. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p><a name="SizeOfLengthsV0">Size of Lengths</a></p></td> + <td><p>This value contains the number of bytes used to store + the size of an object. + </p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Group Leaf Node K</p></td> + <td> + <p>Each leaf node of a group B-tree will have at + least this many entries but not more than twice this + many. If a group has a single leaf node then it + may have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Group Internal Node K</p></td> + <td> + <p>Each internal node of a group B-tree will have at + least this many entries but not more than twice this + many. If the group has only one internal + node then it might have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>File Consistency Flags</p></td> + <td> + <p>This field is unused and should be ignored. + </p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Indexed Storage Internal Node K</p></td> + <td> + <p>Each internal node of an indexed storage B-tree will have at + least this many entries but not more than twice this + many. If the index storage B-tree has only one internal + node then it might have fewer entries. + </p> + <p>This value must be greater than zero. + </p> + <p>See the <a href="#Btrees">description</a> of B-trees below. + </p> + + <p><em>This field is present in version 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Base Address</p></td> + <td> + <p>This is the absolute file address of the first byte of + the HDF5 data within the file. The library currently + constrains this value to be the absolute file address + of the superblock itself when creating new files; + future versions of the library may provide greater + flexibility. When opening an existing file and this address does + not match the offset of the superblock, the library assumes + that the entire contents of the HDF5 file have been adjusted in + the file and adjusts the base address and end of file address to + reflect their new positions in the file. Unless otherwise noted, + all other file addresses are relative to this base + address. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Address of Global Free-space Index</p></td> + <td> + <p>The file’s free space is not persistent for version 0 and 1 of + the superblock. + Currently this field always contains the + <a href="#UndefinedAddress">undefined address</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>End of File Address</p></td> + <td> + <p>This is the absolute file address of the first byte past + the end of all HDF5 data. It is used to determine whether a + file has been accidently truncated and as an address where + file data allocation can occur if space from the free list is + not used. + </p> + + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Block Address</p></td> + <td> + <p>This is the relative file address of the file driver + information block which contains driver-specific + information needed to reopen the file. If there is no + driver information block then this entry should be the + <a href="#UndefinedAddress">undefined address</a>. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + + <tr> + <td><p>Root Group Symbol Table Entry</p></td> + <td> + <p>This is the <a href="#SymbolTableEntry">symbol table entry</a> + of the root group, which serves as the entry point into + the group graph for the file. + </p> + + <p><em>This field is present in version 0 and 1 of the superblock.</em> + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <br /> + <p>Versions 2 and 3 of the superblock are described below:</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: Superblock (Versions 2 and 3) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Format Signature + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td>Version # of Superblock</td> + <td>Size of Offsets</td> + <td>Size of Lengths</td> + <td>File Consistency Flags</td> + </tr> + + <tr> + <td colspan="4"><br />Base Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Superblock Extension Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of File Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Root Group Object Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Superblock Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Superblock (Versions 2 and 3) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Format Signature</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p></td> + </tr> + + <tr> + <td><p>Version Number of the Superblock</p></td> + <td> + <p>This field has a value of 2 and has the same meaning as for + versions 0 and 1. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Offsets</p></td> + <td> + <p>This field is the same as described for + <a href="#SizeOfOffsetsV0">versions 0 and 1</a> of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Lengths</p></td> + <td> + <p>This field is the same as described for + <a href="#SizeOfLengthsV0">versions 0 and 1</a> of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>File Consistency Flags</p></td> + + <td> + <p>For superblock version + 2: This field is unused and should be ignored.</p> + <p>For superblock version + 3: This value contains flags to ensure file consistency for + file locking. Currently, the following bit flags are defined: + <ul> + <li>Bit 0 if set indicates that the file has been opened for + write access.</li> + <li>Bit 1 is reserved for future use.</li> + <li>Bit 2 if set indicates that the file has been opened for + single-writer/multiple-reader (SWMR) write access.</li> + <li>Bits 3-7 are reserved for future use.</li> + </ul> + <p> + Bit 0 should be set as the first action when a file has been + opened for write access. Bit 2 should be set when a file + has been opened for SWMR write access. These two bits should + be cleared only as the final action when closing a file. + </p> + <p><em>This field is present in version 0+ of the superblock.</em> + </p> + <p><em>The size of this + field has been reduced from 4 bytes in superblock format + versions 0 and 1 to 1 byte.</em> + </p> + </td> + + </tr> + + <tr> + <td><p>Base Address</p></td> + <td> + <p>This field is the same as described for versions 0 and + 1 of the superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Superblock Extension Address</p></td> + <td> + <p>The field is the address of the object header for the + <a href="#SuperblockExt">superblock extension</a>. + If there is no extension then this entry should be the + <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>End of File Address</p></td> + <td> + <p>This field is the same as described for versions 0 and 1 of the + superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Root Group Object Header Address</p></td> + <td> + <p>This is the address of + the <a href="#DataObject">root group object header</a>, + which serves as the entry point into the group graph for the file. + </p> + </td> + </tr> + + <tr> + <td><p>Superblock Checksum</p></td> + <td> + <p>The checksum for the superblock. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + + <h3><a name="DriverInfo"> + II.B. Disk Format: Level 0B - File Driver Info</a></h3> + + <p>The <b>driver information block</b> is an optional region of the + file which contains information needed by the file driver + to reopen a file. The format is described below:</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Driver Information Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved</td> + </tr> + + <tr> + <td colspan="4">Driver Information Size</td> + </tr> + + <tr> + <td colspan="4"><br />Driver Identification + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Driver Information + <em>(variable size)</em><br /><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Driver Information Block + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number of the Driver Information Block. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Size</p></td> + <td> + <p>The size in bytes of the <em>Driver Information</em> field. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Identification</p></td> + <td> + <p>This is an eight-byte ASCII string without null + termination which identifies the driver and/or version number + of the Driver Information Block. The predefined driver encoded + in this field by the HDF5 Library is identified by the + letters <code>NCSA</code> followed by the first four characters of + the driver name. If the Driver Information block is not + the original version then the last letter(s) of the + identification will be replaced by a version number in + ASCII, starting with 0. + </p> + <p> + Identification for user-defined drivers is also eight-byte long. + It can be arbitrary but should be unique to avoid + the four character prefix “NCSA”. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Driver Information</p></td> + <td>Driver information is stored in a format defined by the + file driver (see description below).</td> + </tr> + </table> + </div> + + <br /> + <p>The two drivers encoded in the <em>Driver Identification</em> + field are as follows:</p> + <ul> + <li> + Multi driver: + <p> + The identifier for this driver is “NCSAmulti”. + This driver provides a mechanism for segregating raw data and different types of metadata + into multiple files. + These files are viewed by the library as a single virtual HDF5 file with a single file address. + A maximum of 6 files will be created for the following data: + superblock, B-tree, raw data, global heap, local heap, and object header. + More than one type of data can be written to the same file. + </p></li> + <li> + Family driver + <p> + The identifier for this driver is “NCSAfami” and is encoded in this field for library version 1.8 and after. + This driver is designed for systems that do not support files larger than 2 gigabytes + by splitting the HDF5 file address space across several smaller files. + It does nothing to segregate metadata and raw data; + they are mixed in the address space just as they would be in a single contiguous file. + </p></li> + </ul> + <p>The format of the <em>Driver Information</em> field for the + above two drivers are described below:</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: Multi Driver Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Member Mapping</td> + </tr> + + <tr> + <td>Member Mapping</td> + <td>Member Mapping</td> + <td>Reserved</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />... ...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Member File N<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />End of Address for Member File N<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File 1 + <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File 2 + <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />... ...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Name of Member File N + <em>(variable size)</em><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Multi Driver Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Member Mapping</p></td> + <td><p>These fields are integer values from 1 to 6 + indicating how the data can be mapped to or merged with another type of + data. + <table class="list"> + <tr> + <th width="20%" align="center">Member Mapping</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">1</td> + <td>The superblock data.</td> + </tr> + <tr> + <td align="center">2</td> + <td>The B-tree data.</td> + </tr> + <tr> + <td align="center">3</td> + <td>The raw data.</td> + </tr> + <tr> + <td align="center">4</td> + <td>The global heap data.</td> + </tr> + <tr> + <td align="center">5</td> + <td>The local heap data.</td> + </tr> + <tr> + <td align="center">6</td> + <td>The object header data.</td> + </tr> + </table></p> + <p>For example, if the third field has the value 3 and all the rest have the + value 1, it means there are two files: one for raw data, and one for superblock, + B-tree, global heap, local heap, and object header.</p> + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td><p>These fields are reserved and should always be zero.</p></td> + </tr> + + <tr> + <td><p>Address of Member File N</p></td> + <td><p>This field Specifies the virtual address at which the member file starts.</p> + <p>N is the number of member files.</p> + </td> + </tr> + + <tr> + <td><p>End of Address for Member File N</p></td> + <td><p>This field is the end of the allocated address for the member file. + </p></td> + </tr> + + <tr> + <td><p>Name of Member File N</p></td> + <td><p>This field is the null-terminated name of the member file and + its length should be multiples of 8 bytes. + Additional bytes will be padded with <em>NULL</em>s. The default naming + convention is <em>%s-X.h5</em>, where <em>X</em> is one of the letters + <em>s</em> (for superblock), <em>b</em> (for B-tree), <em>r</em> (for raw data), + <em>g</em> (for global heap), <em>l</em> (for local heap), and <em>o</em> (for + object header). The name of the whole HDF5 file will substitute the <em>%s</em> + in the string. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Family Driver Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="8"><br />Size of Member File<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Family Driver Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size of Member File</p></td> + <td><p>This field is the size of the member file in the family of files.</p></td> + </tr> + </table> + </div> + + <h3><a name="SuperblockExt"> + II.C. Disk Format: Level 0C - Superblock Extension</a></h3> + + <p>The <em>superblock extension</em> is used to store superblock metadata + which is either optional, or added after the version of the superblock + was defined. Superblock extensions may only exist when version 2 + or later of the superblock is used. A superblock extension is an object + header which may hold the following messages:</p> + <ul> + <li> + <a href="#SOHMTableMessage">Shared Message Table message</a> containing + information to locate the master table of shared object header message + indices.</li> + <li> + <a href="#BtreeKValuesMessage">B-tree ‘K’ Values message</a> containing + non-default B-tree ‘K’ values.</li> + <li> + <a href="#DrvInfoMessage">Driver Info message</a> containing information + needed by the file driver in order to reopen a file. + See also the + <a href="#DriverInfo">“Disk Format: Level 0B - File Driver + Info”</a> section above.</li> + <li> + <a href="#FsinfoMessage">File Space Info message</a> containing + information about file space handling in the file.</li> + </ul> + + + + <h2><a name="FileInfra"> + III. Disk Format: Level 1 - File Infrastructure</a></h2> + + <h3><a name="Btrees"> + III.A. Disk Format: Level 1A - B-trees and B-tree Nodes</a></h3> + + <p>B-trees allow flexible storage for objects which tend to grow + in ways that cause the object to be stored discontiguously. B-trees + are described in various algorithms books including “Introduction to + Algorithms” by Thomas H. Cormen, Charles E. Leiserson, and Ronald + L. Rivest. B-trees are used in several places in the HDF5 file format, + when an index is needed for another data structure.</p> + + <p>The version 1 B-tree structure described below is the original + index structure. The version 1 B-trees are being phased out in + favor of the version 2 B-trees described below. Note that both + types of structures may be found in the same file depending on + the application settings when creating the file.</p> + + <h4><a name="V1Btrees"> + III.A.1. Disk Format: Level 1A1 - Version 1 B-trees</a></h4> + + <p>Version 1 B-trees in HDF5 files are an implementation of the + B-link tree. The sibling nodes at a particular level in + the tree are stored in a doubly-linked list. See the + “Efficient Locking for Concurrent Operations on B-trees” + paper by Phillip Lehman and S. Bing Yao as published in the + <cite>ACM Transactions on Database Systems</cite>, Vol. 6, No. 4, + December 1981.</p> + + <p>The B-trees implemented by the file format contain one more + key than the number of children. In other words, each child + pointer out of a B-tree node has a left key and a right key. + The pointers out of internal nodes point to sub-trees while + the pointers out of leaf nodes point to symbol nodes and + raw data chunks. + Aside from that difference, internal nodes and leaf nodes + are identical.</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: B-tree Nodes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Node Type</td> + <td>Node Level</td> + <td colspan="2">Entries Used</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Left Sibling<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Right Sibling<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 1<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 2 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 2<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Key 2<em>K</em> <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Child 2<em>K</em><sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Key 2<em>K</em>+1 + <em>(variable size)</em></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: B-tree Nodes + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>TREE</code>” + is used to indicate the beginning of a B-tree node. This + gives file consistency checking utilities a better chance + of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Node Type</p></td> + <td> + <p>Each B-tree points to a particular type of data. + This field indicates the type of data as well as + implying the maximum degree <em>K</em> of the tree and + the size of each Key field. + + + <table class="list"> + <tr> + <th width="20%" align="center">Node Type</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>This tree points to group nodes.</td> + </tr> + <tr> + <td align="center">1</td> + <td>This tree points to raw data chunk nodes.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Node Level</p></td> + <td> + <p>The node level indicates the level at which this node + appears in the tree (leaf nodes are at level zero). Not + only does the level indicate whether child pointers + point to sub-trees or to data, but it can also be used + to help file consistency checking utilities reconstruct + damaged trees. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Entries Used</p></td> + <td> + <p>This determines the number of children to which this + node points. All nodes of a particular type of tree + have the same maximum degree, but most nodes will point + to less than that number of children. The valid child + pointers and keys appear at the beginning of the node + and the unused pointers and keys appear at the end of + the node. The unused pointers and keys have undefined + values. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Address of Left Sibling</p></td> + <td> + <p>This is the relative file address of the left sibling of + the current node. If the current + node is the left-most node at this level then this field + is the <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Address of Right Sibling</p></td> + <td> + <p>This is the relative file address of the right sibling of + the current node. If the current + node is the right-most node at this level then this + field is the <a href="#UndefinedAddress">undefined address</a>. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Keys and Child Pointers</p></td> + <td> + <p>Each tree has 2<em>K</em>+1 keys with 2<em>K</em> + child pointers interleaved between the keys. The number + of keys and child pointers actually containing valid + values is determined by the node’s <em>Entries + Used</em> field. If that field is <em>N</em>, then the + B-tree contains <em>N</em> child pointers and + <em>N</em>+1 keys. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Key</p></td> + <td> + <p>The format and size of the key values is determined by + the type of data to which this tree points. The keys are + ordered and are boundaries for the contents of the child + pointer; that is, the key values represented by child + <em>N</em> fall between Key <em>N</em> and Key + <em>N</em>+1. Whether the interval is open or closed on + each end is determined by the type of data to which the + tree points. + </p> + + <p> + The format of the key depends on the node type. + For nodes of node type 0 (group nodes), the key is formatted as + follows: + + <table class="list"> + <tr> + <td width="20%">A single field of + <i><a href="#SizeOfLengthsV0">Size of Lengths</a></i> + bytes:</td> + <td width="80%">Indicates the byte offset into the local heap + for the first object name in the subtree which + that key describes. + </td> + </tr> + </table> + </p> + + + <p> + For nodes of node type 1 (chunked raw data nodes), the key is + formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-4:</td> + <td width="80%">Size of chunk in bytes.</td> + </tr> + <tr> + <td>Bytes 4-8:</td> + <td>Filter mask, a 32-bit bit field indicating which + filters have been skipped for this chunk. Each filter + has an index number in the pipeline (starting at 0, with + the first filter to apply) and if that filter is skipped, + the bit corresponding to its index is set.</td> + </tr> + <tr> + <td>(<em>D + 1</em>) 64-bit fields:</td> + <td>The offset of the + chunk within the dataset where <i>D</i> is the number + of dimensions of the dataset, and the last value is the + offset within the dataset’s datatype and should + always be zero. For example, if + a chunk in a 3-dimensional dataset begins at the + position <code>[5,5,5]</code>, there will be three + such 64-bit values, each with the value of + <code>5</code>, followed by a <code>0</code> value.</td> + </tr> + </table> + </p> + + </td> + </tr> + + <tr valign="top"> + <td><p>Child Pointer</p></td> + <td> + <p>The tree node contains file addresses of subtrees or + data depending on the node level. Nodes at Level 0 point + to data addresses, either raw data chunks or group nodes. + Nodes at non-zero levels point to other nodes of the + same B-tree. + </p> + <p>For raw data chunk nodes, the child pointer is the address + of a single raw data chunk. For group nodes, the child pointer + points to a <a href="#SymbolTable">symbol table</a>, which contains + information for multiple symbol table entries. + </p> + </td> + </tr> + </table> + </div> + + <p> + Conceptually, each B-tree node looks like this:</p> + <center> + <table> + <tr valign="top" align="center"> + <td>key[0]</td><td> </td> + <td>child[0]</td><td> </td> + <td>key[1]</td><td> </td> + <td>child[1]</td><td> </td> + <td>key[2]</td><td> </td> + <td>...</td><td> </td> + <td>...</td><td> </td> + <td>key[<i>N</i>-1]</td><td> </td> + <td>child[<i>N</i>-1]</td><td> </td> + <td>key[<i>N</i>]</td> + </tr> + </table> + </center> + <br /> + + where child[<i>i</i>] is a pointer to a sub-tree (at a level + above Level 0) or to data (at Level 0). + Each key[<i>i</i>] describes an <i>item</i> stored by the B-tree + (a chunk or an object of a group node). The range of values + represented by child[<i>i</i>] is indicated by key[<i>i</i>] + and key[<i>i</i>+1]. + + + <p>The following question must next be answered: + “Is the value described by key[<i>i</i>] contained in + child[<i>i</i>-1] or in child[<i>i</i>]?” + The answer depends on the type of tree. + In trees for groups (node type 0), the object described by + key[<i>i</i>] is the greatest object contained in + child[<i>i</i>-1] while in chunk trees (node type 1) the + chunk described by key[<i>i</i>] is the least chunk in + child[<i>i</i>].</p> + + <p>That means that key[0] for group trees is sometimes unused; + it points to offset zero in the heap, which is always the + empty string and compares as “less-than” any valid + object name.</p> + + <p>And key[<i>N</i>] for chunk trees is sometimes unused; + it contains a chunk offset which compares as “greater-than” + any other chunk offset and has a chunk byte size of zero + to indicate that it is not actually allocated.</p> + + <h4><a name="V2Btrees"> + III.A.2. Disk Format: Level 1A2 - Version 2 B-trees</a></h4> + + <p>Version 2 (v2) B-trees are “traditional” B-trees + with one major difference. Instead of just using a simple pointer + (or address in the file) to a child of an internal node, the pointer + to the child node contains two additional pieces of information: + the number of records in the child node itself, and the total number + of records in the child node and all its descendants. Storing this + additional information allows fast array-like indexing to locate + the n<sup>th</sup> record in the B-tree.</p> + + <p>The entry into a version 2 B-tree is a header which contains global + information about the structure of the B-tree. The <em>root node + address</em> + field in the header points to the B-tree root node, which is either an + internal or leaf node, depending on the value in the header’s + <em>depth</em> field. An internal node consists of records plus + pointers to further leaf or internal nodes in the tree. A leaf node + consists of solely of records. The format of the records depends on + the B-tree type (stored in the header).</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Node Size</td> + </tr> + <tr> + <td colspan="2">Record Size</td> + <td colspan="2">Depth</td> + </tr> + <tr> + <td>Split Percent</td> + <td>Merge Percent</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Root Node Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="2">Number of Records in Root Node</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records in B-tree<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree Header + </caption> + + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTHD</code>” + is used to indicate the header of a version 2 (v2) B-tree + node. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree header. This document + describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field indicates the type of B-tree: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>This B-tree is used for testing only. This + value should <em>not</em> be used for storing + records in actual HDF5 files. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>This B-tree is used for indexing indirectly accessed, + non-filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">2</td> + <td>This B-tree is used for indexing indirectly accessed, + filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">3</td> + <td>This B-tree is used for indexing directly accessed, + non-filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">4</td> + <td>This B-tree is used for indexing directly accessed, + filtered ‘huge’ fractal heap objects. + </td> + </tr> + <tr> + <td align="center">5</td> + <td>This B-tree is used for indexing the ‘name’ field for + links in indexed groups. + </td> + </tr> + <tr> + <td align="center">6</td> + <td>This B-tree is used for indexing the ‘creation order’ + field for links in indexed groups. + </td> + </tr> + <tr> + <td align="center">7</td> + <td>This B-tree is used for indexing shared object header + messages. + </td> + </tr> + <tr> + <td align="center">8</td> + <td>This B-tree is used for indexing the ‘name’ field for + indexed attributes. + </td> + </tr> + <tr> + <td align="center">9</td> + <td>This B-tree is used for indexing the ‘creation order’ + field for indexed attributes. + </td> + </tr> + + <tr> + <td align="center">10</td> + <td>This B-tree is used for indexing chunks of + datasets with no filters and with more than one + dimension of unlimited extent. + </td> + </tr> + + <tr> + <td align="center">11</td> + <td>This B-tree is used for indexing chunks of + datasets with filters and more than one dimension + of unlimited extent. + </td> + </tr> + </table></p> + <p>The format of records for each type is described below.</p> + </td> + </tr> + + <tr valign="top"> + <td><p>Node Size</p></td> + <td> + <p>This is the size in bytes of all B-tree nodes. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Record Size</p></td> + <td> + <p>This field is the size in bytes of the B-tree record. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Depth</p></td> + <td> + <p>This is the depth of the B-tree. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Split Percent</p></td> + <td> + <p>The percent full that a node needs to increase above before it + is split. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Merge Percent</p></td> + <td> + <p>The percent full that a node needs to be decrease below before it + is split. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Root Node Address</p></td> + <td> + <p>This is the address of the root B-tree node. A B-tree with + no records will have the <a href="#UndefinedAddress">undefined + address</a> in this field. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Number of Records in Root Node</p></td> + <td> + <p>This is the number of records in the root node. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Total Number of Records in B-tree</p></td> + <td> + <p>This is the total number of records in the entire B-tree. + </p> + </td> + </tr> + + <tr valign="top"> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the B-tree header. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree Internal Node + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Records 0, 1, 2...N-1 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Child Node Pointer 0<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Number of Records N<sub>0</sub> for Child + Node 0 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Total Number of Records for Child Node 0 + <em>(optional, variable size)</em></td> + </tr> + <tr> + <td colspan="4"><br />Child Node Pointer 1<sup>O</sup><br /><br /></td> + </tr> + <td colspan="4"><br />Number of Records N<sub>1</sub> for + Child Node 1 <em>(variable size)</em></td> +</tr> +<tr> + <td colspan="4"><br />Total Number of Records for Child Node 1 + <em>(optional, variable size)</em></td> +</tr> +<tr> + <td colspan="4">...</td> +</tr> +<tr> + <td colspan="4"><br />Child Node Pointer N<sup>O</sup><br /><br /></td> +</tr> +<tr> + <td colspan="4"><br />Number of Records N<sub>n</sub> for + Child Node N <em>(variable size)</em></td> +</tr> +<tr> + <td colspan="4"><br />Total Number of Records for Child Node N + <em>(optional, variable size)</em></td> +</tr> +<tr> + <td colspan="4">Checksum</td> +</tr> +</table> + +<table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> +</table> +</div> + + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree Internal Node + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTIN</code>” is + used to indicate the internal node of a B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree internal node. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field is the type of the B-tree node. It should always + be the same as the B-tree type in the header. + </p> + </td> + </tr> + + <tr> + <td><p>Records</p></td> + <td> + <p>The size of this field is determined by the number of records + for this node and the record size (from the header). The format + of records depends on the type of B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Child Node Pointer</p></td> + <td> + <p>This field is the address of the child node pointed to by the + internal node. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Records in Child Node</p></td> + <td> + <p>This is the number of records in the child node pointed to by + the corresponding <em>Node Pointer</em>. + </p> + <p>The number of bytes used to store this field is determined by + the maximum possible number of records able to be stored in the + child node. + </p> + <p> + The maximum number of records in a child node is computed + in the following way: + + <ul> + <li>Subtract the fixed size overhead for + the child node (for example, its signature, version, + checksum, and so on and <em>one</em> pointer triplet + of information for the child node (because there is one + more pointer triplet than records in each internal node)) + from the size of nodes for the B-tree. </li> + <li>Divide that result by the size of a record plus the + pointer triplet of information stored to reach each + child node from this node.</li> + </ul> + + </p> + <p> + Note that leaf nodes do not encode any + child pointer triplets, so the maximum number of records in a + leaf node is just the node size minus the leaf node overhead, + divided by the record size. + </p> + <p> + Also note that the first level of internal nodes above the + leaf nodes do not encode the <em>Total Number of Records in Child + Node</em> value in the child pointer triplets (since it is the + same as the <em>Number of Records in Child Node</em>), so the + maximum number of records in these nodes is computed with the + equation above, but using (<em>Child Pointer</em>, <em>Number of + Records in Child Node</em>) pairs instead of triplets. + </p> + <p> + The number of + bytes used to encode this field is the least number of bytes + required to encode the maximum number of records in a child + node value for the child nodes below this level + in the B-tree. + </p> + <p> + For example, if the maximum number of child records is + 123, one byte will be used to encode these values in this + node; if the maximum number of child records is + 20000, two bytes will be used to encode these values in this + node; and so on. The maximum number of bytes used to + encode these values is 8 (in other words, an unsigned + 64-bit integer). + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Records in Child Node</p></td> + <td> + <p>This is the total number of records for the node pointed to by + the corresponding <em>Node Pointer</em> and all its children. + This field exists only in nodes whose depth in the B-tree node + is greater than 1 (in other words, the “twig” + internal nodes, just above leaf nodes, do not store this + field in their child node pointers). + </p> + <p>The number of bytes used to store this field is determined by + the maximum possible number of records able to be stored in the + child node and its descendants. + </p> + <p> + The maximum possible number of records able to be stored in a + child node and its descendants is computed iteratively, in the + following way: The maximum number of records in a leaf node + is computed, then that value is used to compute the maximum + possible number of records in the first level of internal nodes + above the leaf nodes. Multiplying these two values together + determines the maximum possible number of records in child node + pointers for the level of nodes two levels above leaf nodes. + This process is continued up to any level in the B-tree. + </p> + <p> + The number of bytes used to encode this value is computed in + the same way as for the <em>Number of Records in Child Node</em> + field. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for this node. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree Leaf Node + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Record 0, 1, 2...N-1 <em>(variable size)</em></td> + </tr> + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree Leaf Node + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>BTLF</code>“ + is used to indicate the leaf node of a version 2 (v2) B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this B-tree leaf node. + This document describes version 0. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field is the type of the B-tree node. It should always + be the same as the B-tree type in the header. + </p> + </td> + </tr> + + <tr> + <td><p>Records</p></td> + <td> + <p>The size of this field is determined by the number of records + for this node and the record size (from the header). The format + of records depends on the type of B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for this node. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<p>The record layout for each stored (in other words, non-testing) + B-tree type is as follows:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 1 Record Layout - Indirectly + Accessed, Non-filtered, ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 1 Record Layout - Indirectly + Accessed, Non-filtered, ‘Huge’ Fractal Heap Objects + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Huge Object Address</p></td> + <td> + <p>The address of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object Length</p></td> + <td> + <p>The length of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object ID</p></td> + <td> + <p>The heap ID for the huge object. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 2 Record Layout - Indirectly + Accessed, Filtered, ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Filtered Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask</td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Memory Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 2 Record Layout - Indirectly + Accessed, Filtered, ‘Huge’ Fractal Heap Objects + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filtered Huge Object Address</p></td> + <td> + <p>The address of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Length</p></td> + <td> + <p>The length of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td> + <p>A 32-bit bit field indicating which filters have been skipped for + this chunk. Each filter has an index number in the pipeline + (starting at 0, with the first filter to apply) and if that + filter is skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Memory Size</p></td> + <td> + <p>The size of the de-filtered huge object in memory. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object ID</p></td> + <td> + <p>The heap ID for the huge object. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 3 Record Layout - Directly + Accessed, Non-filtered, ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 3 Record Layout - Directly + Accessed, Non-filtered, ‘Huge’ Fractal Heap Objects + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Huge Object Address</p></td> + <td> + <p>The address of the huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Huge Object Length</p></td> + <td> + <p>The length of the huge object in the file. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 4 Record Layout - Directly + Accessed, Filtered, ‘Huge’ Fractal Heap Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Filtered Huge Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Length<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask</td> + </tr> + <tr> + <td colspan="4"><br />Filtered Huge Object Memory Size<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 4 Record Layout - Directly + Accessed, Filtered, ‘Huge’ Fractal Heap Objects + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filtered Huge Object Address</p></td> + <td> + <p>The address of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Length</p></td> + <td> + <p>The length of the filtered huge object in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td> + <p>A 32-bit bit field indicating which filters have been skipped for + this chunk. Each filter has an index number in the pipeline + (starting at 0, with the first filter to apply) and if that + filter is skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + <tr> + <td><p>Filtered Huge Object Memory Size</p></td> + <td> + <p>The size of the de-filtered huge object in memory. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 5 Record Layout - Link Name + for Indexed Group + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Hash of Name</td> + </tr> + <tr> + <td colspan="4">ID <em>(bytes 1-4)</em></td> + </tr> + + <tr> + <td colspan="3">ID <em>(bytes 5-7)</em></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 5 Record Layout - Link Name + for Indexed Group + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the name for the link. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the link’s name. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This is a 7-byte sequence of bytes and is the heap ID for the + link record in the group’s fractal heap.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 6 Record Layout - Creation + Order for Indexed Group + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Creation Order + <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">ID <em>(bytes 1-4)</em></td> + </tr> + <tr> + <td colspan="3">ID <em>(bytes 5-7)</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 6 Record Layout - Creation + Order for Indexed Group + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the link. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This is a 7-byte sequence of bytes and is the heap ID for the + link record in the group’s fractal heap.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 7 Record Layout - Shared + Object Header Messages (Sub-type 0 - Message in Heap) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Hash</td> + </tr> + <tr> + <td colspan="4">Reference Count</td> + </tr> + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 7 Record Layout - Shared + Object Header Messages (Sub-type 0 - Message in Heap) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This field Indicates the location where the message is stored: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>Shared message is stored in shared message index heap. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Shared message is stored in object header. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the shared message. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>The number of objects which reference this message.</p> + </td> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + shared message in the shared message index’s fractal heap.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 7 Record Layout - Shared + Object Header Messages (Sub-type 1 - Message in Object Header) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Hash</td> + </tr> + <tr> + <td>Reserved (zero)</td> + <td>Message Type</td> + <td colspan="2">Object Header Index</td> + </tr> + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 7 Record Layout - Shared + Object Header Messages (Sub-type 1 - Message in Object Header) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This field Indicates the location where the message is stored: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>Shared message is stored in shared message index heap. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Shared message is stored in object header. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the shared message. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Message Type</p></td> + <td> + <p>The object header message type of the shared message.</p> + </td> + </tr> + + <tr> + <td><p>Object Header Index</p></td> + <td> + <p>This field indicates that the shared message is the n<sup>th</sup> message + of its type in the specified object header.</p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>The address of the object header containing the shared message.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 8 Record Layout - Attribute + Name for Indexed Attributes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan>Message Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Creation Order</td> + </tr> + <tr> + <td colspan="4">Hash of Name</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 8 Record Layout - Attribute + Name for Indexed Attributes + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + attribute in the object’s attribute fractal heap.</p> + </td> + </tr> + + <tr> + <td><p>Message Flags</p></td> + <td><p>The object header message flags for the attribute message.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the attribute. + </p> + </td> + </tr> + + <tr> + <td><p>Hash</p></td> + <td> + <p>This field is hash value of the name for the attribute. The hash + value is the Jenkins’ lookup3 checksum algorithm applied to + the attribute’s name. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree, Type 9 Record Layout - Creation + Order for Indexed Attributes + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Heap ID <em>(8 bytes)</em><br /><br /></td> + </tr> + <tr> + <td colspan>Message Flags</td> + <td colspan="3" bgcolor="#DDDDDD"> + <em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Creation Order</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 9 Record Layout - Creation + Order for Indexed Attributes + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap ID</p></td> + <td> + <p>This is an 8-byte sequence of bytes and is the heap ID for the + attribute in the object’s attribute fractal heap.</p> + </td> + </tr> + + <tr> + <td><p>Message Flags</p></td> + <td> + <p>The object header message flags for the attribute message.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td> + <p>This field is the creation order value for the attribute. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<a name="V2BtType10"></a> + <div align="center"> + <table class="format"> + <caption> + <a name="V2BtreesType10"></a> + Layout: Version 2 B-tree, Type 10 Record Layout - + Non-filtered Dataset Chunks + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dimension 0 Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dimension 1 Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #n Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 10 Record Layout - + Non-filtered Dataset Chunks +</caption> +<tr> + <th width="30%">Field Name</th> + <th>Description</th> +</tr> + +<tr> + <td><p>Address</p></td> + <td> + <p>This field is the address of the dataset chunk in the file.</p> + </td> +</tr> + +<tr> + <td><p>Dimension #n Scaled Offset</p></td> + <td> + <p>This field is the scaled offset of the chunk within the + dataset. <em>n</em> is the number of dimensions for the + dataset. The first scaled offset stored in the list is for + the slowest changing dimension, and the last scaled offset + stored is for the fastest changing dimension. Scaled offset + is calculated by dividing the chunk dimension sizes into + the chunk offsets.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + <a name="V2BtreesType11"></a> + Layout: Version 2 B-tree, Type 11 Record Layout - Filtered + Dataset Chunks + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Chunk Size + <em>(variable size; at most 8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filter Mask</td> + </tr> + + <tr> + <td colspan="4"><br />Dimension 0 Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dimension 1 Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #n Scaled Offset + <em>(8 bytes)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree, Type 11 Record Layout - Filtered + Dataset Chunks +</caption> +<tr> + <th width="30%">Field Name</th> + <th>Description</th> +</tr> + +<tr> + <td><p>Address</p></td> + <td> + <p>This field is the address of the dataset chunk in the file.</p> + </td> +</tr> + +<tr> + <td><p>Chunk Size</p></td> + <td> + <p>This field is the size of the dataset chunk in bytes.</p> + </td> +</tr> + +<tr> + <td><p>Filter Mask</p></td> + <td> + <p>This field is the filter mask which indicates the filter + to skip for the dataset chunk. Each filter has an index + number in the pipeline and if that filter is skipped, + the bit corresponding to its index is set.</p> + </td> +</tr> + +<tr> + <td><p>Dimension #n Scaled Offset</p></td> + <td> + <p>This field is the scaled offset of the chunk within + the dataset. <em>n</em> is the number of dimensions for + the dataset. The first scaled offset stored in the list + is for the slowest changing dimension, and the last scaled + offset stored is for the fastest changing dimension.</p> + </td> +</tr> + +</table> +</div> + +<h3><a name="SymbolTable"> + III.B. Disk Format: Level 1B - Group Symbol Table Nodes</a></h3> + +<p>A group is an object internal to the file that allows + arbitrary nesting of objects within the file (including other + groups). A group maps a set of link names in the group to a set + of relative file addresses of objects in the file. Certain metadata + for an object to which the group points can be cached in the + group’s symbol table entry in addition to being in the + object’s header.</p> + +<p>An HDF5 object name space can be stored hierarchically by + partitioning the name into components and storing each + component as a link in a group. The link for a + non-ultimate component points to the group containing + the next component. The link for the last + component points to the object being named.</p> + +<p>One implementation of a group is a collection of symbol table + nodes indexed by a B-tree. Each symbol table node contains entries + for one or more links. If an attempt is made to add a link to an + already full symbol table node containing 2<em>K</em> entries, then + the node is split and one node contains <em>K</em> symbols and the + other contains <em>K</em>+1 symbols.</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Symbol Table Node (A Leaf of a B-tree) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version Number</td> + <td>Reserved <em>(zero)</em></td> + <td colspan="2">Number of Symbols</td> + </tr> + + <tr> + <td colspan="4"><br /><br />Group Entries<br /><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Symbol Table Node (A Leaf of a B-tree) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SNOD</code>” is + used to indicate the + beginning of a symbol table node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version Number</p></td> + <td> + <p>The version number for the symbol table node. This + document describes version 1. (There is no version ‘0’ + of the symbol table node) + </p> + </td> + </tr> + + <tr> + <td><p>Number of Entries</p></td> + <td> + <p>Although all symbol table nodes have the same length, + most contain fewer than the maximum possible number of + link entries. This field indicates how many entries + contain valid data. The valid entries are packed at the + beginning of the symbol table node while the remaining + entries contain undefined values. + </p> + </td> + </tr> + + <tr> + <td><p>Symbol Table Entries</p></td> + <td> + <p>Each link has an entry in the symbol table node. + The format of the entry is described below. + There are 2<em>K</em> entries in each group node, where + <em>K</em> is the “Group Leaf Node K” value from the + <a href="#Superblock">superblock</a>. + </p> + </td> + </tr> + </table> +</div> + +<h3><a name="SymbolTableEntry"> + III.C. Disk Format: Level 1C - Symbol Table Entry </a></h3> + +<p>Each symbol table entry in a symbol table node is designed + to allow for very fast browsing of stored objects. + Toward that design goal, the symbol table entries + include space for caching certain constant metadata from the + object header.</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Symbol Table Entry + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Link Name Offset<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Cache Type</td> + </tr> + + <tr> + <td colspan="4">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Scratch-pad Space + <em>(16 bytes)</em><br /><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Symbol Table Entry + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Link Name Offset</p></td> + <td> + <p>This is the byte offset into the group’s local + heap for the name of the link. The name is null + terminated. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>Every object has an object header which serves as a + permanent location for the object’s metadata. In addition + to appearing in the object header, some of the object’s metadata + can be cached in the scratch-pad space. + </p> + </td> + </tr> + + <tr> + <td><p>Cache Type</p></td> + <td> + <p>The cache type is determined from the object header. + It also determines the format for the scratch-pad space: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center">0</td> + <td>No data is cached by the group entry. This + is guaranteed to be the case when an object header + has a link count greater than one. + </td> + </tr> + <tr> + <td align="center">1</td> + <td>Group object header metadata is cached in the + scratch-pad space. This implies that the symbol table + entry refers to another group. + </td> + </tr> + <tr> + <td align="center">2</td> + <td>The entry is a symbolic link. The first four bytes + of the scratch-pad space are the offset into the local + heap for the link value. The object header address + will be undefined. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td> + <p>These four bytes are present so that the scratch-pad + space is aligned on an eight-byte boundary. They are + always set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Scratch-pad Space</p></td> + <td> + <p>This space is used for different purposes, depending + on the value of the Cache Type field. Any metadata + about an object represented in the scratch-pad + space is duplicated in the object header for that + object. + </p> + <p> + Furthermore, no data is cached in the group + entry scratch-pad space if the object header for + the object has a link count greater than one. + </p> + </td> + </tr> + </table> +</div> + +<h4>Format of the Scratch-pad Space</h4> + +<p>The symbol table entry scratch-pad space is formatted + according to the value in the Cache Type field.</p> + +<p>If the Cache Type field contains the value zero + <code>(0)</code> then no information is + stored in the scratch-pad space.</p> + +<p>If the Cache Type field contains the value one + <code>(1)</code>, then the scratch-pad space + contains cached metadata for another object header + in the following format:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Object Header Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address of B-tree<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Name Heap<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Object Header Scratch-pad Format + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address of B-tree</p></td> + <td> + <p>This is the file address for the root of the + group’s B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Name Heap</p></td> + <td> + <p>This is the file address for the group’s local + heap, in which are stored the group’s symbol names. + </p> + </td> + </tr> + </table> +</div> + + +<br /> +<br /> +<br /> +<p>If the Cache Type field contains the value two + <code>(2)</code>, then the scratch-pad space + contains cached metadata for a symbolic link + in the following format:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Symbolic Link Scratch-pad Format + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Offset to Link Value</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Symbolic Link Scratch-pad Format + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Offset to Link Value</p></td> + <td> + <p>The value of a symbolic link (that is, the name of the + thing to which it points) is stored in the local heap. + This field is the 4-byte offset into the local heap for + the start of the link value, which is null terminated. + </p> + </td> + </tr> + </table> +</div> + +<h3><a name="LocalHeap"> + III.D. Disk Format: Level 1D - Local Heaps</a></h3> + +<p>A local heap is a collection of small pieces of data that are particular + to a single object in the HDF5 file. Objects can be + inserted and removed from the heap at any time. + The address of a heap does not change once the heap is created. + For example, a group stores addresses of objects in symbol table nodes + with the names of links stored in the group’s local heap. +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Local Heap + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Data Segment Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset to Head of Free-list<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Data Segment<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Local Heap + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>HEAP</code>” + is used to indicate the + beginning of a heap. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>Each local heap has its own version number so that new + heaps can be added to old files. This document + describes version zero (0) of the local heap. + </p> + </td> + </tr> + + <tr> + <td><p>Data Segment Size</p></td> + <td> + <p>The total amount of disk memory allocated for the heap + data. This may be larger than the amount of space + required by the objects stored in the heap. The extra + unused space in the heap holds a linked list of free blocks. + </p> + </td> + </tr> + + <tr> + <td><p>Offset to Head of Free-list</p></td> + <td> + <p>This is the offset within the heap data segment of the + first free block (or the + <a href="#UndefinedAddress">undefined address</a> if there is no + free block). The free block contains + <a href="#SizeOfLengthsV0">Size of Lengths</a> bytes that + are the offset of the next free block (or the + value ‘1’ if this is the + last free block) followed by Size of Lengths bytes that store + the size of this free block. The size of the free block includes + the space used to store the offset of the next free block and + the size of the current block, making the minimum size of a free + block 2 * Size of Lengths. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Data Segment</p></td> + <td> + <p>The data segment originally starts immediately after + the heap header, but if the data segment must grow as a + result of adding more objects, then the data segment may + be relocated, in its entirety, to another part of the + file. + </p> + </td> + </tr> + </table> +</div> + +<p>Objects within a local heap should be aligned on an 8-byte boundary.</p> + +<h3><a name="GlobalHeap"> + III.E. Disk Format: Level 1E - Global Heap</a></h3> + +<p>Each HDF5 file has a global heap which stores various types of + information which is typically shared between datasets. The + global heap was designed to satisfy these goals:</p> + +<ol type="A"> + <li>Repeated access to a heap object must be efficient without + resulting in repeated file I/O requests. Since global heap + objects will typically be shared among several datasets, it is + probable that the object will be accessed repeatedly.</li> + <li>Collections of related global heap objects should result in + fewer and larger I/O requests. For instance, a dataset of + object references will have a global heap object for each + reference. Reading the entire set of object references + should result in a few large I/O requests instead of one small + I/O request for each reference.</li> + <li>It should be possible to remove objects from the global heap + and the resulting file hole should be eligible to be reclaimed + for other uses.</li> +</ol> + + +<p>The implementation of the heap makes use of the memory management + already available at the file level and combines that with a new + object called a <em>collection</em> to achieve goal B. The global heap + is the set of all collections. Each global heap object belongs to + exactly one collection, and each collection contains one or more global + heap objects. For the purposes of disk I/O and caching, a collection is + treated as an atomic object, addressing goal A. +</p> + +<p>When a global heap object is deleted from a collection (which + occurs when its reference count falls to zero), objects located + after the deleted object in the collection are packed down toward + the beginning of the collection, and the collection’s + global heap object 0 is created (if possible), or its size is + increased to account for the recently freed space. There are + no gaps between objects in each collection, with the possible + exception of the final space in the collection, if it is not + large enough to hold the header for the collection’s + global heap object 0. These features address goal C. +</p> + +<p>The HDF5 Library creates global heap collections as needed, so there may + be multiple collections throughout the file. The set of all of them is + abstractly called the “global heap”, although they do not actually link + to each other, and there is no global place in the file where you can + discover all of the collections. The collections are found simply by + finding a reference to one through another object in the file. For + example, data of variable-length datatype elements is stored in the + global heap and is accessed via a global heap ID. The format for + global heap IDs is described at the end of this section. +</p> + +<p>For more information on global heaps for virtual datasets, see + <a href="#GlobalHeapVDS">“Disk Format: Level 1F - Global Heap + Block for Virtual Datasets.”</a></p> +<br /> + +<div align="center"> + <table class="format"> + <caption> + Layout: A Global Heap Collection + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Collection Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 2<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />...<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object <em>N</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Global Heap Object 0 (free space)<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: A Global Heap Collection + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>GCOL</code>” + is used to indicate the + beginning of a collection. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>Each collection has its own version number so that new + collections can be added to old files. This document + describes version one (1) of the collections (there is no + version zero (0)). + </p> + </td> + </tr> + + <tr> + <td><p>Collection Size</p></td> + <td> + <p>This is the size in bytes of the entire collection + including this field. The default (and minimum) + collection size is 4096 bytes which is a typical file + system block size. This allows for 127 16-byte heap + objects plus their overhead (the collection header of 16 bytes + and the 16 bytes of information about each heap object). + </p> + </td> + </tr> + + <tr> + <td><p>Global Heap Object 1 through <em>N</em></p></td> + <td> + <p>The objects are stored in any order with no + intervening unused space. + </p> + </td> + </tr> + + <tr> + <td><p>Global Heap Object 0</p></td> + <td> + <p>Global Heap Object 0 (zero), when present, represents the free + space in the collection. Free space always appears at the end of + the collection. If the free space is too small to store the header + for Object 0 (described below) then the header is implied and is not + written. + <p> + The field <em>Object Size</em> for Object 0 indicates the + amount of possible free space in the collection including the 16-byte + header size of Object 0. + </p> + </td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Global Heap Object + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="2">Heap Object Index</td> + <td colspan="2">Reference Count</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Object Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Object Data<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Global Heap Object + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Heap Object Index</p></td> + <td> + <p>Each object has a unique identification number within a + collection. The identification numbers are chosen so that + new objects have the smallest value possible with the + exception that the identifier <code>0</code> always refers to the + object which represents all free space within the + collection. + </p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>All heap objects have a reference count field. An + object which is referenced from some other part of the + file will have a positive reference count. The reference + count for Object 0 is always zero. + </p> + </td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td> + <p>Zero padding to align next field on an 8-byte boundary. + </p> + </td> + </tr> + + <tr> + <td><p>Object Size</p></td> + <td> + <p>This is the size of the object data stored for the object. + The actual storage space allocated for the object data is rounded + up to a multiple of eight. + </p> + </td> + </tr> + + <tr> + <td><p>Object Data</p></td> + <td> + <p>The object data is treated as a one-dimensional array + of bytes to be interpreted by the caller. + </p> + </td> + </tr> + </table> + +</div> + +<br /> +<br /> +<br /> +<p> + <a name="GlobalHeapID"></a> + The format for the ID used to locate an object in the global heap is + described here:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Global Heap ID + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Collection Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Object Index</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Global Heap ID + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Collection Address</p></td> + <td> + <p>This field is the address of the global heap collection + where the data object is stored. + </p> + </td> + </tr> + + <tr> + <td><p>ID</p></td> + <td> + <p>This field is the index of the data object within the + global heap collection. + </p> + </td> + </tr> + + </table> +</div> + + + +<h3><a name="GlobalHeapVDS"> III.F. Disk Format: Level 1F - Global + Heap Block for Virtual Datasets</a></h3> + +<p>The layout for the global heap block used with virtual datasets is + described below. For more information on global heaps, see + <a href="#GlobalHeap"></a>“Disk Format: Level 1E - Global Heap.”</p> + +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Global Heap Block for Virtual Dataset + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Num Entries<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Filename #1 <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Dataset #1 <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Selection #1 <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Virtual Selection #1 <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Filename #n <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Dataset #n <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Source Selection #n <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Virtual Selection #n <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Global Heap Block for Virtual Dataset + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for the block; the value is 0.</p> + </td> + </tr> + + <tr> + <td><p>Num Entries</p></td> + <td><p>The number of entries in the block.</p> + </td> + </tr> + + <tr> + <td><p>Source Filename #n</p></td> + <td> + <p>The source file name where the source dataset is located. + </p> + </td> + </tr> + + <tr> + <td><p>Source Dataset #n</p></td> + <td><p>The source dataset name that is mapped to the + virtual dataset.</p></td> + </tr> + + <tr> + <td><p>Source Selection #n</p></td> + <td> + <p>The <a href="#DataspaceSEL">dataspace selection</a> in the + source dataset that is mapped to the virtual selection. + </p> + </td> + </tr> + + <tr> + <td><p>Virtual Selection #n</p></td> + <td> + <p>This is the <a href="#DataspaceSEL">dataspace selection</a> in the virtual dataset that is + mapped to the source selection. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the block.</p> + </td> + </tr> + + </table> +</div> +<br> + +<h3><a name="FractalHeap"> + III.G. Disk Format: Level 1G - Fractal Heap</a></h3> + +<p> + Each fractal heap consists of a header and zero or more direct and + indirect blocks (described below). The header contains general + information as well as + initialization parameters for the doubling table. The <em>Address + of Root Block</em> field in the header points to the first direct or + indirect block in the heap. +</p> + +<p> + Fractal heaps are based on a data structure called a <em>doubling + table</em>. A doubling table provides a mechanism for quickly + extending an array-like data structure that minimizes the number of + empty blocks in the heap, while retaining very fast lookup of any + element within the array. More information on fractal heaps and + doubling tables can be found in the RFC + “<a href="Supplements/FractalHeap/PrivateHeap.pdf">Private + Heaps in HDF5</a>.” +</p> + +<p> + The fractal heap implements the doubling table structure with + indirect and direct blocks. + Indirect blocks in the heap do not actually contain data for + objects in the heap, their “size” is abstract - + they represent the indexing structure for locating the + direct blocks in the doubling table. + Direct blocks + contain the actual data for objects stored in the heap. +</p> + +<p> + All indirect blocks have a constant number of block entries in each + row, called the <em>width</em> of the doubling table + (see <em>Table Width</em> field in the header). + + The number + of rows for each indirect block in the heap is determined by the + size of the block that the indirect block represents in the + doubling table (calculation of this is shown below) and is + constant, except for the “root” + indirect block, which expands and shrinks its number of rows as + needed. +</p> + +<p> + Blocks in the first <em>two</em> rows of an indirect block + are <em>Starting Block Size</em> number of bytes in size. + For example, if the row <em>width</em> of the doubling table is 4, + then the first eight block entries in the + indirect block are <em>Starting Block Size</em> number of bytes in size. + The blocks in each subsequent row are twice the size of + the blocks in the previous row. In other words, blocks in + the third row are twice the <em>Starting Block Size</em>, + blocks in the fourth row are four times the + <em>Starting Block Size</em>, and so on. Entries for + blocks up to the <em>Maximum Direct Block Size</em> point to + direct blocks, and entries for blocks greater than that size + point to further indirect blocks (which have their own + entries for direct and indirect blocks). + <em>Starting Block Size</em> and + <em>Maximum Direct Block Size</em> are fields + stored in the header. +</p> + +<p> + The number of rows of blocks, <em>nrows</em>, in an + indirect block is calculated by the following expression: + <br /> <br /> + <em>nrows</em> = (log<sub>2</sub>(<em>block_size</em>) - + log<sub>2</sub>(<em><Starting Block Size></em>)) + 1 +</p> +where <em>block_size</em> is the size of the block that the indirect block +represents in the doubling table. +For example, to represent a block with <em>block_size</em> equals to 1024, +and <em>Starting Block Size</em> equals to 256, +three rows are needed. +<p> + The maximum number of rows of direct blocks, <em>max_dblock_rows</em>, + in any indirect block of a fractal heap is given by the + following expression: + <br /> <br /> + <em>max_dblock_rows</em> = + (log<sub>2</sub>(<em><Maximum Direct Block Size></em>) - + log<sub>2</sub>(<em><Starting Block Size></em>)) + 2 +</p> +<p> + Using the computed values for <em>nrows</em> and + <em>max_dblock_rows</em>, along with the <em>width</em> of the + doubling table, the number of direct and indirect block entries + (<em>K</em> and <em>N</em> in the indirect block description, below) + in an indirect block can be computed: + <br /> <br /> + <em>K</em> = MIN(<em>nrows</em>, <em>max_dblock_rows</em>) * + <em><Table Width></em> + + <br /> <br /> + If <em>nrows</em> is less than or equal to <em>max_dblock_rows</em>, + <em>N</em> is 0. Otherwise, <em>N</em> is simply computed: + <br /> <br /> + <em>N</em> = <em>K</em> - (<em>max_dblock_rows</em> * + <em><Table Width></em>) +</p> + +<p> + The size of indirect blocks on disk is determined by the number + of rows in the indirect block (computed above). The size of direct + blocks on disk is exactly the size of the block in the doubling + table. +</p> +<br> + +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Heap ID Length</td> + <td colspan="2">I/O Filters’ Encoded Length</td> + </tr> + + <tr> + <td>Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Maximum Size of Managed Objects</td> + </tr> + + <tr> + <td colspan="4"><br />Next Huge Object ID<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />v2 B-tree Address of Huge Objects<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Free Space in Managed Blocks<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Managed Block Free Space Manager<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Managed Space in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Amount of Allocated Managed Space in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset of Direct Block Allocation Iterator in Managed Space<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Managed Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Huge Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Huge Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Tiny Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Tiny Objects in Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Table Width</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Starting Block Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Direct Block Size<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Maximum Heap Size</td> + <td colspan="2">Starting # of Rows in Root Indirect Block</td> + </tr> + + <tr> + <td colspan="4"><br />Address of Root Block<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Current # of Rows in Root Indirect Block</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Filtered Root Direct Block <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">I/O Filter Mask<em> (optional)</em></td> + </tr> + + <tr> + <td colspan="4">I/O Filter Information<em> (optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap Header + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FRHP</code>” + is used to indicate the + beginning of a fractal heap header. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap ID Length</p></td> + <td> + <p>This is the length in bytes of heap object IDs for this heap.</p> + </td> + </tr> + + <tr> + <td><p>I/O Filters’ Encoded Length</p></td> + <td> + <p>This is the size in bytes of the encoded <em>I/O Filter Information</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is the heap status flag and is a bit field + indicating additional information about the fractal heap. + <table class="list"> + <tr> + <th width="20%" align="center">Bit(s)</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the ID value to use for huge object has wrapped + around. If the value for the <em>Next Huge Object ID</em> + has wrapped around, each new huge object inserted into the + heap will require a search for an ID value. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the direct blocks in the heap are checksummed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Size of Managed Objects</p></td> + <td> + <p>This is the maximum size of managed objects allowed in the heap. + Objects greater than this this are ‘huge’ objects and will be + stored in the file directly, rather than in a direct block for + the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Next Huge Object ID</p></td> + <td> + <p>This is the next ID value to use for a huge object in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>v2 B-tree Address of Huge Objects</p></td> + <td> + <p>This is the address of the <a href="#V2Btrees">v2 B-tree</a> + used to track huge objects in the heap. The type of records + stored in the <em>v2 B-tree</em> will + be determined by whether the address and length of a huge object + can fit into a heap ID (if yes, it is a “directly” accessed + huge object) and whether there is a filter used on objects + in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Free Space in Managed Blocks</p></td> + <td> + <p>This is the total amount of free space in managed direct blocks + (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Address of Managed Block Free Space Manager</p></td> + <td> + <p>This is the address of the + <em><a href="#FreeSpaceManager">Free-space Manager</a></em> for + managed blocks. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Managed Space in Heap</p></td> + <td> + <p>This is the total amount of managed space in the heap (in bytes), + essentially the upper bound of the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Amount of Allocated Managed Space in Heap</p></td> + <td> + <p>This is the total amount of managed space (in bytes) actually + allocated in + the heap. This can be less than the <em>Amount of Managed Space + in Heap</em> field, if some direct blocks in the heap’s linear + address space are not allocated. + </p> + </td> + </tr> + + <tr> + <td><p>Offset of Direct Block Allocation Iterator in Managed Space</p></td> + <td> + <p>This is the linear heap offset where the next direct + block should be allocated at (in bytes). This may be less than + the <em>Amount of Managed Space in Heap</em> value because the + heap’s address space is increased by a “row” of direct blocks + at a time, rather than by single direct block increments. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Managed Objects in Heap</p></td> + <td> + <p>This is the number of managed objects in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Huge Objects in Heap</p></td> + <td> + <p>This is the total size of huge objects in the heap (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Number of Huge Objects in Heap</p></td> + <td> + <p>This is the number of huge objects in the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Tiny Objects in Heap</p></td> + <td> + <p>This is the total size of tiny objects that are packed in heap + IDs (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Number of Tiny Objects in Heap</p></td> + <td> + <p>This is the number of tiny objects that are packed in heap IDs. + </p> + </td> + </tr> + + <tr> + <td><p>Table Width</p></td> + <td> + <p>This is the number of columns in the doubling table for managed + blocks. This value must be a power of two. + </p> + </td> + </tr> + + <tr> + <td><p>Starting Block Size</p></td> + <td> + <p>This is the starting block size to use in the doubling table for + managed blocks (in bytes). This value must be a power of two. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Direct Block Size</p></td> + <td> + <p>This is the maximum size allowed for a managed direct block. + Objects inserted into the heap that are larger than this value + (less the number of bytes of direct block prefix/suffix) + are stored as ‘huge’ objects. This value must be a power of + two. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Heap Size</p></td> + <td> + <p>This is the maximum size of the heap’s linear address space for + managed objects (in bytes). The value stored is the log2 of + the actual value, that is: the number of bits of the address space. + ‘Huge’ and ‘tiny’ objects are not counted in this value, since + they do not store objects in the linear address space of the + heap. + </p> + </td> + </tr> + + <tr> + <td><p>Starting # of Rows in Root Indirect Block</p></td> + <td> + <p>This is the starting number of rows for the root indirect block. + A value of 0 indicates that the root indirect block will have + the maximum number of rows needed to address the heap’s <em>Maximum + Heap Size</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Root Block</p></td> + <td> + <p>This is the address of the root block for the heap. It can + be the <a href="#UndefinedAddress">undefined address</a> if + there is no data in the heap. It either points to a direct + block (if the <em>Current # of Rows in the Root Indirect + Block</em> value is 0), or an indirect block. + </p> + </td> + </tr> + + <tr> + <td><p>Current # of Rows in Root Indirect Block</p></td> + <td> + <p>This is the current number of rows in the root indirect block. + A value of 0 indicates that <em>Address of Root Block</em> + points to direct block instead of indirect block. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Filtered Root Direct Block</p></td> + <td> + <p>This is the size of the root direct block, if filters are + applied to heap objects (in bytes). This field is only + stored in the header if the <em>I/O Filters’ Encoded Length</em> + is greater than 0. + </p> + </td> + </tr> + + <tr> + <td><p>I/O Filter Mask</p></td> + <td> + <p>This is the filter mask for the root direct block, if filters + are applied to heap objects. This mask has the same format as + that used for the filter mask in chunked raw data records in a + <a href="#V1Btrees">v1 B-tree</a>. + This field is only + stored in the header if the <em>I/O Filters’ Encoded Length</em> + is greater than 0. + </p> + </td> + </tr> + + <tr> + <td><p>I/O Filter Information</p></td> + <td> + <p>This is the I/O filter information encoding direct blocks and + huge objects, if filters are applied to heap objects. This + field is encoded as a <a href="#FilterMessage">Filter Pipeline</a> + message. + The size of this field is determined by <em>I/O Filters’ + Encoded Length</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the header.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap Direct Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Heap Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Object Data <em>(variable size)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap Direct Block + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FHDB</code>” + is used to indicate the + beginning of a fractal heap direct block. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap Header Address</p></td> + <td> + <p>This is the address for the fractal heap header that this + block belongs to. This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>This is the offset of the block within the fractal heap’s + address space (in bytes). The number of bytes used to encode + this field is the <em>Maximum Heap Size</em> (in the heap’s + header) divided by 8 and rounded up to the next highest integer, + for values that are not a multiple of 8. This value is + principally used for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the direct block.</p> + <p>This field is only present if bit 1 of <em>Flags</em> in the + heap’s header is set.</p> + </td> + </tr> + + <tr> + <td><p>Object Data</p></td> + <td> + <p>This section of the direct block stores the actual data for + objects in the heap. The size of this section is determined by + the direct block’s size minus the size of the other fields + stored in the direct block (for example, the <em>Signature</em>, + <em>Version</em>, and others including the <em>Checksum</em> if it is + present). + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap Indirect Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Heap Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #0 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #0 <em>(optional)</em> <sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #0 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #1 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #1 <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #1 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4"><br />Child Direct Block #K-1 Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Size of Filtered Direct Block #K-1 <em>(optional)</em><sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">Filter Mask for Direct Block #K-1 <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #0 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #1 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4"><br />Child Indirect Block #N-1 Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap Indirect Block + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FHIB</code>” is used to + indicate the beginning of a fractal heap indirect block. This + gives file consistency checking utilities a better chance of + reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Heap Header Address</p></td> + <td> + <p>This is the address for the fractal heap header that this + block belongs to. This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>This is the offset of the block within the fractal heap’s + address space (in bytes). The number of bytes used to encode + this field is the <em>Maximum Heap Size</em> (in the heap’s + header) divided by 8 and rounded up to the next highest integer, + for values that are not a multiple of 8. This value is + principally used for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Child Direct Block #K Address</p></td> + <td> + <p>This field is the address of the child direct block. + The size of the [uncompressed] direct block can be computed by + its offset in the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Filtered Direct Block #K</p></td> + <td> + <p>This is the size of the child direct block after passing through + the I/O filters defined for this heap (in bytes). If no I/O + filters are present for this heap, this field is not present. + </p> + </td> + </tr> + <tr> + <td><p>Filter Mask for Direct Block #K</p></td> + <td> + <p>This is the I/O filter mask for the filtered direct block. + This mask has the same format as that used for the filter mask + in chunked raw data records in a <a href="#V1Btrees">v1 B-tree</a>. + If no I/O filters are present for this heap, this field is not + present. + </p> + </td> + </tr> + + <tr> + <td><p>Child Indirect Block #N Address</p></td> + <td> + <p>This field is the address of the child indirect block. + The size of the indirect block can be computed by + its offset in the heap’s linear address space. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the indirect block.</p> + </td> + </tr> + + </table> + +</div> + +<br /> +<p>An object in the fractal heap is identified by means of a fractal heap ID, + which encodes information to locate the object in the heap. + Currently, the fractal heap stores an object in one of three ways, + depending on the object’s size:</p> + +<div align="center"> + <table class="list80"> + <tr> + <th width="20%">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center">Tiny</td> + <td> + <p>When an object is small enough to be encoded in the + heap ID, the object’s data is embedded in the fractal + heap ID itself. There are two sub-types for this type of + object: normal and extended. The sub-type for tiny heap + IDs depends on whether the heap ID is large enough to + store objects greater than 16 bytes or not. If the + heap ID length is 18 bytes or smaller, the + ‘normal’ tiny heap ID form is used. If the + heap ID length is greater than 18 bytes in length, the + “extended” form is used. See the format + description below for both sub-types. + </p> + </td> + </tr> + + <tr> + <td align="center">Huge</td> + <td> + <p>When the size of an object is larger than <em>Maximum Size of + Managed Objects</em> in the <em>Fractal Heap Header</em>, the + object’s data is stored on its own in the file and the object + is tracked/indexed via a version 2 B-tree. All huge objects + for a particular fractal heap use the same v2 B-tree. All huge + objects for a particular fractal heap use the same format for + their huge object IDs. + </p> + + <p>Depending on whether the IDs for a heap are large enough to hold + the object’s retrieval information and whether I/O pipeline filters + are applied to the heap’s objects, 4 sub-types are derived for + huge object IDs for this heap:</p> + + <div align="center"> + <table class="list"> + <tr> + <th align="left" width="35%">Sub-type</th> + <th align="left">Description</th> + </tr> + + <tr> + <td align="left">Directly accessed, non-filtered</td> + <td> + <p>The object’s address and length are embedded in the + fractal heap ID itself and the object is directly accessed + from them. This allows the object to be accessed without + resorting to the B-tree. + </p> + </td> + </tr> + + <tr> + <td align="left">Directly accessed, filtered</td> + <td> + <p>The filtered object’s address, length, filter mask and + de-filtered size are embedded in the fractal heap ID itself + and the object is accessed directly with them. This allows + the object to be accessed without resorting to the B-tree. + </p> + </td> + </tr> + + <tr> + <td align="left">Indirectly accessed, non-filtered</td> + <td> + <p>The object is located by using a B-tree key embedded in + the fractal heap ID to retrieve the address and length from + the version 2 B-tree for huge objects. Then, the address + and length are used to access the object. + </p> + </td> + </tr> + + <tr> + <td align="left">Indirectly accessed, filtered</td> + <td> + <p>The object is located by using a B-tree key embedded in + the fractal heap ID to retrieve the filtered object’s + address, length, filter mask and de-filtered size from the + version 2 B-tree for huge objects. Then, this information + is used to access the object. + </p> + </td> + </tr> + </table> + </div> + + </td> + </tr> + + <tr> + <td align="center">Managed</td> + <td> + <p>When the size of an object does not meet the above two + conditions, the object is stored and managed via the direct and + indirect blocks based on the doubling table. + </p> + </td> + </tr> + </table> +</div> + + +<br /> +<p>The specific format for each type of heap ID is described below: +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Tiny Objects (Sub-type 1 - + ‘Normal’) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version, Type, and Length</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Tiny Objects (Sub-type 1 - + ‘Normal’) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version, Type, and Length</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Tiny objects have a value of <code>2</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>The length of the tiny object. The value stored + is one less than the actual length (since zero-length + objects are not allowed to be stored in the heap). + For example, an object of actual length 1 has an + encoded length of 0, an object of actual length 2 + has an encoded length of 1, and so on. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td> + <p>This is the data for the object. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Tiny Objects (Sub-type 2 - + ‘Extended’) + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version, Type, and Length</td> + <td>Extended Length</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Data <em>(variable size)</em></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Tiny Objects (Sub-type 2 - + ‘Extended’) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version, Type, and Length</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Tiny objects have a value of <code>2</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>These 4 bits, together with the next byte, form an + unsigned 12-bit integer for holding the length of the + object. These 4-bits are bits 8-11 of the 12-bit integer. + See description for the <em>Extended Length</em> field below. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Extended Length</p></td> + <td> + <p>This byte, together with the 4 bits in the previous byte, + forms an unsigned 12-bit integer for holding the length of + the tiny object. These 8 bits are bits 0-7 of the 12-bit + integer formed. The value stored is one less than the actual + length (since zero-length objects are not allowed to be + stored in the heap). For example, an object of actual length + 1 has an encoded length of 0, an object of actual length + 2 has an encoded length of 1, and so on. + </p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td> + <p>This is the data for the object. + </p> + </td> + </tr> + + </table> +</div> + + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Huge Objects (Sub-types 1 and 2): + Indirectly Accessed, Non-filtered/Filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version and Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />v2 B-tree Key<sup>L</sup><em> (variable size)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Huge Objects (Sub-types 1 and 2): + Indirectly Accessed, Non-filtered/Filtered + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version and Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>v2 B-tree Key</p></td> + <td><p>This field is the B-tree key for retrieving the information + from the version 2 B-tree for huge objects needed to access the + object. See the description of <a href="#V2Btrees">v2 B-tree</a> + records sub-types 1 and 2 for a description of the fields. New key + values are derived from <em>Next Huge Object ID</em> in the + <em>Fractal Heap Header</em>.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Huge Objects (Sub-type 3): + Directly Accessed, Non-filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version and Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address <sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length <sup>L</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Huge Objects (Sub-type 3): + Directly Accessed, Non-filtered + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version and Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This field is the address of the object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the object in the file.</p> + </td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Huge Objects (Sub-type 4): + Directly Accessed, Filtered + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version and Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address <sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length <sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filter Mask</td> + </tr> + + <tr> + <td colspan="4"><br />De-filtered Size <sup>L</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Huge Objects (Sub-type 4): + Directly Accessed, Filtered + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version and Type</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Huge objects have a value of <code>1</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This field is the address of the filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td><p>This field is the I/O pipeline filter mask for the + filtered object in the file.</p> + </td> + </tr> + + <tr> + <td><p>Filtered Size</p></td> + <td><p>This field is the size of the de-filtered object in the file.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap ID for Managed Objects + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version and Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Length <em>(variable size)</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap ID for Managed Objects + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version and Type</p></td> + <td><p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>6-7</code></td> + <td>The current version of ID format. This document + describes version 0. + </td> + </tr> + <tr> + <td align="center"><code>4-5</code></td> + <td>The ID type. Managed objects have a value of <code>0</code>. + </td> + </tr> + <tr> + <td align="center"><code>0-3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Offset</p></td> + <td><p>This field is the offset of the object in the heap. + This field’s size is the minimum number of bytes + necessary to encode the <em>Maximum Heap Size</em> value + (from the <em>Fractal Heap Header</em>). For example, if the + value of the <em>Maximum Heap Size</em> is less than 256 bytes, + this field is 1 byte in length, a <em>Maximum Heap Size</em> + of 256-65535 bytes uses a 2 byte length, and so on.</p></td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This field is the length of the object in the heap. It + is determined by taking the minimum value of <em>Maximum + Direct Block Size</em> and <em>Maximum Size of Managed + Objects</em> in the <em>Fractal Heap Header</em>. Again, + the minimum number of bytes needed to encode that value is + used for the size of this field.</p></td> + </tr> + </table> +</div> + +<h3><a name="FreeSpaceManager"> + III.H. Disk Format: Level 1H - Free-space Manager</a></h3> + +<p> + Free-space managers are used to describe space within a heap or + the entire HDF5 file that is not currently used for that heap or + file. +</p> + +<p> + The <em>free-space manager header</em> contains metadata information + about the space being tracked, along with the address of the list + of <em>free space sections</em> which actually describes the free + space. The header records information about free-space sections being + tracked, creation parameters for handling free-space sections of a + client, and section information used to locate the collection of + free-space sections. +</p> + +<p> + The <em>free-space section list</em> stores a collection of + free-space sections that is specific to each <em>client</em> of the + free-space manager. + + For example, the fractal heap is a client of the free space manager + and uses it to track unused space within the heap. There are 4 + types of section records for the fractal heap, each of which has + its own format, listed below. +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Free-space Manager Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Total Space Tracked<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Total Number of Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Serialized Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Number of Un-Serialized Sections<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="2">Number of Section Classes</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Shrink Percent</td> + <td colspan="2">Expand Percent</td> + </tr> + + <tr> + <td colspan="2">Size of Address Space</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Section Size <sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of Serialized Section List<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Serialized Section List Used<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Allocated Size of Serialized Section List<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Free-space Manager Header + </caption> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FSHD</code>” + is used to indicate the beginning of the Free-space Manager + Header. This gives file consistency checking utilities a + better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This is the version number for the Free-space Manager Header + and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>This is the client ID for identifying the user of this + free-space manager: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Fractal heap + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>File + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Total Space Tracked</p></td> + <td> + <p>This is the total amount of free space being tracked, in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Sections</p></td> + <td> + <p>This is the total number of free-space sections being tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Serialized Sections</p></td> + <td> + <p>This is the number of serialized free-space sections being + tracked. + </p> + </td> + </tr> + <tr> + <td><p>Number of Un-Serialized Sections</p></td> + <td> + <p>This is the number of un-serialized free-space sections being + managed. Un-serialized sections are created by the free-space + client when the list of sections is read in. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Section Classes</p></td> + <td> + <p>This is the number of section classes handled by this free space + manager for the free-space client. + </p> + </td> + </tr> + + <tr> + <td><p>Shrink Percent</p></td> + <td> + <p>This is the percent of current size to shrink the allocated + serialized free-space section list. + </p> + </td> + </tr> + + <tr> + <td><p>Expand Percent</p></td> + <td> + <p>This is the percent of current size to expand the allocated + serialized free-space section list. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Address Space</p></td> + <td> + <p>This is the size of the address space that free-space sections + are within. This is stored as the log<sub>2</sub> of the + actual value (in other words, the number of bits required + to store values within that address space). + </p> + </td> + </tr> + + <tr> + <td><p>Maximum Section Size</p></td> + <td> + <p>This is the maximum size of a section to be tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Address of Serialized Section List</p></td> + <td> + <p>This is the address where the serialized free-space section + list is stored. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Serialized Section List Used</p></td> + <td> + <p>This is the size of the serialized free-space section + list used (in bytes). This value must be less than + or equal to the <em>allocated size of serialized section + list</em>, below. + </p> + </td> + </tr> + + <tr> + <td><p>Allocated Size of Serialized Section List</p></td> + <td> + <p>This is the size of serialized free-space section list + actually allocated (in bytes). + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the free-space manager header.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<p>The free-space sections being managed are stored in a + <em>free-space section list</em>, described below. The sections + in the free-space section list are stored in the following way: + a count of the number of sections describing a particular size of + free space and the size of the free-space described (in bytes), + followed by a list of section description records; then another + section count and size, followed by the list of section + descriptions for that size; and so on.</p> + + +<div align="center"> + <table class="format"> + <caption> + Layout: Free-space Section List + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Free-space Manager Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #0 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #0 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #0 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #0 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #0 Section Record #K-1 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #1 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #1 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #1 Section Record #K-1 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><strong>...</strong></td> + </tr> + + <tr> + <td colspan="4"><strong>...</strong></td> + </tr> + + <tr> + <td colspan="4">Number of Section Records in Set #N-1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Size of Free-space Section Described in Record Set #N-1 <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #0 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #N-1 Section Record #0 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #0 Data <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #K-1 Offset<em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="1">Record Set #N-1 Section Record #K-1 Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Record Set #N-1 Section Record #K-1 Data <em>(variable size)</td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Free-space Section List + </caption> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FSSE</code>” + is used to indicate the beginning of the Free-space Section + Information. This gives file consistency checking utilities + a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This is the version number for the Free-space Section List + and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Free-space Manager Header Address</p></td> + <td> + <p>This is the address of the <em>Free-space Manager Header</em>. + This field is principally used for file + integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Section Records for Set #N</p></td> + <td> + <p>This is the number of free-space section records for set #N. + The length of this field is the minimum number of bytes needed + to store the <em>number of serialized sections</em> (from the + <em>free-space manager header</em>). + </p> + + <p> + The number of sets of free-space section records is + determined by the <em>size of serialized section list</em> in + the <em>free-space manager header</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Section Size for Record Set #N</p></td> + <td> + <p>This is the size (in bytes) of the free-space section described + for <em>all</em> the section records in set #N. + </p> + + <p> + The length of this field is the minimum number of bytes needed + to store the <em>maximum section size</em> (from the + <em>free-space manager header</em>). + </p> + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Offset</p></td> + <td> + <p>This is the offset (in bytes) of the free-space section within + the client for the free-space manager. + </p> + + <p> + The length of this field is the minimum number of bytes needed + to store the <em>size of address space</em> (from the + <em>free-space manager header</em>). + </p> + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Type</p></td> + <td> + <p>This is the type of the section record, used to decode the + <em>record set #N section #K data</em> information. The defined + record type for <em>file</em> client is: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>File’s section (a range of actual bytes in file) + </td> + </tr> + <tr> + <td align="center"><code>1+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + <p>The defined record types for a <em>fractal heap</em> client are: + + <table class="list"> + <tr> + <th width="20%" align="center">Type</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Fractal heap “single” section + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Fractal heap “first row” section + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Fractal heap “normal row” section + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Fractal heap “indirect” section + </td> + </tr> + + <tr> + <td align="center"><code>4+</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Record Set #N Section #K Data</p></td> + <td> + <p>This is the section-type specific information for each record + in the record set, described below. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the <em>Free-space Section List</em>. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<p> + The section-type specific data for each free-space section record is + described below: +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: File’s Section Data Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap “Single” Section Data Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap “First Row” Section Data + Record + </caption> + + <tr> + <td colspan="4"><em>Same format as “indirect” + section data</em></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap “Normal Row” Section Data + Record + </caption> + + <tr> + <td colspan="4"><em>No additional record data stored</em></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fractal Heap “Indirect” Section + Data Record + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Fractal Heap Indirect Block Offset <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="2">Block Start Row</td> + <td colspan="2">Block Start Column</td> + </tr> + + <tr> + <td colspan="2">Number of Blocks</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fractal Heap “Indirect” Section + Data Record + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Fractal Heap Block Offset</p></td> + <td> + <p>The offset of the indirect block in the fractal heap’s address + space containing the empty blocks. + </p> + <p> + The number of bytes used to encode this field is the minimum + number of bytes needed to encode values for the <em>Maximum + Heap Size</em> (in the fractal heap’s header). + </p> + </td> + </tr> + + <tr> + <td><p>Block Start Row</p></td> + <td> + <p>This is the row that the empty blocks start in. + </p> + </td> + </tr> + + <tr> + <td><p>Block Start Column</p></td> + <td> + <p>This is the column that the empty blocks start in. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Blocks</p></td> + <td> + <p>This is the number of empty blocks covered by the section. + </p> + </td> + </tr> + </table> +</div> + +<h3><a name="SOHMTable"> + III.I. Disk Format: Level 1I - Shared Object Header Message Table</a></h3> + +<p> + The <em>shared object header message table</em> is used to locate + object + header messages that are shared between two or more object headers + in the file. Shared object header messages are stored and indexed + in the file in one of two ways: indexed sequentially in a + <em>shared header message list</em> or indexed with a v2 B-tree. + The shared messages themselves are either stored in a fractal + heap (when two or more objects share the message), or remain in an + object’s header (when only one object uses the message currently, + but the message can be shared in the future). +</p> + +<p> + The <em>shared object header message table</em> + contains a list of shared message index headers. Each index header + records information about the version of the index format, the index + storage type, flags for the message types indexed, the number of + messages in the index, the address where the index resides, + and the fractal heap address if shared messages are stored there. +</p> + +<p> + Each index can be either a list or a v2 B-tree and may transition + between those two forms as the number of messages in the index + varies. Each shared message record contains information used to + locate the shared message from either a fractal heap or an object + header. The types of messages that can be shared are: <em>Dataspace, + Datatype, Fill Value, Filter Pipeline and Attribute</em>. +</p> + +<p> + The <em>shared object header message table</em> is pointed to + from a <a href="#SOHMTableMessage">shared message table</a> message + in the superblock extension for a file. This message stores the + version of the table format, along with the number of index headers + in the table. +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Object Header Message Table + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version for index #0</td> + <td>Index Type for index #0</td> + <td colspan="2">Message Type Flags for index #0</td> + </tr> + + <tr> + <td colspan="4">Minimum Message Size for index #0</td> + </tr> + + <tr> + <td colspan="2">List Cutoff for index #0</td> + <td colspan="2">v2 B-tree Cutoff for index #0</td> + </tr> + + <tr> + <td colspan="2">Number of Messages for index #0</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Index Address<sup>O</sup> for index #0<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup> for index #0<br /><br /></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td>Version for index #N-1</td> + <td>Index Type for index #N-1</td> + <td colspan="2">Message Type Flags for index #N-1</td> + </tr> + + <tr> + <td colspan="4">Minimum Message Size for index #N-1</td> + </tr> + + <tr> + <td colspan="2">List Cutoff for index #N-1</td> + <td colspan="2">v2 B-tree Cutoff for index #N-1</td> + </tr> + + <tr> + <td colspan="2">Number of Messages for index #N-1</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Index Address<sup>O</sup> for index #N-1<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup> for index #N-1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Object Header Message Table + </caption> + <tr> + <th width="35%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SMTB</code>” + is used to indicate the beginning of the Shared Object + Header Message table. This gives file consistency checking + utilities a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version for index #N</p></td> + <td> + <p>This is the version number for the list of shared object header message + indexes and this document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Index Type for index #N</p></td> + <td> + <p>The type of index can be an unsorted list or a v2 B-tree. + </p> + </td> + </tr> + + <tr> + <td><p>Message Type Flags for index #N</p></td> + <td> + <p>This field indicates the type of messages tracked in the index, + as follows: + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the index tracks <em>Dataspace Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the message tracks <em>Datatype Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, the message tracks <em>Fill Value Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, the message tracks <em>Filter Pipeline Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, the message tracks <em>Attribute Messages</em>. + </td> + </tr> + <tr> + <td align="center"><code>5-15</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + + <p> + An index can track more than one type of message, but each type + of message can only by in one index. + </p> + </td> + </tr> + + <tr> + <td><p>Minimum Message Size for index #N</p></td> + <td> + <p>This is the message size sharing threshold for the index. + If the encoded size of the message is less than this value, the + message is not shared. + </p> + </td> + </tr> + + <tr> + <td><p>List Cutoff for index #N</p></td> + <td> + <p>This is the cutoff value for the indexing of messages to + switch from a list to a v2 B-tree. If the number of messages + is greater than this value, the index should be a v2 B-tree. + </p> + </td> + </tr> + <tr> + <td><p>v2 B-tree Cutoff for index #N</p></td> + <td> + <p>This is the cutoff value for the indexing of messages + to switch from a v2 B-tree back to a list. If the number + of messages is less than this value, the index should be + a list. + </p> + </td> + </tr> + + <tr> + <td><p>Number of Messages for index #N</p></td> + <td> + <p>The number of shared messages being tracked for the index. + </p> + </td> + </tr> + + <tr> + <td><p>Index Address for index #N</p></td> + <td> + <p>This field is the address of the list or v2 B-tree where the + index nodes reside. + </p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address for index #N</p></td> + <td> + <p>This field is the address of the fractal heap if shared messages + are stored there. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the table.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<p> + Shared messages are indexed either with a <em>shared message record + list</em>, described below, or using a v2 B-tree (using record type 7). + The number of records in the <em>shared message record list</em> is + determined in the index’s entry in the <em>shared object header message + table</em>. +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message Record List + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #0</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #1</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Shared Message Record #N-1</td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message Record List + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>SMLI</code>” + is used to indicate the beginning of a list of index nodes. + This gives file consistency checking utilities a better + chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Shared Message Record #N</p></td> + <td> + <p>The record for locating the shared message, either in the + fractal heap for the index, or an object header (see format for + <em>index nodes</em> below). + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the list. + </p> + </td> + </tr> + + </table> +</div> + +<br /> +<p> + The record for each shared message in an index is stored in one + of the following forms: +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message Record for Messages Stored in a + Fractal Heap + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Hash Value</td> + </tr> + + <tr> + <td colspan="4">Reference Count</td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap ID<br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message Record for Messages Stored in a + Fractal Heap + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This has a value of 0 indicating that the message is stored in + the heap. + </p> + </td> + </tr> + + <tr> + <td><p>Hash Value</p></td> + <td> + <p>This is the hash value for the message. + </p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td> + <p>This is the number of times the message is used in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap ID</p></td> + <td> + <p>This is an 8-byte fractal heap ID for the message as stored in + the fractal heap for the index. + </p> + </td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message Record for Messages Stored in an + Object Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Message Location</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Hash Value</td> + </tr> + + <tr> + <td>Reserved</td> + <td>Message Type</td> + <td colspan="2">Creation Index</td> + </tr> + + <tr> + <td colspan="4"><br />Object Header Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message Record for Messages Stored in an + Object Header + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Message Location</p></td> + <td> + <p>This has a value of 1 indicating that the message is stored in + an object header. + </p> + </td> + </tr> + + <tr> + <td><p>Hash Value</p></td> + <td> + <p>This is the hash value for the message. + </p> + </td> + </tr> + + <tr> + <td><p>Message Type</p></td> + <td> + <p>This is the message type in the object header. + </p> + </td> + </tr> + + <tr> + <td><p>Creation Index</p></td> + <td> + <p>This is the creation index of the message within the object + header. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Address</p></td> + <td> + <p>This is the address of the object header where the message is + located. + </p> + </td> + </tr> + </table> +</div> + +<h2><a name="DataObject"> + IV. Disk Format: Level 2 - Data Objects </a></h2> + +<p>Data objects contain the “real” user-visible information in the file. + These objects compose the scientific data and other information which + are generally thought of as “data” by the end-user. All the + other information in the file is provided as a framework for + storing and accessing these data objects. +</p> + +<p>A data object is composed of header and data + information. The header information contains the information + needed to interpret the data information for the object as + well as additional “metadata” or pointers to additional + “metadata” used to describe or annotate each object. +</p> + +<h3><a name="ObjectHeader"> + IV.A. Disk Format: Level 2A - Data Object Headers</a></h3> + +<p>The header information of an object is designed to encompass + all of the information about an object, except for the data itself. + This information includes the dataspace, the datatype, information + about how the data is stored on disk (in external files, compressed, + broken up in blocks, and so on), as well as other information used + by the library to speed up access to the data objects or maintain + a file’s integrity. Information stored by user applications + as attributes is also stored in the object’s header. The header + of each object is not necessarily located immediately prior to the + object’s data in the file and in fact may be located in any + position in the file. The order of the messages in an object header + is not significant.</p> + +<p>Object headers are composed of a prefix and a set of messages. The + prefix contains the information needed to interpret the messages and + a small amount of metadata about the object, and the messages contain + the majority of the metadata about the object. +</p> + +<h3><a name="ObjectHeaderPrefix"> + IV.A.1. Disk Format: Level 2A1 - Data Object Header Prefix</a></h3> + + + +<h4><a name="V1ObjectHeaderPrefix"> + IV.A.1.a. Version 1 Data Object Header Prefix</a></h4> + +<p>Header messages are aligned on 8-byte boundaries for version 1 + object headers. +</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Version 1 Object Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved (zero)</td> + <td colspan="2">Total Number of Header Messages</td> + </tr> + + <tr> + <td colspan="4">Object Reference Count</td> + </tr> + + <tr> + <td colspan="4">Object Header Size</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="2">Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + </tr> + + <tr> + <td>Header Message #1 Flags</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td colspan="2">Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + </tr> + + <tr> + <td>Header Message #n Flags</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 1 Object Header + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + information in the object header. When the format of the + object header is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + is version one (1) (there was no version zero (0)) of the + object header. + </p> + </td> + </tr> + + <tr> + <td><p>Total Number of Header Messages</p></td> + <td> + <p>This value determines the total number of messages listed in + object headers for this object. This value includes the messages + in continuation messages for this object. + </p> + </td> + </tr> + + <tr> + <td><p>Object Reference Count</p></td> + <td> + <p>This value specifies the number of “hard links” to this object + within the current file. References to the object from external + files, “soft links” in this file and object references in this + file are not tracked. + </p> + </td> + </tr> + + <tr> + <td><p>Object Header Size</p></td> + <td> + <p>This value specifies the number of bytes of header message data + following this length field that contain object header messages + for this object header. This value does not include the size of + object header continuation blocks for this object elsewhere in the + file. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>This value specifies the type of information included in the + following header message data. The message types for + header messages are defined in sections below. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size includes + padding bytes to make the message a multiple of eight + bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>This is a bit field with the following definition: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the message data is constant. This is used + for messages like the datatype message of a dataset. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the message is <em>shared</em> and stored + in another location than the object header. The Header + Message Data field contains a Shared Message + (described in the <a href="#ObjectHeaderMessages">Data Object Header Messages</a> + section below) + and the Size of Header Message Data field + contains the size of that Shared Message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, the message should not be shared. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, the HDF5 decoder should fail to open this object + if it does not understand the message’s type and the file + is open with permissions allowing write access to the file. + (Normally, unknown messages can just be ignored by HDF5 + decoders) + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, the HDF5 decoder should set bit 5 of this + message’s flags (in other words, this bit field) + if it does not understand the message’s type + and the object is modified in any way. (Normally, + unknown messages can just be ignored by HDF5 + decoders) + </td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>If set, this object was modified by software that did not + understand this message. + (Normally, unknown messages should just be ignored by HDF5 + decoders) (Can be used to invalidate an index or a similar + feature) + </td> + </tr> + <tr> + <td align="center"><code>6</code></td> + <td>If set, this message is shareable. + </td> + </tr> + <tr> + <td align="center"><code>7</code></td> + <td>If set, the HDF5 decoder should always fail to open this + object if it does not understand the message’s type (whether + it is open for read-only or read-write access). (Normally, + unknown messages can just be ignored by HDF5 decoders) + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>The format and length of this field is determined by the + header message type and size respectively. Some header + message types do not require any data and this information + can be eliminated by setting the length of the message to + zero. The data is padded with enough zeroes to make the + size a multiple of eight. + </p> + </td> + </tr> + </table> +</div> + +<h4><a name="V2ObjectHeaderPrefix"> + IV.A.1.b. Version 2 Data Object Header Prefix</a></h4> + +<p>Note that the “total number of messages” field has been dropped from + the data object header prefix in this version. The number of messages + in the data object header is just determined by the messages encountered + in all the object header blocks.</p> + +<p>Note also that the fields and messages in this version of data object + headers have <em>no</em> alignment or padding bytes inserted - they are + stored packed together.</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 Object Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Access time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Modification Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Change Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Birth Time <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="2">Maximum # of compact attributes <em>(optional)</em></td> + <td colspan="2">Minimum # of dense attributes <em>(optional)</em></td> + </tr> + + <tr> + <td>Size of Chunk #0 <em>(variable size)</em></td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td>Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + <td>Header Message #1 Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #1 Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td>Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + <td>Header Message #n Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #n Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Gap <em>(optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 Object Header + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>OHDR</code>” + is used to indicate the beginning of an object header. This + gives file consistency checking utilities a better chance + of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This field has a value of 2 indicating version 2 of the object header. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is a bit field indicating additional information + about the object header. + <table class="list"> + <tr> + <th width="20%" align="center">Bit(s)</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>This two bit field determines the size of the + <em>Size of Chunk #0</em> field. The values are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>The <em>Size of Chunk #0</em> field is 1 byte. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The <em>Size of Chunk #0</em> field is 2 bytes. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The <em>Size of Chunk #0</em> field is 4 bytes. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>The <em>Size of Chunk #0</em> field is 8 bytes. + </td> + </tr> + </table> + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>If set, attribute creation order is tracked.</td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>If set, attribute creation order is indexed.</td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>If set, non-default attribute storage phase change + values are stored.</td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>If set, access, modification, change and birth times + are stored.</td> + </tr> + <tr> + <td align="center"><code>6-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Access Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object’s raw data was last accessed + (in other words, read or written). + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Modification Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after + the UNIX epoch when the object’s raw data was last + modified (in other words, written). + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Change Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object’s metadata was last changed. + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Birth Time</p></td> + <td> + <p>This 32-bit value represents the number of seconds after the + UNIX epoch when the object was created. + </p> + <p>This field is present if bit 5 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Maximum # of compact attributes</p></td> + <td> + <p>This is the maximum number of attributes to store in the compact + format before switching to the indexed format. + </p> + <p>This field is present if bit 4 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Minimum # of dense attributes</p></td> + <td> + <p>This is the minimum number of attributes to store in the indexed + format before switching to the compact format. + </p> + <p>This field is present if bit 4 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Chunk #0</p></td> + <td> + <p> + This unsigned value specifies the number of bytes of header + message data following this field that contain object header + information. + </p> + <p> + This value does not include the size of object header + continuation blocks for this object elsewhere in the file. + </p> + <p> + The length of this field varies depending on bits 0 and 1 of + the <em>flags</em> field. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>This value specifies the number of bytes of header + message data following the header message type and length + information for the current message. The size of messages + in this version does <em>not</em> include any padding bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Creation Order</p></td> + <td> + <p>This field stores the order that a message of a given type + was created in. + </p> + <p>This field is present if bit 2 of <em>flags</em> is set. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p> + </td> + </tr> + + <tr> + <td><p>Gap</p></td> + <td> + <p>A gap in an object header chunk is inferred by the end of the + messages for the chunk before the beginning of the chunk’s + checksum. Gaps are always smaller than the size of an + object header message prefix (message type + message size + + message flags). + </p> + <p>Gaps are formed when a message (typically an attribute message) + in an earlier chunk is deleted and a message from a later + chunk that does not quite fit into the free space is moved + into the earlier chunk. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the object header chunk. + </p> + </td> + </tr> + </table> +</div> + +<p>The header message types and the message data associated with + them compose the critical “metadata” about each object. Some + header messages are required for each object while others are + optional. Some optional header messages may also be repeated + several times in the header itself, the requirements and number + of times allowed in the header will be noted in each header + message description below. +</p> + + +<h3><a name="ObjectHeaderMessages"> + IV.A.2. Disk Format: Level 2A2 - Data Object Header Messages</a></h3> + +<p>Data object header messages are small pieces of metadata that are + stored in the data object header for each object in an HDF5 file. + Data object header messages provide the metadata required to describe + an object and its contents, as well as optional pieces of metadata + that annotate the meaning or purpose of the object. +</p> + +<p>Data object header messages are either stored directly in the data + object header for the object or are shared between multiple objects + in the file. When a message is shared, a flag in the <em>Message Flags</em> + indicates that the actual <em>Message Data</em> + portion of that message is stored in another location (such as another + data object header, or a heap in the file) and the <em>Message Data</em> + field contains the information needed to locate the actual information + for the message. +</p> + +<p> + The format of shared message data is described here:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message (Version 1) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used when there are changes in the format + of a shared object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by the library before version 1.6.1. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the object header + containing the message to be shared.</p> + </td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + + <caption> + Layout: Shared Message (Version 2) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message (Version 2) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used when there are changes in the format + of a shared object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by the library of version 1.6.1 and after. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the object header + containing the message to be shared.</p></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message (Version 3) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Type</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Location <em>(variable size)</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message (Version 3) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number indicates changes in the format of shared + object message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the library of version 1.8 and after. In this + version, the <em>Type</em> field can indicate that + the message is stored in the fractal heap. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td><p>The type of shared message location: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Message is not shared and is not shareable. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Message stored in file’s <em>shared object header message</em> + heap (a <em>shared</em> message). + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Message stored in another object’s header (a <em>committed</em> + message). + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Message stored is not shared, but is sharable. + </td> + </tr> + + </table></p> + </td> + </tr> + + <tr> + <td><p>Location</p></td> + <td><p>This field contains either a <a href="#SizeOfOffsetsV0"> + <em>Size of Offsets</em></a>-bytes address of the object header + containing the message to be shared, or an 8-byte fractal heap + ID for the message in the file’s <em>shared object header + message</em> heap. + </p> + </td> + </tr> + </table> +</div> + + +<p>The following is a list of currently defined header messages: +</p> + +<h4><a name="NILMessage">IV.A.2.a. The NIL Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> NIL</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0000</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The NIL message is used to indicate a message which is to be + ignored when reading the header messages for a data object. + [Possibly one which has been deleted for some reason.] + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> Unspecified</td></tr> +</table></center> +<!-- end msgdesc table --> + + +<h4><a name="DataspaceMessage">IV.A.2.b. The Dataspace Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Dataspace</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0001</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies according to the number of + dimensions, as described in the following table.</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset objects; + may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The dataspace message describes the number of dimensions (in + other words, “rank”) and size of each dimension that + the data object has. This message is only used for datasets which + have a simple, rectilinear, array-like layout; datasets requiring + a more complex layout are not yet supported. + </td> + </tr> + + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Dataspace Message - Version 1 + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Reserved</td> + </tr> + + <tr> + <td colspan="4">Reserved</td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #1 Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #1 Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Permutation Index #1<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Permutation Index #n<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Dataspace Message - Version 1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + Dataspace Message. When the format of the + information in the message is changed, the version number + is incremented and can be used to determine how the + information in the object header is formatted. This + document describes version one (1) (there was no version + zero (0)). + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the data + object has. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. Bit 1 is used to indicate that + permutation indices are present. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Maximum Size</p></td> + <td> + <p>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + “<a href="#UnlimitedDim">unlimited</a>” size which indicates + that the data may expand along this dimension indefinitely. + If these values are not stored, the maximum size of each + dimension is assumed to be the dimension’s current size. + </p> + </td> + </tr> + + <tr> + <td><p>Permutation Index #n</p></td> + <td> + <p>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. If these values are + not stored, the first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + </table> +</div> + + + +<br /> +<p>Version 2 of the dataspace message dropped the optional + permutation index value support, as it was never implemented in the + HDF5 Library:</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Dataspace Message - Version 2 + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Flags</td> + <td>Type</td> + </tr> + + <tr> + <td colspan="4"><br />Dimension #1 Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #1 Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4"><br />Dimension #n Maximum Size<sup>L</sup> <em>(optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Dataspace Message - Version 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This value is used to determine the format of the + Dataspace Message. This field should be ‘2’ for version 2 + format messages. + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the data object has. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>This field is used to store flags to indicate the + presence of parts of this message. Bit 0 (the least + significant bit) is used to indicate that maximum + dimensions are present. + </p> + </td> + </tr> + + <tr> + <td><p>Type</p></td> + <td> + <p>This field indicates the type of the dataspace: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>A <em>scalar</em> dataspace; in other words, + a dataspace with a single, dimensionless element. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>A <em>simple</em> dataspace; in other words, + a dataspace with a rank greater than 0 and an + appropriate number of dimensions. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>A <em>null</em> dataspace; in other words, + a dataspace with no elements. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Maximum Size</p></td> + <td> + <p>This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + “<a href="#UnlimitedDim">unlimited</a>” size which indicates + that the data may expand along this dimension indefinitely. + If these values are not stored, the maximum size of each + dimension is assumed to be the dimension’s current size. + </p> + </td> + </tr> + + </table> +</div> + + + +<!-- + <h4><a name="DataSpaceMessage">Header Message Name: Complex Dataspace (Fiber Bundle?)</a></h4> + + <!-- start msgdesc table -- + <center> + <table class="msgdesc"> + <p><b>Header Message Name: ???????</b></td></tr> +<b>Header Message Type: </b>0x0002<br /> +<b>Length:</b> Varies</td></tr> + +<b>Status:</b> One of the <em>Simple Dataspace</em> or +<em>Complex Dataspace</em> messages is required (but not both) and may +not be repeated.<br /> <b>Description:</b> The +<em>Dataspace</em> message describes space that the dataset is +mapped onto in a more comprehensive way than the <em>Simple + Dimensionality</em> message is capable of handling. The +dataspace of a dataset encompasses the type of coordinate system +used to locate the dataset’s elements as well as the structure and +regularity of the coordinate system. The dataspace also +describes the number of dimensions which the dataset inhabits as +well as a possible higher dimensional space in which the dataset +is located within. + +<br /> +<p><b>Format of Data:</b></p> + +<center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Message Layout</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Mesh Type</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimensionality</td> + </tr> + </table> +</center> + +<br /> +<dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Mesh Type: (unsigned 32-bit integer) + <dd>This value indicates whether the grid is + polar/spherical/cartesion, + structured/unstructured and regular/irregular. <br /> + The mesh type value is broken up as follows: <br /> + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Mesh-type Layout</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="1">Mesh Embedding</td> + <td colspan="1">Coordinate System</td> + <td colspan="1">Structure</td> + <td colspan="1">Regularity</td> + </tr> + </table> + </center> + The following are the definitions of mesh-type bytes: + <dl> + <dt>Mesh Embedding + <dd>This value indicates whether the dataset dataspace + is located within + another dataspace or not: + <dl> <dl> + <dt><STANDALONE> + <dd>The dataset mesh is self-contained and is not + embedded in another mesh. + <dt><EMBEDDED> + <dd>The dataset’s dataspace is located within + another dataspace, as + described in information below. + </dl> </dl> + <dt>Coordinate System + <dd>This value defines the type of coordinate system + used for the mesh: + <dl> <dl> + <dt><POLAR> + <dd>The last two dimensions are in polar + coordinates, higher dimensions are + cartesian. + <dt><SPHERICAL> + <dd>The last three dimensions are in spherical + coordinates, higher dimensions + are cartesian. + <dt><CARTESIAN> + <dd>All dimensions are in cartesian coordinates. + </dl> </dl> + <dt>Structure + <dd>This value defines the locations of the grid-points + on the axes: + <dl> <dl> + <dt><STRUCTURED> + <dd>All grid-points are on integral, sequential + locations, starting from 0. + <dt><UNSTRUCTURED> + <dd>Grid-points locations in each dimension are + explicitly defined and + may be of any numeric datatype. + </dl> </dl> + <dt>Regularity + <dd>This value defines the locations of the dataset + points on the grid: + <dl> <dl> + <dt><REGULAR> + <dd>All dataset elements are located at the + grid-points defined. + <dt><IRREGULAR> + <dd>Each dataset element has a particular + grid-location defined. + </dl> </dl> + </dl> + <p>The following grid combinations are currently allowed:</p> + <dl> <dl> + <dt><POLAR-STRUCTURED-REGULAR> + <dt><SPHERICAL-STRUCTURED-REGULAR> + <dt><CARTESIAN-STRUCTURED-REGULAR> + <dt><POLAR-UNSTRUCTURED-REGULAR> + <dt><SPHERICAL-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-REGULAR> + <dt><CARTESIAN-UNSTRUCTURED-IRREGULAR> + </dl> </dl> + All of the above grid types can be embedded within another + dataspace. + <br /> <br /> + <dt>Logical Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions that the dataset occupies. + + <br /> + <center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Embedded Dimensionality Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Embedded Dimensionality</td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Dimension Size #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Dimension Size #n</td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Origin Location #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Embedded Origin Location #n</td> + </tr> + </table> + </center> + + <dt>Embedded Dimensionality: (unsigned 32-bit integer) + <dd>This value is the number of dimensions of the space the + dataset is located within: in other words, a planar dataset + located within a 3-D space, a 3-D dataset + which is a subset of another 3-D space, and so on. + <dt>Embedded Dimension Size: (unsigned 32-bit integer) + <dd>These values are the sizes of the dimensions of the + embedded dataspace + that the dataset is located within. + <dt>Embedded Origin Location: (unsigned 32-bit integer) + <dd>These values comprise the location of the dataset’s + origin within the embedded dataspace. + </dl> +</dl> +[Comment: need some way to handle different orientations of the +dataset dataspace +within the embedded dataspace]<br /> + +<br /> +<center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Structured/Regular Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4">Logical Dimension Size #1</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Maximum #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Size #n</td> + </tr> + <tr align="center"> + <td colspan="4">Logical Dimension Maximum #n</td> + </tr> + </table> +</center> + +<br /> +<dl> + <dt>The elements of the dimensionality message are described below: + <dd> + <dl> + <dt>Logical Dimension Size #n: (unsigned 32-bit integer) + <dd>This value is the current size of the dimension of the + data as stored in + the file. The first dimension stored in the list of + dimensions is the slowest + changing dimension and the last dimension stored is the + fastest changing + dimension. + <dt>Logical Dimension Maximum #n: (unsigned 32-bit integer) + <dd>This value is the maximum size of the dimension of the + data as stored in + the file. This value may be the special value + <UNLIMITED> which + indicates that the data may expand along this dimension + indefinitely. + </dl> +</dl> +<br /> +<center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Structured/Irregular Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4"># of Grid Points in Dimension #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4"># of Grid Points in Dimension #n</td> + </tr> + <tr align="center"> + <td colspan="4">Datatype of Grid Point Locations</td> + </tr> + <tr align="center"> + <td colspan="4">Location of Grid Points in Dimension #1</td> + </tr> + <tr align="center"> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr align="center"> + <td colspan="4">Location of Grid Points in Dimension #n</td> + </tr> + </table> +</center> + +<br /> +<center> + <table border cellpadding="4" width="80%"> + <caption align="bottom"> + <b>HDF5 Dataspace Unstructured Grid Information</b> + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr align="center"> + <td colspan="4"># of Grid Points</td> + </tr> + <tr align="center"> + <td colspan="4">Datatype of Grid Point Locations</td> + </tr> + <tr align="center"> + <td colspan="4">Grid Point Locations<br />.<br />.<br /></td> + </tr> + </table> +</center> +--> + +<h4><a name="LinkInfoMessage">IV.A.2.c. The Link Info Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Link Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x002 </td></tr> + <tr><td colspan="2"><b>Length:</b> Varies </td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated. </td></tr> + <tr><td><b>Description:</b></td> + <td>The link info message tracks variable information about the + current state of the links for a “new style” + group’s behavior. Variable information will be stored in + this message and constant information will be stored in the + <a href="#GroupInfoMessage">Group Info</a> message. + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Link Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Maximum Creation Index <em>(8 bytes, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of v2 B-tree for Name Index<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address of v2 B-tree for Creation Order Index<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Link Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This field determines various optional aspects of the link + info message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, creation order for the links is tracked. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, creation order for the links is indexed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Creation Index</p></td> + <td><p>This 64-bit value is the maximum creation order index value + stored for a link in this group.</p> + <p>This field is present if bit 0 of <em>flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address</p></td> + <td> + <p> + This is the address of the fractal heap to store dense links. + Each link stored in the fractal heap is stored as a + <a href="#LinkMessage">Link Message</a>. + </p> + <p> + If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of v2 B-tree for Name Index</p></td> + <td><p>This is the address of the version 2 B-tree to index names of links.</p> + <p>If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + </td> + </tr> + + <tr> + <td><p>Address of v2 B-tree for Creation Order Index</p></td> + <td><p>This is the address of the version 2 B-tree to index creation order of links.</p> + <p>If there are no links in the group, or the group’s links + are stored “compactly” (as object header messages), this + value will be the <a href="#UndefinedAddress">undefined + address</a>. + </p> + <p>This field exists if bit 1 of <em>flags</em> is set.</p> + </td> + </tr> + + </table> +</div> + + +<h4><a name="DatatypeMessage">IV.A.2.d. The Datatype Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Datatype</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0003 + </td></tr> + <tr><td colspan="2"><b>Length:</b> Variable</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset or committed + datatype (formerly named datatype) objects; may not be repeated. + </td></tr> + <tr><td><b>Description:</b></td> + <td><p>The datatype message defines the datatype for each element + of a dataset or a common datatype for sharing between multiple + datasets. A datatype can describe an atomic type like a fixed- + or floating-point type or more complex types like a C struct + (compound datatype), array (array datatype), or C++ vector + (variable-length datatype).</p> + <p>Datatype messages that are part of a dataset object do not + describe how elements are related to one another; the dataspace + message is used for that purpose. Datatype messages that are part of + a committed datatype (formerly named datatype) message describe + a common datatype that can be shared by multiple datasets in the + file.</p> + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Datatype Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Class and Version</td> + <td>Class Bit Field, Bits 0-7</td> + <td>Class Bit Field, Bits 8-15</td> + <td>Class Bit Field, Bits 16-23</td> + </tr> + + <tr> + <td colspan="4">Size</td> + </tr> + + <tr> + <td colspan="4"><br /><br />Properties<br /><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Datatype Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Class and Version</p></td> + <td> + <p>The version of the datatype message and the datatype’s class + information are packed together in this field. The version + number is packed in the top 4 bits of the field and the class + is contained in the bottom 4 bits. + </p> + <p>The version number information is used for changes in the + format of the datatype message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Used by early versions of the library to encode + compound datatypes with explicit array fields. + See the compound datatype description below for + further details. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Used when an array datatype needs to be encoded. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Used when a VAX byte-ordered type needs to be + encoded. Packs various other datatype classes more + efficiently also. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>Used to encode the revised reference datatype. + </td> + </tr> + </table></p> + + <p>The class of the datatype determines the format for the class + bit field and properties portion of the datatype message, which + are described below. The + following classes are currently defined: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#ClassFixedPoint">Fixed-Point</a></td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td><a href="#ClassFloatingPoint">Floating-Point</a></td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td> <a href="#ClassTime">Time</a></td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td><a href="#ClassString">String</a></td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td><a href="#ClassBitField">Bit field</a></td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td><a href="#ClassOpaque">Opaque</a></td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td><a href="#ClassCompound">Compound</a></td> + </tr> + + <tr> + <td align="center"><code>7</code></td> + <td><a href="#ClassReference">Reference</a></td> + </tr> + + <tr> + <td align="center"><code>8</code></td> + <td><a href="#ClassEnum">Enumerated</a></td> + </tr> + + <tr> + <td align="center"><code>9</code></td> + <td><a href="#ClassVarLen">Variable-Length</a></td> + </tr> + + <tr> + <td align="center"><code>10</code></td> + <td><a href="#ClassArray">Array</a></td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Class Bit Fields</p></td> + <td> + <p>The information in these bit fields is specific to each datatype + class and is described below. All bits not defined for a + datatype class are set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>The size of a datatype element in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Properties</p></td> + <td> + <p>This variable-sized sequence of bytes encodes information + specific to each datatype class and is described for each class + below. If there is no property information specified for a + datatype class, the size of this field is zero bytes. + </p> + </td> + </tr> + + </table> +</div> + + +<br /> +<br /> +<a name="ClassFixedPoint"></a> + <p>Class specific information for the Fixed-point Numbers class + (Class 0):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Fixed-point Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1, 2</p></td> + <td><p><b>Padding type.</b> Bit 1 is the lo_pad bit and bit 2 + is the hi_pad bit. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</p></td> + </tr> + + <tr> + <td><p>3</p></td> + <td><p><b>Signed.</b> If this bit is set then the fixed-point + number is in 2’s complement form.</p></td> + </tr> + + <tr> + <td><p>4-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Fixed-point Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Fixed-point Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the fixed-point + value within the datatype. The bit offset specifies the number + of bits “to the right of” the value (which are set to the + lo_pad bit value). + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the fixed-point value + within the datatype. This value, combined with the datatype + element’s size and the Bit Offset field specifies the number + of bits “to the left of” the value (which are set to the + hi_pad bit value). + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <a name="ClassFloatingPoint"></a> + <p>Class specific information for the Floating-point Numbers class + (Class 1):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Floating-point Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0, 6</p></td> + <td><p><b>Byte Order.</b> These two non-contiguous bits specify the + “endianness” of the bytes in the datatype element. + <table class="list"> + <tr> + <th width="10%" align="center">Bit 6</th> + <th width="10%" align="center">Bit 0</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td align="center"><code>0</code></td> + <td>Byte order is little-endian + </td> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td align="center"><code>1</code></td> + <td>Byte order is big-endian + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td align="center"><code>0</code></td> + <td>Reserved + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td align="center"><code>1</code></td> + <td>Byte order is VAX-endian + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>1, 2, 3</p></td> + <td><p><b>Padding type.</b> Bit 1 is the low bits pad type, bit 2 + is the high bits pad type, and bit 3 is the internal bits + pad type. If a datum has unused bits at either end or between + the sign bit, exponent, or mantissa, then the value of bit + 1, 2, or 3 is copied to those locations.</p></td> + </tr> + + <tr> + <td><p>4-5</p></td> + <td><p><b>Mantissa Normalization.</b> This 2-bit bit field specifies + how the most significant bit of the mantissa is managed. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>No normalization + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The most significant bit of the mantissa is always set + (except for 0.0). + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The most significant bit of the mantissa is not stored, + but is implied to be set. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Reserved. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>7</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + + <tr> + <td><p>8-15</p></td> + <td><p><b>Sign Location.</b> This is the bit position of the sign + bit. Bits are numbered with the least significant bit zero.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Floating-point Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + + <tr> + <td>Exponent Location</td> + <td>Exponent Size</td> + <td>Mantissa Location</td> + <td>Mantissa Size</td> + </tr> + + <tr> + <td colspan="4">Exponent Bias</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Floating-point Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the floating-point + value within the datatype. The bit offset specifies the number + of bits “to the right of” the value. + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the floating-point value + within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Location</p></td> + <td> + <p>The bit position of the exponent field. Bits are numbered with + the least significant bit number zero. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Size</p></td> + <td> + <p>The size of the exponent field in bits. + </p> + </td> + </tr> + + <tr> + <td><p>Mantissa Location</p></td> + <td> + <p>The bit position of the mantissa field. Bits are numbered with + the least significant bit number zero. + </p> + </td> + </tr> + + <tr> + <td><p>Mantissa Size</p></td> + <td> + <p>The size of the mantissa field in bits. + </p> + </td> + </tr> + + <tr> + <td><p>Exponent Bias</p></td> + <td> + <p>The bias of the exponent field. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <a name="ClassTime"></a> + <p>Class specific information for the Time class (Class 2):</p> + + + <div align="center"> + <table class="desc"> + <caption> + Bits: Time Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Time Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Time Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the time value. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <a name="ClassString"></a> + <p>Class specific information for the Strings class (Class 3):</p> + + + <div align="center"> + <table class="desc"> + <caption> + Bits: String Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Padding type.</b> This four-bit value determines the + type of padding to use for the string. The values are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Null Terminate: A zero byte marks the end of the + string and is guaranteed to be present after + converting a long string to a short string. When + converting a short string to a long string the value is + padded with additional null characters as necessary. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Null Pad: Null characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Space Pad: Space characters are added to the end of + the value during conversions from short values to long + values but conversion in the opposite direction simply + truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align="center"><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>4-7</p></td> + <td><p><b>Character Set.</b> The character set used to + encode the string. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>8-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <p>There are no properties defined for the string class. + </p> + + <br /> + <br /> + <a name="ClassBitField"></a> + <p>Class specific information for the Bit Fields class (Class 4):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Bitfield Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0</p></td> + <td><p><b>Byte Order.</b> If zero, byte order is little-endian; + otherwise, byte order is big endian.</p></td> + </tr> + + <tr> + <td><p>1, 2</p></td> + <td><p><b>Padding type.</b> Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.</p></td> + </tr> + + <tr> + <td><p>3-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Bit Field Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="2">Bit Offset</td> + <td colspan="2">Bit Precision</td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Bit Field Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bit Offset</p></td> + <td> + <p>The bit offset of the first significant bit of the bit field + within the datatype. The bit offset specifies the number + of bits “to the right of” the value. + </p> + </td> + </tr> + + <tr> + <td><p>Bit Precision</p></td> + <td> + <p>The number of bits of precision of the bit field + within the datatype. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <br /> + <a name="ClassOpaque"></a> + <p>Class specific information for the Opaque class (Class 5):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Opaque Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-7</p></td> + <td><p>Length of ASCII tag in bytes.</p></td> + </tr> + + <tr> + <td><p>8-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Opaque Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />ASCII Tag<br /> + <br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Opaque Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>ASCII Tag</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <br /> + <a name="ClassCompound"></a> + <p>Class specific information for the Compound class (Class 6):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Compound Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-15</p></td> + <td><p><b>Number of Members.</b> This field contains the number + of members defined for the compound datatype. The member + definitions are listed in the Properties field of the data + type message.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + + <p>The Properties field of a compound datatype is a list of the + member definitions of the compound datatype. The member + definitions appear one after another with no intervening bytes. + The member types are described with a (recursively) encoded datatype + message.</p> + + <p>Note that the property descriptions are different for different + versions of the datatype version. Additionally note that the version + 0 datatype encoding is deprecated and has been replaced with later + encodings in versions of the HDF5 Library from the 1.4 release + onward.</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Compound Properties Description for Datatype Version 1 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member</td> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension Permutation</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #2 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #3 Size (required)</td> + </tr> + + <tr> + <td colspan="4">Dimension #4 Size (required)</td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Compound Properties Description for Datatype Version 1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td> + <p>This is the byte offset of the member within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>If set to zero, this field indicates a scalar member. If set + to a value greater than zero, this field indicates that the + member is an array of values. For array members, the size of + the array is indicated by the ‘Size of Dimension n’ field in + this message. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension Permutation</p></td> + <td> + <p>This field was intended to allow an array field to have + its dimensions permuted, but this was never implemented. + This field should always be set to zero. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This field is the size of a dimension of the array field as + stored in the file. The first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td> + <p>This field is a datatype message describing the datatype of + the member. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Compound Properties Description for Datatype Version 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member</td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Compound Properties Description for Datatype Version 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td> + <p>This NUL-terminated string provides a description for the + opaque type. It is NUL-padded to a multiple of 8 bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td> + <p>This is the byte offset of the member within the datatype. + </p> + </td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td> + <p>This field is a datatype message describing the datatype of + the member. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Compound Properties Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Byte Offset of Member <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Member Type Message<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Compound Properties Description for Datatype Version 3 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>This NUL-terminated string provides a description for the + opaque type. It is <em>not</em> NUL-padded to a multiple of 8 + bytes.</p></td> + </tr> + + <tr> + <td><p>Byte Offset of Member</p></td> + <td><p>This is the byte offset of the member within the datatype. + The field size is the minimum number of bytes necessary, + based on the size of the datatype element. For example, a + datatype element size of less than 256 bytes uses a 1 byte + length, a datatype element size of 256-65535 bytes uses a + 2 byte length, and so on.</p></td> + </tr> + + <tr> + <td><p>Member Type Message</p></td> + <td><p>This field is a datatype message describing the datatype of + the member.</p></td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <a name="ClassReference"></a> + <p>Class specific information for the Reference class (Class 7):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Reference Bit Field Description for Datatype Version < 4 + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Type.</b> This four-bit value contains the reference types which are supported for + backward compatibility. The values defined are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Object Reference (H5R_OBJECT1): A reference to another object in this + HDF5 file. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Dataset Region Reference (H5R_DATASET_REGION1): A reference to a region within + a dataset in this HDF5 file. + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>4-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Bits: Reference Bit Field Description for Datatype Version 4 + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Type.</b> This four-bit value contains the revised reference types. + The values defined are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Object Reference (H5R_OBJECT2): A reference to another object + in this file or an external file. + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Dataset Region Reference (H5R_DATASET_REGION2): A reference to a region within + a dataset in this file or an external file. + </td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>Attribute Reference (H5R_ATTR): A reference to an attribute attached to an + object in this file or an external file. + </td> + </tr> + + <tr> + <td align="center"><code>5-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>4-7</p></td> + <td><p><b>Version.</b> This four-bit value contains the version for encoding + the revised reference types. The values defined are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Unused + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>The version for encoding the revised reference types: Object Reference (2), + Dataset Region Reference (3) and Attribute Reference (4). + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + + </table></p> + + </td> + </tr> + + <tr> + <td><p>8-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <p>There are no properties defined for the reference class. + </p> + + + <br /> + <br /> + <a name="ClassEnum"></a> + <p>Class specific information for the Enumeration class (Class 8):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Enumeration Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-15</p></td> + <td><p><b>Number of Members.</b> The number of name/value + pairs defined for the enumeration type.</p></td> + </tr> + + <tr> + <td><p>16-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Enumeration Property Description for Datatype Versions + 1 and 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Names<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Values<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Enumeration Property Description for Datatype Versions + 1 and 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each enumeration type is based on some parent type, usually an + integer. The information for that parent type is described + recursively by this field. + </p> + </td> + </tr> + + <tr> + <td><p>Names</p></td> + <td> + <p>The name for each name/value pair. Each name is stored as a null + terminated ASCII string in a multiple of eight bytes. The names + are in no particular order. + </p> + </td> + </tr> + + <tr> + <td><p>Values</p></td> + <td> + <p>The list of values in the same order as the names. The values + are packed (no inter-value padding) and the size of each value + is determined by the parent type. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Enumeration Property Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Names<br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Values<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Enumeration Property Description for Datatype Version 3 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each enumeration type is based on some parent type, usually an + integer. The information for that parent type is described + recursively by this field. + </p> + </td> + </tr> + + <tr> + <td><p>Names</p></td> + <td> + <p>The name for each name/value pair. Each name is stored as a null + terminated ASCII string, <em>not</em> padded to a multiple of + eight bytes. The names are in no particular order. + </p> + </td> + </tr> + + <tr> + <td><p>Values</p></td> + <td> + <p>The list of values in the same order as the names. The values + are packed (no inter-value padding) and the size of each value + is determined by the parent type. + </p> + </td> + </tr> + + </table> + </div> + + + + <br /> + <a name="ClassVarLen"></a> + <p>Class specific information for the Variable-length class (Class 9):</p> + + <div align="center"> + <table class="desc"> + <caption> + Bits: Variable-length Bit Field Description + </caption> + + <tr> + <th width="10%">Bits</th> + <th>Meaning</th> + </tr> + + <tr> + <td><p>0-3</p></td> + <td><p><b>Type.</b> This four-bit value contains the type of + variable-length datatype described. The values defined are: + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Sequence: A variable-length sequence of any datatype. + Variable-length sequences do not have padding or + character set information. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>String: A variable-length sequence of characters. + Variable-length strings have padding and character set + information. + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>4-7</p></td> + <td><p><b>Padding type.</b> (variable-length string only) + This four-bit value determines the type of padding + used for variable-length strings. The values are the same + as for the string padding type, as follows: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Null terminate: A zero byte marks the end of a string + and is guaranteed to be present after converting a long + string to a short string. When converting a short string + to a long string, the value is padded with additional null + characters as necessary. + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Null pad: Null characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Space pad: Space characters are added to the end of the + value during conversion from a short string to a longer + string. Conversion from a long string to a shorter string + simply truncates the value. This is the Fortran + representation of the string. + </td> + </tr> + + <tr> + <td align="center"><code>3-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + <p>This value is set to zero for variable-length sequences.</p> + + </td> + </tr> + + <tr> + <td><p>8-11</p></td> + <td><p><b>Character Set.</b> (variable-length string only) + This four-bit value specifies the character set + to be used for encoding the string: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>2-15</code></td> + <td>Reserved + </td> + </tr> + </table></p> + + <p>This value is set to zero for variable-length sequences.</p> + + </td> + </tr> + + <tr> + <td><p>12-23</p></td> + <td><p>Reserved (zero).</p></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Variable-length Property Description + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Variable-length Property Description + </caption> + <tr> + <th width="10%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each variable-length type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <a name="ClassArray"></a> + <p>Class specific information for the Array class (Class 10):</p> + + <p>There are no bit fields defined for the array class. + </p> + + <p>Note that the dimension information defined in the property for this + datatype class is independent of dataspace information for a dataset. + The dimension information here describes the dimensionality of the + information within a data element (or a component of an element, if the + array datatype is nested within another datatype) and the dataspace for a + dataset describes the size and locations of the elements in a dataset. + </p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Array Property Description for Datatype Version 2 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Permutation Index #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Permutation Index #n</td> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Array Property Description for Datatype Version 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the array has. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the size of the dimension of the array + as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Permutation Index #n</p></td> + <td> + <p>This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. Currently, dimension + permutations are not supported, and these indices should + be set to the index position minus one. In other words, + the first dimension should be set to 0, the second dimension + should be set to 1, and so on. + </p> + </td> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each array type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Array Property Description for Datatype Version 3 + </caption> + + <tr> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + <th width="25%">Byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Dimension #1 Size</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4"><br />Base Type<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Array Property Description for Datatype Version 3 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td> + <p>This value is the number of dimensions that the array has. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td> + <p>This value is the size of the dimension of the array + as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Base Type</p></td> + <td> + <p>Each array type is based on some parent type. The + information for that parent type is described recursively by + this field. + </p> + </td> + </tr> + + </table> + </div> + + + + <h4><a name="OldFillValueMessage">IV.A.2.e. The Data Storage - + Fill Value (Old) Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Fill Value + (old)</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0004</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The fill value message stores a single data value which + is returned to the application when an uninitialized data element + is read from a dataset. The fill value is interpreted with the + same datatype as the dataset. If no fill value message is present + then a fill value of all zero bytes is assumed.</p> + <p>This fill value message is deprecated in favor of the + “new” fill value message (Message Type 0x0005) and + is only written to the file for forward compatibility with + versions of the HDF5 Library before the 1.6.0 version. + Additionally, it only appears for datasets with a user-defined + fill value (as opposed to the library default fill value or an + explicitly set “undefined” fill value).</p> + </td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Layout: Fill Value Message (Old) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Size</td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Fill Value Message (Old) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. + </p> + </td> + </tr> + </table> + </div> + + + <h4><a name="FillValueMessage">IV.A.2.f. The Data Storage - + Fill Value Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Fill + Value</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0005</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for dataset objects; + may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The fill value message stores a single data value which is + returned to the application when an uninitialized data element + is read from a dataset. The fill value is interpreted with the + same datatype as the dataset.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Layout: Fill Value Message - Versions 1 and 2 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Space Allocation Time</td> + <td>Fill Value Write Time</td> + <td>Fill Value Defined</td> + </tr> + + <tr> + <td colspan="4">Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Fill Value Message - Versions 1 and 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the + format of the fill value message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Initial version of this message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>In this version, the Size and Fill Value fields are + only present if the Fill Value Defined field is set + to 1. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>This version packs the other fields in the message + more efficiently than version 2. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Space Allocation Time</p></td> + <td> + <p>When the storage space for the dataset’s raw data will be + allocated. The allowed values are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Not used. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Early allocation. Storage space for the entire dataset + should be allocated in the file when the dataset is + created. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Late allocation. Storage space for the entire dataset + should not be allocated until the dataset is written + to. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Incremental allocation. Storage space for the + dataset should not be allocated until the portion + of the dataset is written to. This is currently + used in conjunction with chunked data storage for + datasets. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Fill Value Write Time</p></td> + <td> + <p>At the time that storage space for the dataset’s raw data is + allocated, this value indicates whether the fill value should + be written to the raw data storage elements. The allowed values + are: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>On allocation. The fill value is always written to + the raw data storage when the storage space is allocated. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Never. The fill value should never be written to + the raw data storage. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Fill value written if set by user. The fill value + will be written to the raw data storage when the storage + space is allocated only if the user explicitly set + the fill value. If the fill value is the library + default or is undefined, it will not be written to + the raw data storage. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Fill Value Defined</p></td> + <td> + <p>This value indicates if a fill value is defined for this + dataset. If this value is 0, the fill value is undefined. + If this value is 1, a fill value is defined for this dataset. + For version 2 or later of the fill value message, this value + controls the presence of the Size and Fill Value fields. + </p> + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. This field + is not present if the Version field is greater than 1, + and the Fill Value Defined field is set to 0. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. This field is + not present if the Version field is greater than 1, + and the Fill Value Defined field is set to 0. + </p> + </td> + </tr> + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Fill Value Message - Version 3 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Fill Value <em>(optional, variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Fill Value Message - Version 3 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the + format of the fill value message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Initial version of this message. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>In this version, the Size and Fill Value fields are + only present if the Fill Value Defined field is set + to 1. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>This version packs the other fields in the message + more efficiently than version 2. + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td> + <p>When the storage space for the dataset’s raw data will be + allocated. The allowed values are: + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>Space Allocation Time, with the same + values as versions 1 and 2 of the message. + </td> + </tr> + <tr> + <td align="center"><code>2-3</code></td> + <td>Fill Value Write Time, with the same + values as versions 1 and 2 of the message. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>Fill Value Undefined, indicating that the fill + value has been marked as “undefined” for this dataset. + Bits 4 and 5 cannot both be set. + </td> + </tr> + <tr> + <td align="center"><code>5</code></td> + <td>Fill Value Defined, with the same values as + versions 1 and 2 of the message. + Bits 4 and 5 cannot both be set. + </td> + </tr> + <tr> + <td align="center"><code>6-7</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Size</p></td> + <td> + <p>This is the size of the Fill Value field in bytes. This field + is not present if the Version field is greater than 1, + and the Fill Value Defined flag is set to 0. + </p> + </td> + </tr> + + <tr> + <td><p>Fill Value</p></td> + <td> + <p>The fill value. The bytes of the fill value are interpreted + using the same datatype as for the dataset. This field is + not present if the Version field is greater than 1, + and the Fill Value Defined flag is set to 0. + </p> + </td> + </tr> + </table> + </div> + + + <h4><a name="LinkMessage">IV.A.2.g. The Link Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Link</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0006</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies </td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated. </td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message encodes the information for a link in a + group’s object header, when the group is storing its links + “compactly”, or in the group’s fractal heap, + when the group is storing its links “densely”.</p> + <p>A group is storing its links compactly when the fractal heap + address in the <em><a href="#LinkInfoMessage">Link Info + Message</a></em> is set to the “undefined address” + value.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Layout: Link Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td>Link type <em>(optional)</em></td> + <td bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Creation Order <em>(8 bytes, optional)</em><br /><br /></td> + </tr> + <tr> + <td>Link Name Character Set <em>(optional)</em></td> + <td>Length of Link Name (variable size)</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Link Name (variable size)</td> + </tr> + <tr> + <td colspan="4"><br />Link Information (variable size)<br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Link Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 1.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This field contains information about the link and controls + the presence of other fields below. + <table class="list"> + <tr> + <th width="20%" align="center">Bits</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0-1</code></td> + <td>Determines the size of the <em>Length of Link Name</em> + field. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 1 byte. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 2 bytes. + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 4 bytes. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>The size of the <em>Length of Link Name</em> + field is 8 bytes. + </td> + </tr> + </table> + </td> + </tr> + <tr> + <td align="center"><code>2</code></td> + <td>Creation Order Field Present: if set, the <em>Creation + Order</em> field is present. If not set, creation order + information is not stored for links in this group. + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Link Type Field Present: if set, the link is not + a hard link and the <em>Link Type</em> field is present. + If not set, the link is a hard link. + </td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>Link Name Character Set Field Present: if set, the + link name is not represented with the ASCII character + set and the <em>Link Name Character Set</em> field is + present. If not set, the link name is represented with + the ASCII character set. + </td> + </tr> + <tr> + <td align="center"><code>5-7</code></td> + <td>Reserved (zero). + </td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Link type</p></td> + <td><p>This is the link class type and can be one of the following + values: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>A hard link (should never be stored in the file) + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>A soft link. + </td> + </tr> + <tr> + <td align="center"><code>2-63</code></td> + <td>Reserved for future HDF5 internal use. + </td> + </tr> + <tr> + <td align="center"><code>64</code></td> + <td>An external link. + </td> + </tr> + <tr> + <td align="center"><code>65-255</code></td> + <td>Reserved, but available for user-defined link types. + </td> + </tr> + </table></p> + + <p>This field is present if bit 3 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Creation Order</p></td> + <td><p>This 64-bit value is an index of the link’s creation time within + the group. Values start at 0 when the group is created an increment + by one for each link added to the group. Removing a link from a + group does not change existing links’ creation order field. + </p> + <p>This field is present if bit 2 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Link Name Character Set</p></td> + <td><p>This is the character set for encoding the link’s name: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding (this should never be stored + in the file) + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + </table></p> + + <p>This field is present if bit 4 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Length of link name</p></td> + <td><p>This is the length of the link’s name. The size of this field + depends on bits 0 and 1 of <em>Flags</em>.</p> + </td> + </tr> + + <tr> + <td><p>Link name</p></td> + <td><p>This is the name of the link, non-NULL terminated.</p> + </td> + </tr> + + <tr> + <td><p>Link information</p></td> + <td><p>The format of this field depends on the <em>link type</em>.</p> + <p>For <b>hard</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%"><i><a href="#SizeOfOffsetsV0"> + Size of Offsets</a></i> bytes:</td> + <td width="80%">The address of the object header for the object that the + link points to. + </td> + </tr> + </table> + </p> + + <p> + For <b>soft</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of soft link value.</td> + </tr> + <tr> + <td><em>Length of soft link value</em> bytes:</td> + <td>A non-NULL-terminated string storing the value of the + soft link. + </td> + </tr> + </table> + </p> + + <p> + For <b>external</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of external link value.</td> + </tr> + <tr> + <td><em>Length of external link value</em> bytes:</td> + <td>The first byte contains the version number in the + upper 4 bits and flags in the lower 4 bits for the external + link. Both version and flags are defined to be zero in + this document. The remaining bytes consist of two + NULL-terminated strings, with no padding between them. + The first string is the name of the HDF5 file containing + the object linked to and the second string is the full path + to the object linked to, within the HDF5 file’s + group hierarchy. + </td> + </tr> + </table> + </p> + + <p> + For <b>user-defined</b> links, the field is formatted as follows: + + <table class="list"> + <tr> + <td width="20%">Bytes 1-2:</td> + <td width="80%">Length of user-defined data.</td> + </tr> + <tr> + <td><em>Length of user-defined link value</em> bytes:</td> + <td>The data supplied for the user-defined link type.</td> + </tr> + </table> + </p> + + </td> + </tr> + </table> + </div> + + <h4><a name="ExternalFileListMessage">IV.A.2.h. The Data Storage - + External Data Files Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> External + Data Files</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0007</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The external data storage message indicates that the data + for an object is stored outside the HDF5 file. The filename of + the object is stored as a Universal Resource Location (URL) of + the actual filename containing the data. An external file list + record also contains the byte offset of the start of the data + within the file and the amount of space reserved in the file + for that data.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Layout: External File List Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="2">Allocated Slots</td> + <td colspan="2">Used Slots</td> + </tr> + + <tr> + <td colspan="4"><br />Heap Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Slot Definitions...<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: External File List Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of + External Data Storage Message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>The current version used by the library.</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Allocated Slots</p></td> + <td> + <p>The total number of slots allocated in the message. Its value must be at least as + large as the value contained in the Used Slots field. (The current library simply + uses the number of Used Slots for this message)</p> + </td> + </tr> + + <tr> + <td><p>Used Slots</p></td> + <td> + <p>The number of initial slots which contains valid information.</p> + </td> + </tr> + + <tr> + <td><p>Heap Address</p></td> + <td> + <p>This is the address of a local heap which contains the names for the external + files (The local heap information can be found in Disk Format Level 1D in this + document). The name at offset zero in the heap is always the empty string.</p> + </td> + </tr> + + <tr> + <td><p>Slot Definitions</p></td> + <td> + <p>The slot definitions are stored in order according to the array addresses they + represent.</p> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: External File List Slot + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Name Offset in Local Heap<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Offset in External Data File<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data Size in External File<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: External File List Slot + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name Offset in Local Heap</p></td> + <td> + <p>The byte offset within the local name heap for the name + of the file. File names are stored as a URL which has a + protocol name, a host name, a port number, and a file + name: + <code><em>protocol</em>:<em>port</em>//<em>host</em>/<em>file</em></code>. + If the protocol is omitted then “file:” is assumed. If + the port number is omitted then a default port for that + protocol is used. If both the protocol and the port + number are omitted then the colon can also be omitted. If + the double slash and host name are omitted then + “localhost” is assumed. The file name is the only + mandatory part, and if the leading slash is missing then + it is relative to the application’s current working + directory (the use of relative names is not + recommended). + </p> + </td> + </tr> + + <tr> + <td><p>Offset in External Data File</p></td> + <td> + <p>This is the byte offset to the start of the data in the + specified file. For files that contain data for a single + dataset this will usually be zero.</p> + </td> + </tr> + + <tr> + <td><p>Data Size in External File</p></td> + <td> + <p>This is the total number of bytes reserved in the + specified file for raw data storage. For a file that + contains exactly one complete dataset which is not + extendable, the size will usually be the exact size of the + dataset. However, by making the size larger one allows + HDF5 to extend the dataset. The size can be set to a value + larger than the entire file since HDF5 will read zeroes + past the end of the file without failing.</p> + </td> + </tr> + </table> + </div> + + + <h4><a name="LayoutMessage">IV.A.2.i. The Data Layout Message</a></h4> + + <!-- start msgdesc table --> + <center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Data Layout</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0008</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for datasets; may not + be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The Data Layout message + describes how the elements of a multi-dimensional array are stored + in the HDF5 file. Four types of data layout are supported: + <ol> + <li>Contiguous: The array is stored in one contiguous area of + the file. This layout requires that the size of the array be + constant: data manipulations such as chunking, compression, + checksums, or encryption are not permitted. The message stores + the total storage size of the array. The offset of an element + from the beginning of the storage area is computed as in a C + array.</li> + <li>Chunked: The array domain is regularly decomposed into + chunks, and each chunk is allocated and stored separately. This + layout supports arbitrary element traversals, compression, + encryption, and checksums (these features are described + in other messages). The message stores the size of a chunk + instead of the size of the entire array; the storage size of + the entire array can be calculated by traversing the chunk index + that stores the chunk addresses.</li> + <li>Compact: The array is stored in one contiguous block as + part of this object header message.</li> + <li>Virtual: This is only supported for version 4 of the Data + Layout message. The message stores information that is used to + locate the global heap collection containing the Virtual Dataset + (VDS) mapping information. The mapping associates the VDS to + the source dataset elements that are stored across a collection + of HDF5 files.</li> + </ol></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> + </table></center> + <!-- end msgdesc table --> + + <div align="center"> + <table class="format"> + <caption> + Layout: Data Layout Message (Versions 1 and 2) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Dimensionality</td> + <td>Layout Class</td> + <td>Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Data Address<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Dimension 1 Size</td> + </tr> + + <tr> + <td colspan="4">Dimension 2 Size</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Dataset Element Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4">Compact Data Size <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Compact Data... <em>(variable size, optional)</em><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Data Layout Message (Versions 1 and 2) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of the data + layout message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by version 1.4 and before of the library to encode layout information. + Data space is always allocated when the data set is created.</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by version 1.6.[0,1,2] of the library to encode layout information. + Data space is allocated only when it is necessary.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td><p>An array has a fixed dimensionality. This field + specifies the number of dimension size fields later in the + message. The value stored for chunked storage is 1 greater than + the number of dimensions in the dataset’s dataspace. + For example, 2 is stored for a 1 dimensional dataset. + </p> + </td> + </tr> + + <tr> + <td><p>Layout Class</p></td> + <td><p>The layout class specifies the type of storage for the data + and how the other fields of the layout message are to be + interpreted. + + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Compact Storage + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Contiguous Storage + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Chunked Storage + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Data Address</p></td> + <td><p>For contiguous storage, this is the address of the raw + data in the file. For chunked storage this is the address + of the <a href="#V1Btrees">v1 B-tree</a> that is used to look up the addresses of the + chunks. This field is not present for compact storage. + If the version for this message is greater than 1, the address + may have the “undefined address” value, to indicate that + storage has not yet been allocated for this array.</p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td><p>For contiguous and compact storage the dimensions define + the entire size of the array while for chunked storage they define + the size of a single chunk. In all cases, they are in units of + array elements (not bytes). The first dimension stored in the list + of dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dataset Element Size</p></td> + <td><p>The size of a dataset element, in bytes. This field is only + present for chunked storage. + </p> + </td> + </tr> + + <tr> + <td><p>Compact Data Size</p></td> + <td><p>This field is only present for compact data storage. + It contains the size of the raw data for the dataset array, in + bytes.</p> + </td> + </tr> + + <tr> + <td><p>Compact Data</p></td> + <td><p>This field is only present for compact data storage. + It contains the raw data for the dataset array.</p> + </td> + </tr> + </table> + </div> + + <br /> + <p>Version 3 of this message re-structured the format into specific + properties that are required for each layout class.</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Data Layout Message (Version 3) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Layout Class</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Properties <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Data Layout Message (Version 3) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The version number information is used for changes in the format of layout message + and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the version 1.6.3 and later of the library to store properties + for each layout class.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Layout Class</p></td> + <td><p>The layout class specifies the type of storage for the data + and how the other fields of the layout message are to be + interpreted. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Compact Storage + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Contiguous Storage + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Chunked Storage + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Properties</p></td> + <td><p>This variable-sized field encodes information specific to each + layout class and is described below. If there is no property + information specified for a layout class, the size of this field + is zero bytes.</p></td> + </tr> + </table> + </div> + + <br /> + <a name="CompactStorage"></a> + <p>Class-specific information for compact storage (layout class 0): (Note: The dimensionality information + is in the Dataspace message)</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Compact Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Raw Data... <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Compact Storage Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size</p></td> + <td><p>This field contains the size of the raw data for the dataset + array, in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Raw Data</p></td> + <td><p>This field contains the raw data for the dataset array.</p></td> + </tr> + </table> + </div> + + + <br /> + <a name="ContiguousStorage"></a> + <p>Class-specific information for contiguous storage (layout class 1): + (Note: The dimensionality information is in the Dataspace message)</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Contiguous Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Size<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Contiguous Storage Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address of the raw data in the file. + The address may have the “undefined address” value, to indicate + that storage has not yet been allocated for this array.</p></td> + </tr> + + <tr> + <td><p>Size</p></td> + <td><p>This field contains the size allocated to store the raw data, + in bytes. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + <p>Class-specific information for chunked storage (layout class 2):</p> + + + <div align="center"> + <table class="format"> + <caption> + Layout: Chunked Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Dimensionality</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Dimension 0 Size</td> + </tr> + + <tr> + <td colspan="4">Dimension 1 Size</td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Dimension #n Size</td> + </tr> + + <tr> + <td colspan="4">Dataset Element Size</td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Chunked Storage Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td><p>A chunk has a fixed dimensionality. This field specifies + the number of dimension size fields later in the message.</p></td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address of the <a href="#V1Btrees">v1 B-tree</a> + that is used to look up the + addresses of the chunks that actually store portions of the array + data. The address may have the “undefined address” value, to + indicate that storage has not yet been allocated for this array.</p></td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td><p>These values define the dimension size of a single chunk, in + units of array elements (not bytes). The first dimension stored in + the list of dimensions is the slowest changing dimension and the + last dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Dataset Element Size</p></td> + <td><p>The size of a dataset element, in bytes. + </p> + </td> + </tr> + </table> + </div> + + + <br /> + + <p><a name="DataLayoutV4"> + Version 4</a> of this message is similar to version 3 but has + additional information for the virtual layout class as well as + indexing information for the chunked layout class.</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: Data Layout Message (Version 4) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Layout Class</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Properties <em>(variable size)</em><br /><br /></td> + </tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Data Layout Message (Version 4) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>The value for this field is 4 and is used by version 1.10.0 + and later of the library to store properties for each layout + class and indexing information for the chunked layout. + </p> + </td> + </tr> + + <tr> + <td><p>Layout Class</p></td> + <td><p>The layout class specifies the type of storage for the data + and how the other fields of the layout message are to be + interpreted. + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Compact Storage + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Contiguous Storage + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Chunked Storage + </td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>Virtual Storage + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Properties</p></td> + <td><p>This variable-sized field encodes information specific to a + layout class as follows: + <table class="list"> + <tr> + <th align="left" width="20%">Layout Class</th> + <th align="left" width="80%">Description</th> + </tr> + + <tr> + <td align="left">Compact Storage</td> + <td>See <a href="#CompactStorage">Compact Storage + Property Description</i></a> for the version 3 +Data Layout message. +</td> +</tr> + +<tr> + <td align="left">Contiguous Storage</td> + <td>See <a href="#ContiguousStorage">Contiguous Storage + Property Description</i></a> for the version 3 +Data Layout message. +</td> +</tr> + +<tr> + <td align="left">Chunked Storage</td> + <td>See <a href="#ChunkedStorage">Chunked Storage + Property Description</i></a> below. +</td> +</tr> + +<tr> + <td align="left">Virtual Storage</td> + <td>See <a href="#VirtualStorage">Virtual Storage + Property Description</i></a> below. +</td> +</tr> +</table> + +</p></td> +</tr> +</table> +</div> + +<br /> +<a name="ChunkedStorage"></a> +<p>Class-specific information for chunked storage (layout + class 2):</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Chunked Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Flags</td> + <td>Dimensionality</td> + <td>Dimension Size Encoded Length</td> + <td colspan="1" bgcolor="#DDDDDD"><em>This space inserted to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Dimension 0 Size <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Dimension 1 Size <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4">...</td> + </tr> + + <tr> + <td colspan="4">Dimension #n Size <em>(variable size)</em></td> + </tr> + + <tr> + <td>Chunk Indexing Type</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Indexing Type Information <em>(variable size)</em></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Chunked Storage Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is the chunked layout feature flag:</p> + + <table class="list"> + <tr> + <th width="55%" align="left">Value</th> + <th width="45%" align="left">Description</th> + </tr> + + <tr> + <td align="left"><code>DONT_FILTER_PARTIAL_BOUND_CHUNKS (bit 0)</code></td> + <td>Do not apply filter to a partial edge chunk. + + </td> + </tr> + + <tr> + <td align="left"><code>SINGLE_INDEX_WITH_FILTER (bit 1)</code></td> + <td>A filtered chunk for <i>Single Chunk</i> indexing. + </td> + </tr> + + </table> + + </td> + + </tr> + + <tr> + <td><p>Dimensionality</p></td> + <td><p>A chunk has fixed dimension. This field specifies + the number of <em>Dimension Size</em> fields later in the message.</p></td> + </tr> + + <tr> + <td><p>Dimension Size Encoded Length</p></td> + <td> + <p>This is the size in bytes used to encode <em>Dimension Size</em>. + </p> + </td> + </tr> + + <tr> + <td><p>Dimension #n Size</p></td> + <td><p>These values define the dimension size of a single chunk, in + units of array elements (not bytes). The first dimension stored in + the list of dimensions is the slowest changing dimension and the + last dimension stored is the fastest changing dimension. + </p> + </td> + </tr> + + <tr> + <td><p>Chunk Indexing Type</p></td> + <td><p>There are five indexing types used to look up addresses + of the chunks. For more information on each type, see + <a href="#AppendixC">“Appendix C: Types of Indexes for + Dataset Chunks.”</a> + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td><a href="#SingleChunk"><i>Single Chunk</i></a> indexing type. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td><a href="#Implicit"><i>Implicit</i></a> indexing type. + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td><a href="#FixedArray"><i>Fixed Array</i></a> indexing type. + </td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td><a href="#ExtensibleArray"><i>Extensible Array</i></a> indexing type. + </td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td><a href="#V2Btrees"><i>Version 2 B-tree</i></a> indexing type. + </td> + </tr> + + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Indexing Type Information</p></td> + <td><p>This variable-sized field encodes information specific to + an indexing type. More information on what is encoded with + each type can be found below this table. + <ul> + <li>See <a href="#IndexInfoSingle"><i>Single Chunk</i></a> below.</li> + <li>See <a href="#IndexInfoImplicit"><i>Implicit</i></a> below.</li> + <li>See <a href="#IndexInfoFixed"><i>Fixed Array</i></a> below.</li> + <li>See <a href="#IndexInfoExtensible"><i>Extensible Array</i></a> below.</li> + <li>See <a href="#IndexInfoV2Btrees"><i>Version 2 B-tree</i></a> below.</li> + </ul> + </p> + </td> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address specific to an indexing type. + The address may be undefined if the chunk or index storage is not allocated yet. + <table class="list"> + <tr> + <th width="40%" align="left">Value</th> + <th width="60%" align="left">Description</th> + </tr> + + <tr> + <td align="left"><i>Single Chunk index</i></td> + <td align="left">Address of the single chunk.</td> + </td> + </tr> + + <tr> + <td align="left"><i>Implicit index</i></td> + <td align="left">Address of the array of dataset chunks.</td> +</td> +</tr> + +<tr> + <td align="left"><i>Fixed Array index</i></td> + <td align="left">Address of the index.</td> +</tr> + +<tr> + <td align="left"><i>Extensible Array index</i></td> + <td align="left">Address of the index.</td> +</td> +</tr> + +<tr> + <td align="left"><i>Version 2 B-tree index</i></td> + <td align="left">Address of the index.</td> +</td> +</tr> + +</table> + +</p> +</td> +</tr> + +</table> +</div> + +<br /> + +<ol> + <li> + <a name="IndexInfoSingle"></a> + Index-specific information for <i>Single Chunk</i>: + </li> + + <p>The following information exists only when the chunk is filtered. + In other words, when <code>DONT_FILTER_PARTIAL_BOUND_CHUNKS</code> + (bit 0) is enabled in the field <em>flags</em>.</p> + + <div align="center"> + <table class="format"> + <caption> + Layout: Single Chunk Indexing Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Size of filtered chunk<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filters for chunk</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="55%"> </td> + <td width="45%"> <!-- width is slightly different: these + tables are part of an ordered list; see <ol> tags. --> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Single Chunk Indexing Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Size of filtered chunk</p></td> + <td><p>This field is the size of a filtered chunk.</p></td> + </tr> + + <tr> + <td><p>Filters for chunk</p></td> + <td><p>This field contains filters for the chunk.</p></td> + </tr> + </table> + </div> +</p> + +<br /> + +<li> + <a name="IndexInfoImplicit"></a> + Index-specific information for <i>Implicit</i>: +</li> + +<div align="center"> + <table class="format"> + <caption> + Layout: Implicit Indexing Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4" bgcolor="#DDDDDD"> + <em>No specific indexing information</em></td> + </tr> + + </table> +</div> + +<br /> +<li> + <a name="IndexInfoFixed"></a> + Index-specific information for <i>Fixed Array</i>: +</li> + +<div align="center"> + <table class="format"> + <caption> + Layout: Fixed Array Indexing Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="1">Page Bits</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fixed Array Indexing Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Page Bits</p></td> + <td><p>This field contains the number of bits needed to store the + maximum number of elements in a data block page.</p></td> + </tr> + + </table> +</div> +</p> + +<br /> +<li> + <a name="IndexInfoExtensible"></a> + Index-specific information for <i>Extensible Array</i>: +</li> + +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Indexing Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Max Bits</td> + <td>Index Elements</td> + <td>Min Pointers</td> + <td>Min Elements</td> + </tr> + + <td colspan="2">Page Bits</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> +</tr> + +</table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Indexing Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Max Bits</p></td> + <td><p>This field contains the number of bits needed to store the maximum number of elements + in the array. + </p> + </td> + </tr> + + <tr> + <td><p>Index Elements</p></td> + <td><p>This field contains the number of elements to store in the + index block. + </p> + </td> + </tr> + + <tr> + <td><p>Min Pointers</p></td> + <td><p>This field contains the minimum number of data block pointers + for a superblock. + </p> + </td> + </tr> + + <tr> + <td><p>Min Elements</p></td> + <td><p>This field contains the minimum number of elements per data block. + </p> + </td> + </tr> + + <tr> + <td><p>Page Bits</p></td> + <td><p>This field contains the number of bits needed to store the + maximum number of elements in a data block page. + </p> + </td> + </tr> + + </table> +</div> +</p> +<br /> + +<li> + <a name="IndexInfoV2Btrees"></a> + Index-specific information for <i>Version 2 B-tree</i>: +</li> + +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 B-tree Indexing Information + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Node Size</td> + </tr> + + <tr> + <td>Split Percent</td> + <td>Merge Percent</td> + <td colspan="2" bgcolor="#DDDDDD"> + <em>This space inserted only to align table nicely</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 B-tree Indexing Information + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Node Size</p></td> + <td><p>This field is the size in bytes of a B-tree node. + </p> + </td> + </tr> + + <tr> + <td><p>Split Percent</p></td> + <td><p>This field is the percentage full of a B-tree node at which to split the node.</p></td> + </tr> + + <tr> + <td><p>Merge Percent</p></td> + <td><p>This field is the percentage full of a B-tree node at which to merge the node.</p></td> + </tr> + </table> +</div> +</ol> + + + +<br /> +<a name="VirtualStorage"></a> +<p> + Class-specific information for virtual storage (layout class 3):</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Virtual Storage Property Description + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Index</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Virtual Storage Property Description + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>This is the address of the global heap collection where + the VDS mapping entries are stored. + See <a href="#GlobalHeapVDS">“Disk Format: Level 1F - + Global Heap Block for Virtual Datasets.”</a> + </p></td> + </tr> + + <tr> + <td><p>Index</p></td> + <td><p>This is the index of the data object within the global heap collection. + </p> + </td> + </tr> + </table> +</div> + +<h4><a name="BogusMessage">IV.A.2.j. The Bogus Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Bogus</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0009</td></tr> + <tr><td colspan="2"><b>Length:</b> 4 bytes</td></tr> + <tr><td colspan="2"><b>Status:</b> For testing only; should never + be stored in a valid file.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message is used for testing the HDF5 Library’s + response to an “unknown” message type and should + never be encountered in a valid HDF5 file.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Bogus Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Bogus Value</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Bogus Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Bogus Value</p></td> + <td> + <p>This value should always be: <code>0xdeadbeef</code>.</p> + </td> + </tr> + </table> +</div> + +<h4><a name="GroupInfoMessage">IV.A.2.k. The Group Info Message +</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Group Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000A</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message stores information for the constants defining + a “new style” group’s behavior. Constant + information will be stored in this message and variable + information will be stored in the + <a href="#LinkInfoMessage">Link Info</a> message.</p> + <p>Note: the “estimated entry” information below is + used when determining the size of the object header for the + group when it is created.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Group Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Link Phase Change: Maximum Compact Value <em>(optional)</em></td> + </tr> + <tr> + <td colspan="2">Link Phase Change: Minimum Dense Value <em>(optional)</em></td> + <td colspan="2">Estimated Number of Entries <em>(optional)</em></td> + </tr> + <tr> + <td colspan="2">Estimated Link Name Length of Entries <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Group Info Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is the group information flag with the following definition: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, link phase change values are stored. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, the estimated entry information is non-default + and is stored. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Link Phase Change: Maximum Compact Value</p></td> + <td><p>The is the maximum number of links to store “compactly” (in + the group’s object header).</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Link Phase Change: Minimum Dense Value</p></td> + <td><p>This is the minimum number of links to store “densely” (in + the group’s fractal heap). The fractal heap’s address is + located in the <a href="#LinkInfoMessage">Link Info</a> + message.</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Estimated Number of Entries</p></td> + <td><p>This is the estimated number of entries in groups.</p> + <p>If this field is not present, the default value of <code>4</code> + will be used for the estimated number of group entries.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Estimated Link Name Length of Entries</p></td> + <td><p>This is the estimated length of entry name.</p> + <p>If this field is not present, the default value of <code>8</code> + will be used for the estimated link name length of group entries.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + </table> +</div> +<!-- </p> --> + +<h4><a name="FilterMessage">IV.A.2.l. The Data Storage - Filter + Pipeline Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> + Data Storage - Filter Pipeline</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000B</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>This message describes the filter pipeline which should + be applied to the data stream by providing filter identification + numbers, flags, a name, and client data.</p> + <p>This message may be present in the object headers of both + dataset and group objects. For datasets, it specifies the + filters to apply to raw data. For groups, it specifies the + filters to apply to the group’s fractal heap. Currently, + only datasets using chunked data storage use the filter + pipeline on their raw data.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Filter Pipeline Message - Version 1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Number of Filters</td> + <td colspan="2">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4">Reserved (zero)</td> + </tr> + + <tr> + <td colspan="4"><br />Filter Description List <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Filter Pipeline Message - Version 1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This table + describes version 1.</p></td> + </tr> + + <tr> + <td><p>Number of Filters</p></td> + <td><p>The total number of filters described in this + message. The maximum possible number of filters in a + message is 32.</p></td> + </tr> + + <tr> + <td><p>Filter Description List</p></td> + <td><p>A description of each filter. A filter description + appears in the next table.</p></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Filter Description - Version 1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Filter Identification Value</td> + <td colspan="2">Name Length</td> + </tr> + + <tr> + <td colspan="2">Flags</td> + <td colspan="2">Number Client Data Values</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Client Data <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Padding <em>(variable size, optional)</em></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Filter Description - Version 1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filter Identification Value</p></td> + <td> + <p> + This value, often referred to as a filter identifier, + is designed to be a unique identifier for the filter. + Values from zero through 32,767 are reserved for filters + supported by The HDF Group in the HDF5 Library and for + filters requested and supported by third parties. + Filters supported by The HDF Group are documented immediately + below. Information on 3rd-party filters can be found at + The HDF Group’s + <a href="http://www.hdfgroup.org/services/contributions.html"> + Contributions</a> page.</p> + + <p> + To request a filter identifier, please contact + The HDF Group’s Help Desk at + <img src="Graphics/help.png" valign="middle" height="14" + alt="The HDF Group Help Desk">. + You will be asked to provide the following information:</p> + <ol> + <li>Contact information for the developer requesting the + new identifier</li> + <li>A short description of the new filter</li> + <li>Links to any relevant information, including licensing + information</li> + </ol> + <p> + Values from 32768 to 65535 are reserved for non-distributed uses + (for example, internal company usage) or for application usage + when testing a feature. The HDF Group does not track or document + the use of the filters with identifiers from this range.</p> + + <p> + The filters currently in library version 1.8.0 are + listed below: + + <table class="list"> + <tr> + <th width="20%" align="center">Identification</th> + <th width="15%" align="left">Name</th> + <th width="65%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>N/A</td> + <td>Reserved</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>deflate</td> + <td>GZIP deflate compression</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>shuffle</td> + <td>Data element shuffling</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>fletcher32</td> + <td>Fletcher32 checksum</td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>szip</td> + <td>SZIP compression</td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td>nbit</td> + <td>N-bit packing</td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td>scaleoffset</td> + <td>Scale and offset encoded values</td> + </tr> + </table> + </p></td> + </tr> + + <tr> + <td><p>Name Length</p></td> + <td><p>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</p></td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>The flags indicate certain properties for a filter. The + bit values defined so far are: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently skipped in the pipeline.</td> + </tr> + + <tr> + <td align="center"><code>1-15</code></td> + <td>Reserved (zero)</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Number of Client Data Values</p></td> + <td><p>Each filter can store integer values to control + how the filter operates. The number of entries in the + <em>Client Data</em> array is stored in this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>If the <em>Name Length</em> field is non-zero then it will + contain the size of this field, padded to a multiple of eight. This + field contains a null-terminated, ASCII character string to serve + as a comment/name for the filter.</p></td> + </tr> + + <tr> + <td><p>Client Data</p></td> + <td><p>This is an array of four-byte integers which will be + passed to the filter function. The <em>Client Data Number</em> of + Values determines the number of elements in the array.</p></td> + </tr> + + <tr> + <td><p>Padding</p></td> + <td><p>Four bytes of zeroes are added to the message at this + point if the Client Data Number of Values field contains + an odd number.</p></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Filter Pipeline Message - Version 2 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Number of Filters</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Filter Description List <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Filter Pipeline Message - Version 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This table + describes version 2.</p></td> + </tr> + + <tr> + <td><p>Number of Filters</p></td> + <td><p>The total number of filters described in this + message. The maximum possible number of filters in a + message is 32.</p></td> + </tr> + + <tr> + <td><p>Filter Description List</p></td> + <td><p>A description of each filter. A filter description + appears in the next table.</p></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Filter Description - Version 2 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="2">Filter Identification Value</td> + <td colspan="2">Name Length <em>(optional)</em></td> + </tr> + + <tr> + <td colspan="2">Flags</td> + <td colspan="2">Number Client Data Values</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size, optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Client Data <em>(variable size, optional)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Filter Description - Version 2 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Filter Identification Value</p></td> + <td> + <p> + This value, often referred to as a filter identifier, + is designed to be a unique identifier for the filter. + Values from zero through 32,767 are reserved for filters + supported by The HDF Group in the HDF5 Library and for + filters requested and supported by third parties. + Filters supported by The HDF Group are documented immediately + below. Information on 3rd-party filters can be found at + The HDF Group’s + <a href="http://www.hdfgroup.org/services/contributions.html"> + Contributions</a> page.</p> + + <p> + To request a filter identifier, please contact + The HDF Group’s Help Desk at + <img src="Graphics/help.png" valign="middle" height="14" + alt="The HDF Group Help Desk">. + You will be asked to provide the following information:</p> + <ol> + <li>Contact information for the developer requesting the + new identifier</li> + <li>A short description of the new filter</li> + <li>Links to any relevant information, including licensing + information</li> + </ol> + <p> + Values from 32768 to 65535 are reserved for non-distributed uses + (for example, internal company usage) or for application usage + when testing a feature. The HDF Group does not track or document + the use of the filters with identifiers from this range.</p> + + <p> + The filters currently in library version 1.8.0 are + listed below: + + <table class="list"> + <tr> + <th width="20%" align="center">Identification</th> + <th width="15%" align="left">Name</th> + <th width="65%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>N/A</td> + <td>Reserved</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>deflate</td> + <td>GZIP deflate compression</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>shuffle</td> + <td>Data element shuffling</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>fletcher32</td> + <td>Fletcher32 checksum</td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>szip</td> + <td>SZIP compression</td> + </tr> + + <tr> + <td align="center"><code>5</code></td> + <td>nbit</td> + <td>N-bit packing</td> + </tr> + + <tr> + <td align="center"><code>6</code></td> + <td>scaleoffset</td> + <td>Scale and offset encoded values</td> + </tr> + </table> + </p></td> + </tr> + + <tr> + <td><p>Name Length</p></td> + <td><p>Each filter has an optional null-terminated ASCII name + and this field holds the length of the name including the + null termination padded with nulls to be a multiple of + eight. If the filter has no name then a value of zero is + stored in this field.</p> + <p>Filters with IDs less than 256 (in other words, filters + that are defined in this format documentation) do not store + the <em>Name Length</em> or <em>Name</em> fields. + </p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>The flags indicate certain properties for a filter. The + bit values defined so far are: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set then the filter is an optional filter. + During output, if an optional filter fails it will be + silently skipped in the pipeline.</td> + </tr> + + <tr> + <td align="center"><code>1-15</code></td> + <td>Reserved (zero)</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Number of Client Data Values</p></td> + <td><p>Each filter can store integer values to control + how the filter operates. The number of entries in the + <em>Client Data</em> array is stored in this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>If the <em>Name Length</em> field is non-zero, then it will + contain the size of this field, <em>not</em> padded to a multiple + of eight. This field contains a <em>non-</em>null-terminated, + ASCII character string to serve as a comment/name for the filter. + </p> + <p>Filters that are defined in this format documentation + such as deflate and shuffle do not store the <em>Name + Length</em> or <em>Name</em> fields. + </p> + </td> + </tr> + + <tr> + <td><p>Client Data</p></td> + <td><p>This is an array of four-byte integers which will be + passed to the filter function. The Client Data Number of + Values</em> determines the number of elements in the array.</p> +</td> +</tr> +</table> +</div> + +<h4><a name="AttributeMessage">IV.A.2.m. The Attribute Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Attribute</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000C</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The <em>Attribute</em> message is used to store objects + in the HDF5 file which are used as attributes, or + “metadata” about the current object. An attribute + is a small dataset; it has a name, a datatype, a dataspace, and + raw data. Since attributes are stored in the object header, they + should be relatively small (in other words, less than 64KB). + They can be associated with any type of object which has an + object header (groups, datasets, or committed (named) + datatypes).</p> + <p>In 1.8.x versions of the library, attributes can be larger + than 64KB. See the + <a href="UG/HDF5_Users_Guide-Responsive%20HTML5/index.html#t=HDF5_Users_Guide%2FAttributes%2FHDF5_Attributes.htm%3Frhtocid%3Dtoc8.2_1%23TOC_8_5_Special_Issuesbc-13"> + “Special Issues”</a> section of the Attributes chapter + in the <cite>HDF5 User’s Guide</cite> for more information.</p> + <p>Note: Attributes on an object must have unique names: + the HDF5 Library currently enforces this by causing the + creation of an attribute with a duplicate name to fail. + Attributes on different objects may have the same name, + however.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Attribute Message (Version 1) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Reserved (zero)</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Attribute Message (Version 1) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the format of the + attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by the library before version 1.6 to encode attribute message. + This version does not support shared datatypes.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator. Note that the <em>Name</em> field below may + contain additional padding not represented by this + field.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below. Note that the <em>Datatype</em> field may contain + additional padding not represented by this field.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below. Note that the <em>Dataspace</em> field may contain + additional padding not represented by this field.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is + padded with additional null characters to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. This + field is padded with additional zero bytes to make it a + multiple of eight bytes.</p></td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. This + field is <em>not</em> padded with additional bytes.</p></td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Attribute Message (Version 2) + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Attribute Message (Version 2) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the + format of the attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Used by the library of version 1.6.x and after to encode + attribute messages. + This version supports shared datatypes. The fields of + name, datatype, and dataspace are not padded with + additional bytes of zero. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This bit field contains extra information about + interpreting the attribute message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, datatype is shared.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>If set, dataspace is shared.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is <em>not</em> + padded with additional bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s datatype is + shared, this field will contain a “shared message” encoding + instead of the datatype encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s dataspace is + shared, this field will contain a “shared message” encoding + instead of the dataspace encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes.</p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. + </p> + <p>This field is <em>not</em> padded with additional zero bytes. + </p> + </td> + </tr> + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Attribute Message (Version 3) + </caption> + + <tr align="center"> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Name Size</td> + </tr> + + <tr> + <td colspan="2">Datatype Size</td> + <td colspan="2">Dataspace Size</td> + </tr> + + <tr> + <td>Name Character Set Encoding</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Name <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Datatype <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Dataspace <em>(variable size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Attribute Message (Version 3) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number information is used for changes in the + format of the attribute message and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Used by the library of version 1.8.x and after to + encode attribute messages. + This version supports attributes with non-ASCII names. + </td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This bit field contains extra information about + interpreting the attribute message: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, datatype is shared.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>If set, dataspace is shared.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Name Size</p></td> + <td><p>The length of the attribute name in bytes including the + null terminator.</p></td> + </tr> + + <tr> + <td><p>Datatype Size</p></td> + <td><p>The length of the datatype description in the <em>Datatype</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Dataspace Size</p></td> + <td><p>The length of the dataspace description in the <em>Dataspace</em> + field below.</p></td> + </tr> + + <tr> + <td><p>Name Character Set Encoding</p></td> + <td><p>The character set encoding for the attribute’s name: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>ASCII character set encoding + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>UTF-8 character set encoding + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>The null-terminated attribute name. This field is <em>not</em> + padded with additional bytes.</p></td> + </tr> + + <tr> + <td><p>Datatype</p></td> + <td><p>The datatype description follows the same format as + described for the datatype object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s datatype is + shared, this field will contain a “shared message” encoding + instead of the datatype encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Dataspace</p></td> + <td><p>The dataspace description follows the same format as + described for the dataspace object header message. + </p> + <p>If the + <em>Flag</em> field indicates this attribute’s dataspace is + shared, this field will contain a “shared message” encoding + instead of the dataspace encoding. + </p> + <p>This field is <em>not</em> padded with additional bytes.</p> + </td> + </tr> + + <tr> + <td><p>Data</p></td> + <td><p>The raw data for the attribute. The size is determined + from the datatype and dataspace descriptions. + </p> + <p>This field is <em>not</em> padded with additional zero bytes. + </p> + </td> + </tr> + </table> +</div> + +<h4><a name="CommentMessage">IV.A.2.n. The Object Comment + Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Comment</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000D</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object comment is designed to be a short description of + an object. An object comment is a sequence of non-zero + (<code>\0</code>) ASCII characters with no other formatting + included by the library.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Object Comment Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Comment <em>(variable size)</em><br /><br /></td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Object Comment Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Name</p></td> + <td><p>A null terminated ASCII character string.</p></td> + </tr> + </table> +</div> + +<h4><a name="OldModificationTimeMessage">IV.A.2.o. The Object + Modification Time (Old) Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Modification Time (Old)</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000E</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td><p>The object modification date and time is a timestamp + which indicates (using ISO-8601 date and time format) the last + modification of an object. The time is updated when any object + header message changes according to the system clock where the + change was posted. All fields of this message should be + interpreted as coordinated universal time (UTC).</p> + <p>This modification time message is deprecated in favor of + the “new” <a href="#ModificationTimeMessage">Object + Modification Time</a> message and is no longer written to the + file in versions of the HDF5 Library after the 1.6.0 + version.</p></td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Modification Time Message (Old) + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Year</td> + </tr> + + <tr> + <td colspan="2">Month</td> + <td colspan="2">Day of Month</td> + </tr> + + <tr> + <td colspan="2">Hour</td> + <td colspan="2">Minute</td> + </tr> + + <tr> + <td colspan="2">Second</td> + <td colspan="2">Reserved</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Modification Time Message (Old) + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Year</p></td> + <td><p>The four-digit year as an ASCII string. For example, + <code>1998</code>. + </p></td> + </tr> + + <tr> + <td><p>Month</p></td> + <td><p>The month number as a two digit ASCII string where + January is <code>01</code> and December is <code>12</code>.</p></td> + </tr> + + <tr> + <td><p>Day of Month</p></td> + <td><p>The day number within the month as a two digit ASCII + string. The first day of the month is <code>01</code>.</p></td> + </tr> + + <tr> + <td><p>Hour</p></td> + <td><p>The hour of the day as a two digit ASCII string where + midnight is <code>00</code> and 11:00pm is <code>23</code>.</p></td> + </tr> + + <tr> + <td><p>Minute</p></td> + <td><p>The minute of the hour as a two digit ASCII string where + the first minute of the hour is <code>00</code> and + the last is <code>59</code>.</p></td> + </tr> + + <tr> + <td><p>Second</p></td> + <td><p>The second of the minute as a two digit ASCII string + where the first second of the minute is <code>00</code> + and the last is <code>59</code>.</p></td> + </tr> + + <tr> + <td><p>Reserved</p></td> + <td><p>This field is reserved and should always be zero.</p></td> + </tr> + </table> +</div> + +<h4><a name="SOHMTableMessage">IV.A.2.p. The Shared Message Table + Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Shared Message + Table</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x000F</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message is used to locate the table of shared object + header message (SOHM) indexes. Each index consists of information + to find the shared messages from either the heap or object header. + This message is <em>only</em> found in the superblock + extension.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Shared Message Table Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Shared Object Header Message Table Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td>Number of Indices</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + </table> + + <table class="note"> + <tr> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Shared Message Table Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes version 0.</p></td> + </tr> + + <tr> + <td><p>Shared Object Header Message Table Address</p></td> + <td><p>This field is the address of the master table for shared + object header message indexes.</p> + </td> + </tr> + + <tr> + <td><p>Number of Indices</p></td> + <td><p>This field is the number of indices in the master table. + </p></td> + </tr> + + </table> +</div> + +<h4><a name="ContinuationMessage">IV.A.2.q. The Object Header + Continuation Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object Header + Continuation</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0010</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object header continuation is the location in the file + of a block containing more header messages for the current data + object. This can be used when header blocks become too large or + are likely to change over time.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Object Header Continuation Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Offset<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Length<sup>L</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Object Header Continuation Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Offset</p></td> + <td><p>This value is the address in the file where the + header continuation block is located.</p></td> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>This value is the length in bytes of the header continuation + block in the file.</p></td> + </tr> + </table> +</div> +<br /> + +<p>The format of the header continuation block that this message points + to depends on the version of the object header that the message is + contained within. +</p> + +<p> + Continuation blocks for version 1 object headers have no special + formatting information; they are merely a list of object header + message info sequences (type, size, flags, reserved bytes and data + for each message sequence). See the description + of <a href="#V1ObjectHeaderPrefix">Version 1 Data Object Header Prefix.</a> +</p> + +<p>Continuation blocks for version 2 object headers <em>do</em> have + special formatting information as described here + (see also the description of + <a href="#V2ObjectHeaderPrefix">Version 2 Data Object Header Prefix.</a>): +</p> +<div align="center"> + <table class="format"> + <caption> + Layout: Version 2 Object Header Continuation Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + <tr> + <td>Header Message Type #1</td> + <td colspan="2">Size of Header Message Data #1</td> + <td>Header Message #1 Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #1 Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #1<br /><br /></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td>Header Message Type #n</td> + <td colspan="2">Size of Header Message Data #n</td> + <td>Header Message #n Flags</td> + </tr> + + <tr> + <td colspan="2">Header Message #n Creation Order <em>(optional)</em></td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Message Data #n<br /><br /></td> + </tr> + + <tr> + <td colspan="4">Gap <em>(optional, variable size)</em></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 Object Header Continuation Block + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>OCHK</code>” + is used to indicate the beginning of an object header + continuation block. This gives file consistency checking + utilities a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Type</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Size of Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Header Message #n Flags</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Header Message #n Creation Order</p></td> + <td> + <p>This field stores the order that a message of a given type + was created in.</p> + <p>This field is present if bit 2 of <em>flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Header Message #n Data</p></td> + <td> + <p>Same format as version 1 of the object header, described above. + </p></td> + </tr> + + <tr> + <td><p>Gap</p></td> + <td> + <p>A gap in an object header chunk is inferred by the end of the + messages for the chunk before the beginning of the chunk’s + checksum. Gaps are always smaller than the size of an + object header message prefix (message type + message size + + message flags).</p> + <p>Gaps are formed when a message (typically an attribute message) + in an earlier chunk is deleted and a message from a later + chunk that does not quite fit into the free space is moved + into the earlier chunk.</p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>This is the checksum for the object header chunk. + </p> + </td> + </tr> + </table> +</div> + +<h4><a name="SymbolTableMessage">IV.A.2.r. The Symbol Table + Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Symbol Table + Message</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0011</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Required for + “old style” groups; may not be repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>Each “old style” group has a v1 B-tree and a + local heap for storing symbol table entries, which are located + with this message.</td></tr> + <tr><td colspan="2"><b>Format of data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Symbol Table Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />v1 B-tree Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Local Heap Address<sup>O</sup><br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Symbol Table Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>v1 B-tree Address</p></td> + <td><p>This value is the address of the v1 B-tree containing the + symbol table entries for the group.</p></td> + </tr> + + <tr> + <td><p>Local Heap Address</p></td> + <td><p>This value is the address of the local heap containing + the link names for the symbol table entries for the group.</p></td> + </tr> + </table> +</div> + +<h4><a name="ModificationTimeMessage">IV.A.2.s. The Object + Modification Time Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object + Modification Time</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0012</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>The object modification time is a timestamp which indicates + the time of the last modification of an object. The time is + updated when any object header message changes according to + the system clock where the change was posted.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Modification Time Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4">Seconds After UNIX Epoch</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Modification Time Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number is used for changes in the format of Object Modification Time + and is described here: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Never used.</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Used by Version 1.6.1 and after of the library to encode time. In + this version, the time is the seconds after Epoch.</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Seconds After UNIX Epoch</p></td> + <td><p>A 32-bit unsigned integer value that stores the number of + seconds since 0 hours, 0 minutes, 0 seconds, January 1, 1970, + Coordinated Universal Time.</p></td> + </tr> + </table> +</div> + +<h4><a name="BtreeKValuesMessage">IV.A.2.t. The B-tree + ‘K’ Values Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> B-tree + ‘K’ Values</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0013</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message retrieves non-default ‘K’ values + for internal and leaf nodes of a group or indexed storage v1 + B-trees. This message is <em>only</em> found in the superblock + extension.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: B-tree ‘K’ Values Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="2">Indexed Storage Internal Node K</td> + <td bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="2">Group Internal Node K</td> + <td colspan="2">Group Leaf Node K</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: B-tree ‘K’ Values Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Indexed Storage Internal Node K</p></td> + <td><p>This is the node ‘K’ value for each internal node of an + indexed storage v1 B-tree. See the description of this field + in version 0 and 1 of the superblock as well the section on + v1 B-trees. + </p> + </td> + </tr> + + <tr> + <td><p>Group Internal Node K</p></td> + <td><p>This is the node ‘K’ value for each internal node of a group + v1 B-tree. See the description of this field in version 0 and + 1 of the superblock as well as the section on v1 B-trees. + </p> + </td> + </tr> + + <tr> + <td><p>Group Leaf Node K</p></td> + <td><p>This is the node ‘K’ value for each leaf node of a group v1 + B-tree. See the description of this field in version 0 and 1 + of the superblock as well as the section on v1 B-trees. + </p> + </td> + </tr> + + </table> +</div> + +<h4><a name="DrvInfoMessage">IV.A.2.u. The Driver Info + Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Driver + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0014</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + + <tr><td> + <b>Description:</b></td> + <td>This message contains information needed by the file driver + to reopen a file. This message is <em>only</em> found in the + superblock extension: see the <a href="#SuperblockExt"> + “Disk Format: Level 0C - Superblock Extension”</a> + section for more information. For more information on the fields + in the driver info message, see the <a href="#DriverInfo"> + “Disk Format: Level 0B - File Driver Info”</a> + section; those who use the multi and family file drivers will + find this section particularly helpful.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Driver Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4"><br />Driver Identification</td> + </tr> + + <tr> + <td colspan="2">Driver Information Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Driver Information <em>(variable size)</em><br /><br /><br /></td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Driver Info Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Driver Identification</p></td> + <td><p>This is an eight-byte ASCII string without null termination which + identifies the driver. + </p> + </td> + </tr> + + <tr> + <td><p>Driver Information Size</p></td> + <td><p>The size in bytes of the <em>Driver Information</em> field of this + message.</p> + </td> + </tr> + + <tr> + <td><p>Driver Information</p></td> + <td><p>Driver information is stored in a format defined by the file driver.</p> + </td> + </tr> + </table> +</div> + +<h4><a name="AinfoMessage">IV.A.2.v. The Attribute Info + Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Attribute + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0015</td></tr> + <tr><td colspan="2"><b>Length:</b> Varies</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message stores information about the attributes on an + object, such as the maximum creation index for the attributes + created and the location of the attribute storage when the + attributes are stored “densely”.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Attribute Info Message + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Flags</td> + <td colspan="2">Maximum Creation Index <em>(optional)</em></td> + </tr> + <tr> + <td colspan="4"><br />Fractal Heap Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Attribute Name v2 B-tree Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Attribute Creation Order v2 B-tree Address<sup>O</sup> <em>(optional)</em><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Attribute Info Message + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is the attribute index information flag with the + following definition: + + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, creation order for attributes is tracked. + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>If set, creation order for attributes is indexed. + </td> + </tr> + <tr> + <td align="center"><code>2-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + <tr> + <td><p>Maximum Creation Index</p></td> + <td><p>The is the maximum creation order index value for the + attributes on the object.</p> + <p>This field is present if bit 0 of <em>Flags</em> is set.</p> + </td> + </tr> + + <tr> + <td><p>Fractal Heap Address</p></td> + <td><p>This is the address of the fractal heap to store dense + attributes. + Each attribute stored in the fractal heap is described by + the <a href="#AttributeMessage">Attribute Message.</a> + </p> + </td> + </tr> + + <tr> + <td><p>Attribute Name v2 B-tree Address</p></td> + <td><p>This is the address of the version 2 B-tree to index the + names of densely stored attributes.</p> + </td> + </tr> + + <tr> + <td><p>Attribute Creation Order v2 B-tree Address</p></td> + <td><p>This is the address of the version 2 B-tree to index the + creation order of densely stored attributes.</p> + <p>This field is present if bit 1 of <em>Flags</em> is set.</p> + </td> + </tr> + + </table> +</div> + +<h4><a name="RefCountMessage">IV.A.2.w. The Object Reference + Count Message</a></h4> + +<!-- start msgdesc table --> +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> Object Reference + Count</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0016</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td><b>Description:</b></td> + <td>This message stores the number of hard links (in groups or + objects) pointing to an object: in other words, its + <em>reference count</em>.</td></tr> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<!-- end msgdesc table --> + +<div align="center"> + <table class="format"> + <caption> + Layout: Object Reference Count + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Reference count</td> + </tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Object Reference Count + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for this message. This document describes + version 0.</p> + </td> + </tr> + + <tr> + <td><p>Reference Count</p></td> + <td><p>The unsigned 32-bit integer is the reference count for the + object. This message is only present in “version 2” + (or later) object headers, and if not present those object + header versions, the reference count for the object is assumed + to be 1.</p> + </td> + </tr> + + </table> +</div> + +<br /> + +<h4><a name="FsinfoMessage">IV.A.2.x. The File Space Info + Message</a></h4> + +<center> + <table class="msgdesc"> + <tr><td colspan="2"><b>Header Message Name:</b> File Space + Info</td></tr> + <tr><td colspan="2"><b>Header Message Type:</b> 0x0017</td></tr> + <tr><td colspan="2"><b>Length:</b> Fixed</td></tr> + <tr><td colspan="2"><b>Status:</b> Optional; may not be + repeated.</td></tr> + <tr><td> + <b>Description:</b></td> + <td>This message stores the file space management information + that the library uses in handling file space + requests for the file. Version 0 of the message is used for release 1.10.0 only. + Version 1 of the message is used for release 1.10.1+. + There is no File Space Info message before release 1.10 as the library does + not track file space across multiple file opens. + <p> + Note that version 0 is deprecated starting release 1.10.1. + That means when the 1.10.1+ library opens an HDF5 file with a version 0 message, + the library will decode and map the message to version 1. + On file close, it will encode the message as a version 1 message. + <p> + The library uses the following three mechanisms to manage file space in an HDF5 file: + <ul> + <li> Free-space managers + <br> They track free-space sections of various sizes in the file that are not currently + allocated. Each free-space manager corresponds to a file space type. + There are two main groups of file space types: metadata and raw data. + Metadata is further divided into five types: superblock, B-tree, global heap, + local heap, and object header. + See the description of <a href="#FreeSpaceManager">Free-space + Manager</a> as well the description of file space allocation types in + <a href="#AppendixB">Appendix B</a> + </li> + <li> Aggregators + <br> The library manages two aggregators, one for metadata and one for raw data. + Aggregator is a contiguous block of free-space in the file. + The size of each aggregator is tunable via public routines + <code>H5Pset_meta_block_size</code> and <code>H5Pset_small_data_block_size</code> respectively. + </li> + <li> Virtual file drivers + <br> The library's virtual file driver interface dispatches requests for additional + space to the allocation routine of the file driver associated with the file. + For example, if the sec2 file driver is being used, its allocation routine will + increase the size of the file to service the requests. + </li> + </ul> + <p> + For release 1.10.0, the library derives the following four file space strategies + based on the mechanisms: + <ul> + <li>H5F_FILE_SPACE_ALL + <ul> + <li>Mechanisms used: free-space managers, aggregators, and virtual file drivers</li> + <li>Does not persist free-space across file opens</li> + <li>This strategy is the library default</li> + </ul> + </li> + <li>H5F_FILE_SPACE_ALL_PERSIST</li> + <ul> + <li>Mechanisms used: free-space managers, aggregators, and virtual file drivers</li> + <li>Persist free-space across file opens</li> + </ul> + <li>H5F_FILE_SPACE_AGGR_VFD</li> + <ul> + <li>Mechanisms used: aggregators and virtual file drivers</li> + <li>Does not persist free-space across file opens</li> + </ul> + <li>H5F_FILE_SPACE_VFD</li> + <ul> + <li>Mechanisms used: virtual file drivers</li> + <li>Does not persist free-space across file opens</li> + </ul> + </ul> + For release 1.10.1+, the free-space manager mechanism is modified to handle paged aggregation + which aggregates small metadata and raw data allocations into constant-sized well-aligned pages + to allow efficient I/O accesses. + With the support of this feature, the library derives the following four file space strategies: + <ul> + <li>H5F_FSPACE_STRATEGY_FSM_AGGR </li> + <ul> + <li>Mechanisms used: free-space managers, aggregators, and virtual file drivers</li> + <li>This strategy is the library default</li> + </ul> + <li>H5F_FSPACE_STRATEGY_PAGE</li> + <ul> + <li>Mechanisms used: free-space managers with embedded paged aggregation and virtual file drivers</li> + </ul> + <li>H5F_FSPACE_STRATEGY_AGGR</li> + <ul> + <li>Mechanisms used: aggregators and virtual file drivers</li> + </ul> + <li>H5F_FSPACE_STRATEGY_NONE</li> + <ul> + <li>Mechanisms used: virtual file drivers</li> + </ul> + </ul> + The default is not persisting free-space across file opens for the above four strategies. + User can use the public routine <code>H5Pset_file_space_strategy</code> to request + persisting free-space. + </td></tr> + <p> + <tr><td colspan="2"><b>Format of Data:</b> See the tables + below.</td></tr> +</table></center> +<p> + <div align="center"> + <table class="format"> + <caption> + Layout: File Space Info - Version 0 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Strategy</td> + <td colspan="2">Threshold<sup>L</sup></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>O</sup> for H5FD_MEM_SUPER<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>0</sup> for H5FD_MEM_BTREE<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>0</sup> for H5FD_MEM_DRAW<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>0</sup> for H5FD_MEM_GHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>0</sup> for H5FD_MEM_LHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Free-space manager address<sup>0</sup> for H5FD_MEM_OHDR<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: File Space Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>This is version 0 of this message.</p> + </td> + </tr> + + <tr> + <td><p>Strategy</p></td> + <td><p>This is the file space strategy used to manage file space. + There are four types: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>H5F_FILE_SPACE_ALL_PERSIST</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>H5F_FILE_SPACE_ALL</td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>H5F_FILE_SPACE_AGGR_VFD</td> + </tr> + <tr> + <td align="center"><code>4</code></td> + <td>H5F_FILE_SPACE_VFD</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Threshold</p></td> + <td><p>This is the smallest free-space section size that the + free-space manager will track. + </td> + </tr> + <tr> + <td><p>Free-space manager addresses</p></td> + <td><p>These are the six free-space manager addresses for the + six file space allocation types: + <ul> + <li>H5FD_MEM_SUPER</li> + <li>H5FD_MEM_BTREE</li> + <li>H5FD_MEM_DRAW</li> + <li>H5FD_MEM_GHEAP</li> + <li>H5FD_MEM_LHEAP</li> + <li>H5FD_MEM_OHDR</li> + </ul> + Note that these six fields exist only if the value for the field + “<em>Strategy</em>” is H5F_FILE_SPACE_ALL_PERSIST. + </p> + </td> + </tr> + + </table> + </div> + <br /> + + <div align="center"> + <table class="format"> + <caption> + Layout: File Space Info - Version 1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Version</td> + <td>Strategy</td> + <td>Persisting free-space</td> + <td colspan="1" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4">Free-space Section Threshold<sup>L</sup></td> + </tr> + + <tr> + <td colspan="4">File Space Page Size</td> + </tr> + + <tr> + <td colspan="2">Page-end Metadata threshold</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />EOA<sup>0</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FD_MEM_SUPER<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FD_MEM_BTREE<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FM_MEM_DRAW<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FD_MEM_GHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FD_MEM_LHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of small-sized free-space manager for H5FD_MEM_OHDR<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FD_MEM_SUPER<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FD_MEM_BTREE<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FM_MEM_DRAW<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FD_MEM_GHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FD_MEM_LHEAP<br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Address<sup>O</sup> of large-sized free-space manager for H5FD_MEM_OHDR<br /><br /></td> + </tr> + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: File Space Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>This is version 1 of this message.</p> + </td> + </tr> + + <tr> + <td><p>Strategy</p></td> + <td><p>This is the file space strategy used to manage file space. + There are four types: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>H5F_FSPACE_STRATEGY_FSM_AGGR</td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>H5F_FSPACE_STRATEGY_PAGE</td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>H5F_FSPACE_STRATEGY_AGGR</td> + </tr> + <tr> + <td align="center"><code>3</code></td> + <td>H5F_FSPACE_STRATEGY_NONE</td> + </tr> + </table></p> + </td> + </tr> + + <tr> + <td><p>Persisting free-space</p></td> + <td><p>True or false in persisting free-space. + </td> + </tr> + + <tr> + <td><p>Free-space Section Threshold</p></td> + <td><p>This is the smallest free-space section size that the + free-space manager will track. + </td> + </tr> + + <tr> + <td><p>File space page size</p></td> + <td><p>This is the file space page size, which is used when the paged aggregation feature + is enabled. + </td> + </tr> + + <tr> + <td><p>Page-end metadata threshold</p></td> + <td><p>This is the smallest free-space section size at the end of a page that + the free-space manager will track. This is used when the paged aggregation feature + is enabled. + </td> + </tr> + + <tr> + <td><p>EOA</p></td> + <td><p>The EOA before the allocation of free-space manager header and section info for the + self-referential free-space managers when persisting free-space. + <br> + Note that self-referential free-space managers are managers that involve file space + allocation for the managers' free-space header and section info. + </td> + </tr> + + <tr> + <td><p>Addresses of small-sized free-space managers</p></td> + <td><p>These are the addresses of the six small-sized free-space managers for + the six file space allocation types: + </p> + <ul> + <li>H5FD_MEM_SUPER</li> + <li>H5FD_MEM_BTREE</li> + <li>H5FD_MEM_DRAW</li> + <li>H5FD_MEM_GHEAP</li> + <li>H5FD_MEM_LHEAP</li> + <li>H5FD_MEM_OHDR</li> + </ul> + Note that these six fields exist only if the value for the field + “<em>Persisting free-space</em>” is true. +</ul> +</td> +</tr> + +<tr> + <td><p>Addresses of large-sized free-space managers</p></td> + <td><p>These are the addresses of the six large-sized free-space managers for + the six file space allocation types: + </p> + <ul> + <li>H5FD_MEM_SUPER</li> + <li>H5FD_MEM_BTREE</li> + <li>H5FD_MEM_DRAW</li> + <li>H5FD_MEM_GHEAP</li> + <li>H5FD_MEM_LHEAP</li> + <li>H5FD_MEM_OHDR</li> + </ul> + Note that these six fields exist only if the value for the field + “<em>Persisting free-space</em>” is true. +</ul> +</td> +</tr> + +</table> +</div> + +<h3><a name="DataStorage"> + IV.B. Disk Format: Level 2B - Data Object Data Storage</a></h3> + +<p>The data for an object is stored separately from its header + information in the file and may not actually be located in the HDF5 file + itself if the header indicates that the data is stored externally. The + information for each record in the object is stored according to the + dimensionality of the object (indicated in the dataspace header message). + Multi-dimensional array data is stored in C order; in other words, the + “last” dimension changes fastest.</p> + +<p>Data whose elements are composed of atomic datatypes are stored in IEEE + format, unless they are specifically defined as being stored in a different + machine format with the architecture-type information from the datatype + header message. This means that each architecture will need to [potentially] + byte-swap data values into the internal representation for that particular + machine.</p> + +<p> Data with a variable-length datatype is stored in the global heap + of the HDF5 file. Global heap identifiers are stored in the + data object storage.</p> + +<p>Data whose elements are composed of reference datatypes are stored in + several different ways depending on the particular reference type involved. + Object pointers are just stored as the offset of the object header being + pointed to with the size of the pointer being the same number of bytes as + offsets in the file.</p> + +<p>Dataset region references are stored as a heap-ID which points to + the following information within the file-heap: an offset of the object + pointed to, number-type information (same format as header message), + dimensionality information (same format as header message), sub-set start + and end information (in other words, a coordinate location for each), + and field start and end names (in other words, a [pointer to the] string + indicating the first field included and a [pointer to the] string name + for the last field). </p> + +<p>Data of a compound datatype is stored as a contiguous stream of the items + in the structure, with each item formatted according to its datatype. +<p> + Description of datatypes for variable-length, references and compound classes can be found + in <a href="#DatatypeMessage">Datatype Message</a>. +<p> + Information about global heap and heap ID can be found in <a href="#GlobalHeap">Global Heap</a>. +<p> + For reference datatype, + see also the encoding description for <a href="#ReferenceEncodeRV">Reference Encoding (Revised) </a> and + <a href="#ReferenceEncodeDP">Reference Encoding (Backward Compatibility)</a> in Appendix D. +</p> + +<h2><a name="AppendixA"> + V. Appendix A: Definitions</a></h2> + +<p>Definitions of various terms used in this document are included in + this section.</p> + +<div align="center"> + <table class="glossary"> + <tr> + <th width="20%">Term</th> + <th>Definition</th> + </tr> + + <tr> + <td>Undefined Address</td> + <td>The <a name="UndefinedAddress">undefined + address</a> for a file is a file address with all bits + set: in other words, <code>0xffff...ff</code>.</td> + </tr> + + <tr> + <td>Unlimited Size</td> + <td>The <a name="UnlimitedDim">unlimited size</a> + for a size is a value with all bits set: in other words, + <code>0xffff...ff</code>.</td> + </tr> + + </table> +</div> + + +<h2><a name="AppendixB"> + VI. Appendix B: File Space Allocation Types</a></h2> + +<p>There are six basic types of file space allocation as follows: +</p> +<div align="center"> + <table class="desc"> + <tr> + <th width="30%">Basic Allocation Type</th> + <th>Description</th> + </tr> + + <tr> + <td>H5FD_MEM_SUPER</td> + <td>File space allocated for <em>Superblock.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_BTREE</td> + <td>File space allocated for <em>B-tree.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_DRAW</td> + <td>File space allocated for <em>raw data</em>.</td> + </tr> + + <tr> + <td>H5FD_MEM_GHEAP</td> + <td>File space allocated for <em>Global Heap.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_LHEAP</td> + <td>File space allocated for <em>Local Heap.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_OHDR</td> + <td>File space allocated for <em>Object Header.</em></td> + </tr> + </table> +</div> + +<br /> +<p>There are other file space allocation types that are mapped to the + above six basic types because they are similar in nature. + The mapping and the corresponding description are listed in the following two tables: +</p> + +<div align="center"> + <table class="desc"> + <tr> + <th width="30%">Basic Allocation Type</th> + <th>Mapping of Allocation Types to Basic Allocation Types</th> + </tr> + + <tr> + <td>H5FD_MEM_SUPER</td> + <td><em>none</em></td> + </tr> + + <tr> + <td>H5FD_MEM_BTREE</td> + <td>H5FD_MEM_SOHM_INDEX</td> + </tr> + + <tr> + <td>H5FD_MEM_DRAW</td> + <td>H5FD_MEM_FHEAP_HUGE_OBJ</td> + </tr> + + <tr> + <td>H5FD_MEM_GHEAP</td> + <td><em>none</em></td> + </tr> + + <tr> + <td>H5FD_MEM_LHEAP</td> + <td>H5FD_MEM_FHEAP_DBLOCK, H5FD_MEM_FSPACE_SINFO</td> + </tr> + + <tr> + <td>H5FD_MEM_OHDR</td> + <td>H5FD_MEM_FHEAP_HDR, H5FD_MEM_FHEAP_IBLOCK, H5FD_MEM_FSPACE_HDR, H5FD_MEM_SOHM_TABLE</td> + </tr> + </table> +</div> + +<br /> +</p> + +<div align="center"> + <table class="desc"> + <tr> + <th width="30%">Allocation Type</th> + <th>Description</th> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_HDR</td> + <td>File space allocated for <em>Fractal Heap Header.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_DBLOCK</td> + <td>File space allocated for <em>Fractal Heap Direct Blocks.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_IBLOCK</td> + <td>File space allocated for <em>Fractal Heap Indirect Blocks.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FHEAP_HUGE_OBJ</td> + <td>File space allocated for huge objects in the fractal heap.</td> + </tr> + + <tr> + <td>H5FD_MEM_FSPACE_HDR</td> + <td>File space allocated for <em>Free-space Manager Header.</em></td> + </tr> + + <tr> + <td>H5FD_MEM_FSPACE_SINFO</td> + <td>File space allocated for <em>Free-space Section List</em> of the free-space manager.</td> + </tr> + <tr> + <td>H5FD_MEM_SOHM_TABLE</td> + <td>File space allocated for <em>Shared Object Header Message Table.</em></td> + </tr> + <tr> + <td>H5FD_MEM_SOHM_INDEX</td> + <td>File space allocated for <em>Shared Message Record List.</em></td> + </tr> + </table> +</div> + +<h2><a name="AppendixC"> VII. Appendix C: + Types of Indexes for Dataset Chunks</a></h2> + +<p>For an HDF5 file without the latest format enabled, the library + uses the <a href="#V1Btrees">Version 1 B-tree</a> to index dataset + chunks.</p> + +<p>For an HDF5 file with the latest format enabled, the library uses + one of the following five indexing types depending on a chunked + dataset’s dimension specification and the way it is extended. +</p> + +<a name="SingleChunk"> + <h3>VII.A. The Single Chunk Index</h3></a> + +<p>The <i>Single Chunk</i> index can be used when the dataset fulfills + the following condition:</p> + +<ul> + <li>the current, maximum, and chunk dimension sizes are all the same</li> +</ul> + +<p>The dataset has only one chunk, and the address of the single + chunk is stored in the version 4 <i>Data Layout</i> message. + See the <a href="#ChunkedStorage">Chunked Storage Property + Description</i></a> layout and field description tables.</p> + +<a name="Implicit"> + <h3>VII.B. The Implicit Index</h3></a> + +<p>The <i>Implicit</i> index can be used when the dataset fulfills + the following conditions:</p> + +<ul> + <li>fixed maximum dimension sizes</li> + <li>no filter applied to the dataset</li> + <li>the timing for the space allocation of the dataset chunks is + <code>H5P_ALLOC_TIME_EARLY</code></li> +</ul> + +<p>Since the dataset’s dimension sizes are known and storage space + is to be allocated early, an array of dataset chunks are allocated + based on the maximum dimension sizes when the dataset is created. + The base address of the array is stored in the version 4 + <i>Data Layout</i> message. See the + <a href="#ChunkedStorage">Chunked Storage Property + Description</i></a> layout and field description tables. +</p> + +<p>When accessing a dataset chunk with a specified offset, the + address of the chunk in the array is computed as below:</p> + +<dir><p><code>base address + (size of a chunk in bytes * chunk index + associated with the offset)</code></p></dir> + +<p>A chunk index starts at 0 and increases according to the + fastest changing dimension, then the next fastest, and so on. + <a name="ChunkIndex"></a> + The chunk index for a dataset chunk offset is computed as below: + <ol> + <li>Calculate the scaled offset for each dimension in + <code>scaled_offset</code>: + <br /> + <pre> + scaled_offset = chunk_offset/chunk_dims + </pre></li> + <li>Calculate the # of chunks for each dimension in + <code>nchunks</code>: + <br /> + <pre> + nchunks = (curr_dims + chunk_dims - 1)/chunk_dims + </pre></li> + + <li>Calculate the down chunks for each dimension in + <code>down_chunks</code>: + <br /> + <pre> + /* n is the # of dimensions */ + for(i = (int)(n-1), acc = 1; i >= 0; i--) { + down_chunks[i] = acc; + acc *= nchunks[i]; + } + </pre> + </li> + + <li>Calculate the chunk index in <code>chunk_index</code>: + <br /> + <pre> + /* n is the # of dimensions */ + for(u = 0, chunk_index = 0; u < n; u++) + chunk_index += down_chunks[u] * scaled_offset[u]; + </pre> + </li> + </ol> +<p> + For example, for a 2-dimensional dataset with + <code>curr_dims[4,5]</code> and <code>chunk_dims[3,2]</code>, + there will be a total of 6 chunks, with 3 chunks in the fastest + changing dimension and 2 chunks in the slowest changing dimension. + See the figure below. + The chunk index for the chunk offset <code>[3,4]</code> + is computed as below: + <ol> + <code> + <li>scaled_offset[0] = 1, scaled_offset[1] = 2</li> + <li>nchunks[0] = 2, nchunks[1] = 3</li> + <li>down_chunks[0] = 3, down_chunks[1] = 1</li> + <li>chunk_index = 5</li> + </code> + </ol> + + + <table align="center" width="400" border="0"> + <tr valign="center" align="center"> + <td> + <hr size="2"/> + <img height="250" src="FileFormatSpecChunkDiagram.jpg" + alt="Chunk Diagram"></td> + </tr> + <tr valign="top" align="center"> + <td> + <hr size="1" /> + <b>Figure 3. Implicit index chunk diagram </b> + <hr size="2"/></td> + </tr> + </table> + + + + + + <a name="FixedArray"> + <h3>VII.C. The Fixed Array Index</h3></a> + +<p>The <i>Fixed Array</i> index can be used when the dataset fulfills + the following condition:</p> +<ul> + <li>fixed maximum dimension sizes</li> +</ul> + +<p>Since the maximum number of chunks is known, an array of + in-file-on-disk addresses based on the maximum number of chunks is + allocated when data is written to the dataset. To access a dataset + chunk with a specified offset, the + <a href="#ChunkIndex">chunk index</i></a> associated with the offset +is calculated. The index is mapped into the array to locate the +disk address for the chunk.</p> + +<p>The Fixed Array (FA) index structure provides space and speed + improvements in locating chunks over index structures that handle + more dynamic data accesses like a + <a href="#AppendV2Btrees">Version 2 B-tree</a> index. + The entry into the Fixed Array is the Fixed Array header which + contains metadata about the entries stored in the array. The + header contains a pointer to a data block which stores the array + of entries that describe the dataset chunks. For greater efficiency, + the array will be divided into multiple pages if the number of + entries exceeds a threshold value. The space for the data block + and possibly data block pages are allocated as a single contiguous + block of space.</p> + +<p>The content of the data block depends on whether paging is + activated or not. When paging is not used, elements that describe + the chunks are stored in the data block. If paging is turned on, + the data block contains a bitmap indicating which pages are + initialized. Then subsequent data block pages will contain the + entries that describe the chunks.</p> + +<p>An entry describes either a filtered or non-filtered dataset + chunk. The formats for both element types are described below. +</p> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fixed Array Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td>Entry Size</td> + <td>Page Bits</td> + </tr> + + <tr> + <td colspan="4"><br />Max Num + Entries<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data Block + Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fixed Array Header + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FAHD</code>” + is used to indicate the beginning of a Fixed Array header. + This gives file consistency checking utilities a better + chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The ID for identifying the client of the + Fixed Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Entry Size</p></td> + <td> + <p>The size in bytes of an entry in the Fixed Array. + </p> + </td> + </tr> + + <tr> + <td><p>Page Bits</p></td> + <td> + <p>The number of bits needed to store the maximum + number of entries in a + <a href="#FADataBlockPage">data block page.</a></p> + </td> + </tr> + + <tr> + <td><p>Max Num Entries</p></td> + <td> + <p>The maximum number of entries in the Fixed + Array.</p> + </td> + </tr> + + <tr> + <td><p>Data Block Address</p></td> + <td> + <p>The address of the data block in the Fixed Array. + </p> + </td> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the header.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Fixed Array Data Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Page Bitmap <em>(variable size and + optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Elements <em>(variable size and + optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fixed Array Data Block + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>FADB</code>” + is used to indicate the beginning of a Fixed Array data + block. This gives file consistency checking utilities a + better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The ID for identifying the client of the + Fixed Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Header Address</p></td> + <td> + <p>The address of the Fixed Array header. Principally used + for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Page Bitmap</p></td> + <td><p>A bitmap indicating which data block pages are initialized.</p> + <p>Exists only if the data block is paged.</p></td> + </tr> + + <tr> + <td><p>Elements</p></td> + <td> + <p>Contains the elements stored in the data block + and exists only if the data block is not paged. + There are two element types: + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#FaNonFilterChunk">Non-filtered + dataset chunks</i></a> +</td> +</tr> +<tr> + <td align="center"><code>1</code></td> + <td><a href="#FaFilterChunk">Filtered dataset + chunks</i></a> +</td> +</tr> +</table> +</p> +</td> +</tr> + +<tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the Fixed Array data block.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption><a name="FADataBlockPage"> + Layout: Fixed Array Data Block Page</a> + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Elements <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Fixed Array Data Block Page + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Elements</p></td> + <td> + <p>Contains the elements stored in the data block page. + There are two element types: + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#FaNonFilterChunk">Non-filtered dataset chunks</i></a> +</td> +</tr> +<tr> + <td align="center"><code>1</code></td> + <td><a href="#FaFilterChunk">Filtered dataset chunks</i></a> +</td> +</tr> +</table> +</p> +</td> +</tr> + +<tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for a Fixed Array data block page.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<a name="FaNonFilterChunk"></a> +<div align="center"> + <table class="format"> + <caption> + Layout: Data Block Element for Non-filtered Dataset Chunk + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Data Block Element for Non-filtered Dataset Chunk + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the dataset chunk in the file. + </p> + </td> + </tr> + + </table> +</div> +<!-- </p> --> + +<br /> +<br /> +<br /> +<a name="FaFilterChunk"></a> +<div align="center"> + <table class="format"> + <caption> + Layout: Data Block Element for Filtered Dataset Chunk + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Chunk Size <em>(variable size; at most + 8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filter Mask</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Data Block Element for Filtered Dataset Chunk + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the dataset chunk in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Chunk Size</p></td> + <td><p>The size of the dataset chunk in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td><p>Indicates the filter to skip for the dataset chunk. Each + filter has an index number in the pipeline; if that filter is + skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + </table> +</div> + +<a name="ExtensibleArray"> + <h3>VII.D. The Extensible Array Index</h3></a> + +<p>The <i>Extensible Array</i> index can be used when the dataset + fulfills the following condition:</p> + +<ul> + <li>only one dimension of unlimited extent</li> +</ul> + +<p>The Extensible Array (EA) is a data structure that is used as a + chunk index in datasets where the dataspace has a single + unlimited dimension. In other words, one dimension is set to + <code>H5S_UNLIMITED</code>, and the other dimensions are any number + of fixed-size dimensions. The idea behind the extensible array is + that a particular data object can be located via a lightweight + indexing structure of fixed depth for a given address space. This + indexing structure requires only a few (2-3) file operations per + element lookup and gives good cache performance. Unlike the B-tree + structure, the extensible array is optimized for appends. Where a + B-tree would always add at the rightmost node under these + circumstances, either creating a deep tree (version 1) or requiring + expensive rebalances to correct (version 2), the extensible array + has already mapped out a pre-balanced internal structure. This + optimized internal structure is instantiated as needed when chunk + records are inserted into the structure.</p> + + + +<!-- + + <p>A description of the rationale that leads to the present + implementation of the extensible array can be found at + <a href="https://svn.hdfgroup.org/hdf5doc/trunk/projects/1_10_alpha/ReviseChunks/skip_lists"> + https://svn.hdfgroup.org/hdf5doc/trunk/projects/1_10_alpha/ReviseChunks/skip_lists</a>. + </p> + +<p>The current implementation differs from the data structure + described in that reference in some ways, but the basic idea is the + same.</p> + +--> + + + +<p>An Extensible Array consists of a header, an index block, + secondary blocks, data blocks, and (optional) data block pages. The + general scheme is that the index block is used to reference a + secondary block, which is, in turn, used to reference the data block + page where the chunk information is stored. The data blocks will + be paged for efficiency when their size passes a threshold value. + These pages are laid out contiguously on the disk after the data + block, are initialized as needed, and are tracked via bitmaps + stored in the secondary block. The number of secondary and data + blocks/pages in a chunk index varies as they are allocated as + needed and the first few are (conceptually) stored in parent + elements as an optimization.</p> + +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Header + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td>Element Size</td> + <td>Max Nelmts Bits</td> + </tr> + + <tr> + <td>Index Blk Elmts</td> + <td>Data Blk Min Elmts</td> + <td>Secondary Blk Min Data Ptrs</td> + <td>Max Data Blk Page Nelmts Bits</td> + </tr> + + <tr> + <td colspan="4"><br />Num Secondary Blks<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Secondary Blk Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Num Data Blks<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Data Blk Size<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Max Index Set<sup>L</sup><br /><br /></td> + </tr> + <tr> + <td colspan="4"><br />Num Elements<sup>L</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Index Block Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘L’ in the above table are + of the size specified in the <a href="#SizeOfLengthsV0">Size + of Lengths</a> field in the superblock.) + </td></tr> + <tr> + <td> </td> + <td> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Header + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>EAHD</code>” + is used to indicate the beginning of an Extensible Array + header. This gives file consistency checking utilities a + better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The ID for identifying the client of the + Fixed Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Element Size</p></td> + <td> + <p>The size in bytes of an element in the Extensible Array. + </p> + </td> + </tr> + + <tr> + <td><p>Max Nelmts Bits</p></td> + <td> + <p>The number of bits needed to store the + maximum number of elements in the Extensible Array.</p> + </td> + </tr> + + <tr> + <td><p>Index Blk Elmts</p></td> + <td> + <p>The number of elements to store in the index block. + </p> + </td> + </tr> + + <tr> + <td><p>Data Blk Min Elmts</p></td> + <td> + <p>The minimum number of elements per data block. + </p> + </td> + </tr> + + <tr> + <td><p>Secondary Blk Min Data Ptrs</p></td> + <td> + <p>The minimum number of data block pointers for a + secondary block. + </p> + </td> + </tr> + + <tr> + <td><p>Max Dblk Page Nelmts Bits</p></td> + <td> + <p>The number of bits needed to store the maximum number + of elements in a data block page. + </p> + </td> + </tr> + + <tr> + <td><p>Num Secondary Blks</p></td> + <td> + <p>The number of secondary blocks created. + </p> + </td> + </tr> + + <tr> + <td><p>Secondary Blk Size</p></td> + <td> + <p>The size of the secondary blocks created. + </p> + </td> + </tr> + + <tr> + <td><p>Num Data Blks</p></td> + <td> + <p>The number of data blocks created. + </p> + </td> + </tr> + + <tr> + <td><p>Data Blk Size</p></td> + <td> + <p>The size of the data blocks created. + </p> + </td> + </tr> + + <tr> + <td><p>Max Index Set</p></td> + <td> + <p>The maximum index set. + </p> + </td> + </tr> + + <tr> + <td><p>Num Elmts</p></td> + <td> + <p>The number of elements realized. + </p> + </td> + </tr> + + <tr> + <td><p>Index Block Address</p></td> + <td> + <p>The address of the index block. + </p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the header.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Index Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Elements <em>(variable size and + optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data Block Addresses <em>(variable + size and optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Secondary Block Addresses <em>(variable + size and optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Index Block + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>EAIB</code>” + is used to indicate the beginning of an Extensible Array + Index Block. This gives file consistency checking utilities + a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The client ID for identifying the user of the + Extensible Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Header Address</p></td> + <td> + <p>The address of the Extensible Array header. Principally + used for file integrity checking.</p> + </td> + </tr> + + <tr> + <td><p>Elements</p></td> + <td> + <p>Contains the elements that are stored directly in + the index block. An optimization to avoid unnecessary + secondary blocks. + <br /> + <br /> + There are two element types: + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#EaNonFilterChunk">Non-filtered dataset chunks</i></a> +</td> +</tr> +<tr> + <td align="center"><code>1</code></td> + <td><a href="#EaFilterChunk">Filtered dataset chunks</i></a> +</td> +</tr> +</table> +</p> +</td> +</tr> + +<tr> + <td><p>Data Block Addresses</p></td> + <td> + <p>Contains the addresses of the data blocks + that are stored directly in the Index Block. An + optimization to avoid unnecessary secondary blocks.</p> + </td> +</tr> + +<tr> + <td><p>Secondary Block Addresses</p></td> + <td> + <p>Contains the addresses of the secondary + blocks.</p> + </td> +</tr> + +<tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the Extensible Array Index Block.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Secondary Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Block Offset <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Page Bitmap <em>(variable size and + optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Data Block Addresses <em>(variable + size and optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Secondary Block + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>EASB</code>” + is used to indicate the beginning of an Extensible Array + Secondary Block. This gives file consistency checking utilities + a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The ID for identifying the client of the + Extensible Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Header Address</p></td> + <td> + <p>The address of the Extensible Array header. Principally + used for file integrity checking.</p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>Stores the offset of the block in the array. + </p> + </td> + </tr> + + <tr> + <td><p>Page Bitmap</p></td> + <td> + <p>A bitmap indicating which + data block pages are initialized. + <p> + Exists only if the data block is paged. + </td> + </tr> + + <tr> + <td><p>Data Block Addresses</p></td> + <td> + <p>Contains the addresses of the data blocks + referenced by this secondary block.</p> + </td> + </tr> + + <tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the Extensible Array + Secondary Block.</p> + </td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Data Block + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4">Signature</td> + </tr> + + <tr> + <td>Version</td> + <td>Client ID</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Header Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Block Offset <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Elements <em>(variable size and + optional)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Data Block + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Signature</p></td> + <td> + <p>The ASCII character string “<code>EADB</code>” + is used to indicate the beginning of an Extensible Array + data block. This gives file consistency checking utilities + a better chance of reconstructing a damaged file. + </p> + </td> + </tr> + + <tr> + <td><p>Version</p></td> + <td> + <p>This document describes version 0.</p> + </td> + </tr> + + <tr> + <td><p>Client ID</p></td> + <td> + <p>The ID for identifying the client of the + Extensible Array: + + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>Non-filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>1</code></td> + <td>Filtered dataset chunks + </td> + </tr> + <tr> + <td align="center"><code>2+</code></td> + <td>Reserved. + </td> + </tr> + </table> + </p> + </td> + </tr> + + <tr> + <td><p>Header Address</p></td> + <td> + <p>The address of the Extensible Array header. Principally + used for file integrity checking. + </p> + </td> + </tr> + + <tr> + <td><p>Block Offset</p></td> + <td> + <p>The offset of the block in the array. + </td> + </tr> + + <tr> + <td><p>Elements</p></td> + <td> + <p>Contains the elements stored in the data block and + exists only if the data block is not paged. + <br /> + <br /> + There are two element types: + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#EaNonFilterChunk">Non-filtered dataset chunks</i></a> +</td> +</tr> +<tr> + <td align="center"><code>1</code></td> + <td><a href="#EaFilterChunk">Filtered dataset chunks</i></a> +</td> +</tr> +</table> +</p> +</td> +</tr> + +<tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for the Extensible Array data block.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<div align="center"> + <table class="format"> + <caption> + Layout: Extensible Array Data Block Page + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td colspan="4"><br />Elements <em>(variable + size)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Checksum</td> + </tr> + + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Extensible Array Data Block Page + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Elements</p></td> + <td> + <p>Contains the elements stored in the data block + page.</p> + <p> + There are two element types: + <table class="list"> + <tr> + <th width="20%" align="center">ID</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td><a href="#EaNonFilterChunk">Non-filtered dataset chunks</i></a> +</td> +</tr> +<tr> + <td align="center"><code>1</code></td> + <td><a href="#EaFilterChunk">Filtered dataset chunks</i></a> +</td> +</tr> +</table> +</p> +</td> +</tr> + +<tr> + <td><p>Checksum</p></td> + <td> + <p>The checksum for an Extensible Array data block + page.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> +<a name="EaNonFilterChunk"></a> +<div align="center"> + <table class="format"> + <caption> + Layout: Data Block Element for Non-filtered Dataset Chunk + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr><td> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Data Block Element for Non-filtered Dataset Chunk + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the dataset chunk in the file. + </p> + </td> + </tr> + + </table> +</div> +</p> + +<br /> +<br /> +<br /> +<a name="EaFilterChunk"></a> +<div align="center"> + <table class="format"> + <caption> + Layout: Data Block Element for Filtered Dataset Chunk + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Address<sup>O</sup><br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br />Chunk Size<em> (variable size; at + most 8 bytes)</em><br /><br /></td> + </tr> + + <tr> + <td colspan="4">Filter Mask</td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> +</div> + +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Data Block Element for Filtered Dataset Chunk + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Address</p></td> + <td><p>The address of the dataset chunk in the file. + </p> + </td> + </tr> + + <tr> + <td><p>Chunk Size</p></td> + <td><p>The size of the dataset chunk in bytes. + </p> + </td> + </tr> + + <tr> + <td><p>Filter Mask</p></td> + <td><p>Indicates the filter to skip for the dataset chunk. + Each filter has an index number in the pipeline; if that + filter is skipped, the bit corresponding to its index is set. + </p> + </td> + </tr> + + </table> +</div> + +<a name="AppendV2Btrees"> + <h3>VII.E. The Version 2 B-trees Index</h3></a> + +<p>The <i>Version 2 B-trees</i> index can be used when the dataset + fulfills the following condition:</p> + +<ul> + <li>more than one dimension of unlimited extent</li> +</ul> + +<p>Version 2 B-trees can be used to index various objects in the + library. See <a href="#V2Btrees">“Version 2 B-trees”</a> + for more information. The B-tree types <a href="#V2BtType10">10</a> + and <a href="#V2BtreesType11">11</a> record layouts are for + indexing dataset chunks.</p> + +<h2><a name="AppendixD"> VIII. Appendix D: + Encoding for dataspace and reference</a></h2> + +<a name="DataspaceEncode"> + <h3>VIII.A. Dataspace Encoding </h3></a> +<i>H5Sencode</i> is a public routine that encodes a dataspace description into a buffer while +<i>H5Sdecode</i> is the corresponding routine that decodes the description encoded in the buffer. +<p> + See the reference manual description for these two public routines. + + <br /> + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Dataspace Description for H5Sencode/H5Sdecode + </caption> + + <tr> + <th>byte</th> + <th>byte</th> + <th>byte</th> + <th>byte</th> + </tr> + + <tr> + <td>Dataspace ID</td> + <td>Encode Version</td> + <td>Size of Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + <tr> + <td colspan="4"><br />Size of Extent + <br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Dataspace Message + <em>(variable size)</em> + <br /><br /></td> + </tr> + + <tr> + <td colspan="4"><br /><br />Dataspace Selection + <em>(variable size)</em> + <br /><br /></td> + </tr> + + </table> + + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Dataspace Description for H5Sencode/H5Sdecode + </caption> + <tr> + <th width="40%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Dataspace ID</p></td> + <td> + <p>The datspace message ID which is 1.</p> + </td> + </tr> + + <tr> + <td><p>Encode Version</p></td> + <td> + <p>H5S_ENCODE_VERSION which is 0. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Size</p></td> + <td> + <p>The number of bytes used to store the size of an object. + </p> + </td> + </tr> + + <tr> + <td><p>Size of Extent</p></td> + <td> + <p>Size of the dataspace message. + </p> + </td> + </tr> + + <tr> + <td><p>Dataspace Message</p></td> + <td> + <p>The dataspace message information. See + <a href="#DataspaceMessage">Dataspace Message.</a></p> +</p> +</td> +</tr> + +<tr> + <td><p>Dataspace Selection</p></td> + <td> + <p>The dataspace selection information. See + <a href="#DataspaceSEL">Dataspace Selection.</a></p> + </td> +</tr> + +</table> +</div> + + +<br /> +<br /> +<br /> +<a name="DataspaceSEL"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Dataspace Selection + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Selection Type</td> + </tr> + <tr> + <td colspan="4"><br />Selection Info (<em>variable + size</em>)<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Dataspace Selection + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Selection Type</p></td> + <td> + <p>There are 4 types of selection: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>H5S_SEL_NONE: Nothing selected + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>H5S_SEL_POINTS: Sequence of points selected + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>H5S_SEL_HYPER: Hyperslab selected + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>H5S_SEL_ALL: Entire extent selected + </td> + </tr> + </table> + </td> + + </tr> + + <tr> + <td><p>Selection Info</p></td> + <td> + <p>There are 4 types of selection info: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td>Selection info for <a href="#SelNONE">H5S_SEL_NONE</a> + </td> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>Selection info for <a href="#SelPOINTS">H5S_SEL_POINTS</a> + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>Selection info for <a href="#SelHYPER">H5S_SEL_HYPER</a> + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>Selection for <a href="#SelALL">H5S_SEL_ALL</a> + </td> + </tr> + </table> + </td> + + </tr> + </table> + </div> + + + <br /> + <br /> + <br /> +<a name="SelNONE"/></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Selection Info for H5S_SEL_NONE + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Version</td> + </tr> + <tr> + <td colspan="4"><br />Reserved <em>(zero, 8 bytes)</em><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Selection Info for H5S_SEL_NONE + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for the H5S_SEL_NONE Selection Info. + The value is 1.</p></td> + </tr> + </table> + </div> + + + <br /> + <br /> + <br /> + <a name="SelPOINTS"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Selection Info for H5S_SEL_POINTS + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Version</td> + </tr> + <tr> + <td colspan="4"><br /><br />Points Selection Info <em>(variable size)</em> + <br /><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Selection Info for H5S_SEL_POINTS + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for the H5S_SEL_POINTS Selection Info. + The value is either 1 or 2.</p></td> + </tr> + + <tr> + <td><p>Points Selection Info</p></td> + <td><p>Depending on <em>version</em>: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>See <a href="#SelPOINTSV1">Version 1 Points Selection Info</a> + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>See <a href="#SelPOINTSV2">Version 2 Points Selection Info</a> + </td> + </tr> + + </table> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <a name="SelPOINTSV1"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 1 Points Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Reserved <em>(zero)</em></td> + </tr> + + <tr> + <td colspan="4">Length</td> + </tr> + <tr> + <td colspan="4">Rank</td> + </tr> + <tr> + <td colspan="4">Num Points</td> + </tr> + <tr> + <td colspan="4">Point #1: coordinate #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #1: coordinate #u</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #n: coordinate #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #n: coordinate #u</td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 1 Points Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>The size in bytes from <em>Length</em> to the end of the + selection info.</td> + </tr> + + <tr> + <td><p>Rank</p></td> + <td><p>The number of dimensions.</p></td> + </tr> + <tr> + <td><p>Num Points</p></td> + <td><p>The number of points in the selection.</p></td> + </tr> + <tr> + <td><p>Point #n: coordinate #u</p></td> + <td><p>The array of points in the selection. + <p>The points selected are #1 to #n where n is <em>Num Points</em>. + <p>The list of coordinates for each point are #1 to #u where u is + <em>Rank</em>.</p></td> + </tr> + </table> + </div> + + + <br /> + <br /> + <br /> + <a name="SelPOINTSV2"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 2 Points Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="1">Encode Size</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted only to align table nicely</em> + </td> + </tr> + + <tr> + <td colspan="4">Rank</td> + </tr> + <tr> + <td colspan="4">Num Points<p>(2, 4 or 8 bytes)<br /></td> + </tr> + <tr> + <td colspan="4">Point #1: coordinate #1<p>(2, 4 or 8 bytes)<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #1: coordinate #u<p>(2, 4 or 8 bytes)<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #n: coordinate #1 <p>(2, 4 or 8 bytes)<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Point #n: coordinate #u<p>(2, 4 or 8 bytes)<br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 Points Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Encode Size</td> + <td><p>The size for encoding the points selection info which can be 2, 4 or 8 bytes. + </td> + </tr> + + <tr> + <td><p>Rank</p></td> + <td><p>The number of dimensions.</p></td> + </tr> + <tr> + <td><p>Num Points</p></td> + <td><p>The number of points in the selection. + <p>The field <em>Encode Size</em> indicates the size of this field</p></td> + </tr> + <tr> + <td><p>Point #n: coordinate #u</p></td> + <td><p>The array of points in the selection. + <p>The points selected are #1 to #n where n is <em>Num Points</em>. + <p>The list of coordinates for each point are #1 to #u where u is + <em>Rank</em>. + <p>The field <em>Encode Size</em> indicates the size of this field</p></td> + </tr> + </table> + </div> + + + <br /> + <br /> + <br /> + <a name="SelHYPER"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Selection Info for H5S_SEL_HYPER + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Version</td> + </tr> + <tr> + <td colspan="4"><br />Hyperslab Selection Info + (<em>variable size</em>)<br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Selection Info for H5S_SEL_HYPER + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for the H5S_SEL_HYPER selection info. + The value is 1, 2 or 3.</p></td> + </tr> + + <tr> + <td><p>Hyperslab Selection Info</p></td> + <td><p>Depending on <em>version</em>: + <table class="list"> + <tr> + <th width="20%" align="center">Version</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>1</code></td> + <td>See <a href="#SelHYPERV1">Version 1 Hyperslab Selection Info</a>. + </td> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>See <a href="#SelHYPERV2">Version 2 Hyperslab Selection Info</a> + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>See <a href="#SelHYPERV3">Version 3 Hyperslab Selection Info</a> + </td> + </tr> + </table> + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + <a name="SelHYPERV1"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 1 Hyperslab Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Reserved</td> + </tr> + <tr> + <td colspan="4">Length</td> + </tr> + <tr> + <td colspan="4">Rank</td> + </tr> + <tr> + <td colspan="4">Num Blocks</td> + </tr> + <tr> + <td colspan="4">Starting Offset #1 for Block #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Starting Offset #n for Block #1</td> + </tr> + + <tr> + <td colspan="4">Ending Offset #1 for Block #1</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Ending Offset #n for Block #1</td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td colspan="4">Starting Offset #1 for Block #u</td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Starting Offset #n for Block #u</td> + </tr> + + <tr> + <td colspan="4">Ending Offset #1 for Block #u</em></td> +</tr> +<tr> + <td colspan="4">.<br />.<br />.<br /></td> +</tr> +<tr> + <td colspan="4">Ending Offset #n for Block #u</td> +</tr> + +</table> +</div> + +<br /> +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Version 1 Hyperslab Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>The size in bytes from the field <em>Rank</em> to the + end of the Selection Info.</td> + </tr> + + <tr> + <td><p>Rank</p></td> + <td><p>The number of dimensions in the dataspace.</p></td> + </tr> + + <tr> + <td><p>Num Blocks</p></td> + <td><p>The number of blocks in the selection.</p></td> + </tr> + + <tr> + <td><p>Starting Offset #n for Block #u</p></td> + <td><p>The offset #n of the starting element in block #u. + <p>#n is from 1 to <em>Rank</em>. + <p>#u is from 1 to <em>Num Blocks</em> moving from the fastest + changing dimension to the slowest changing dimension. + </p></td> + </tr> + + <tr> + <td><p>Ending Offset #n for Block #u</p></td> + <td><p>The offset #n of the ending element in block #u. + <p>#n is from 1 to <em>Rank</em>. + <p>#u is from 1 to <em>Num Blocks</em> moving from the fastest + changing dimension to the slowest changing dimension. + </p></td> + </tr> + + </table> +</div> + +<br /> +<br /> +<br /> +<a name="SelHYPERV2"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 2 Hyperslab Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Flags</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Length</td> + </tr> + <tr> + <td colspan="4">Rank</td> + </tr> + <tr> + <td colspan="4">Start #1 <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Stride #1 <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Count #1 <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Block #1 <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Start #n <em>(8 bytes)</em><p></td> + </tr> + + <tr> + <td colspan="4">Stride #n <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Count #n <em>(8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Block #n <em>(8 bytes)</em><p></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 2 Hyperslab Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is a bit field with the following definition. + Currently, this is always set to 0x1. + <p> + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td>If set, it a a regular hyperslab, otherwise, irregular. + </td> + </tr> + + </table> + </td> + + </tr> + + <tr> + <td><p>Length</p></td> + <td><p>The size in bytes from the field <em>Rank</em> to the + end of the Selection Info.</td> + </tr> + + <tr> + <td><p>Rank</p></td> + <td><p>The number of dimensions in the dataspace.</td> + </tr> + + <tr> + <td><p>Start #n</p></td> + <td><p>The offset of the starting element in the block. + <p>#n is from 1 to <em>Rank</em>. + </p></td> + </tr> + + <tr> + <td><p>Stride #n</p></td> + <td><p>The number of elements to move in each dimension. + <p>#n is from 1 to <em>Rank</em>. + </p></td> + </tr> + + <tr> + <td><p>Count #n</p></td> + <td><p>The number of blocks to select in each dimension. + <p>#n is from 1 to <em>Rank</em>. + </p></td> + </tr> + + <tr> + <td><p>Block #n</p></td> + <td><p>The size (in elements) of each block in each dimension. + <p>#n is from 1 to <em>Rank</em>. + </p></td> + </tr> + </table> + </div> + + + + + <br /> + <br /> + <br /> + <a name="SelHYPERV3"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 3 Hyperslab Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Flags</td> + <td>Encode Size</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + <tr> + <td colspan="4">Rank</td> + </tr> + <tr> + <td colspan="4"><br />Regular/Irregular Hyperslab Selection Info + <p><em>(variable size)</em><br /><br/></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 3 Hyperslab Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This is a bit field with the following definition: + <p> + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + <tr> + <td align="center"><code>0</code></td> + <td>If set, it is a regular hyperslab, otherwise, irregular. + </td> + </tr> + + </table> + </td> + </tr> + + <tr> + <td><p>Encode Size</p></td> + <td><p>The size for encoding hyperslab selection info, which can 2, 4 or 8 bytes.</td> + </tr> + + <tr> + <td><p>Rank</p></td> + <td><p>The number of dimensions in the dataspace.</td> + </tr> + + <tr> + <td><p>Regular/Irregular Hyperslab Selection Info</p></td> + <td><p>This is the selection info for version 3 hyperslab which can be regular or irregular. + <p>If bit 0 of the field <em>Flags</em> is set, + See <a href="#SelHYPERV3REG">Version 3 Regular Hyperslab Selection Info</a> + <p>Otherwise, see <a href="#SelHYPERV3IRREG">Version 3 Irregular Hyperslab Selection Info</a> + </td> + + </tr> + + </table> + </div> + + + <br /> + <br /> + <br /> + <a name="SelHYPERV3REG"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 3 Regular Hyperslab Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Start #1 <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Stride #1 <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Count #1 <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Block #1 <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Start #n <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + <tr> + <td colspan="4">Stride #n <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Count #n <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Block #n <p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 3 Regular Hyperslab Selection Info + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Start #n</p></td> + <td><p>The offset of the starting element in the block. + <p>#n is from 1 to <em>Rank</em>. + <p>The field <em>Encode Size</em> indicates the size of this field. + </p></td> + </tr> + + <tr> + <td><p>Stride #n</p></td> + <td><p>The number of elements to move in each dimension. + <p>#n is from 1 to <em>Rank</em>. + <p>The field <em>Encode Size</em> indicates the size of this field. + </p></td> + </tr> + + <tr> + <td><p>Count #n</p></td> + <td><p>The number of blocks to select in each dimension. + <p>#n is from 1 to <em>Rank</em>. + <p>The field <em>Encode Size</em> indicates the size of this field. + </p></td> + </tr> + + <tr> + <td><p>Block #n</p></td> + <td><p>The size (in elements) of each block in each dimension. + <p>#n is from 1 to <em>Rank</em>. + <p>The field <em>Encode Size</em> indicates the size of this field. + </p></td> + </tr> + </table> + </div> + + <br /> + <br /> + <br /> + <a name="SelHYPERV3IRREG"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Version 3 Irregular Hyperslab Selection Info + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Num Blocks<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">Starting Offset #1 for Block #1<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Starting Offset #n for Block #1<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + <tr> + <td colspan="4">Ending Offset #1 for Block #1<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Ending Offset #n for Block #1<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + + <tr> + <td colspan="4">Starting Offset #1 for Block #u<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Starting Offset #n for Block #u<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + <tr> + <td colspan="4">Ending Offset #1 for Block #u<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + <tr> + <td colspan="4">.<br />.<br />.<br /></td> + </tr> + <tr> + <td colspan="4">Ending Offset #n for Block #u<p><em>(2, 4 or 8 bytes)</em><p></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Version 3 Irregular Hyperslab Selection Info + </caption> + + <tr> + <td><p>Num Blocks</p></td> + <td><p>The number of blocks in the selection. + <p>The field <em>Encode Size</em> indicates the size of this field</p></td> + </tr> + + <tr> + <td><p>Starting Offset #n for Block #u</p></td> + <td><p>The offset #n of the starting element in block #u. + <p>#n is from 1 to <em>Rank</em>. + <p>#u is from 1 to <em>Num Blocks</em> moving from the fastest + changing dimension to the slowest changing dimension. + <p>The field <em>Encode Size</em> indicates the size of this field + </p></td> + </tr> + + <tr> + <td><p>Ending Offset #n for Block #u</p></td> + <td><p>The offset #n of the ending element in block #u. + <p>#n is from 1 to <em>Rank</em>. + <p>#u is from 1 to <em>Num Blocks</em> moving from the fastest + changing dimension to the slowest changing dimension. + <p>The field <em>Encode Size</em> indicates the size of this field + </p></td> + </tr> + + </table> + </div> + + + <br /> + <br /> + <br /> + <a name="SelALL"></a> + <div align="center"> + <table class="format"> + <caption> + Layout: Selection Info for H5S_SEL_ALL + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4">Version</td> + </tr> + <tr> + <td colspan="4"><br />Reserved <em>(zero, + 8 bytes)</em><br /><br /></td> + </tr> + + </table> + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Selection Info for H5S_SEL_ALL + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Version</p></td> + <td><p>The version number for the H5S_SEL_ALL Selection Info; + the value is 1.</p></td> + </tr> + </table> + </div> + + <a name="ReferenceEncodeRV"> + <h3>VIII.B. Reference Encoding (Revised)</h3></a> + <p> + <br /> + For the following reference type, + the Reference Header and Reference Block are stored together as the dataset's raw data: + <ul> + <li>Object Reference (H5R_OBJECT2) (without reference to an external file)</li> + </ul> + <p> + For the following reference types, + the Reference Header plus the <a href="#GlobalHeapID">Global Heap ID</a> are stored + as the dataset's raw data in the file. + The global heap ID is used to locate the Reference Block stored in the global heap: + <ul> + <li>Object Reference (H5R_OBJECT2) (with reference to an external file)</li> + <li>Dataset Region Reference (H5R_DATASET_REGION2) (with/without reference to an external file)</li> + <li>Attribute Reference (H5R_ATTR) (with/without reference to an external file)</li> + </ul> + <br /> + <br /> + + <div align="center"> + <table class="format"> + <caption> + Layout: Reference Header + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Reference Type</td> + <td>Flags</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + + </table> + + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Reference Header + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Reference Type</p></td> + <td> + <p>There are 3 types of references: + <table class="list"> + <tr> + <th width="20%" align="center">Value</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>2</code></td> + <td>H5R_OBJECT2: Object Reference + </td> + </tr> + + <tr> + <td align="center"><code>3</code></td> + <td>H5R_DATASET_REGION2: Dataset Region Reference + </td> + </tr> + + <tr> + <td align="center"><code>4</code></td> + <td>H5R_ATTR: Attribute Reference + </td> + </tr> + + </table> + + </td> + </tr> + + <tr> + <td><p>Flags</p></td> + <td><p>This field describes the reference: + <table class="list"> + <tr> + <th width="20%" align="center">Bit</th> + <th width="80%" align="left">Description</th> + </tr> + + <tr> + <td align="center"><code>0</code></td> + <td>If set, the reference is to an external file. + </td> + </tr> + <tr> + <td align="center"><code>1-7</code></td> + <td>Reserved</td> + </tr> + </table></p> + + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + + <div align="center"> + <table class="format"> + <caption> + Layout: Reference Block + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td>Token Size</td> + <td colspan="3" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + <tr> + <td colspan=4><br /><br />Token + <em>(variable size)</em><br /> <br /><br /></td> + </tr> + <tr> + <td colspan=2>Length of External File Name</td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> + </tr> + <tr> + <td colspan=4><br /><br />External File Name + <em>(variable size)</em><br /><br /><br /></td> + </tr> + <tr> + <td colspan=4>Size of Dataspace Selection</td> + </tr> + <tr> + <td colspan=4>Rank of Dataspace Selection</td> + </tr> + <tr> + <td colspan=4><br /><br />Dataspace Selection Information + <em>(variable size)</em><br /><br /> <br /></td> +</td> +</tr> +<tr> + <td colspan=2>Length of Attribute Name </td> + <td colspan="2" bgcolor="#DDDDDD"><em>This space inserted + only to align table nicely</em></td> +</tr> +<tr> + <td colspan=4><br /><br />Attribute Name + <em>(variable size)</em><br /><br /><br /></td> +</tr> + +</table> + +</div> + +<br /> +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Reference Block + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Token size</p></td> + <td><p>This is the size of the token for the object. + </td> + </tr> + + <tr> + <td><p>Token</p></td> + <td> + <p> + This is the token for the object. + </p> + </td> + </tr> + + <tr> + <td><p>Length fo External File Name</p></td> + <td><p>This is the length for the external file name. + <p>This field exists if bit 0 of <em>flags</em> is set.</p> + </p> + </td> + </tr> + + <tr> + <td><p>External File Name</p></td> + <td><p>This is the name of the external file being referenced.</p> +</p> +<p>This field exists if bit 0 of <em>flags</em> is set.</p> +</td> +</tr> + +<tr> + <td><p>Dataspace Selection Information</p></td> + <td><p>See <a href="#DataspaceSEL">Dataspace Selection.</a></p> +</p> +<p>This field exists if the <em>Reference Type</em> is H5R_DATASET_REGION2.</p> +</td> +</tr> + +<tr> + <td><p>Length of Attribute Name</p></td> + <td><p>This is the length of the attribute name. + <p>This field exists if the <em>Reference Type</em> is H5R_ATTRIBUTE.</p> + </td> +</tr> + +<tr> + <td><p>Attribute Name</p></td> + <td><p>This is the name of the attribute being referenced. + <p>This field exists if the <em>Reference Type</em> is H5R_ATTRIBUTE.</p> + </td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> + + +<a name="ReferenceEncodeDP"> + <h3>VIII.C. Reference Encoding (Backward Compatibility)</h3></a> +<p> + <br /> + The two references described below are maintained to preserve compatibility with previous versions of the library. +<p> + For the following reference type, + the reference encoding is stored as the dataset's raw data in the file: + <ul> + <li>Object Reference (H5R_OBJECT1)</li> + </ul> +<p> + For the following reference type, + the <a href="#GlobalHeapID">Global Heap ID</a> is stored as the dataset's raw data in the file. + The global heap ID is used to locate the reference encoding + stored in the global heap: + <ul> + <li>Dataset Region Reference (H5R_DATASET_REGION1)</li> + </ul> + + <br /> + <br /> + <div align="center"> + <table class="format"> + <caption> + Layout: Reference for H5R_OBJECT1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Object Address<sup>O</sup><br /><br /></td> + </tr> + + </table> + + <table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> + </table> + + </div> + + <br /> + <br /> + <div align="center"> + <table class="desc"> + <caption> + Fields: Reference for H5R_OBJECT1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Object Address</p></td> + <td> + <p>Address of the object being referenced + </td> + </tr> + + </table> + </div> + + <br /> + <br /> + <br /> + + <div align="center"> + <table class="format"> + <caption> + Layout: Reference for H5R_DATASET_REGION1 + </caption> + + <tr> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + <th width="25%">byte</th> + </tr> + + <tr> + <td colspan="4"><br />Object Address<sup>O</sup><br /><br /></td> + </tr> + <tr> + <td colspan=4><br /><br />Dataspace Selection Information + <em>(variable size)</em><br /><br /> <br /></td> +</td> +</tr> + +</table> + +<table class="note"> + <tr> + <td width="60%"> </td> + <td width="40%"> + (Items marked with an ‘O’ in the above table are + of the size specified in the <a href="#SizeOfOffsetsV0">Size + of Offsets</a> field in the superblock.) + </td></tr> +</table> + +</div> + +<br /> +<br /> +<div align="center"> + <table class="desc"> + <caption> + Fields: Reference for H5R_DATASET_REGION1 + </caption> + <tr> + <th width="30%">Field Name</th> + <th>Description</th> + </tr> + + <tr> + <td><p>Object Address</p></td> + <td><p>This is the address of the object being referenced. + </td> + </tr> + + <tr> + <td><p>Dataspace Selection Information</p></td> + <td><p>This is the dataspace selection for the object being referenced. + See <a href="#DataspaceSEL">Dataspace Selection.</a></p> +</p> +</td> +</tr> + +</table> +</div> + +<br /> +<br /> +<br /> + + +</body> +</html> diff --git a/doxygen/examples/H5A_examples.c b/doxygen/examples/H5A_examples.c new file mode 100644 index 0000000..f332efa --- /dev/null +++ b/doxygen/examples/H5A_examples.c @@ -0,0 +1,145 @@ +/* -*- c-file-style: "stroustrup" -*- */ + +#include "hdf5.h" + +#include <stdio.h> +#include <stdlib.h> + +int +main(void) +{ + int ret_val = EXIT_SUCCESS; + + //! <!-- [create] --> + { + __label__ fail_acpl, fail_attr, fail_file; + hid_t file, acpl, fspace, attr; + + unsigned mode = H5F_ACC_TRUNC; + char file_name[] = "f1.h5"; + // attribute names can be arbitrary Unicode strings + char attr_name[] = "Χαρακτηριστικό"; + + if ((file = H5Fcreate(file_name, mode, H5P_DEFAULT, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((acpl = H5Pcreate(H5P_ATTRIBUTE_CREATE)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_acpl; + } + // use UTF-8 encoding for the attribute name + if (H5Pset_char_encoding(acpl, H5T_CSET_UTF8) < 0) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // create a scalar (singleton) attribute + if ((fspace = H5Screate(H5S_SCALAR)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // create an attribute on the root group + if ((attr = H5Acreate2(file, attr_name, H5T_STD_I32LE, fspace, acpl, H5P_DEFAULT)) == + H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_attr; + } + + H5Aclose(attr); +fail_attr: + H5Sclose(fspace); +fail_fspace: + H5Pclose(acpl); +fail_acpl: + H5Fclose(file); +fail_file:; + } + //! <!-- [create] --> + + //! <!-- [read] --> + { + __label__ fail_attr, fail_file; + hid_t file, attr; + + unsigned mode = H5F_ACC_RDONLY; + char file_name[] = "f1.h5"; + char attr_name[] = "Χαρακτηριστικό"; + int value; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((attr = H5Aopen(file, attr_name, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_attr; + } + // read the attribute value + if (H5Aread(attr, H5T_NATIVE_INT, &value) < 0) + ret_val = EXIT_FAILURE; + + // do something w/ the attribute value + + H5Aclose(attr); +fail_attr: + H5Fclose(file); +fail_file:; + } + //! <!-- [read] --> + + //! <!-- [update] --> + { + __label__ fail_attr, fail_file; + hid_t file, attr; + + unsigned mode = H5F_ACC_RDWR; + char file_name[] = "f1.h5"; + char attr_name[] = "Χαρακτηριστικό"; + int value = 1234; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((attr = H5Aopen(file, attr_name, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_attr; + } + // update the attribute value + if (H5Awrite(attr, H5T_NATIVE_INT, &value) < 0) + ret_val = EXIT_FAILURE; + + H5Aclose(attr); +fail_attr: + H5Fclose(file); +fail_file:; + } + //! <!-- [update] --> + + //! <!-- [delete] --> + { + __label__ fail_attr, fail_file; + hid_t file; + + unsigned mode = H5F_ACC_RDWR; + char file_name[] = "f1.h5"; + char attr_name[] = "Χαρακτηριστικό"; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + // delete the attribute + if (H5Adelete(file, attr_name) < 0) { + ret_val = EXIT_FAILURE; + goto fail_attr; + } + +fail_attr: + H5Fclose(file); +fail_file:; + } + //! <!-- [delete] --> + + return ret_val; +} diff --git a/doxygen/examples/H5D_examples.c b/doxygen/examples/H5D_examples.c new file mode 100644 index 0000000..aad057d --- /dev/null +++ b/doxygen/examples/H5D_examples.c @@ -0,0 +1,173 @@ +/* -*- c-file-style: "stroustrup" -*- */ + +#include "hdf5.h" + +#include <stdio.h> +#include <stdlib.h> + +int +main(void) +{ + int ret_val = EXIT_SUCCESS; + + //! <!-- [create] --> + { + __label__ fail_lcpl, fail_dset, fail_file; + hid_t file, lcpl, fspace, dset; + + unsigned mode = H5F_ACC_TRUNC; + char file_name[] = "d1.h5"; + // link names can be arbitrary Unicode strings + char dset_name[] = "σύνολο/δεδομένων"; + + if ((file = H5Fcreate(file_name, mode, H5P_DEFAULT, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((lcpl = H5Pcreate(H5P_LINK_CREATE)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_lcpl; + } + // use UTF-8 encoding for link names + if (H5Pset_char_encoding(lcpl, H5T_CSET_UTF8) < 0) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // create intermediate groups as needed + if (H5Pset_create_intermediate_group(lcpl, 1) < 0) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // create a 1D dataspace + if ((fspace = H5Screate_simple(1, (hsize_t[]){10}, NULL)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // create a 32-bit integer dataset + if ((dset = H5Dcreate2(file, dset_name, H5T_STD_I32LE, fspace, lcpl, H5P_DEFAULT, H5P_DEFAULT)) == + H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_dset; + } + + H5Dclose(dset); +fail_dset: + H5Sclose(fspace); +fail_fspace: + H5Pclose(lcpl); +fail_lcpl: + H5Fclose(file); +fail_file:; + } + //! <!-- [create] --> + + //! <!-- [read] --> + { + __label__ fail_dset, fail_file; + hid_t file, dset; + + unsigned mode = H5F_ACC_RDONLY; + char file_name[] = "d1.h5"; + // assume a priori knowledge of dataset name and size + char dset_name[] = "σύνολο/δεδομένων"; + int elts[10]; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((dset = H5Dopen2(file, dset_name, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_dset; + } + // read all dataset elements + if (H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, elts) < 0) + ret_val = EXIT_FAILURE; + + // do something w/ the dataset elements + + H5Dclose(dset); +fail_dset: + H5Fclose(file); +fail_file:; + } + //! <!-- [read] --> + + //! <!-- [update] --> + { + __label__ fail_update, fail_fspace, fail_dset, fail_file; + hid_t file, dset, fspace; + + unsigned mode = H5F_ACC_RDWR; + char file_name[] = "d1.h5"; + char dset_name[] = "σύνολο/δεδομένων"; + int new_elts[6][2] = {{-1, 1}, {-2, 2}, {-3, 3}, {-4, 4}, {-5, 5}, {-6, 6}}; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + if ((dset = H5Dopen2(file, dset_name, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_dset; + } + // get the dataset's dataspace + if ((fspace = H5Dget_space(dset)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fspace; + } + // select the first 5 elements in odd positions + if (H5Sselect_hyperslab(fspace, H5S_SELECT_SET, (hsize_t[]){1}, (hsize_t[]){2}, (hsize_t[]){5}, + NULL) < 0) { + ret_val = EXIT_FAILURE; + goto fail_update; + } + + // (implicitly) select and write the first 5 elements of the second column of NEW_ELTS + if (H5Dwrite(dset, H5T_NATIVE_INT, H5S_ALL, fspace, H5P_DEFAULT, new_elts) < 0) + ret_val = EXIT_FAILURE; + +fail_update: + H5Sclose(fspace); +fail_fspace: + H5Dclose(dset); +fail_dset: + H5Fclose(file); +fail_file:; + } + //! <!-- [update] --> + + //! <!-- [delete] --> + { + __label__ fail_delete, fail_file; + hid_t file; + + unsigned mode = H5F_ACC_RDWR; + char file_name[] = "d1.h5"; + char group_name[] = "σύνολο"; + char dset_name[] = "σύνολο/δεδομένων"; + + if ((file = H5Fopen(file_name, mode, H5P_DEFAULT)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + // delete (unlink) the dataset + if (H5Ldelete(file, dset_name, H5P_DEFAULT) < 0) { + ret_val = EXIT_FAILURE; + goto fail_delete; + } + // the previous call deletes (unlinks) only the dataset + if (H5Ldelete(file, group_name, H5P_DEFAULT) < 0) { + ret_val = EXIT_FAILURE; + goto fail_delete; + } + +fail_delete: + H5Fclose(file); +fail_file:; + } + + //! <!-- [delete] --> + + return ret_val; +} diff --git a/doxygen/examples/H5F_examples.c b/doxygen/examples/H5F_examples.c new file mode 100644 index 0000000..a7ce6fb --- /dev/null +++ b/doxygen/examples/H5F_examples.c @@ -0,0 +1,187 @@ +/* -*- c-file-style: "stroustrup" -*- */ + +#include "hdf5.h" + +#include <stdio.h> +#include <stdlib.h> + +int +main(void) +{ + int ret_val = EXIT_SUCCESS; + + //! <!-- [life_cycle] --> + { + __label__ fail_fapl, fail_fcpl, fail_file; + hid_t fcpl, fapl, file; + + if ((fcpl = H5Pcreate(H5P_FILE_CREATE)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fcpl; + } + else { + // adjust the file creation properties + } + + if ((fapl = H5Pcreate(H5P_FILE_ACCESS)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fapl; + } + else { + // adjust the file access properties + } + + unsigned mode = H5F_ACC_EXCL; + char name[] = "f1.h5"; + + if ((file = H5Fcreate(name, mode, fcpl, fapl)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + + // do something useful with FILE + + H5Fclose(file); +fail_file: + H5Pclose(fapl); +fail_fapl: + H5Pclose(fcpl); +fail_fcpl:; + } + //! <!-- [life_cycle] --> + + //! <!-- [life_cycle_w_open] --> + { + __label__ fail_fapl, fail_file; + hid_t fapl, file; + + if ((fapl = H5Pcreate(H5P_FILE_ACCESS)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_fapl; + } + else { + // adjust the file access properties + } + + unsigned mode = H5F_ACC_RDWR; + char name[] = "f1.h5"; + + if ((file = H5Fopen(name, mode, fapl)) == H5I_INVALID_HID) { + ret_val = EXIT_FAILURE; + goto fail_file; + } + + // do something useful with FILE + + H5Fclose(file); +fail_file: + H5Pclose(fapl); +fail_fapl:; + } + //! <!-- [life_cycle_w_open] --> + + //! <!-- [minimal] --> + { + unsigned mode = H5F_ACC_TRUNC; + char name[] = "f11.h5"; + + hid_t file = H5Fcreate(name, mode, H5P_DEFAULT, H5P_DEFAULT); + if (file != H5I_INVALID_HID) + H5Fclose(file); + else + ret_val = EXIT_FAILURE; + } + //! <!-- [minimal] --> + + //! <!-- [open] --> + { + unsigned mode = H5F_ACC_RDONLY; + char name[] = "f11.h5"; + + hid_t file = H5Fopen(name, mode, H5P_DEFAULT); + if (file != H5I_INVALID_HID) + H5Fclose(file); + else + ret_val = EXIT_FAILURE; + } + //! <!-- [open] --> + + //! <!-- [flush] --> + { + unsigned mode = H5F_ACC_RDWR; + char name[] = "f11.h5"; + + hid_t file = H5Fopen(name, mode, H5P_DEFAULT); + if (file != H5I_INVALID_HID) { + int step; + for (step = 0; step < 1000; ++step) { + + // do important work & flush every 20 steps + + if (step % 20 == 0) { + if (H5Fflush(file, H5F_SCOPE_LOCAL) < 0) { + perror("H5Fflush failed."); + ret_val = EXIT_FAILURE; + break; + } + } + } + + if (H5Fclose(file) < 0) + perror("H5Fclose failed."); + } + else + ret_val = EXIT_FAILURE; + } + //! <!-- [flush] --> + + //! <!-- [libver_bounds] --> + { + unsigned mode = H5F_ACC_RDWR; + char name[] = "f11.h5"; + + hid_t file = H5Fopen(name, mode, H5P_DEFAULT); + if (file != H5I_INVALID_HID) { + if (H5Fset_libver_bounds(file, H5F_LIBVER_EARLIEST, H5F_LIBVER_V18) >= 0) { + + // object creation will not exceed HDF5 version 1.8.x + } + else + perror("H5Fset_libver_bounds failed."); + + if (H5Fclose(file) < 0) + perror("H5Fclose failed."); + } + else + ret_val = EXIT_FAILURE; + } + //! <!-- [libver_bounds] --> + + //! <!-- [mount] --> + { + hid_t file = H5Fopen("f11.h5", H5F_ACC_RDWR, H5P_DEFAULT); + if (file != H5I_INVALID_HID) { + hid_t group, child; + if ((group = H5Gcreate1(file, "mount_point", H5P_DEFAULT)) != H5I_INVALID_HID) { + if ((child = H5Fopen("f1.h5", H5F_ACC_RDONLY, H5P_DEFAULT)) != H5I_INVALID_HID) { + if (H5Fmount(group, ".", child, H5P_DEFAULT) >= 0) { + + // do something useful w/ the mounted file + } + else { + ret_val = EXIT_FAILURE; + perror("H5Fmount failed."); + } + H5Fclose(child); + } + H5Gclose(group); + } + H5Fclose(file); + } + else + ret_val = EXIT_FAILURE; + } + //! <!-- [mount] --> + + return ret_val; +} diff --git a/doxygen/examples/H5Pget_metadata_read_attempts.1.c b/doxygen/examples/H5Pget_metadata_read_attempts.1.c new file mode 100644 index 0000000..da325c0 --- /dev/null +++ b/doxygen/examples/H5Pget_metadata_read_attempts.1.c @@ -0,0 +1,22 @@ +/* Get a copy of file access property list */ +fapl = H5Pcreate(H5P_FILE_ACCESS); + +/* Retrieve the # of read attempts from the file access property list */ +H5Pget_metadata_read_attempts(fapl, &attempts); + +/* + * The value returned in "attempts" will be 1 (default for non-SWMR access). + */ + +/* Set the # of read attempts to 20 */ +H5Pset_metadata_read_attempts(fapl, 20); + +/* Retrieve the # of read attempts from the file access property list */ +H5Pget_metadata_read_attempts(fapl, &attempts); + +/* + * The value returned in "attempts" will be 20 as set. + */ + +/* Close the property list */ +H5Pclose(fapl); diff --git a/doxygen/examples/H5Pget_metadata_read_attempts.2.c b/doxygen/examples/H5Pget_metadata_read_attempts.2.c new file mode 100644 index 0000000..2cd12db --- /dev/null +++ b/doxygen/examples/H5Pget_metadata_read_attempts.2.c @@ -0,0 +1,44 @@ +/* Open the file with SWMR access and default file access property list */ +fid = H5Fopen(FILE, (H5F_ACC_RDONLY | H5F_ACC_SWMR_READ), H5P_DEFAULT); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 100 (default for SWMR access). + */ + +/* Close the property list */ +H5Pclose(file_fapl); + +/* Close the file */ +H5Fclose(fid); + +/* Create a copy of file access property list */ +fapl = H5Pcreate(H5P_FILE_ACCESS); + +/* Set the # of read attempts */ +H5Pset_metadata_read_attempts(fapl, 20); + +/* Open the file with SWMR access and the non-default file access property list */ +fid = H5Fopen(FILE, (H5F_ACC_RDONLY | H5F_ACC_SWMR_READ), fapl); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 20. + */ + +/* Close the property lists */ +H5Pclose(file_fapl); +H5Pclose(fapl); + +/* Close the file */ +H5Fclose(fid); diff --git a/doxygen/examples/H5Pget_metadata_read_attempts.3.c b/doxygen/examples/H5Pget_metadata_read_attempts.3.c new file mode 100644 index 0000000..4b5ea3a --- /dev/null +++ b/doxygen/examples/H5Pget_metadata_read_attempts.3.c @@ -0,0 +1,44 @@ +/* Open the file with non-SWMR access and default file access property list */ +fid = H5Fopen(FILE, H5F_ACC_RDONLY, H5P_DEFAULT); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 1 (default for non-SWMR access). + */ + +/* Close the property list */ +H5Pclose(file_fapl); + +/* Close the file */ +H5Fclose(fid); + +/* Create a copy of file access property list */ +fapl = H5Pcreate(H5P_FILE_ACCESS); + +/* Set the # of read attempts */ +H5Pset_metadata_read_attempts(fapl, 20); + +/* Open the file with non-SWMR access and the non-default file access property list */ +fid = H5Fopen(FILE, H5F_ACC_RDONLY, fapl); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 1 (default for non-SWMR access). + */ + +/* Close the property lists */ +H5Pclose(file_fapl); +H5Pclose(fapl); + +/* Close the file */ +H5Fclose(fid); diff --git a/doxygen/examples/H5Pget_object_flush_cb.c b/doxygen/examples/H5Pget_object_flush_cb.c new file mode 100644 index 0000000..d18f3df --- /dev/null +++ b/doxygen/examples/H5Pget_object_flush_cb.c @@ -0,0 +1,41 @@ +hid_t fapl_id; +unsigned counter; +H5F_object_flush_t *ret_cb; +unsigned * ret_counter; + +/* Create a copy of the file access property list */ +fapl_id = H5Pcreate(H5P_FILE_ACCESS); + +/* Set up the object flush property values */ +/* flush_cb: callback function to invoke when an object flushes (see below) */ +/* counter: user data to pass along to the callback function */ +H5Pset_object_flush_cb(fapl_id, flush_cb, &counter); + +/* Open the file */ +file_id = H5Fopen(FILE, H5F_ACC_RDWR, H5P_DEFAULT); + +/* Get the file access property list for the file */ +fapl = H5Fget_access_plist(file_id); + +/* Retrieve the object flush property values for the file */ +H5Pget_object_flush_cb(fapl, &ret_cb, &ret_counter); +/* ret_cb will point to flush_cb() */ +/* ret_counter will point to counter */ + +/* +. +. +. +. +. +. +*/ + +/* The callback function for the object flush property */ +static herr_t +flush_cb(hid_t obj_id, void *_udata) +{ + unsigned *flush_ct = (unsigned *)_udata; + ++(*flush_ct); + return 0; +} diff --git a/doxygen/examples/H5Pset_metadata_read_attempts.c b/doxygen/examples/H5Pset_metadata_read_attempts.c new file mode 100644 index 0000000..7c2f65d --- /dev/null +++ b/doxygen/examples/H5Pset_metadata_read_attempts.c @@ -0,0 +1,59 @@ +//! [SWMR Access] +/* Create a copy of file access property list */ +fapl = H5Pcreate(H5P_FILE_ACCESS); + +/* Set the # of read attempts */ +H5Pset_metadata_read_attempts(fapl, 20); + +/* Open the file with SWMR access and the non-default file access property list */ +fid = H5Fopen(FILE, (H5F_ACC_RDONLY | H5F_ACC_SWMR_READ), fapl); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 20. + * The library will use 20 as the number of read attempts + * when reading checksummed metadata in the file + */ + +/* Close the property list */ +H5Pclose(fapl); +H5Pclose(file_fapl); + +/* Close the file */ +H5Fclose(fid); +//! [SWMR Access] + +//! [non-SWMR Access] +/* Create a copy of file access property list */ +fapl = H5Pcreate(H5P_FILE_ACCESS); + +/* Set the # of read attempts */ +H5Pset_metadata_read_attempts(fapl, 20); + +/* Open the file with SWMR access and the non-default file access property list */ +fid = H5Fopen(FILE, H5F_ACC_RDONLY, fapl); + +/* Get the file's file access roperty list */ +file_fapl = H5Fget_access_plist(fid); + +/* Retrieve the # of read attempts from the file's file access property list */ +H5Pget_metadata_read_attempts(file_fapl, &attempts); + +/* + * The value returned in "attempts" will be 1 (default for non-SWMR access). + * The library will use 1 as the number of read attempts + * when reading checksummed metadata in the file + */ + +/* Close the property lists */ +H5Pclose(fapl); +H5Pclose(file_fapl); + +/* Close the file */ +H5Fclose(fid); +//! [non-SWMR Access] diff --git a/doxygen/examples/H5Pset_object_flush_cb.c b/doxygen/examples/H5Pset_object_flush_cb.c new file mode 100644 index 0000000..1dfa90d --- /dev/null +++ b/doxygen/examples/H5Pset_object_flush_cb.c @@ -0,0 +1,41 @@ +hid_t file_id, fapl_id; +hid_t dataset_id, dapl_id; +unsigned counter; + +/* Create a copy of the file access property list */ +fapl_id = H5Pcreate(H5P_FILE_ACCESS); + +/* Set up the object flush property values */ +/* flush_cb: callback function to invoke when an object flushes (see below) */ +/* counter: user data to pass along to the callback function */ +H5Pset_object_flush_cb(fapl_id, flush_cb, &counter); + +/* Open the file */ +file_id = H5Fopen(FILE, H5F_ACC_RDWR, H5P_DEFAULT); + +/* Create a group */ +gid = H5Gcreate2(fid, “group”, H5P_DEFAULT, H5P_DEFAULT_H5P_DEFAULT); + +/* Open a dataset */ +dataset_id = H5Dopen2(file_id, DATASET, H5P_DEFAULT); + +/* The flush will invoke flush_cb() with counter */ +H5Dflush(dataset_id); +/* counter will be equal to 1 */ + +/* ... */ + +/* The flush will invoke flush_cb() with counter */ +H5Gflush(gid); +/* counter will be equal to 2 */ + +/* ... */ + +/* The callback function for object flush property */ +static herr_t +flush_cb(hid_t obj_id, void *_udata) +{ + unsigned *flush_ct = (unsigned *)_udata; + ++(*flush_ct); + return 0; +} diff --git a/doxygen/examples/ImageSpec.html b/doxygen/examples/ImageSpec.html new file mode 100644 index 0000000..1b700ff --- /dev/null +++ b/doxygen/examples/ImageSpec.html @@ -0,0 +1,1203 @@ +<!doctype html public "-//w3c//dtd html 4.0 transitional//en"> +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> + <meta name="GENERATOR" content="Mozilla/4.72 [en] (WinNT; U) [Netscape]"> + <title>Image Specification</title> + +The HDF5 specification defines the standard objects and storage for the +standard HDF5 objects. (For information about the HDF5 library, model and +specification, see the HDF documentation.) This document is an additional +specification do define a standard profile for how to store image data +in HDF5. Image data in HDF5 is stored as HDF5 datasets with standard attributes +to define the properties of the image. +<p>This specification is primarily concerned with two dimensional raster +data similar to HDF4 Raster Images. Specifications for storing other +types of imagery will be covered in other documents. +<p>This specification defines: +<ul> +<li> +Standard storage and attributes for an Image dataset (<a href="#Sect1">Section +1</a>)</li> + +<li> +Standard storage and attributes for Palettes (<a href="#sect2">Section +2</a>)</li> + +<li> +Standard for associating Palettes with Images. (<a href="#Sect3">Section +3</a>)</li> +</ul> + +<h2> +<a NAME="Sect1"></a>1. HDF5 Image Specification</h2> + +<h3> +1.1 Overview</h3> +Image data is stored as an HDF5 dataset with values of HDF5 class Integer +or Float. A common example would be a two dimensional dataset, with +elements of class Integer, e.g., a two dimensional array of unsigned 8 +bit integers. However, this specification does not limit the dimensions +or number type that may be used for an Image. +<p>The dataset for an image is distinguished from other datasets by giving +it an attribute "CLASS=IMAGE". In addition, the Image dataset may +have an optional attribute "PALETTE" that is an array of object references +for zero or more palettes. The Image dataset may have additional attributes +to describe the image data, as defined in <a href="#Sect1.2">Section 1.2</a>. +<p>A Palette is an HDF5 dataset which contains color map information. +A Pallet dataset has an attribute "CLASS=PALETTE" and other attributes +indicating the type and size of the palette, as defined in <a href="#sect2">Section +2.1</a>. A Palette is an independent object, which can be shared +among several Image datasets. +<h3> +<a NAME="Sect1.2"></a>1.2 Image Attributes</h3> +The attributes for the Image are scalars unless otherwise noted. +The length of String valued attributes should be at least the number of +characters. Optionally, String valued attributes may be stored in a String +longer than the minimum, in which case it must be zero terminated or null +padded. "Required" attributes must always be used. "Optional" attributes +must be used when required. +<br> +<h4> +Attributes</h4> + +<dl> +<dt> +Attribute name="<b>CLASS</b>" (Required)</dt> + +<dd> +This attribute is type H5T_C_S1, with size 5.</dd> + +<dd> +For all Images, the value of this attribute is "IMAGE".</dd> + +<dd> +</dd> + +<dd> +This attribute identifies this data set as intended to be interpreted as +an image that conforms to the specifications on this page.</dd> +</dl> + +<dt> +Attribute name="<b>PALETTE</b>"</dt> + +<dl> +<dd> +A Image dataset within an HDF5 file may optionally specify an array of +palettes to be viewed with. The dataset will have an attribute field called +"<b>PALETTE</b>" which contains a one-dimensional array of object reference +pointers (HDF5 datatype H5T_STD_REF_OBJ) which refer to palettes in the +file. The palette datasets must conform to the Palette specification in +<a href="#sect2">section +2 below</a>. The first palette in this array will be the default palette +that the data may be viewed with.</dd> +</dl> + +<dl> +<dt> +</dt> + +<dt> +Attribute name="<b>IMAGE_SUBCLASS</b>"</dt> + +<dd> +If present, the value of this attribute indicates the type of Palette that +should be used with the Image. This attribute is a scalar of type +H5T_C_S1, with size according to the string plus one. The values +are:</dd> + +<dl> +<dt> +"IMAGE_GRAYSCALE" (length 15)</dt> + +<dd> +A grayscale image</dd> + +<dt> +"IMAGE_BITMAP" (length 12)</dt> + +<dd> +A bit map image</dd> + +<dt> +"IMAGE_TRUECOLOR" (length 15)</dt> + +<dd> +A truecolor image</dd> + +<dt> +"IMAGE_INDEXED" (length 13)</dt> + +<dd> +An indexed image</dd> + +<dd> +</dd> +</dl> + +<dt> +Attribute name="<b>INTERLACE_MODE</b>"</dt> + +<dd> +For images with more than one component for each pixel, this optional attribute +specifies the layout of the data. The values are type H5T_C_S1 of length +15. See <a href="#Section1.3">section 1.3</a> for information about the +storage layout for data.</dd> + +<dd> +"INTERLACE_PIXEL" (default): the component value for a pixel are contiguous.</dd> + +<dd> +"INTERLACE_PLANE": each component is stored as a plane.</dd> + +<dt> +</dt> + +<dt> +Attribute name="<b>DISPLAY_ORIGIN</b>"</dt> + +<dd> +This optional attribute indicates the intended orientation of the data +on a two-dimensional raster display. The value indicates which corner +the pixel at (0, 0) should be viewed. The values are type H5T_C_S1 +of length 2. If DISPLAY_ORIGIN is not set, the orientation is undefined.</dd> + +<dd> +"UL": (0,0) is at the upper left.</dd> + +<dd> +"LL": (0,0) is at the lower left.</dd> + +<dd> +"UR": (0,0) is at the upper right.</dd> + +<dd> +"LR": (0,0) is at the lower right.</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_WHITE_IS_ZERO</b>"</dt> + +<dl> +<dd> +This attribute is of type H5T_NATIVE_UCHAR. 0 = false, 1 = true . +This is used for images with IMAGE_SUBCLASS="IMAGE_GRAYSCALE" or "IMAGE_BITMAP".</dd> +</dl> + +<dl> +<dt> +Attribute name="<b>IMAGE_MINMAXRANGE</b>"</dt> + +<dd> +If present, this attribute is an array of two numbers, of the same HDF5 +datatype as the data. The first element is the minimum value of the +data, and the second is the maximum. This is used for images with +IMAGE_SUBCLASS="IMAGE_GRAYSCALE", "IMAGE_BITMAP" or "IMAGE_INDEXED".</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_BACKGROUNDINDEX</b>"</dt> + +<dl> +<dd> +If set, this attribute indicates the index value that should be interpreted +as the "background color". This attribute is HDF5 type H5T_NATIVE_UINT.</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_TRANSPARENCY</b>"</dt> + +<dl> +<dd> +If set, this attribute indicates the index value that should be interpreted +as the "transparent color". This attribute is HDF5 type H5T_NATIVE_UINT. +This attribute may not be used for IMAGE_SUBCLASS="IMAGE_TRUE_COLOR".</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_ASPECTRATIO</b>"</dt> + +<dl> +<dd> +If set, this attribute indicates the aspect ratio.</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_COLORMODEL</b>"</dt> + +<dl> +<dd> +If set, this attribute indicates the color model of Palette that should +be used with the Image. This attribute is of type H5T_C_S1, with +size 3, 4, or 5. The value is one of the color models described in +the Palette specification in <a href="#sect2.2">section 2.2 below</a>. +This attribute may be used only for IMAGE_SUBCLASS="IMAGE_TRUECOLOR" or +"IMAGE_INDEXED".</dd> +</dl> + +<dt> +Attribute name="<b>IMAGE_GAMMACORRECTION</b>"</dt> + +<dl> +<dd> +If set, this attribute gives the Gamma correction. The attribute +is type H5T_NATIVE_FLOAT. This attribute may be used only for IMAGE_SUBCLASS="IMAGE_TRUECOLOR" +or "IMAGE_INDEXED".</dd> +</dl> +Attribute name="<b>IMAGE_VERSION</b>" (Required) +<dl> +<dd> +This attribute is of type H5T_C_S1, with size corresponding to the length +of the version string. This attribute identifies the version number +of this specification to which it conforms. The current version number +is "1.2".</dd> + +<br> +<p> +<br> +<br> +<center><table BORDER=2 BGCOLOR="#FFFFFF" > +<caption><b>Table 1. Attributes of an Image Dataset</b></caption> + +<tr> +<td><b>Attribute Name</b></td> + +<td><b>(R = Required</b> +<br><b>O= Optional)</b></td> + +<td><b>Type</b></td> + +<td><b>String Size</b></td> + +<td><b>Value</b></td> +</tr> + +<tr> +<td>CLASS</td> + +<td>R</td> + +<td>String</td> + +<td>5</td> + +<td>"IMAGE"</td> +</tr> + +<tr> +<td>PALETTE</td> + +<td>O</td> + +<td>Array Object References</td> + +<td></td> + +<td><references to Palette datasets><sup>1</sup></td> +</tr> + +<tr> +<td>IMAGE_SUBCLASS</td> + +<td>O<sup>2</sup></td> + +<td>String</td> + +<td>15, +<br>12, +<br>15, +<br>13</td> + +<td> +<dt> +"IMAGE_GRAYSCALE",</dt> + +<dt> +"IMAGE_BITMAP",</dt> + +<dt> +"IMAGE_TRUECOLOR",</dt> + +<dt> +"IMAGE_INDEXED"</dt> +</td> +</tr> + +<tr> +<td>INTERLACE_MODE</td> + +<td>O<sup>3,6</sup></td> + +<td>String</td> + +<td>15</td> + +<td>The layout of components if more than one component per pixel.</td> +</tr> + +<tr> +<td>DISPLAY_ORIGIN</td> + +<td>O</td> + +<td>String</td> + +<td>2</td> + +<td>If set, indicates the intended location of the pixel (0,0).</td> +</tr> + +<tr> +<td>IMAGE_WHITE_IS_ZERO</td> + +<td>O<sup>3,4</sup></td> + +<td>Unsigned Integer</td> + +<td></td> + +<td>0 = false, 1 = true</td> +</tr> + +<tr> +<td>IMAGE_MINMAXRANGE</td> + +<td>O<sup>3,5</sup></td> + +<td>Array [2] <same datatype as data values></td> + +<td></td> + +<td>The (<minimum>, <maximum>) value of the data.</td> +</tr> + +<tr> +<td>IMAGE_BACKGROUNDINDEX</td> + +<td>O<sup>3</sup></td> + +<td>Unsigned Integer</td> + +<td></td> + +<td>The index of the background color.</td> +</tr> + +<tr> +<td>IMAGE_TRANSPARENCY</td> + +<td>O<sup>3,5</sup></td> + +<td>Unsigned Integer</td> + +<td></td> + +<td>The index of the transparent color.</td> +</tr> + +<tr> +<td>IMAGE_ASPECTRATIO</td> + +<td>O<sup>3,4</sup></td> + +<td>Unsigned Integer</td> + +<td></td> + +<td>The aspect ratio.</td> +</tr> + +<tr> +<td>IMAGE_COLORMODEL</td> + +<td>O<sup>3,6</sup></td> + +<td>String</td> + +<td>3, 4, or 5</td> + +<td>The color model, as defined below in the Palette specification for +attribute <b>PAL_COLORMODEL</b>.</td> +</tr> + +<tr> +<td>IMAGE_GAMMACORRECTION</td> + +<td>O<sup>3,6</sup></td> + +<td>Float</td> + +<td></td> + +<td>The gamma correction.</td> +</tr> + +<tr> +<td>IMAGE_VERSION</td> + +<td>R</td> + +<td>String</td> + +<td>3</td> + +<td>"1.2"</td> +</tr> +</table></center> + +<dl><font size=-1>1. The first element of the array is the default +Palette.</font> +<br><font size=-1>2. This attribute is <b>required</b> for images +that use one of the standard color map types listed.</font> +<br><font size=-1>3. This attribute is <b>required</b> if set for the source +image, in the case that the image is translated from another file into +HDF5.</font> +<br><font size=-1>4. This applies to: IMAGE_SUBCLASS="IMAGE_GRAYSCALE" +or "IMAGE_BITMAP".</font> +<br><font size=-1>5. This applies to: IMAGE_SUBCLASS="IMAGE_GRAYSCALE", +"IMAGE_BITMAP", or "IMAGE_INDEXED".</font> +<br><font size=-1>6. This applies to: IMAGE_SUBCLASS="IMAGE_TRUECOLOR", +or "IMAGE_INDEXED".</font></dl> +</dl> +Table 2 summarizes the standard attributes for an Image datasets using +the common sub-classes. R means that the attribute listed on the leftmost +column is Required for the image subclass on the first row, O means that +the attribute is Optional for that subclass and N that the attribute cannot +be applied to that subclass. The two first rows show the only required +attributes +for all subclasses. +<br> +<table BORDER WIDTH="100%" > +<caption><b>Table 2a. Applicability of Attributes to IMAGE sub-classes</b></caption> + +<tr> +<td WIDTH="20%"><b>IMAGE_SUBCLASS</b><sup>1</sup></td> + +<td WIDTH="20%"><b>IMAGE_GRAYSCALE</b></td> + +<td WIDTH="20%"><b>IMAGE_BITMAP</b></td> +</tr> + +<tr> +<td WIDTH="20%">CLASS</td> + +<td WIDTH="20%">R</td> + +<td WIDTH="20%">R</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_VERSION</td> + +<td WIDTH="20%">R</td> + +<td WIDTH="20%">R</td> +</tr> + +<tr> +<td>INTERLACE_MODE</td> + +<td>N</td> + +<td>N</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_WHITE_IS_ZERO</td> + +<td WIDTH="20%">R</td> + +<td WIDTH="20%">R</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_MINMAXRANGE</td> + +<td WIDTH="20%">O</td> + +<td WIDTH="20%">O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_BACKGROUNDINDEX</td> + +<td WIDTH="20%">O</td> + +<td WIDTH="20%">O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_TRANSPARENCY</td> + +<td WIDTH="20%">O</td> + +<td WIDTH="20%">O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_ASPECTRATIO</td> + +<td WIDTH="20%">O</td> + +<td WIDTH="20%">O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_COLORMODEL</td> + +<td WIDTH="20%">N</td> + +<td WIDTH="20%">N</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_GAMMACORRECTION</td> + +<td WIDTH="20%">N</td> + +<td WIDTH="20%">N</td> +</tr> + +<tr> +<td WIDTH="20%">PALETTE</td> + +<td WIDTH="20%">O</td> + +<td WIDTH="20%">O</td> +</tr> + +<tr> +<td>DISPLAY_ORIGIN</td> + +<td>O</td> + +<td>O</td> +</tr> +</table> + +<blockquote> </blockquote> + +<table BORDER WIDTH="100%" > +<caption><b>Table 2b. Applicability of Attributes to IMAGE sub-classes</b></caption> + +<tr> +<td WIDTH="20%"><b>IMAGE_SUBCLASS</b></td> + +<td WIDTH="20%"><b>IMAGE_TRUECOLOR</b></td> + +<td><b>IMAGE_INDEXED</b></td> +</tr> + +<tr> +<td WIDTH="20%">CLASS</td> + +<td WIDTH="20%">R</td> + +<td>R</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_VERSION</td> + +<td WIDTH="20%">R</td> + +<td>R</td> +</tr> + +<tr> +<td>INTERLACE_MODE</td> + +<td>R</td> + +<td>N</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_WHITE_IS_ZERO</td> + +<td WIDTH="20%">N</td> + +<td>N</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_MINMAXRANGE</td> + +<td WIDTH="20%">N</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_BACKGROUNDINDEX</td> + +<td WIDTH="20%">N</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_TRANSPARENCY</td> + +<td WIDTH="20%">N</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_ASPECTRATIO</td> + +<td WIDTH="20%">O</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_COLORMODEL</td> + +<td WIDTH="20%">O</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">IMAGE_GAMMACORRECTION</td> + +<td WIDTH="20%">O</td> + +<td>O</td> +</tr> + +<tr> +<td WIDTH="20%">PALETTE</td> + +<td WIDTH="20%">O</td> + +<td>O</td> +</tr> + +<tr> +<td>DISPLAY_ORIGIN</td> + +<td>O</td> + +<td>O</td> +</tr> +</table> + +<h3> +<a NAME="Section1.3"></a>1.3 Storage Layout and Properties for Images</h3> +In the case of an image with more than one component per pixel (e.g., Red, +Green, and Blue), the data may be arranged in one of two ways. Following +HDF4 terminology, the data may be interlaced by pixel or by plane, which +should be indicated by the INTERLACE_MODE attribute. In both +cases, the dataset will have a dataspace with three dimensions, height, +width, and components. The interlace modes specify different orders +for the dimensions. +<br> +<table BORDER COLS=2 WIDTH="100%" > +<caption><b>Table 3. Storage of multiple component image data.</b></caption> + +<tr> +<td><b>Interlace Mode</b></td> + +<td><b>Dimensions in the Dataspace</b></td> +</tr> + +<tr> +<td>INTERLACE_PIXEL</td> + +<td>[height][width][pixel components]</td> +</tr> + +<tr> +<td>INTERLACE_PLANE</td> + +<td>[pixel components][height][width]</td> +</tr> +</table> + +<p>For example, consider a 5 (rows) by 10 (column) image, with Red, Green, +and Blue components. Each component is an unsigned byte. In HDF5, +the datatype would be declared as an unsigned 8 bit integer. For +pixel interlace, the dataspace would be a three dimensional array, with +dimensions: [10][5][3]. For plane interleave, the dataspace would +be three dimensions: [3][10][5]. +<p>In the case of images with only one component, the dataspace may be +either a two dimensional array, or a three dimensional array with the third +dimension of size 1. For example, a 5 by 10 image with 8 bit color +indexes would be an HDF5 dataset with type unsigned 8 bit integer. +The dataspace could be either a two dimensional array, with dimensions +[10][5], or three dimensions, with dimensions either [10][5][1] or [1][10][5]. +<p>Image datasets may be stored with any chunking or compression properties +supported by HDF5. +<p><b>A note concerning compatibility with HDF5 GR interface: </b>An Image +dataset is stored as an HDF5 dataset. It is important to note that +the order of the dimensions is the same as for any other HDF5 dataset. +For a two dimensional image that is to be stored as a series of horizontal +scan lines, with the scan lines contiguous (i.e., the fastest changing +dimension is 'width'), the image will have a dataspace with <i>dim[0] = +height</i> and <i>dim[1]</i> = <i>width</i>. This is completely consistent +with all other HDF5 datasets. +<p>Users familiar with HDF4 should be cautioned that <i>this is not the +same as HDF4</i>, and specifically is not consistent with what the HDF4 +GR interface does. +<br> +<h2> +<a NAME="sect2"></a>2. HDF5 Palette Specification</h2> + +<h3> +2.1 Overview</h3> +A palette is the means by which color is applied to an image and is also +referred to as a color lookup table. It is a table in which every row contains +the numerical representation of a particular color. In the example of an +8 bit standard RGB color model palette, this numerical representation of +a color is presented as a triplet specifying the intensity of red, green, +and blue components that make up each color. +<center> +<p><img SRC="Palettes.fm.anc.gif" ></center> + +<p>In this example, the color component numeric type is an 8 bit unsigned +integer. While this is most common and recommended for general use, other +component color numeric datatypes, such as a 16 bit unsigned integer , +may be used. This type is specified as the type attribute of the palette +dataset. (see H5Tget_type(), H5Tset_type()) +<p>The minimum and maximum values of the component color numeric are specified +as attribute of the palette dataset. See below (attribute PAL_MINMAXNUMERIC). +If these attributes do not exist, it is assumed that the range of values +will fill the space of the color numeric type. i.e. with an 8 bit unsigned +integer, the valid range would be 0 to 255 for each color component. +<p>The HDF5 palette specification additionally allows for color models +beyond RGB. YUV, HSV, CMY, CMYK, YCbCr color models are supported, and +may be specified as a color model attribute of the palette dataset. <i>(see +"Palette Attributes" for details)</i>. +<p>In HDF 4 and earlier, palettes were limited to 256 colors. The HDF5 +palette specification allows for palettes of varying length. The length +is specified as the number of rows of the palette dataset. +<br> +<br> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#666666" > +<tr> +<td><font color="#FFFFFF">Important Note: The specification of the Indexed +Palette will change substantially in the next version. The Palette +described here is <i>denigrated</i> and is not supported.</font></td> +</tr> +</table> + +<br> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td><i>Denigrated</i> +<p>In a standard palette, the color entries are indexed directly. HDF5 +supports the notion of a range index table. Such a table defines an ascending +ordered list of ranges that map dataset values to the palette. If a range +index table exists for the palette, the PAL_TYPE attribute will be set +to "RANGEINDEX", and the PAL_RANGEINDEX attribute will contain an object +reference to a range index table array. If not, the PAL_TYPE attribute +either does not exist, or will be set to "STANDARD". +<p>The range index table array consists of a one dimensional array with +the same length as the palette dataset - 1. Ideally, the range index would +be of the same type as the dataset it refers to, however this is not a +requirement. +<p><b>Example 2: A range index array of type floating point</b> +<center> +<p><img SRC="PaletteExample1.gif" ></center> + +<p>The range index array attribute defines the "<i>to</i>" of the range. +Notice that the range index array attribute is one less entry in size than +the palette. The first entry of 0.1259, specifies that all values below +and up to 0.1259 inclusive, will map to the first palette entry. The second +entry signifies that all values greater than 0.1259 up to 0.3278 inclusive, +will map to the second palette entry, etc. All value greater than the last +range index array attribute (100000) map to the last entry in the palette.</td> +</tr> +</table> + +<h3> +<a NAME="sect2.2"></a>2.2. Palette Attributes</h3> +A palette exists in an HDF file as an independent data set with accompanying +attributes. The Palette attributes are scalars except where noted +otherwise. String values should have size the length of the string +value plus one. "Required" attributes must be used. "Optional" +attributes must be used when required. +<p>These attributes are defined as follows: +<dl> +<dt> +Attribute name="<b>CLASS</b>" (Required)</dt> + +<dd> +This attribute is of type H5T_C_S1, with size 7.</dd> + +<dd> +For all palettes, the value of this attribute is "PALETTE". This attribute +identifies this palette data set as a palette that conforms to the specifications +on this page.</dd> + +<dt> +Attribute name="<b>PAL_COLORMODEL</b>" (Required)</dt> + +<dd> +This attribute is of type H5T_C_S1, with size 3, 4, or 5.</dd> + +<dd> +Possible values for this are "RGB", "YUV", "CMY", "CMYK", "YCbCr", "HSV".</dd> + +<dd> +This defines the color model that the entries in the palette data set represent.</dd> + +<dl> +<dt> +"RGB"</dt> + +<dd> +Each color index contains a triplet where the the first value defines the +red component, second defines the green component, and the third the blue +component.</dd> + +<dt> +"CMY"</dt> + +<dd> +Each color index contains a triplet where the the first value defines the +cyan component, second defines the magenta component, and the third the +yellow component.</dd> + +<dt> +"CMYK"</dt> + +<dd> +Each color index contains a quadruplet where the the first value defines +the cyan component, second defines the magenta component, the third the +yellow component, and the forth the black component.</dd> + +<dt> +"YCbCr"</dt> + +<dd> +Class Y encoding model. Each color index contains a triplet where the the +first value defines the luminance, second defines the Cb Chromonance, and +the third the Cr Chromonance.</dd> + +<dt> +"YUV"</dt> + +<dd> +Composite encoding color model. Each color index contains a triplet where +the the first value defines the luminance component, second defines the +chromonance component, and the third the value component.</dd> + +<dt> +"HSV"</dt> + +<dd> +Each color index contains a triplet where the the first value defines the +hue component, second defines the saturation component, and the third the +value component. The hue component defines the hue spectrum with a low +value representing magenta/red progressing to a high value which would +represent blue/magenta, passing through yellow, green, cyan. A low value +for the saturation component means less color saturation than a high value. +A low value for <i>value</i> will be darker than a high value.</dd> + +<dd> +</dd> +</dl> + +<dt> +Attribute name="<b>PAL_TYPE</b>" (Required)</dt> + +<dd> +This attribute is of type H5T_C_S1, with size 9 or 10.</dd> + +<dd> +The current supported values for this attribute are : "STANDARD8" or "RANGEINDEX"</dd> + +<dd> +A PAL_TYPE of "STANDARD8" defines a palette dataset such that the first +entry defines index 0, the second entry defines index 1, etc. up until +the length of the palette - 1. This assumes an image dataset with direct +indexes into the palette.</dd> +</dl> + +<dl> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td><i>Denigrated</i> +<p>If the PAL_TYPE is set to "RANGEINDEX", there will be an additional +attribute with a name of "<b>PAL_RANGEINDEX</b>", (See example 2 +for more details)</td> +</tr> +</table> + +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td> +<dt> +Attribute name="<b>PAL_RANGEINDEX</b>" <i>(Denigrated)</i></dt> + +<dl> +<dd> +The <b>PAL_RANGEINDEX</b> attribute contains an HDF object reference (HDF5 +datatype H5T_STD_REF_OBJ) pointer which specifies a range index array in +the file to be used for color lookups for the palette. (Only for +PAL_TYPE="RANGEINDEX")</dd> +</dl> +</td> +</tr> +</table> + +<dt> +Attribute name="<b>PAL_MINMAXNUMERIC</b>"</dt> + +<dl> +<dt> +If present, this attribute is an array of two numbers, of the same HDF5 +datatype as the palette elements or color numerics.</dt> + +<br>They specify the minimum and maximum values of the color numeric components. +For example, if the palette was an RGB of type Float, the color numeric +range for Red, Green, and Blue could be set to be between 0.0 and 1.0. +The intensity of the color guns would then be scaled accordingly to be +between this minimum and maximum attribute.</dl> +Attribute name="<b>PAL_VERSION</b>" (Required) +<dl>This attribute is of type H5T_C_S1, with size corresponding to the +length of the version string. This attribute identifies the version +number of this specification to which it conforms. The current version +is "1.2".</dl> + +<center><table BORDER=2 BGCOLOR="#FFFFFF" > +<caption><b>Table 4. Attributes of a Palette Dataset</b></caption> + +<tr> +<td><b>Attribute Name</b></td> + +<td><b>(R = Required,</b> +<br><b>O = Optional)</b></td> + +<td><b>Type</b></td> + +<td><b>String Size</b></td> + +<td><b>Value</b></td> +</tr> + +<tr> +<td>CLASS</td> + +<td>R</td> + +<td>String</td> + +<td> +<center>7</center> +</td> + +<td>"PALETTE"</td> +</tr> + +<tr> +<td>PAL_COLORMODEL</td> + +<td>R</td> + +<td>String</td> + +<td> +<center>3, 4, or 5</center> +</td> + +<td>Color Model: "RGB", YUV", "CMY", "CMYK", "YCbCr", or "HSV"</td> +</tr> + +<tr> +<td>PAL_TYPE</td> + +<td>R</td> + +<td>String</td> + +<td> +<center>9</center> + +<p><br> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td>or 10</td> +</tr> +</table> +</td> + +<td>"STANDARD8" +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td>or "RANGEINDEX" <i>(Denigrated)</i></td> +</tr> +</table> +</td> +</tr> + +<tr> +<td> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td><i>Denigrated</i> +<br>RANGE_INDEX</td> +</tr> +</table> +</td> + +<td></td> + +<td> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td>Object Reference </td> +</tr> +</table> +</td> + +<td></td> + +<td> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td><Object Reference to Dataset of range index values></td> +</tr> +</table> +</td> +</tr> + +<tr> +<td>PAL_MINMAXNUMERIC</td> + +<td>O</td> + +<td>Array[2] of <same datatype as palette></td> + +<td></td> + +<td>The first value is the <Minimum value for color values>, the second +value is <Maximum value for color values><sup>2</sup></td> +</tr> + +<tr> +<td>PAL_VERSION</td> + +<td>R</td> + +<td>String</td> + +<td>4</td> + +<td>"1.2"</td> +</tr> +</table></center> + +<dl> +<table BORDER COLS=1 WIDTH="100%" BGCOLOR="#CCCCCC" > +<tr> +<td><font size=-1>1. The RANGE_INDEX attribute is required if the +PAL_TYPE is "RANGEINDEX". Otherwise, the RANGE_INDEX attribute should +be omitted. (Range index is denigrated.)</font></td> +</tr> +</table> +<font size=-1>2. The minimum and maximum are optional. If not +set, the range is assumed to the maximum range of the number type. +If one of these attributes is set, then both should be set. The value +of the minimum must be less than or equal to the value of the maximum.</font></dl> +</dl> +Table 5 summarized the uses of the standard attributes for a palette dataset. +R means that the attribute listed on the leftmost column is Required for +the palette type on the first row, O means that the attribute is Optional +for that type and N that the attribute cannot be applied to that type. +The four first rows show the attributes that are always required +for the two palette types. +<br> +<br> +<table BORDER WIDTH="100%" > +<caption><b>Table 5. Applicability of Attributes</b></caption> + +<tr> +<td WIDTH="33%"><b>PAL_TYPE</b></td> + +<td WIDTH="33%"><b>STANDARD8</b></td> + +<td WIDTH="34%"><b>RANGEINDEX</b></td> +</tr> + +<tr> +<td WIDTH="33%">CLASS</td> + +<td WIDTH="33%">R</td> + +<td WIDTH="34%">R</td> +</tr> + +<tr> +<td WIDTH="33%">PAL_VERSION</td> + +<td WIDTH="33%">R</td> + +<td WIDTH="34%">R</td> +</tr> + +<tr> +<td WIDTH="33%">PAL_COLORMODEL</td> + +<td WIDTH="33%">R</td> + +<td WIDTH="34%">R</td> +</tr> + +<tr> +<td WIDTH="33%">RANGE_INDEX</td> + +<td WIDTH="33%">N</td> + +<td WIDTH="34%">R</td> +</tr> + +<tr> +<td WIDTH="33%">PAL_MINMAXNUMERIC</td> + +<td WIDTH="33%">O</td> + +<td WIDTH="34%">O</td> +</tr> +</table> + +<h3> +2.3. Storage Layout for Palettes</h3> +The values of the Palette are stored as a dataset. The datatype can +be any HDF 5 atomic numeric type. The dataset will have dimensions +(<tt>nentries</tt> by <tt>ncomponents</tt>), where '<tt>nentries</tt>' +is the number of colors (usually 256) and '<tt>ncomponents'</tt> is the +number of values per color (3 for <b>RGB</b>, 4 for <b>CMYK</b>, etc.) +<br> +<h2> +<a NAME="Sect3"></a>3. Consistency and Correlation of Image and Palette +Attributes</h2> +The objects in this specification are an extension to the base HDF5 specification +and library. They are accessible with the standard HDF5 library, +but the semantics of the objects are not enforced by the base library. +For example, it is perfectly possible to add an attribute called <b>IMAGE</b> +to <i>any</i> dataset, or to include an object reference to <i>any</i> +HDF5 dataset in a <b>PALETTE</b> attribute. This would be a valid +HDF5 file, but not conformant to this specification. The rules defined +in this specification must be implemented with appropriate software, and +applications must use conforming software to assure correctness. +<p>The Image and Palette specifications include several redundant standard +attributes, such as the <b>IMAGE_COLORMODEL</b> and the <b>PAL_COLORMODEL</b>. +These attributes are informative not normative, in that it is acceptable +to attach a Palette to an Image dataset even if their attributes do not +match. Software is not required to enforce consistency, and files +may contain mismatched associations of Images and Palettes. In all +cases, it is up to applications to determine what kinds of images and color +models can be supported. +<p>For example, an Image that was created from a file with an "RGB" may +have a "YUV" Palette in its <b>PALETTE</b> attribute array. This +would be a legal HDF5 file and also conforms to this specification, although +it may or may not be correct for a given application.</p> + +</body> +</html> diff --git a/doxygen/examples/PaletteExample1.gif b/doxygen/examples/PaletteExample1.gif Binary files differnew file mode 100644 index 0000000..8694d9d --- /dev/null +++ b/doxygen/examples/PaletteExample1.gif diff --git a/doxygen/examples/Palettes.fm.anc.gif b/doxygen/examples/Palettes.fm.anc.gif Binary files differnew file mode 100644 index 0000000..d344c03 --- /dev/null +++ b/doxygen/examples/Palettes.fm.anc.gif diff --git a/doxygen/examples/TableSpec.html b/doxygen/examples/TableSpec.html new file mode 100644 index 0000000..474176e --- /dev/null +++ b/doxygen/examples/TableSpec.html @@ -0,0 +1,193 @@ +<html> +<head> + <title>HDF5 Table Specification</title> +</head> + +The HDF5 specification defines the standard objects and storage for the +standard HDF5 objects. (For information about the HDF5 library, model and +specification, see the HDF documentation.) This document is an additional +specification do define a standard profile for how to store tables in HDF5. +Table data in HDF5 is stored as HDF5 datasets with standard attributes to define +the properties of the tables. + +<h2> +1. Overview</h2> +A generic table is a sequence of records, each record has a name and a type. +Table data is stored as an HDF5 one dimensional compound dataset. A table +is defined as a collection of records whose values are stored in fixed-length +fields. All records have the same structure and all values in each field have +the same data type. +<p>The dataset for a table is distinguished from other datasets by giving +it an attribute "CLASS=TABLE". +Optional attributes allow the storage of a title for the Table and for +each column, and a fill value for each column. +<h2> +2. Table Attributes</h2> +The attributes for the Table are strings. They are written with the <a href="RM_H5LT.html#H5LTset_attribute_string"><code>H5LTset_attribute_string</code></a> +Lite API function. "Required" attributes must always be used. "Optional" attributes +must be used when required. +<br> +<h4> +Attributes</h4> + +<dl> +<dt> +Attribute name="<b>CLASS</b>" (Required)</dt> + +<dd> +This attribute is type H5T_C_S1, with size 5.</dd> + +<dd> +For all Tables, the value of this attribute is "TABLE".</dd> + +<dd> +This attribute identifies this data set as intended to be interpreted as Table that conforms to the specifications on this page.</dd> +</dl> + +<dl> +Attribute name="<b>VERSION</b>" (Required) + +<dd> +This attribute is of type H5T_C_S1, with size corresponding to the length +of the version string. This attribute identifies the version number +of this specification to which it conforms. The current version number +is "0.2".</dd> + +</dl> + +<dl> +<dt> +Attribute name="<b>TITLE</b>" (Optional)</dt> + +<dd> +The <b>TITLE</b> is an optional String that is to be used as the +informative title of the whole table. +The <b>TITLE</b> is set with the parameter <code> table_title</code> of the function +<a href="RM_H5TB.html#H5TBmake_table"> <code> H5TBmake_table</code></a>. </dd> +</dl> + +<dl> +<dt> +Attribute name="<b>FIELD_(n)_NAME</b>" (Required)</dt> + +<dd> +The <b>FIELD_(n)_NAME</b> is an optional String that is to be used as the +informative title of column <b>n</b> of the table. +For each of the fields the word FIELD_ is concatenated with + the zero based field (n) index together with the name of the field.</dd> + +</dl> +<dl> +<dt> +Attribute name="<b>FIELD_(n)_FILL</b>" (Optional)</dt> + +<dd> +The <b>FIELD_(n)_FILL</b> is an optional String that is the fill value for +column <b>n</b> of the table. +For each of the fields the word FIELD_ is concatenated with + the zero based field (n) index together with the fill value, if present. +This value is written only when a fill value is defined for the table.</dd> + +</dl> + +<dl> + +<br> +<center><table BORDER=2 BGCOLOR="#FFFFFF" > +<caption><b>Table 1. Attributes of an Image Dataset</b></caption> + +<tr> +<td><b>Attribute Name</b></td> + +<td><b>(R = Required</b> +<br><b>O= Optional)</b></td> + +<td><b>Type</b></td> + +<td><b>String Size</b></td> + +<td><b>Value</b></td> +</tr> + +<tr> +<td>CLASS</td> + +<td>R</td> + +<td>String</td> + +<td>5</td> + +<td>"TABLE"</td> +</tr> + +<tr> +<td>VERSION</td> + +<td>R</td> + +<td>String</td> + +<td>3</td> + +<td>"0.2"</td> +</tr> + +<tr> +<td>TITLE</td> + +<td>O</td> + +<td>String</td> + +<td> </td> + +<td> + +<tr> +<td>FIELD_(n)_NAME</td> + +<td>R</td> + +<td>String</td> + +<td> </td> + +<td> + + +<tr> +<td>FIELD_(n)_FILL</td> + +<td>O*</td> + +<td>String</td> + +<td> </td> + +<td> + +</table> +</center> + + </dl> +<p> +<center> + +</center> +<i>* </i>The attribute FIELD_(n)_FILL is written to the table if a fill value is +specified on the creation of the Table. Otherwise, it is not.<p>The following +section of code shows the calls necessary to the creation of a table. + +<p><code>/* Create a new HDF5 file using default properties. */<br> + file_id = H5Fcreate( "my_table.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT );</code> </p> + +<p><code>/* Call the make table function */<br> +</code> <code>H5TBmake_table( "Table Title", file_id, "Table1", NFIELDS, NRECORDS, dst_size, <br> + field_names, dst_offset, field_type, <br> + chunk_size, fill_data, compress, p_data ) </code> </p> + +<p><code> /* Close the file. */<br> + status = H5Fclose( file_id );</code> </p> + +</body> diff --git a/doxygen/examples/ThreadSafeLibrary.html b/doxygen/examples/ThreadSafeLibrary.html new file mode 100644 index 0000000..8daf386 --- /dev/null +++ b/doxygen/examples/ThreadSafeLibrary.html @@ -0,0 +1,787 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" + "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html lang="en-US"> +<head> + <title>Thread Safe Library</title> +</head> + +<h1>1. Library header files and conditional compilation</h1> + +<p> +The following code is placed at the beginning of H5private.h: +</p> + +<blockquote> + <pre> + #ifdef H5_HAVE_THREADSAFE + #include <pthread.h> + #endif + </pre> +</blockquote> + +<p> +<code>H5_HAVE_THREADSAFE</code> is defined when the HDF-5 library is +compiled with the --enable-threadsafe configuration option. In general, +code for the non-threadsafe version of HDF-5 library are placed within +the <code>#else</code> part of the conditional compilation. The exception +to this rule are the changes to the <code>FUNC_ENTER</code> (in +H5private.h), <code>HRETURN</code> and <code>HRETURN_ERROR</code> (in +H5Eprivate.h) macros (see section 3.2). +</p> + + +<h1>2. Global variables/structures</h1> + +<h2>2.1 Global library initialization variable</h2> + +<p> +In the threadsafe implementation, the global library initialization +variable <code>H5_libinit_g</code> is changed to a global structure +consisting of the variable with its associated lock (locks are explained +in section 4.1): +</p> + +<blockquote> + <pre> + hbool_t H5_libinit_g = FALSE; + </pre> +</blockquote> + +<p> +becomes +</p> + +<blockquote> + <pre> + H5_api_t H5_g; + </pre> +</blockquote> + +<p> +where <code>H5_api_t</code> is +</p> + +<blockquote> + <pre> + typedef struct H5_api_struct { + H5_mutex_t init_lock; /* API entrance mutex */ + hbool_t H5_libinit_g; + } H5_api_t; + </pre> +</blockquote> + +<p> +All former references to <code>H5_libinit_g</code> in the library are now +made using the macro <code>H5_INIT_GLOBAL</code>. If the threadsafe +library is to be used, the macro is set to <code>H5_g.H5_libinit_g</code> +instead. +</p> + +<h2>2.2 Global serialization variable</h2> + +<p> +A new global boolean variable <code>H5_allow_concurrent_g</code> is used +to determine if multiple threads are allowed to an API call +simultaneously. This is set to <code>FALSE</code>. +</p> + +<p> +All APIs that are allowed to do so have their own local variable that +shadows the global variable and is set to <code>TRUE</code>. In phase 1, +no such APIs exist. +</p> + +<p> +It is defined in <code>H5.c</code> as follows: +</p> + +<blockquote> + <pre> + hbool_t H5_allow_concurrent_g = FALSE; + </pre> +</blockquote> + +<h2>2.3 Global thread initialization variable</h2> + +<p> +The global variable <code>H5_first_init_g</code> of type +<code>pthread_once_t</code> is used to allow only the first thread in the +application process to call an initialization function using +<code>pthread_once</code>. All subsequent calls to +<code>pthread_once</code> by any thread are disregarded. +</p> + +<p> +The call sets up the mutex in the global structure <code>H5_g</code> (see +section 3.1) via an initialization function +<code>H5_first_thread_init</code>. The first thread initialization +function is described in section 4.2. +</p> + +<p> +<code>H5_first_init_g</code> is defined in <code>H5.c</code> as follows: +</p> + +<blockquote> + <pre> + pthread_once_t H5_first_init_g = PTHREAD_ONCE_INIT; + </pre> +</blockquote> + +<h2>2.4 Global key for per-thread error stacks</h2> + +<p> +A global pthread-managed key <code>H5_errstk_key_g</code> is used to +allow pthreads to maintain a separate error stack (of type +<code>H5E_t</code>) for each thread. This is defined in <code>H5.c</code> +as: +</p> + +<blockquote> + <pre> + pthread_key_t H5_errstk_key_g; + </pre> +</blockquote> + +<p> +Error stack management is described in section 4.3. +</p> + +<h2>2.5 Global structure and key for thread cancellation prevention</h2> + +<p> +We need to preserve the thread cancellation status of each thread +individually by using a key <code>H5_cancel_key_g</code>. The status is +preserved using a structure (of type <code>H5_cancel_t</code>) which +maintains the cancellability state of the thread before it entered the +library and a count (which works very much like the recursive lock +counter) which keeps track of the number of API calls the thread makes +within the library. +</p> + +<p> +The structure is defined in <code>H5private.h</code> as: +</p> + +<blockquote> + <pre> + /* cancelability structure */ + typedef struct H5_cancel_struct { + int previous_state; + unsigned int cancel_count; + } H5_cancel_t; + </pre> +</blockquote> + +<p> +Thread cancellation is described in section 4.4. +</p> + + +<h1>3. Changes to Macro expansions</h1> + +<h2>3.1 Changes to FUNC_ENTER</h2> + +<p> +The <code>FUNC_ENTER</code> macro is now extended to include macro calls +to initialize first threads, disable cancellability and wraps a lock +operation around the checking of the global initialization flag. It +should be noted that the cancellability should be disabled before +acquiring the lock on the library. Doing so otherwise would allow the +possibility that the thread be cancelled just after it has acquired the +lock on the library and in that scenario, if the cleanup routines are not +properly set, the library would be permanently locked out. +</p> + +<p> +The additional macro code and new macro definitions can be found in +Appendix E.1 to E.5. The changes are made in <code>H5private.h</code>. +</p> + +<h2>3.2 Changes to HRETURN and HRETURN_ERROR</h2> + +<p> +The <code>HRETURN</code> and <code>HRETURN_ERROR</code> macros are the +counterparts to the <code>FUNC_ENTER</code> macro described in section +3.1. <code>FUNC_LEAVE</code> makes a macro call to <code>HRETURN</code>, +so it is also covered here. +</p> + +<p> +The basic changes to these two macros involve adding macro calls to call +an unlock operation and re-enable cancellability if necessary. It should +be noted that the cancellability should be re-enabled only after the +thread has released the lock to the library. The consequence of doing +otherwise would be similar to that described in section 3.1. +</p> + +<p> +The additional macro code and new macro definitions can be found in +Appendix E.9 to E.9. The changes are made in <code>H5Eprivate.h</code>. +</p> + +<h1>4. Implementation of threadsafe functionality</h1> + +<h2>4.1 Recursive Locks</h2> + +<p> +A recursive mutex lock m allows a thread t1 to successfully lock m more +than once without blocking t1. Another thread t2 will block if t2 tries +to lock m while t1 holds the lock to m. If t1 makes k lock calls on m, +then it also needs to make k unlock calls on m before it releases the +lock. +</p> + +<p> +Our implementation of recursive locks is built on top of a pthread mutex +lock (which is not recursive). It makes use of a pthread condition +variable to have unsuccessful threads wait on the mutex. Waiting threads +are awaken by a signal from the final unlock call made by the thread +holding the lock. +</p> + +<p> +Recursive locks are defined to be the following type +(<code>H5private.h</code>): +</p> + +<blockquote> + <pre> + typedef struct H5_mutex_struct { + pthread_t owner_thread; /* current lock owner */ + pthread_mutex_t atomic_lock; /* lock for atomicity of new mechanism */ + pthread_cond_t cond_var; /* condition variable */ + unsigned int lock_count; + } H5_mutex_t; + </pre> +</blockquote> + +<p> +Detailed implementation code can be found in Appendix A. The +implementation changes are made in <code>H5TS.c</code>. +</p> + +<h2>4.2 First thread initialization</h2> + +<p> +Because the mutex lock associated with a recursive lock cannot be +statically initialized, a mechanism is required to initialize the +recursive lock associated with <code>H5_g</code> so that it can be used +for the first time. +</p> + +<p> +The pthreads library allows this through the pthread_once call which as +described in section 3.3 allows only the first thread accessing the +library in an application to initialize <code>H5_g</code>. +</p> + +<p> +In addition to initializing <code>H5_g</code>, it also initializes the +key (see section 3.4) for use with per-thread error stacks (see section +4.3). +</p> + +<p> +The first thread initialization mechanism is implemented as the function +call <code>H5_first_thread_init()</code> in <code>H5TS.c</code>. This is +described in appendix B. +</p> + +<h2>4.3 Per-thread error stack management</h2> + +<p> +Pthreads allows individual threads to access dynamic and persistent +per-thread data through the use of keys. Each key is associated with +a table that maps threads to data items. Keys can be initialized by +<code>pthread_key_create()</code> in pthreads (see sections 3.4 and 4.2). +Per-thread data items are accessed using a key through the +<code>pthread_getspecific()</code> and <code>pthread_setspecific()</code> +calls to read and write to the association table respectively. +</p> + +<p> +Per-thread error stacks are accessed through the key +<code>H5_errstk_key_g</code> which is initialized by the first thread +initialization call (see section 4.2). +</p> + +<p> +In the non-threadsafe version of the library, there is a global stack +variable <code>H5E_stack_g[1]</code> which is no longer defined in the +threadsafe version. At the same time, the macro call to gain access to +the error stack <code>H5E_get_my_stack</code> is changed from: +</p> + +<blockquote> + <pre> + #define H5E_get_my_stack() (H5E_stack_g+0) + </pre> +</blockquote> + +<p> +to: +</p> + +<blockquote> + <pre> + #define H5E_get_my_stack() H5E_get_stack() + </pre> +</blockquote> + +<p> +where <code>H5E_get_stack()</code> is a surrogate function that does the +following operations: +</p> + +<ol> + <li>if a thread is attempting to get an error stack for the first + time, the error stack is dynamically allocated for the thread and + associated with <code>H5_errstk_key_g</code> using + <code>pthread_setspecific()</code>. The way we detect if it is the + first time is through <code>pthread_getspecific()</code> which + returns <code>NULL</code> if no previous value is associated with + the thread using the key.</li> + + <li>if <code>pthread_getspecific()</code> returns a non-null value, + then that is the pointer to the error stack associated with the + thread and the stack can be used as usual.</li> +</ol> + +<p> +A final change to the error reporting routines is as follows; the current +implementation reports errors to always be detected at thread 0. In the +threadsafe implementation, this is changed to report the number returned +by a call to <code>pthread_self()</code>. +</p> + +<p> +The change in code (reflected in <code>H5Eprint</code> of file +<code>H5E.c</code>) is as follows: +</p> + +<blockquote> + <pre> + #ifdef H5_HAVE_THREADSAFE + fprintf (stream, "HDF5-DIAG: Error detected in thread %d." + ,pthread_self()); + #else + fprintf (stream, "HDF5-DIAG: Error detected in thread 0."); + #endif + </pre> +</blockquote> + +<p> +Code for <code>H5E_get_stack()</code> can be found in Appendix C. All the +above changes were made in <code>H5E.c</code>. +</p> + +<h2>4.4 Thread Cancellation safety</h2> + +<p> +To prevent thread cancellations from killing a thread while it is in the +library, we maintain per-thread information about the cancellability +status of the thread before it entered the library so that we can restore +that same status when the thread leaves the library. +</p> + +<p> +By <i>enter</i> and <i>leave</i> the library, we mean the points when a +thread makes an API call from a user application and the time that API +call returns. Other API or callback function calls made from within that +API call are considered <i>within</i> the library. +</p> + +<p> +Because other API calls may be made from within the first API call, we +need to maintain a counter to determine which was the first and +correspondingly the last return. +</p> + +<p> +When a thread makes an API call, the macro <code>H5_API_SET_CANCEL</code> +calls the worker function <code>H5_cancel_count_inc()</code> which does +the following: +</p> + +<ol> + <li>if this is the first time the thread has entered the library, + a new cancellability structure needs to be assigned to it.</li> + <li>if the thread is already within the library when the API call is + made, then cancel_count is simply incremented. Otherwise, we set + the cancellability state to <code>PTHREAD_CANCEL_DISABLE</code> + while storing the previous state into the cancellability structure. + <code>cancel_count</code> is also incremented in this case.</li> +</ol> + +<p> +When a thread leaves an API call, the macro +<code>H5_API_UNSET_CANCEL</code> calls the worker function +<code>H5_cancel_count_dec()</code> which does the following: +</p> + +<ol> + <li>if <code>cancel_count</code> is greater than 1, indicating that the + thread is not yet about to leave the library, then + <code>cancel_count</code> is simply decremented.</li> + <li>otherwise, we reset the cancellability state back to its original + state before it entered the library and decrement the count (back + to zero).</li> +</ol> + +<p> +<code>H5_cancel_count_inc</code> and <code>H5_cancel_count_dec</code> are +described in Appendix D and may be found in <code>H5TS.c</code>. +</p> + +<h1>5. Test programs</h1> + +<p> +Except where stated, all tests involve 16 simultaneous threads that make +use of HDF-5 API calls without any explicit synchronization typically +required in a non-threadsafe environment. +</p> + +<h2>5.1 Data set create and write</h2> + +<p> +The test program sets up 16 threads to simultaneously create 16 +different datasets named from <i>zero</i> to <i>fifteen</i> for a single +file and then writing an integer value into that dataset equal to the +dataset's named value. +</p> + +<p> +The main thread would join with all 16 threads and attempt to match the +resulting HDF-5 file with expected results - that each dataset contains +the correct value (0 for <i>zero</i>, 1 for <i>one</i> etc ...) and all +datasets were correctly created. +</p> + +<p> +The test is implemented in the file <code>ttsafe_dcreate.c</code>. +</p> + +<h2>5.2 Test on error stack</h2> + +<p> +The error stack test is one in which 16 threads simultaneously try to +create datasets with the same name. The result, when properly serialized, +should be equivalent to 16 attempts to create the dataset with the same +name. +</p> + +<p> +The error stack implementation runs correctly if it reports 15 instances +of the dataset name conflict error and finally generates a correct HDF-5 +containing that single dataset. Each thread should report its own stack +of errors with a thread number associated with it. +</p> + +<p> +The test is implemented in the file <code>ttsafe_error.c</code>. +</p> + +<h2>5.3 Test on cancellation safety</h2> + +<p> +The main idea in thread cancellation safety is as follows; a child thread +is spawned to create and write to a dataset. Following that, it makes a +<code>H5Diterate</code> call on that dataset which activates a callback +function. +</p> + +<p> +A deliberate barrier is invoked at the callback function which waits for +both the main and child thread to arrive at that point. After that +happens, the main thread proceeds to make a thread cancel call on the +child thread while the latter sleeps for 3 seconds before proceeding to +write a new value to the dataset. +</p> + +<p> +After the iterate call, the child thread logically proceeds to wait +another 3 seconds before writing another newer value to the dataset. +</p> + +<p> +The test is correct if the main thread manages to read the second value +at the end of the test. This means that cancellation did not take place +until the end of the iteration call despite of the 3 second wait within +the iteration callback and the extra dataset write operation. +Furthermore, the cancellation should occur before the child can proceed +to write the last value into the dataset. +</p> + +<h2>5.4 Test on attribute creation</h2> + +<p> +A main thread makes 16 threaded calls to <code>H5Acreate</code> with a +generated name for each attribute. Sixteen attributes should be created +for the single dataset in random (chronological) order and receive values +depending on its generated attribute name (e.g. <i>attrib010</i> would +receive the value 10). +</p> + +<p> +After joining with all child threads, the main thread proceeds to read +each attribute by generated name to see if the value tallies. Failure is +detected if the attribute name does not exist (meaning they were never +created) or if the wrong values were read back. +</p> + +<h1>A. Recursive Lock implementation code</h1> + +<blockquote> + <pre> + void H5_mutex_init(H5_mutex_t *H5_mutex) + { + H5_mutex->owner_thread = NULL; + pthread_mutex_init(&H5_mutex->atomic_lock, NULL); + pthread_cond_init(&H5_mutex->cond_var, NULL); + H5_mutex->lock_count = 0; + } + + void H5_mutex_lock(H5_mutex_t *H5_mutex) + { + pthread_mutex_lock(&H5_mutex->atomic_lock); + + if (pthread_equal(pthread_self(), H5_mutex->owner_thread)) { + /* already owned by self - increment count */ + H5_mutex->lock_count++; + } else { + if (H5_mutex->owner_thread == NULL) { + /* no one else has locked it - set owner and grab lock */ + H5_mutex->owner_thread = pthread_self(); + H5_mutex->lock_count = 1; + } else { + /* if already locked by someone else */ + while (1) { + pthread_cond_wait(&H5_mutex->cond_var, &H5_mutex->atomic_lock); + + if (H5_mutex->owner_thread == NULL) { + H5_mutex->owner_thread = pthread_self(); + H5_mutex->lock_count = 1; + break; + } /* else do nothing and loop back to wait on condition*/ + } + } + } + + pthread_mutex_unlock(&H5_mutex->atomic_lock); + } + + void H5_mutex_unlock(H5_mutex_t *H5_mutex) + { + pthread_mutex_lock(&H5_mutex->atomic_lock); + H5_mutex->lock_count--; + + if (H5_mutex->lock_count == 0) { + H5_mutex->owner_thread = NULL; + pthread_cond_signal(&H5_mutex->cond_var); + } + pthread_mutex_unlock(&H5_mutex->atomic_lock); + } + </pre> +</blockquote> + +<h1>B. First thread initialization</h1> + +<blockquote> + <pre> + void H5_first_thread_init(void) + { + /* initialize global API mutex lock */ + H5_g.H5_libinit_g = FALSE; + H5_g.init_lock.owner_thread = NULL; + pthread_mutex_init(&H5_g.init_lock.atomic_lock, NULL); + pthread_cond_init(&H5_g.init_lock.cond_var, NULL); + H5_g.init_lock.lock_count = 0; + + /* initialize key for thread-specific error stacks */ + pthread_key_create(&H5_errstk_key_g, NULL); + + /* initialize key for thread cancellability mechanism */ + pthread_key_create(&H5_cancel_key_g, NULL); + } + </pre> +</blockquote> + + +<h1>C. Per-thread error stack acquisition</h1> + +<blockquote> + <pre> + H5E_t *H5E_get_stack(void) + { + H5E_t *estack; + + if (estack = pthread_getspecific(H5_errstk_key_g)) { + return estack; + } else { + /* no associated value with current thread - create one */ + estack = (H5E_t *)malloc(sizeof(H5E_t)); + pthread_setspecific(H5_errstk_key_g, (void *)estack); + return estack; + } + } + </pre> +</blockquote> + +<h1>D. Thread cancellation mechanisms</h1> + +<blockquote> + <pre> + void H5_cancel_count_inc(void) + { + H5_cancel_t *cancel_counter; + + if (cancel_counter = pthread_getspecific(H5_cancel_key_g)) { + /* do nothing here */ + } else { + /* + * first time thread calls library - create new counter and + * associate with key + */ + cancel_counter = (H5_cancel_t *)malloc(sizeof(H5_cancel_t)); + cancel_counter->cancel_count = 0; + pthread_setspecific(H5_cancel_key_g, (void *)cancel_counter); + } + + if (cancel_counter->cancel_count == 0) { + /* thread entering library */ + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, + &(cancel_counter->previous_state)); + } + + cancel_counter->cancel_count++; + } + + void H5_cancel_count_dec(void) + { + H5_cancel_t *cancel_counter = pthread_getspecific(H5_cancel_key_g); + + if (cancel_counter->cancel_count == 1) + pthread_setcancelstate(cancel_counter->previous_state, NULL); + + cancel_counter->cancel_count--; + } + </pre> +</blockquote> + +<h1>E. Macro expansion codes</h1> + +<h2>E.1 <code>FUNC_ENTER</code></h2> + +<blockquote> + <pre> + /* Initialize the library */ \ + H5_FIRST_THREAD_INIT \ + H5_API_UNSET_CANCEL \ + H5_API_LOCK_BEGIN \ + if (!(H5_INIT_GLOBAL)) { \ + H5_INIT_GLOBAL = TRUE; \ + if (H5_init_library() < 0) { \ + HRETURN_ERROR (H5E_FUNC, H5E_CANTINIT, err, \ + "library initialization failed"); \ + } \ + } \ + H5_API_LOCK_END \ + : + : + : + </pre> +</blockquote> + +<h2>E.2 <code>H5_FIRST_THREAD_INIT</code></h2> + +<blockquote> + <pre> + /* Macro for first thread initialization */ + #define H5_FIRST_THREAD_INIT \ + pthread_once(&H5_first_init_g, H5_first_thread_init); + </pre> +</blockquote> + + +<h2>E.3 <code>H5_API_UNSET_CANCEL</code></h2> + +<blockquote> + <pre> + #define H5_API_UNSET_CANCEL \ + if (H5_IS_API(FUNC)) { \ + H5_cancel_count_inc(); \ + } + </pre> +</blockquote> + + +<h2>E.4 <code>H5_API_LOCK_BEGIN</code></h2> + +<blockquote> + <pre> + #define H5_API_LOCK_BEGIN \ + if (H5_IS_API(FUNC)) { \ + H5_mutex_lock(&H5_g.init_lock); + </pre> +</blockquote> + + +<h2>E.5 <code>H5_API_LOCK_END</code></h2> + +<blockquote> + <pre> + #define H5_API_LOCK_END } + </pre> +</blockquote> + + +<h2>E.6 <code>HRETURN</code> and <code>HRETURN_ERROR</code></h2> + +<blockquote> + <pre> + : + : + H5_API_UNLOCK_BEGIN \ + H5_API_UNLOCK_END \ + H5_API_SET_CANCEL \ + return ret_val; \ + } + </pre> +</blockquote> + +<h2>E.7 <code>H5_API_UNLOCK_BEGIN</code></h2> + +<blockquote> + <pre> + #define H5_API_UNLOCK_BEGIN \ + if (H5_IS_API(FUNC)) { \ + H5_mutex_unlock(&H5_g.init_lock); + </pre> +</blockquote> + +<h2>E.8 <code>H5_API_UNLOCK_END</code></h2> + +<blockquote> + <pre> + #define H5_API_UNLOCK_END } + </pre> +</blockquote> + + +<h2>E.9 <code>H5_API_SET_CANCEL</code></h2> + +<blockquote> + <pre> + #define H5_API_SET_CANCEL \ + if (H5_IS_API(FUNC)) { \ + H5_cancel_count_dec(); \ + } + </pre> +</blockquote> + +<h2>By Chee Wai Lee</h2> +<h4>By Bill Wendling</h4> + +</body> +</html> diff --git a/doxygen/examples/VFL.html b/doxygen/examples/VFL.html new file mode 100644 index 0000000..9776f96 --- /dev/null +++ b/doxygen/examples/VFL.html @@ -0,0 +1,1601 @@ +<HTML> +<HEAD> +<!-- This HTML file has been created by texi2html 1.51 + from VFL.texi on 18 November 1999 --> + +<TITLE>HDF5 Virtual File Layer</TITLE> +</HEAD> + + +<!-- + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + --> + + +<BODY> + +<strong>Revision History</strong> +<p>Initial document, 18 November 1999.</p> + +<p>Updated on 10/24/00, Quincey Koziol</p> + +<p>Added the section “Programming Note for C++ Developers Using C +Functions,” 08/23/2012, Mark Evans + + + +<P> +<P><HR><P> +<H1>Table of Contents</H1> +<UL> +<LI><A NAME="TOC1" HREF="#SEC1">Introduction</A> +<LI><A NAME="TOC2" HREF="#SEC2">Using a File Driver</A> +<UL> +<LI><A NAME="TOC3" HREF="#SEC3">Driver Header Files</A> +<LI><A NAME="TOC4" HREF="#SEC4">Creating and Opening Files</A> +<LI><A NAME="TOC5" HREF="#SEC5">Performing I/O</A> +<LI><A NAME="TOC6" HREF="#SEC6">File Driver Interchangeability</A> +</UL> +<LI><A NAME="TOC7" HREF="#SEC7">Implementation of a Driver</A> +<UL> +<LI><A NAME="TOC8" HREF="#SEC8">Mode Functions</A> +<LI><A NAME="TOC9" HREF="#SEC9">File Functions</A> +<UL> +<LI><A NAME="TOC10" HREF="#SEC10">Opening Files</A> +<LI><A NAME="TOC11" HREF="#SEC11">Closing Files</A> +<LI><A NAME="TOC12" HREF="#SEC12">File Keys</A> +<LI><A NAME="TOC13" HREF="#SEC13">Saving Modes Across Opens</A> +</UL> +<LI><A NAME="TOC14" HREF="#SEC14">Address Space Functions</A> +<UL> +<LI><A NAME="TOC15" HREF="#SEC15">Userblock and Superblock</A> +<LI><A NAME="TOC16" HREF="#SEC16">Allocation of Format Regions</A> +<LI><A NAME="TOC17" HREF="#SEC17">Freeing Format Regions</A> +<LI><A NAME="TOC18" HREF="#SEC18">Querying Address Range</A> +</UL> +<LI><A NAME="TOC19" HREF="#SEC19">Data Functions</A> +<UL> +<LI><A NAME="TOC20" HREF="#SEC20">Contiguous I/O Functions</A> +<LI><A NAME="TOC21" HREF="#SEC21">Flushing Cached Data</A> +</UL> +<LI><A NAME="TOC22" HREF="#SEC22">Optimization Functions</A> +<LI><A NAME="TOC23" HREF="#SEC23">Registration of a Driver</A> + <ul> + <li><a name="TOCProgNote" href="#SECProgNote"> + Programming Note for C++ Developers Using C Functions</a> + </li> + </ul> +<LI><A NAME="TOC24" HREF="#SEC24">Querying Driver Information</A> +</UL> +<LI><A NAME="TOC25" HREF="#SEC25">Miscellaneous</A> +</UL> +<P><HR><P> + + +<H1><A NAME="SEC1" HREF="#TOC1">Introduction</A></H1> + +<P> +The HDF5 file format describes how HDF5 data structures and dataset raw +data are mapped to a linear <STRONG>format address space</STRONG> and the HDF5 +library implements that bidirectional mapping in terms of an +API. However, the HDF5 format specifications do <EM>not</EM> indicate how +the format address space is mapped onto storage and HDF (version 5 and +earlier) simply mapped the format address space directly onto a single +file by convention. + +</P> +<P> +Since early versions of HDF5 it became apparent that users want the ability to +map the format address space onto different types of storage (a single file, +multiple files, local memory, global memory, network distributed global +memory, a network protocol, <I>etc</I>.) with various types of maps. For +instance, some users want to be able to handle very large format address +spaces on operating systems that support only 2GB files by partitioning the +format address space into equal-sized parts each served by a separate +file. Other users want the same multi-file storage capability but want to +partition the address space according to purpose (raw data in one file, object +headers in another, global heap in a third, <I>etc.</I>) in order to improve I/O +speeds. + +</P> +<P> +In fact, the number of storage variations is probably larger than the +number of methods that the HDF5 team is capable of implementing and +supporting. Therefore, a <STRONG>Virtual File Layer</STRONG> API is being +implemented which will allow application teams or departments to design +and implement their own mapping between the HDF5 format address space +and storage, with each mapping being a separate <STRONG>file driver</STRONG> +(possibly written in terms of other file drivers). The HDF5 team will +provide a small set of useful file drivers which will also serve as +examples for those who which to write their own: + +</P> +<DL COMPACT> + +<DT><CODE>H5FD_SEC2</CODE> +<DD> +This is the default driver which uses Posix file-system functions like +<CODE>read</CODE> and <CODE>write</CODE> to perform I/O to a single file. All I/O +requests are unbuffered although the driver does optimize file seeking +operations to some extent. + +<DT><CODE>H5FD_STDIO</CODE> +<DD> +This driver uses functions from <TT>`stdio.h'</TT> to perform buffered I/O +to a single file. + +<DT><CODE>H5FD_CORE</CODE> +<DD> +This driver performs I/O directly to memory and can be used to create small +temporary files that never exist on permanent storage. This type of storage is +generally very fast since the I/O consists only of memory-to-memory copy +operations. + +<DT><CODE>H5FD_MPIIO</CODE> +<DD> +This is the driver of choice for accessing files in parallel using MPI and +MPI-IO. It is only predefined if the library is compiled with parallel I/O +support. + +<DT><CODE>H5FD_FAMILY</CODE> +<DD> +Large format address spaces are partitioned into more manageable pieces and +sent to separate storage locations using an underlying driver of the user's +choice. The <CODE>h5repart</CODE> tool can be used to change the sizes of the +family members when stored as files or to convert a family of files to a +single file or vice versa. + +<DT><CODE>H5FD_SPLIT</CODE> +<DD> +The format address space is split into meta data and raw data and each is +mapped onto separate storage using underlying drivers of the user's +choice. The meta data storage can be read by itself (for limited +functionality) or both files can be accessed together. +</DL> + + + +<H1><A NAME="SEC2" HREF="#TOC2">Using a File Driver</A></H1> + +<P> +Most application writers will use a driver defined by the HDF5 library or +contributed by another programming team. This chapter describes how existing +drivers are used. + +</P> + + + +<H2><A NAME="SEC3" HREF="#TOC3">Driver Header Files</A></H2> + +<P> +Each file driver is defined in its own public header file which should +be included by any application which plans to use that driver. The +predefined drivers are in header files whose names begin with +<SAMP>`H5FD'</SAMP> followed by the driver name and <SAMP>`.h'</SAMP>. The <TT>`hdf5.h'</TT> +header file includes all the predefined driver header files. + +</P> +<P> +Once the appropriate header file is included a symbol of the form +<SAMP>`H5FD_'</SAMP> followed by the upper-case driver name will be the driver +identification number.<A NAME="DOCF1" HREF="#FOOT1">(1)</A> However, the +value may change if the library is closed (<I>e.g.</I>, by calling +<CODE>H5close</CODE>) and the symbol is referenced again. + +</P> + + +<H2><A NAME="SEC4" HREF="#TOC4">Creating and Opening Files</A></H2> + +<P> +In order to create or open a file one must define the method by which the +storage is accessed<A NAME="DOCF2" HREF="#FOOT2">(2)</A> and does so by creating a file access property list<A NAME="DOCF3" HREF="#FOOT3">(3)</A> which is passed to the <CODE>H5Fcreate</CODE> or +<CODE>H5Fopen</CODE> function. A default file access property list is created by +calling <CODE>H5Pcreate</CODE> and then the file driver information is inserted by +calling a driver initialization function such as <CODE>H5Pset_fapl_family</CODE>: + +</P> + +<PRE> +hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); +size_t member_size = 100*1024*1024; /*100MB*/ +H5Pset_fapl_family(fapl, member_size, H5P_DEFAULT); +hid_t file = H5Fcreate("foo%05d.h5", H5F_ACC_TRUNC, H5P_DEFAULT, fapl); +H5Pclose(fapl); +</PRE> + +<P> +Each file driver will have its own initialization function +whose name is <CODE>H5Pset_fapl_</CODE> followed by the driver name and which +takes a file access property list as the first argument followed by +additional driver-dependent arguments. + +</P> +<P> +An alternative to using the driver initialization function is to set the +driver directly using the <CODE>H5Pset_driver</CODE> function.<A NAME="DOCF4" HREF="#FOOT4">(4)</A> Its second argument is the file driver identifier, which may +have a different numeric value from run to run depending on the order in which +the file drivers are registered with the library. The third argument +encapsulates the additional arguments of the driver initialization +function. This method only works if the file driver writer has made the +driver-specific property list structure a public datatype, which is +often not the case. + +</P> + +<PRE> +hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); +static H5FD_family_fapl_t fa = {100*1024*1024, H5P_DEFAULT}; +H5Pset_driver(fapl, H5FD_FAMILY, &fa); +hid_t file = H5Fcreate("foo.h5", H5F_ACC_TRUNC, H5P_DEFAULT, fapl); +H5Pclose(fapl); +</PRE> + +<P> +It is also possible to query the file driver information from a file access +property list by calling <CODE>H5Pget_driver</CODE> to determine the driver and then +calling a driver-defined query function to obtain the driver information: + +</P> + +<PRE> +hid_t driver = H5Pget_driver(fapl); +if (H5FD_SEC2==driver) { + /*nothing further to get*/ +} else if (H5FD_FAMILY==driver) { + hid_t member_fapl; + haddr_t member_size; + H5Pget_fapl_family(fapl, &member_size, &member_fapl); +} else if (....) { + .... +} +</PRE> + + + +<H2><A NAME="SEC5" HREF="#TOC5">Performing I/O</A></H2> + +<P> +The <CODE>H5Dread</CODE> and <CODE>H5Dwrite</CODE> functions transfer data between +application memory and the file. They both take an optional data transfer +property list which has some general driver-independent properties and +optional driver-defined properties. An application will typically perform I/O +in one of three styles via the <CODE>H5Dread</CODE> or <CODE>H5Dwrite</CODE> function: + +</P> +<P> +Like file access properties in the previous section, data transfer properties +can be set using a driver initialization function or a general purpose +function. For example, to set the MPI-IO driver to use independent access for +I/O operations one would say: + +</P> + +<PRE> +hid_t dxpl = H5Pcreate(H5P_DATA_XFER); +H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT); +H5Dread(dataset, type, mspace, fspace, buffer, dxpl); +H5Pclose(dxpl); +</PRE> + +<P> +The alternative is to initialize a driver defined C <CODE>struct</CODE> and pass it +to the <CODE>H5Pset_driver</CODE> function: + +</P> + +<PRE> +hid_t dxpl = H5Pcreate(H5P_DATA_XFER); +static H5FD_mpio_dxpl_t dx = {H5FD_MPIO_INDEPENDENT}; +H5Pset_driver(dxpl, H5FD_MPIO, &dx); +H5Dread(dataset, type, mspace, fspace, buffer, dxpl); +</PRE> + +<P> +The transfer propery list can be queried in a manner similar to the file +access property list: the driver provides a function (or functions) to return +various information about the transfer property list: + +</P> + +<PRE> +hid_t driver = H5Pget_driver(dxpl); +if (H5FD_MPIO==driver) { + H5FD_mpio_xfer_t xfer_mode; + H5Pget_dxpl_mpio(dxpl, &xfer_mode); +} else { + .... +} +</PRE> + + + +<H2><A NAME="SEC6" HREF="#TOC6">File Driver Interchangeability</A></H2> + +<P> +The HDF5 specifications describe two things: the mapping of data onto a linear +<STRONG>format address space</STRONG> and the C API which performs the mapping. +However, the mapping of the format address space onto storage intentionally +falls outside the scope of the HDF5 specs. This is a direct result of the fact +that it is not generally possible to store information about how to access +storage inside the storage itself. For instance, given only the file name +<TT>`/arborea/1225/work/f%03d'</TT> the HDF5 library is unable to tell whether the +name refers to a file on the local file system, a family of files on the local +file system, a file on host <SAMP>`arborea'</SAMP> port 1225, a family of files on a +remote system, <I>etc</I>. + +</P> +<P> +Two ways which library could figure out where the storage is located are: +storage access information can be provided by the user, or the library can try +all known file access methods. This implementation uses the former method. + +</P> +<P> +In general, if a file was created with one driver then it isn't possible to +open it with another driver. There are of course exceptions: a file created +with MPIO could probably be opened with the sec2 driver, any file created +by the sec2 driver could be opened as a family of files with one member, +<I>etc</I>. In fact, sometimes a file must not only be opened with the same +driver but also with the same driver properties. The predefined drivers are +written in such a way that specifying the correct driver is sufficient for +opening a file. + +</P> + + +<H1><A NAME="SEC7" HREF="#TOC7">Implementation of a Driver</A></H1> + +<P> +A driver is simply a collection of functions and data structures which are +registered with the HDF5 library at runtime. The functions fall into these +categories: + +</P> + +<UL> +<LI>Functions which operate on modes + +<LI>Functions which operate on files + +<LI>Functions which operate on the address space + +<LI>Functions which operate on data + +<LI>Functions for driver initialization + +<LI>Optimization functions + +</UL> + + + +<H2><A NAME="SEC8" HREF="#TOC8">Mode Functions</A></H2> + +<P> +Some drivers need information about file access and data transfers which are +very specific to the driver. The information is usually implemented as a pair +of pointers to C structs which are allocated and initialized as part of an +HDF5 property list and passed down to various driver functions. There are two +classes of settings: file access modes that describe how to access the file +through the driver, and data transfer modes which are settings that control +I/O operations. Each file opened by a particular driver may have a different +access mode; each dataset I/O request for a particular file may have a +different data transfer mode. + +</P> +<P> +Since each driver has its own particular requirements for various settings, +each driver is responsible for defining the mode structures that it +needs. Higher layers of the library treat the structures as opaque but must be +able to copy and free them. Thus, the driver provides either the size of the +structure or a pair of function pointers for each of the mode types. + +</P> +<P> +<STRONG>Example:</STRONG> The family driver needs to know how the format address +space is partitioned and the file access property list to use for the +family members. + +</P> + +<PRE> +/* Driver-specific file access properties */ +typedef struct H5FD_family_fapl_t { + hsize_t memb_size; /*size of each member */ + hid_t memb_fapl_id; /*file access property list of each memb*/ +} H5FD_family_fapl_t; + +/* Driver specific data transfer properties */ +typedef struct H5FD_family_dxpl_t { + hid_t memb_dxpl_id; /*data xfer property list of each memb */ +} H5FD_family_dxpl_t; +</PRE> + +<P> +In order to copy or free one of these structures the member file access +or data transfer properties must also be copied or freed. This is done +by providing a copy and close function for each structure: + +</P> +<P> +<STRONG>Example:</STRONG> The file access property list copy and close functions +for the family driver: + +</P> + +<PRE> +static void * +H5FD_family_fapl_copy(const void *_old_fa) +{ + const H5FD_family_fapl_t *old_fa = (const H5FD_family_fapl_t*)_old_fa; + H5FD_family_fapl_t *new_fa = malloc(sizeof(H5FD_family_fapl_t)); + assert(new_fa); + + memcpy(new_fa, old_fa, sizeof(H5FD_family_fapl_t)); + new_fa->memb_fapl_id = H5Pcopy(old_fa->memb_fapl_id); + return new_fa; +} + +static herr_t +H5FD_family_fapl_free(void *_fa) +{ + H5FD_family_fapl_t *fa = (H5FD_family_fapl_t*)_fa; + H5Pclose(fa->memb_fapl_id); + free(fa); + return 0; +} +</PRE> + +<P> +Generally when a file is created or opened the file access properties +for the driver are copied into the file pointer which is returned and +they may be modified from their original value (for instance, the file +family driver modifies the member size property when opening an existing +family). In order to support the <CODE>H5Fget_access_plist</CODE> function the +driver must provide a <CODE>fapl_get</CODE> callback which creates a copy of +the driver-specific properties based on a particular file. + +</P> +<P> +<STRONG>Example:</STRONG> The file family driver copies the member size file +access property list into the return value: + +</P> + +<PRE> +static void * +H5FD_family_fapl_get(H5FD_t *_file) +{ + H5FD_family_t *file = (H5FD_family_t*)_file; + H5FD_family_fapl_t *fa = calloc(1, sizeof(H5FD_family_fapl_t*)); + + fa->memb_size = file->memb_size; + fa->memb_fapl_id = H5Pcopy(file->memb_fapl_id); + return fa; +} +</PRE> + + + +<H2><A NAME="SEC9" HREF="#TOC9">File Functions</A></H2> + +<P> +The higher layers of the library expect files to have a name and allow the +file to be accessed in various modes. The driver must be able to create a new +file, replace an existing file, or open an existing file. Opening or creating +a file should return a handle, a pointer to a specialization of the +<CODE>H5FD_t</CODE> struct, which allows read-only or read-write access and which +will be passed to the other driver functions as they are +called.<A NAME="DOCF5" HREF="#FOOT5">(5)</A> + +</P> + +<PRE> +typedef struct { + /* Public fields */ + H5FD_class_t *cls; /*class data defined below*/ + + /* Private fields -- driver-defined */ + +} H5FD_t; +</PRE> + +<P> +<STRONG>Example:</STRONG> The family driver requires handles to the underlying +storage, the size of the members for this particular file (which might be +different than the member size specified in the file access property list if +an existing file family is being opened), the name used to open the file in +case additional members must be created, and the flags to use for creating +those additional members. The <CODE>eoa</CODE> member caches the size of the format +address space so the family members don't have to be queried in order to find +it. + +</P> + +<PRE> +/* The description of a file belonging to this driver. */ +typedef struct H5FD_family_t { + H5FD_t pub; /*public stuff, must be first */ + hid_t memb_fapl_id; /*file access property list for members */ + hsize_t memb_size; /*maximum size of each member file */ + int nmembs; /*number of family members */ + int amembs; /*number of member slots allocated */ + H5FD_t **memb; /*dynamic array of member pointers */ + haddr_t eoa; /*end of allocated addresses */ + char *name; /*name generator printf format */ + unsigned flags; /*flags for opening additional members */ +} H5FD_family_t; +</PRE> + +<P> +<STRONG>Example:</STRONG> The sec2 driver needs to keep track of the underlying Unix +file descriptor and also the end of format address space and current Unix file +size. It also keeps track of the current file position and last operation +(read, write, or unknown) in order to optimize calls to <CODE>lseek</CODE>. The +<CODE>device</CODE> and <CODE>inode</CODE> fields are defined on Unix in order to uniquely +identify the file and will be discussed below. + +</P> + +<PRE> +typedef struct H5FD_sec2_t { + H5FD_t pub; /*public stuff, must be first */ + int fd; /*the unix file */ + haddr_t eoa; /*end of allocated region */ + haddr_t eof; /*end of file; current file size*/ + haddr_t pos; /*current file I/O position */ + int op; /*last operation */ + dev_t device; /*file device number */ + ino_t inode; /*file i-node number */ +} H5FD_sec2_t; +</PRE> + + + +<H3><A NAME="SEC10" HREF="#TOC10">Opening Files</A></H3> + +<P> +All drivers must define a function for opening/creating a file. This +function should have a prototype which is: + +</P> +<P> +<DL> +<DT><U>Function:</U> static H5FD_t * <B>open</B> <I>(const char *<VAR>name</VAR>, unsigned <VAR>flags</VAR>, hid_t <VAR>fapl</VAR>, haddr_t <VAR>maxaddr</VAR>)</I> +<DD><A NAME="IDX1"></A> + +</P> +<P> +The file name <VAR>name</VAR> and file access property list <VAR>fapl</VAR> are +the same as were specified in the <CODE>H5Fcreate</CODE> or <CODE>H5Fopen</CODE> +call. The <VAR>flags</VAR> are the same as in those calls also except the +flag <CODE>H5F_ACC_CREATE</CODE> is also present if the call was to +<CODE>H5Fcreate</CODE> and they are documented in the <TT>`H5Fpublic.h'</TT> +file. The <VAR>maxaddr</VAR> argument is the maximum format address that the +driver should be prepared to handle (the minimum address is always +zero). +</DL> + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver opens a Unix file with the requested name +and saves information which uniquely identifies the file (the Unix device +number and inode). + +</P> + +<PRE> +static H5FD_t * +H5FD_sec2_open(const char *name, unsigned flags, hid_t fapl_id/*unused*/, + haddr_t maxaddr) +{ + unsigned o_flags; + int fd; + struct stat sb; + H5FD_sec2_t *file=NULL; + + /* Check arguments */ + if (!name || !*name) return NULL; + if (0==maxaddr || HADDR_UNDEF==maxaddr) return NULL; + if (ADDR_OVERFLOW(maxaddr)) return NULL; + + /* Build the open flags */ + o_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY; + if (H5F_ACC_TRUNC & flags) o_flags |= O_TRUNC; + if (H5F_ACC_CREAT & flags) o_flags |= O_CREAT; + if (H5F_ACC_EXCL & flags) o_flags |= O_EXCL; + + /* Open the file */ + if ((fd=open(name, o_flags, 0666))<0) return NULL; + if (fstat(fd, &sb)<0) { + close(fd); + return NULL; + } + + /* Create the new file struct */ + file = calloc(1, sizeof(H5FD_sec2_t)); + file->fd = fd; + file->eof = sb.st_size; + file->pos = HADDR_UNDEF; + file->op = OP_UNKNOWN; + file->device = sb.st_dev; + file->inode = sb.st_ino; + + return (H5FD_t*)file; +} +</PRE> + + + +<H3><A NAME="SEC11" HREF="#TOC11">Closing Files</A></H3> + +<P> +Closing a file simply means that all cached data should be flushed to the next +lower layer, the file should be closed at the next lower layer, and all +file-related data structures should be freed. All information needed by the +close function is already present in the file handle. + +</P> +<P> +<DL> +<DT><U>Function:</U> static herr_t <B>close</B> <I>(H5FD_t *<VAR>file</VAR>)</I> +<DD><A NAME="IDX2"></A> + +</P> +<P> +The <VAR>file</VAR> argument is the handle which was returned by the <CODE>open</CODE> +function, and the <CODE>close</CODE> should free only memory associated with the +driver-specific part of the handle (the public parts will have already been released by HDF5's virtual file layer). +</DL> + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver just closes the underlying Unix file, +making sure that the actual file size is the same as that known to the +library by writing a zero to the last file position it hasn't been +written by some previous operation (which happens in the same code which +flushes the file contents and is shown below). + +</P> + +<PRE> +static herr_t +H5FD_sec2_close(H5FD_t *_file) +{ + H5FD_sec2_t *file = (H5FD_sec2_t*)_file; + + if (H5FD_sec2_flush(_file)<0) return -1; + if (close(file->fd)<0) return -1; + free(file); + return 0; +} +</PRE> + + + +<H3><A NAME="SEC12" HREF="#TOC12">File Keys</A></H3> + +<P> +Occasionally an application will attempt to open a single file more than one +time in order to obtain multiple handles to the file. HDF5 allows the files to +share information<A NAME="DOCF6" HREF="#FOOT6">(6)</A> but in order to +accomplish this HDF5 must be able to tell when two names refer to the same +file. It does this by associating a driver-defined key with each file opened +by a driver and comparing the key for an open request with the keys for all +other files currently open by the same driver. + +</P> +<P> +<DL> +<DT><U>Function:</U> const int <B>cmp</B> <I>(const H5FD_t *<VAR>f1</VAR>, const H5FD_t *<VAR>f2</VAR>)</I> +<DD><A NAME="IDX3"></A> + +</P> +<P> +The driver may provide a function which compares two files <VAR>f1</VAR> and +<VAR>f2</VAR> belonging to the same driver and returns a negative, positive, or +zero value <I>a la</I> the <CODE>strcmp</CODE> function.<A NAME="DOCF7" HREF="#FOOT7">(7)</A> If this +function is not provided then HDF5 assumes that all calls to the <CODE>open</CODE> +callback return unique files regardless of the arguments and it is up to the +application to avoid doing this if that assumption is incorrect. +</DL> + +</P> +<P> +Each time a file is opened the library calls the <CODE>cmp</CODE> function to +compare that file with all other files currently open by the same driver and +if one of them matches (at most one can match) then the file which was just +opened is closed and the previously opened file is used instead. + +</P> +<P> +Opening a file twice with incompatible flags will result in failure. For +instance, opening a file with the truncate flag is a two step process which +first opens the file without truncation so keys can be compared, and if no +matching file is found already open then the file is closed and immediately +reopened with the truncation flag set (if a matching file is already open then +the truncating open will fail). + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver uses the Unix device and i-node as the +key. They were initialized when the file was opened. + +</P> + +<PRE> +static int +H5FD_sec2_cmp(const H5FD_t *_f1, const H5FD_t *_f2) +{ + const H5FD_sec2_t *f1 = (const H5FD_sec2_t*)_f1; + const H5FD_sec2_t *f2 = (const H5FD_sec2_t*)_f2; + + if (f1->device < f2->device) return -1; + if (f1->device > f2->device) return 1; + + if (f1->inode < f2->inode) return -1; + if (f1->inode > f2->inode) return 1; + + return 0; +} +</PRE> + + + +<H3><A NAME="SEC13" HREF="#TOC13">Saving Modes Across Opens</A></H3> + +<P> +Some drivers may also need to store certain information in the file superblock +in order to be able to reliably open the file at a later date. This is done by +three functions: one to determine how much space will be necessary to store +the information in the superblock, one to encode the information, and one to +decode the information. These functions are optional, but if any one is +defined then the other two must also be defined. + +</P> +<P> +<DL> +<DT><U>Function:</U> static hsize_t <B>sb_size</B> <I>(H5FD_t *<VAR>file</VAR>)</I> +<DD><A NAME="IDX4"></A> +<DT><U>Function:</U> static herr_t <B>sb_encode</B> <I>(H5FD_t *<VAR>file</VAR>, char *<VAR>name</VAR>, unsigned char *<VAR>buf</VAR>)</I> +<DD><A NAME="IDX5"></A> +<DT><U>Function:</U> static herr_t <B>sb_decode</B> <I>(H5FD_t *<VAR>file</VAR>, const char *<VAR>name</VAR>, const unsigned char *<VAR>buf</VAR>)</I> +<DD><A NAME="IDX6"></A> + +</P> +<P> +The <CODE>sb_size</CODE> function returns the number of bytes necessary to encode +information needed later if the file is reopened. The <CODE>sb_encode</CODE> +function encodes information from the file into buffer <VAR>buf</VAR> +allocated by the caller. It also writes an 8-character (plus null +termination) into the <CODE>name</CODE> argument, which should be a unique +identification for the driver. The <CODE>sb_decode</CODE> function looks at +the <VAR>name</VAR> + +</P> +<P> + decodes +data from the buffer <VAR>buf</VAR> and updates the <VAR>file</VAR> argument with the new information, +advancing <VAR>*p</VAR> in the process. +</DL> + +</P> +<P> +The part of this which is somewhat tricky is that the file must be readable +before the superblock information is decoded. File access modes fall outside +the scope of the HDF5 file format, but they are placed inside the boot block +for convenience.<A NAME="DOCF8" HREF="#FOOT8">(8)</A> + +</P> +<P> +<STRONG>Example:</STRONG> <EM>To be written later.</EM> + +</P> + + +<H2><A NAME="SEC14" HREF="#TOC14">Address Space Functions</A></H2> + +<P> +HDF5 does not assume that a file is a linear address space of bytes. Instead, +the library will call functions to allocate and free portions of the HDF5 +format address space, which in turn map onto functions in the file driver to +allocate and free portions of file address space. The library tells the file +driver how much format address space it wants to allocate and the driver +decides what format address to use and how that format address is mapped onto +the file address space. Usually the format address is chosen so that the file +address can be calculated in constant time for data I/O operations (which are +always specified by format addresses). + +</P> + + + +<H3><A NAME="SEC15" HREF="#TOC15">Userblock and Superblock</A></H3> + +<P> +The HDF5 format allows an optional userblock to appear before the actual HDF5 +data in such a way that if the userblock is <STRONG>sucked out</STRONG> of the file and +everything remaining is shifted downward in the file address space, then the +file is still a valid HDF5 file. The userblock size can be zero or any +multiple of two greater than or equal to 512 and the file superblock begins +immediately after the userblock. + +</P> +<P> +HDF5 allocates space for the userblock and superblock by calling an +allocation function defined below, which must return a chunk of memory at +format address zero on the first call. + +</P> + + +<H3><A NAME="SEC16" HREF="#TOC16">Allocation of Format Regions</A></H3> + +<P> +The library makes many types of allocation requests: + +</P> +<DL COMPACT> + +<DT><CODE>H5FD_MEM_SUPER</CODE> +<DD> +An allocation request for the userblock and/or superblock. +<DT><CODE>H5FD_MEM_BTREE</CODE> +<DD> +An allocation request for a node of a B-tree. +<DT><CODE>H5FD_MEM_DRAW</CODE> +<DD> +An allocation request for the raw data of a dataset. +<DT><CODE>H5FD_MEM_META</CODE> +<DD> +An allocation request for the raw data of a dataset which +the user has indicated will be relatively small. +<DT><CODE>H5FD_MEM_GROUP</CODE> +<DD> +An allocation request for a group leaf node (internal nodes of the group tree +are allocated as H5MF_BTREE). +<DT><CODE>H5FD_MEM_GHEAP</CODE> +<DD> +An allocation request for a global heap collection. Global heaps are used to +store certain types of references such as dataset region references. The set +of all global heap collections can become quite large. +<DT><CODE>H5FD_MEM_LHEAP</CODE> +<DD> +An allocation request for a local heap. Local heaps are used to store the +names which are members of a group. The combined size of all local heaps is a +function of the number of object names in the file. +<DT><CODE>H5FD_MEM_OHDR</CODE> +<DD> +An allocation request for (part of) an object header. Object headers are +relatively small and include meta information about objects (like the data +space and type of a dataset) and attributes. +</DL> + +<P> +When a chunk of memory is freed the library adds it to a free list and +allocation requests are satisfied from the free list before requesting memory +from the file driver. Each type of allocation request enumerated above has its +own free list, but the file driver can specify that certain object types can +share a free list. It does so by providing an array which maps a request type +to a free list. If any value of the map is <CODE>H5MF_DEFAULT</CODE> (zero) then the +object's own free list is used. The special value <CODE>H5MF_NOLIST</CODE> indicates +that the library should not attempt to maintain a free list for that +particular object type, instead calling the file driver each time an object of +that type is freed. + +</P> +<P> +Mappings predefined in the <TT>`H5FDpublic.h'</TT> file are: +<DL COMPACT> + +<DT><CODE>H5FD_FLMAP_SINGLE</CODE> +<DD> +All memory usage types are mapped to a single free list. +<DT><CODE>H5FD_FLMAP_DICHOTOMY</CODE> +<DD> +Memory usage is segregated into meta data and raw data for the purposes of +memory management. +<DT><CODE>H5FD_FLMAP_DEFAULT</CODE> +<DD> +Each memory usage type has its own free list. +</DL> + +<P> +<STRONG>Example:</STRONG> To make a map that manages object headers on one free list +and everything else on another free list one might initialize the map with the +following code: (the use of <CODE>H5FD_MEM_SUPER</CODE> is arbitrary) + +</P> + +<PRE> +H5FD_mem_t mt, map[H5FD_MEM_NTYPES]; + +for (mt=0; mt<H5FD_MEM_NTYPES; mt++) { + map[mt] = (H5FD_MEM_OHDR==mt) ? mt : H5FD_MEM_SUPER; +} +</PRE> + +<P> +If an allocation request cannot be satisfied from the free list then one of +two things happen. If the driver defines an allocation callback then it is +used to allocate space; otherwise new memory is allocated from the end of the +format address space by incrementing the end-of-address marker. + +</P> +<P> +<DL> +<DT><U>Function:</U> static haddr_t <B>alloc</B> <I>(H5FD_t *<VAR>file</VAR>, H5MF_type_t <VAR>type</VAR>, hsize_t <VAR>size</VAR>)</I> +<DD><A NAME="IDX7"></A> + +</P> +<P> +The <VAR>file</VAR> argument is the file from which space is to be allocated, +<VAR>type</VAR> is the type of memory being requested (from the list above) without +being mapped according to the freelist map and <VAR>size</VAR> is the number of +bytes being requested. The library is allowed to allocate large chunks of +storage and manage them in a layer above the file driver (although the current +library doesn't do that). The allocation function should return a format +address for the first byte allocated. The allocated region extends from that +address for <VAR>size</VAR> bytes. If the request cannot be honored then the +undefined address value is returned (<CODE>HADDR_UNDEF</CODE>). The first call to +this function for a file which has never had memory allocated <EM>must</EM> +return a format address of zero or <CODE>HADDR_UNDEF</CODE> since this is how the +library allocates space for the userblock and/or superblock. +</DL> + +</P> + +<P> +<STRONG>Example:</STRONG> <EM>To be written later.</EM> + +</P> + + +<H3><A NAME="SEC17" HREF="#TOC17">Freeing Format Regions</A></H3> + +<P> +When the library is finished using a certain region of the format address +space it will return the space to the free list according to the type of +memory being freed and the free list map described above. If the free list has +been disabled for a particular memory usage type (according to the free list +map) and the driver defines a <CODE>free</CODE> callback then it will be +invoked. The <CODE>free</CODE> callback is also invoked for all entries on the free +list when the file is closed. + +</P> +<P> +<DL> +<DT><U>Function:</U> static herr_t <B>free</B> <I>(H5FD_t *<VAR>file</VAR>, H5MF_type_t <VAR>type</VAR>, haddr_t <VAR>addr</VAR>, hsize_t <VAR>size</VAR>)</I> +<DD><A NAME="IDX8"></A> + +</P> +<P> +The <VAR>file</VAR> argument is the file for which space is being freed; <VAR>type</VAR> +is the type of object being freed (from the list above) without being mapped +according to the freelist map; <VAR>addr</VAR> is the first format address to free; +and <VAR>size</VAR> is the size in bytes of the region being freed. The region +being freed may refer to just part of the region originally allocated and/or +may cross allocation boundaries provided all regions being freed have the same +usage type. However, the library will never attempt to free regions which have +already been freed or which have never been allocated. +</DL> + +</P> +<P> +A driver may choose to not define the <CODE>free</CODE> function, in which case +format addresses will be leaked. This isn't normally a huge problem since the +library contains a simple free list of its own and freeing parts of the format +address space is not a common occurrence. + +</P> +<P> +<STRONG>Example:</STRONG> <EM>To be written later.</EM> + +</P> + + +<H3><A NAME="SEC18" HREF="#TOC18">Querying Address Range</A></H3> + +<P> +Each file driver must have some mechanism for setting and querying the end of +address, or <STRONG>EOA</STRONG>, marker. The EOA marker is the first format address +after the last format address ever allocated. If the last part of the +allocated address range is freed then the driver may optionally decrease the +eoa marker. + +</P> +<P> +<DL> +<DT><U>Function:</U> static haddr_t <B>get_eoa</B> <I>(H5FD_t *<VAR>file</VAR>)</I> +<DD><A NAME="IDX9"></A> + +</P> +<P> +This function returns the current value of the EOA marker for the specified +file. +</DL> + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver just returns the current eoa marker value +which is cached in the file structure: + +</P> + +<PRE> +static haddr_t +H5FD_sec2_get_eoa(H5FD_t *_file) +{ + H5FD_sec2_t *file = (H5FD_sec2_t*)_file; + return file->eoa; +} +</PRE> + +<P> +The eoa marker is initially zero when a file is opened and the library may set +it to some other value shortly after the file is opened (after the superblock +is read and the saved eoa marker is determined) or when allocating additional +memory in the absence of an <CODE>alloc</CODE> callback (described above). + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver simply caches the eoa marker in the file +structure and does not extend the underlying Unix file. When the file is +flushed or closed then the Unix file size is extended to match the eoa marker. + +</P> + +<PRE> +static herr_t +H5FD_sec2_set_eoa(H5FD_t *_file, haddr_t addr) +{ + H5FD_sec2_t *file = (H5FD_sec2_t*)_file; + file->eoa = addr; + return 0; +} +</PRE> + + + +<H2><A NAME="SEC19" HREF="#TOC19">Data Functions</A></H2> + +<P> +These functions operate on data, transferring a region of the format address +space between memory and files. + +</P> + + + +<H3><A NAME="SEC20" HREF="#TOC20">Contiguous I/O Functions</A></H3> + +<P> +A driver must specify two functions to transfer data from the library to the +file and vice versa. + +</P> +<P> +<DL> +<DT><U>Function:</U> static herr_t <B>read</B> <I>(H5FD_t *<VAR>file</VAR>, H5FD_mem_t <VAR>type</VAR>, hid_t <VAR>dxpl</VAR>, haddr_t <VAR>addr</VAR>, hsize_t <VAR>size</VAR>, void *<VAR>buf</VAR>)</I> +<DD><A NAME="IDX10"></A> +<DT><U>Function:</U> static herr_t <B>write</B> <I>(H5FD_t *<VAR>file</VAR>, H5FD_mem_t <VAR>type</VAR>, hid_t <VAR>dxpl</VAR>, haddr_t <VAR>addr</VAR>, hsize_t <VAR>size</VAR>, const void *<VAR>buf</VAR>)</I> +<DD><A NAME="IDX11"></A> + +</P> +<P> +The <CODE>read</CODE> function reads data from file <VAR>file</VAR> beginning at address +<VAR>addr</VAR> and continuing for <VAR>size</VAR> bytes into the buffer <VAR>buf</VAR> +supplied by the caller. The <CODE>write</CODE> function transfers data in the +opposite direction. Both functions take a data transfer property list +<VAR>dxpl</VAR> which indicates the fine points of how the data is to be +transferred and which comes directly from the <CODE>H5Dread</CODE> or +<CODE>H5Dwrite</CODE> function. Both functions receive <VAR>type</VAR> of +data being written, which may allow a driver to tune it's behavior for +different kinds of data. +</DL> + +</P> +<P> +Both functions should return a negative value if they fail to transfer the +requested data, or non-negative if they succeed. The library will never +attempt to read from unallocated regions of the format address space. + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver just makes system calls. It tries not to +call <CODE>lseek</CODE> if the current operation is the same as the previous +operation and the file position is correct. It also fills the output buffer +with zeros when reading between the current EOF and EOA markers and restarts +system calls which were interrupted. + +</P> + +<PRE> +static herr_t +H5FD_sec2_read(H5FD_t *_file, H5FD_mem_t type/*unused*/, hid_t dxpl_id/*unused*/, + haddr_t addr, hsize_t size, void *buf/*out*/) +{ + H5FD_sec2_t *file = (H5FD_sec2_t*)_file; + ssize_t nbytes; + + assert(file && file->pub.cls); + assert(buf); + + /* Check for overflow conditions */ + if (REGION_OVERFLOW(addr, size)) return -1; + if (addr+size>file->eoa) return -1; + + /* Seek to the correct location */ + if ((addr!=file->pos || OP_READ!=file->op) && + file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0) { + file->pos = HADDR_UNDEF; + file->op = OP_UNKNOWN; + return -1; + } + + /* + * Read data, being careful of interrupted system calls, partial results, + * and the end of the file. + */ + while (size>0) { + do nbytes = read(file->fd, buf, size); + while (-1==nbytes && EINTR==errno); + if (-1==nbytes) { + /* error */ + file->pos = HADDR_UNDEF; + file->op = OP_UNKNOWN; + return -1; + } + if (0==nbytes) { + /* end of file but not end of format address space */ + memset(buf, 0, size); + size = 0; + } + assert(nbytes>=0); + assert((hsize_t)nbytes<=size); + size -= (hsize_t)nbytes; + addr += (haddr_t)nbytes; + buf = (char*)buf + nbytes; + } + + /* Update current position */ + file->pos = addr; + file->op = OP_READ; + return 0; +} +</PRE> + +<P> +<STRONG>Example:</STRONG> The sec2 <CODE>write</CODE> callback is similar except it updates +the file EOF marker when extending the file. + +</P> + + +<H3><A NAME="SEC21" HREF="#TOC21">Flushing Cached Data</A></H3> + +<P> +Some drivers may desire to cache data in memory in order to make larger I/O +requests to the underlying file and thus improving bandwidth. Such drivers +should register a cache flushing function so that the library can insure that +data has been flushed out of the drivers in response to the application +calling <CODE>H5Fflush</CODE>. + +</P> +<P> +<DL> +<DT><U>Function:</U> static herr_t <B>flush</B> <I>(H5FD_t *<VAR>file</VAR>)</I> +<DD><A NAME="IDX12"></A> + +</P> +<P> +Flush all data for file <VAR>file</VAR> to storage. +</DL> + +</P> +<P> +<STRONG>Example:</STRONG> The sec2 driver doesn't cache any data but it also doesn't +extend the Unix file as agressively as it should. Therefore, when finalizing a +file it should write a zero to the last byte of the allocated region so that +when reopening the file later the EOF marker will be at least as large as the +EOA marker saved in the superblock (otherwise HDF5 will refuse to open the +file, claiming that the data appears to be truncated). + +</P> + +<PRE> +static herr_t +H5FD_sec2_flush(H5FD_t *_file) +{ + H5FD_sec2_t *file = (H5FD_sec2_t*)_file; + + if (file->eoa>file->eof) { + if (-1==file_seek(file->fd, file->eoa-1, SEEK_SET)) return -1; + if (write(file->fd, "", 1)!=1) return -1; + file->eof = file->eoa; + file->pos = file->eoa; + file->op = OP_WRITE; + } + + return 0; +} +</PRE> + + + +<H2><A NAME="SEC22" HREF="#TOC22">Optimization Functions</A></H2> + +<P> +The library is capable of performing several generic optimizations on I/O, but +these types of optimizations may not be appropriate for a given VFL driver. +</P> + +<P> +Each driver may provide a query function to allow the library to query whether +to enable these optimizations. If a driver lacks a query function, the library +will disable all types of optimizations which can be queried. +</P> + +<P> +<DL> +<DT><U>Function:</U> static herr_t <B>query</B> <I>(const H5FD_t *<VAR>file</VAR>, unsigned long *<VAR>flags</VAR>)</I> +<DD><A NAME="IDX17"></A> +</P> +<P> +This function is called by the library to query which optimizations to enable +for I/O to this driver. These are the flags which are currently defined: + +<UL> +<DL> +<DT>H5FD_FEAT_AGGREGATE_METADATA (0x00000001) +<DD>Defining the H5FD_FEAT_AGGREGATE_METADATA for a VFL driver means that +the library will attempt to allocate a larger block for metadata and +then sub-allocate each metadata request from that larger block. +<DT>H5FD_FEAT_ACCUMULATE_METADATA (0x00000002) +<DD>Defining the H5FD_FEAT_ACCUMULATE_METADATA for a VFL driver means that +the library will attempt to cache metadata as it is written to the file +and build up a larger block of metadata to eventually pass to the VFL +'write' routine. +<DT>H5FD_FEAT_DATA_SIEVE (0x00000004) +<DD>Defining the H5FD_FEAT_DATA_SIEVE for a VFL driver means that +the library will attempt to cache raw data as it is read from/written to +a file in a "data sieve" buffer. See Rajeev Thakur's papers: + <UL> + <DL> + <DT>http://www.mcs.anl.gov/~thakur/papers/romio-coll.ps.gz + <DT>http://www.mcs.anl.gov/~thakur/papers/mpio-high-perf.ps.gz + </DL> + </UL> +</DL> +</UL> +</P> + +</DL> +</P> + +<H2><A NAME="SEC23" HREF="#TOC23">Registration of a Driver</A></H2> + +<P> +Before a driver can be used the HDF5 library needs to be told of its +existence. This is done by registering the driver, which results in a driver +identification number. Instead of passing many arguments to the registration +function, the driver information is entered into a structure and the address +of the structure is passed to the registration function where it is +copied. This allows the HDF5 API to be extended while providing backward +compatibility at the source level. + +</P> +<P> +<DL> +<DT><U>Function:</U> hid_t <B>H5FDregister</B> <I>(H5FD_class_t *<VAR>cls</VAR>)</I> +<DD><A NAME="IDX13"></A> + +</P> +<P> +The driver described by struct <VAR>cls</VAR> is registered with the library and an +ID number for the driver is returned. +</DL> + +</P> +<P> +The <CODE>H5FD_class_t</CODE> type is a struct with the following fields: + +</P> +<DL COMPACT> + +<DT><CODE>const char *name</CODE> +<DD> +A pointer to a constant, null-terminated driver name to be used for debugging +purposes. +<DT><CODE>size_t fapl_size</CODE> +<DD> +The size in bytes of the file access mode structure or zero if the driver +supplies a copy function or doesn't define the structure. +<DT><CODE>void *(*fapl_copy)(const void *fapl)</CODE> +<DD> +An optional function which copies a driver-defined file access mode structure. +This field takes precedence over <CODE>fm_size</CODE> when both are defined. +<DT><CODE>void (*fapl_free)(void *fapl)</CODE> +<DD> +An optional function to free the driver-defined file access mode structure. If +null, then the library calls the C <CODE>free</CODE> function to free the +structure. +<DT><CODE>size_t dxpl_size</CODE> +<DD> +The size in bytes of the data transfer mode structure or zero if the driver +supplies a copy function or doesn't define the structure. +<DT><CODE>void *(*dxpl_copy)(const void *dxpl)</CODE> +<DD> +An optional function which copies a driver-defined data transfer mode +structure. This field takes precedence over <CODE>xm_size</CODE> when both are +defined. +<DT><CODE>void (*dxpl_free)(void *dxpl)</CODE> +<DD> +An optional function to free the driver-defined data transfer mode +structure. If null, then the library calls the C <CODE>free</CODE> function to +free the structure. +<DT><CODE>H5FD_t *(*open)(const char *name, unsigned flags, hid_t fapl, haddr_t maxaddr)</CODE> +<DD> +The function which opens or creates a new file. +<DT><CODE>herr_t (*close)(H5FD_t *file)</CODE> +<DD> +The function which ends access to a file. +<DT><CODE>int (*cmp)(const H5FD_t *f1, const H5FD_t *f2)</CODE> +<DD> +An optional function to determine whether two open files have the same key. If +this function is not present then the library assumes that two files will +never be the same. +<DT><CODE>int (*query)(const H5FD_t *f, unsigned long *flags)</CODE> +<DD> +An optional function to determine which library optimizations a driver can +support. +<DT><CODE>haddr_t (*alloc)(H5FD_t *file, H5FD_mem_t type, hsize_t size)</CODE> +<DD> +An optional function to allocate space in the file. +<DT><CODE>herr_t (*free)(H5FD_t *file, H5FD_mem_t type, haddr_t addr, hsize_t size)</CODE> +<DD> +An optional function to free space in the file. +<DT><CODE>haddr_t (*get_eoa)(H5FD_t *file)</CODE> +<DD> +A function to query how much of the format address space has been allocated. +<DT><CODE>herr_t (*set_eoa)(H5FD_t *file, haddr_t)</CODE> +<DD> +A function to set the end of address space. +<DT><CODE>haddr_t (*get_eof)(H5FD_t *file)</CODE> +<DD> +A function to return the current end-of-file marker value. +<DT><CODE>herr_t (*read)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl, haddr_t addr, hsize_t size, void *buffer)</CODE> +<DD> +A function to read data from a file. +<DT><CODE>herr_t (*write)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl, haddr_t addr, hsize_t size, const void *buffer)</CODE> +<DD> +A function to write data to a file. +<DT><CODE>herr_t (*flush)(H5FD_t *file)</CODE> +<DD> +A function which flushes cached data to the file. +<DT><CODE>H5FD_mem_t fl_map[H5FD_MEM_NTYPES]</CODE> +<DD> +An array which maps a file allocation request type to a free list. +</DL> + +<P> +<STRONG>Example:</STRONG> The sec2 driver would be registered as: + +</P> + +<PRE> +static const H5FD_class_t H5FD_sec2_g = { + "sec2", /*name */ + MAXADDR, /*maxaddr */ + NULL, /*sb_size */ + NULL, /*sb_encode */ + NULL, /*sb_decode */ + 0, /*fapl_size */ + NULL, /*fapl_get */ + NULL, /*fapl_copy */ + NULL, /*fapl_free */ + 0, /*dxpl_size */ + NULL, /*dxpl_copy */ + NULL, /*dxpl_free */ + H5FD_sec2_open, /*open */ + H5FD_sec2_close, /*close */ + H5FD_sec2_cmp, /*cmp */ + H5FD_sec2_query, /*query */ + NULL, /*alloc */ + NULL, /*free */ + H5FD_sec2_get_eoa, /*get_eoa */ + H5FD_sec2_set_eoa, /*set_eoa */ + H5FD_sec2_get_eof, /*get_eof */ + H5FD_sec2_read, /*read */ + H5FD_sec2_write, /*write */ + H5FD_sec2_flush, /*flush */ + H5FD_FLMAP_SINGLE, /*fl_map */ +}; + +hid_t +H5FD_sec2_init(void) +{ + if (!H5FD_SEC2_g) { + H5FD_SEC2_g = H5FDregister(&H5FD_sec2_g); + } + return H5FD_SEC2_g; +} +</PRE> + +<P> +A driver can be removed from the library by unregistering it + +</P> +<P> +<DL> +<DT><U>Function:</U> herr_t <B>H5Dunregister</B> <I>(hid_t <VAR>driver</VAR>)</I> +<DD><A NAME="IDX14"></A> +Where <VAR>driver</VAR> is the ID number returned when the driver was registered. +</DL> + +</P> +<P> +Unregistering a driver makes it unusable for creating new file access or data +transfer property lists but doesn't affect any property lists or files that +already use that driver. + +</P> + + + + +<H3><A NAME="SECProgNote" HREF="#TOCProgNote">Programming Note +for C++ Developers Using C Functions</A></H3> + +<p>If a C routine that takes a function pointer as an argument is +called from within C++ code, the C routine should be returned from +normally. </p> + +<p>Examples of this kind of routine include callbacks such as +<code>H5Pset_elink_cb</code> and <code>H5Pset_type_conv_cb</code> +and functions such as <code>H5Tconvert</code> and +<code>H5Ewalk2</code>.</p> + +<p>Exiting the routine in its normal fashion allows the HDF5 C +Library to clean up its work properly. In other words, if the C++ +application jumps out of the routine back to the C++ +“catch” statement, the library is not given the +opportunity to close any temporary data structures that were set +up when the routine was called. The C++ application should save +some state as the routine is started so that any problem that +occurs might be diagnosed.</p> + + + + + + + +<H2><A NAME="SEC24" HREF="#TOC24">Querying Driver Information</A></H2> + +<P> +<DL> +<DT><U>Function:</U> void * <B>H5Pget_driver_data</B> <I>(hid_t <VAR>fapl</VAR>)</I> +<DD><A NAME="IDX15"></A> +<DT><U>Function:</U> void * <B>H5Pget_driver_data</B> <I>(hid_t <VAR>fxpl</VAR>)</I> +<DD><A NAME="IDX16"></A> + +</P> +<P> +This function is intended to be used by driver functions, not applications. +It returns a pointer directly into the file access property list +<CODE><VAR>fapl</VAR></CODE> which is a copy of the driver's file access mode originally +provided to the <CODE>H5Pset_driver</CODE> function. If its argument is a data +transfer property list <CODE>fxpl</CODE> then it returns a pointer to the +driver-specific data transfer information instead. +</DL> + +</P> + + + +<H1><A NAME="SEC25" HREF="#TOC25">Miscellaneous</A></H1> + +<P> +The various private <CODE>H5F_low_*</CODE> functions will be replaced by public +<CODE>H5FD*</CODE> functions so they can be called from drivers. + +</P> +<P> +All private functions <CODE>H5F_addr_*</CODE> which operate on addresses will be +renamed as public functions by removing the first underscore so they can be +called by drivers. + +</P> +<P> +The <CODE>haddr_t</CODE> address data type will be passed by value throughout the +library. The original intent was that this type would eventually be a union of +file address types for the various drivers and may become quite large, but +that was back when drivers were part of HDF5. It will become an alias for an +unsigned integer type (32 or 64 bits depending on how the library was +configured). + +</P> +<P> +The various <CODE>H5F*.c</CODE> driver files will be renamed <CODE>H5FD*.c</CODE> and each +will have a corresponding header file. All driver functions except the +initializer and API will be declared static. + +</P> +<P> +This documentation didn't cover optimization functions which would be useful +to drivers like MPI-IO. Some drivers may be able to perform data pipeline +operations more efficiently than HDF5 and need to be given a chance to +override those parts of the pipeline. The pipeline would be designed to call +various H5FD optimization functions at various points which return one of +three values: the operation is not implemented by the driver, the operation is +implemented but failed in a non-recoverable manner, the operation is +implemented and succeeded. + +</P> +<P> +Various parts of HDF5 check the only the top-level file driver and do +something special if it is the MPI-IO driver. However, we might want to be +able to put the MPI-IO driver under other drivers such as the raw part of a +split driver or under a debug driver whose sole purpose is to accumulate +statistics as it passes all requests through to the MPI-IO driver. Therefore +we will probably need a function which takes a format address and or object +type and returns the driver which would have been used at the lowest level to +process the request. + +</P> + +<P><HR><P> +<H1>Footnotes</H1> +<H3><A NAME="FOOT1" HREF="#DOCF1">(1)</A></H3> +<P>The driver name is by convention and might +not apply to drivers which are not distributed with HDF5. +<H3><A NAME="FOOT2" HREF="#DOCF2">(2)</A></H3> +<P>The access method also indicates how to translate +the storage name to a storage server such as a file, network protocol, or +memory. +<H3><A NAME="FOOT3" HREF="#DOCF3">(3)</A></H3> +<P>The term +"<EM>file</EM> access property list" is a misnomer since storage isn't +required to be a file. +<H3><A NAME="FOOT4" HREF="#DOCF4">(4)</A></H3> +<P>This +function is overloaded to operate on data transfer property lists also, as +described below. +<H3><A NAME="FOOT5" HREF="#DOCF5">(5)</A></H3> +<P>Read-only access is only appropriate when opening an existing +file. +<H3><A NAME="FOOT6" HREF="#DOCF6">(6)</A></H3> +<P>For instance, writing data to one handle will cause +the data to be immediately visible on the other handle. +<H3><A NAME="FOOT7" HREF="#DOCF7">(7)</A></H3> +<P>The ordering is +arbitrary as long as it's consistent within a particular file driver. +<H3><A NAME="FOOT8" HREF="#DOCF8">(8)</A></H3> +<P>File access modes do not describe data, but rather +describe how the HDF5 format address space is mapped to the underlying +file(s). Thus, in general the mapping must be known before the file superblock +can be read. However, the user usually knows enough about the mapping for the +superblock to be readable and once the superblock is read the library can fill +in the missing parts of the mapping. +<P><HR><P> + +<?php include("../ed_libs/Footer2.htm"); ?> + +</BODY> +</HTML> diff --git a/doxygen/hdf5_footer.html b/doxygen/hdf5_footer.html new file mode 100644 index 0000000..520f3f5 --- /dev/null +++ b/doxygen/hdf5_footer.html @@ -0,0 +1,21 @@ +<!-- start footer part --> +<!--BEGIN GENERATE_TREEVIEW--> +<div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> + <ul> + $navpath + <li class="footer">$generatedby + <a href="http://www.doxygen.org/index.html"> + <img class="footer" src="$relpath^doxygen.png" alt="doxygen"/></a> $doxygenversion </li> + </ul> +</div> +<!--END GENERATE_TREEVIEW--> +<!--BEGIN !GENERATE_TREEVIEW--> +<hr class="footer"/><address class="footer"><small> +$generatedby  <a href="http://www.doxygen.org/index.html"> +<img class="footer" src="$relpath^doxygen.png" alt="doxygen"/> +</a> $doxygenversion +</small></address> +<!--END !GENERATE_TREEVIEW--> + +</body> +</html> diff --git a/doxygen/hdf5_header.html b/doxygen/hdf5_header.html new file mode 100644 index 0000000..4a575d6 --- /dev/null +++ b/doxygen/hdf5_header.html @@ -0,0 +1,61 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> +<meta http-equiv="X-UA-Compatible" content="IE=9"/> +<meta name="generator" content="Doxygen $doxygenversion"/> +<meta name="viewport" content="width=device-width, initial-scale=1"/> +<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME--> +<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME--> +<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/> +<script type="text/javascript" src="$relpath^jquery.js"></script> +<script type="text/javascript" src="$relpath^dynsections.js"></script> +$treeview +$search +$mathjax +<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" /> +<link href="$relpath$hdf5doxy.css" rel="stylesheet" type="text/css"> +<!-- $extrastylesheet --> +<script type="text/javascript" src="$relpath$hdf5_navtree_hacks.js"></script> + +</head> +<body> + +<div style="background:#FFDDDD;font-size:120%;text-align:center;margin:0;padding:5px">Please, help us to better know about our user community by answering the following short survey: <a href="https://www.hdfgroup.org/">https://www.hdfgroup.org/</a></div> + +<div id="top"><!-- do not remove this div, it is closed by doxygen! --> + +<!--BEGIN TITLEAREA--> +<div id="titlearea"> +<table cellspacing="0" cellpadding="0"> + <tbody> + <tr style="height: 56px;"> + <!--BEGIN PROJECT_LOGO--> + <td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td> + <!--END PROJECT_LOGO--> + <!--BEGIN PROJECT_NAME--> + <td id="projectalign" style="padding-left: 0.5em;"> + <div id="projectname"><a href="https://www.hdfgroup.org">$projectname</a> + <!--BEGIN PROJECT_NUMBER--> <span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER--> + </div> + <!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF--> + </td> + <!--END PROJECT_NAME--> + <!--BEGIN !PROJECT_NAME--> + <!--BEGIN PROJECT_BRIEF--> + <td id="projectalign" style="padding-left: 0.5em;"> + <div id="projectbrief">$projectbrief</div> + </td> + <!--END PROJECT_BRIEF--> + <!--END !PROJECT_NAME--> + <!--BEGIN DISABLE_INDEX--> + <!--BEGIN SEARCHENGINE--> + <td>$searchbox</td> + <!--END SEARCHENGINE--> + <!--END DISABLE_INDEX--> + </tr> + </tbody> +</table> +</div> +<!--END TITLEAREA--> +<!-- end header part --> diff --git a/doxygen/hdf5_navtree_hacks.js b/doxygen/hdf5_navtree_hacks.js new file mode 100644 index 0000000..942970c --- /dev/null +++ b/doxygen/hdf5_navtree_hacks.js @@ -0,0 +1,246 @@ + +// generate a table of contents in the side-nav based on the h1/h2 tags of the current page. +function generate_autotoc() { + var headers = $("h1, h2"); + if(headers.length > 1) { + var toc = $("#side-nav").append('<div id="nav-toc" class="toc"><h3>Table of contents</h3></div>'); + toc = $("#nav-toc"); + var footer = $("#nav-path"); + var footerHeight = footer.height(); + toc = toc.append('<ul></ul>'); + toc = toc.find('ul'); + var indices = new Array(); + indices[0] = 0; + indices[1] = 0; + + var h1counts = $("h1").length; + headers.each(function(i) { + var current = $(this); + var levelTag = current[0].tagName.charAt(1); + if(h1counts==0) + levelTag--; + var cur_id = current.attr("id"); + + indices[levelTag-1]+=1; + var prefix = indices[0]; + if (levelTag >1) { + prefix+="."+indices[1]; + } + + // Uncomment to add number prefixes + // current.html(prefix + " " + current.html()); + for(var l = levelTag; l < 2; ++l){ + indices[l] = 0; + } + + if(cur_id == undefined) { + current.attr('id', 'title' + i); + current.addClass('anchor'); + toc.append("<li class='level" + levelTag + "'><a id='link" + i + "' href='#title" + + i + "' title='" + current.prop("tagName") + "'>" + current.text() + "</a></li>"); + } else { + toc.append("<li class='level" + levelTag + "'><a id='" + cur_id + "' href='#title" + + i + "' title='" + current.prop("tagName") + "'>" + current.text() + "</a></li>"); + } + }); + resizeHeight(); + } +} + + +var global_navtree_object; + +// Overloaded to remove links to sections/subsections +function getNode(o, po) +{ + po.childrenVisited = true; + var l = po.childrenData.length-1; + for (var i in po.childrenData) { + var nodeData = po.childrenData[i]; + if((!nodeData[1]) || (nodeData[1].indexOf('#')==-1)) // <- we added this line + po.children[i] = newNode(o, po, nodeData[0], nodeData[1], nodeData[2], i==l); + } +} + +// Overloaded to adjust the size of the navtree wrt the toc +function resizeHeight() +{ + var header = $("#top"); + var sidenav = $("#side-nav"); + var content = $("#doc-content"); + var navtree = $("#nav-tree"); + var footer = $("#nav-path"); + var toc = $("#nav-toc"); + + var headerHeight = header.outerHeight(); + var footerHeight = footer.outerHeight(); + var tocHeight = toc.height(); + var windowHeight = $(window).height() - headerHeight - footerHeight; + content.css({height:windowHeight + "px"}); + navtree.css({height:(windowHeight-tocHeight) + "px"}); + sidenav.css({height:windowHeight + "px"}); +} + +// Overloaded to save the root node into global_navtree_object +function initNavTree(toroot,relpath) +{ + var o = new Object(); + global_navtree_object = o; // <- we added this line + o.toroot = toroot; + o.node = new Object(); + o.node.li = document.getElementById("nav-tree-contents"); + o.node.childrenData = NAVTREE; + o.node.children = new Array(); + o.node.childrenUL = document.createElement("ul"); + o.node.getChildrenUL = function() { return o.node.childrenUL; }; + o.node.li.appendChild(o.node.childrenUL); + o.node.depth = 0; + o.node.relpath = relpath; + o.node.expanded = false; + o.node.isLast = true; + o.node.plus_img = document.createElement("img"); + o.node.plus_img.src = relpath+"ftv2pnode.png"; + o.node.plus_img.width = 16; + o.node.plus_img.height = 22; + + if (localStorageSupported()) { + var navSync = $('#nav-sync'); + if (cachedLink()) { + showSyncOff(navSync,relpath); + navSync.removeClass('sync'); + } else { + showSyncOn(navSync,relpath); + } + navSync.click(function(){ toggleSyncButton(relpath); }); + } + + navTo(o,toroot,window.location.hash,relpath); + + $(window).bind('hashchange', function(){ + if (window.location.hash && window.location.hash.length>1){ + var a; + if ($(location).attr('hash')){ + var clslink=stripPath($(location).attr('pathname'))+':'+ + $(location).attr('hash').substring(1); + a=$('.item a[class$="'+clslink+'"]'); + } + if (a==null || !$(a).parent().parent().hasClass('selected')){ + $('.item').removeClass('selected'); + $('.item').removeAttr('id'); + } + var link=stripPath2($(location).attr('pathname')); + navTo(o,link,$(location).attr('hash'),relpath); + } else if (!animationInProgress) { + $('#doc-content').scrollTop(0); + $('.item').removeClass('selected'); + $('.item').removeAttr('id'); + navTo(o,toroot,window.location.hash,relpath); + } + }) + + $(window).on("load", showRoot); +} + +// return false if the the node has no children at all, or has only section/subsection children +function checkChildrenData(node) { + if (!(typeof(node.childrenData)==='string')) { + for (var i in node.childrenData) { + var url = node.childrenData[i][1]; + if(url.indexOf("#")==-1) + return true; + } + return false; + } + return (node.childrenData); +} + +// Modified to: +// 1 - remove the root node +// 2 - remove the section/subsection children +function createIndent(o,domNode,node,level) +{ + var level=-2; // <- we replaced level=-1 by level=-2 + var n = node; + while (n.parentNode) { level++; n=n.parentNode; } + if (checkChildrenData(node)) { // <- we modified this line to use checkChildrenData(node) instead of node.childrenData + var imgNode = document.createElement("span"); + imgNode.className = 'arrow'; + imgNode.style.paddingLeft=(16*level).toString()+'px'; + imgNode.innerHTML=arrowRight; + node.plus_img = imgNode; + node.expandToggle = document.createElement("a"); + node.expandToggle.href = "javascript:void(0)"; + node.expandToggle.onclick = function() { + if (node.expanded) { + $(node.getChildrenUL()).slideUp("fast"); + node.plus_img.innerHTML=arrowRight; + node.expanded = false; + } else { + expandNode(o, node, false, false); + } + } + node.expandToggle.appendChild(imgNode); + domNode.appendChild(node.expandToggle); + } else { + var span = document.createElement("span"); + span.className = 'arrow'; + span.style.width = 16*(level+1)+'px'; + span.innerHTML = ' '; + domNode.appendChild(span); + } +} + +// Overloaded to automatically expand the selected node +function selectAndHighlight(hash,n) +{ + var a; + if (hash) { + var link=stripPath($(location).attr('pathname'))+':'+hash.substring(1); + a=$('.item a[class$="'+link+'"]'); + } + if (a && a.length) { + a.parent().parent().addClass('selected'); + a.parent().parent().attr('id','selected'); + highlightAnchor(); + } else if (n) { + $(n.itemDiv).addClass('selected'); + $(n.itemDiv).attr('id','selected'); + } + if ($('#nav-tree-contents .item:first').hasClass('selected')) { + $('#nav-sync').css('top','30px'); + } else { + $('#nav-sync').css('top','5px'); + } + expandNode(global_navtree_object, n, true, true); // <- we added this line + showRoot(); +} + + +$(document).ready(function() { + + generate_autotoc(); + + (function (){ // wait until the first "selected" element has been created + try { + + // this line will triger an exception if there is no #selected element, i.e., before the tree structure is complete. + document.getElementById("selected").className = "item selected"; + + // ok, the default tree has been created, we can keep going... + + // expand the "Chapters" node + if(window.location.href.indexOf('unsupported')==-1) + expandNode(global_navtree_object, global_navtree_object.node.children[0].children[2], true, true); + else + expandNode(global_navtree_object, global_navtree_object.node.children[0].children[1], true, true); + + // Hide the root node "HDF5" + $(document.getElementsByClassName('index.html')[0]).parent().parent().css({display:"none"}); + + } catch (err) { + setTimeout(arguments.callee, 10); + } + })(); + + $(window).on("load", resizeHeight); +}); diff --git a/doxygen/hdf5doxy.css b/doxygen/hdf5doxy.css new file mode 100644 index 0000000..8c03860 --- /dev/null +++ b/doxygen/hdf5doxy.css @@ -0,0 +1,251 @@ + +/******** HDF5 specific CSS code ************/ + +/**** Styles removing elements ****/ + +/* remove the "modules|classes" link for module pages (they are already in the TOC) */ +div.summary { + display:none; +} + +/* remove */ +div.contents hr { + display:none; +} + +/**** ****/ + +p, dl.warning, dl.attention, dl.note +{ + max-width:60em; + text-align:justify; +} + +li { + max-width:55em; + text-align:justify; +} + +img { + border: 0; +} + +div.fragment { + display:table; /* this allows the element to be larger than its parent */ + padding: 0pt; +} +pre.fragment { + border: 1px solid #cccccc; + + margin: 2px 0px 2px 0px; + padding: 3px 5px 3px 5px; +} + +/* Common style for all HDF5's tables */ + +table.example, table.manual, table.manual-vl, table.manual-hl { + max-width:100%; + border-collapse: collapse; + border-style: solid; + border-width: 1px; + border-color: #cccccc; + font-size: 1em; + + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -moz-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +table.example th, table.manual th, table.manual-vl th, table.manual-hl th { + padding: 0.5em 0.5em 0.5em 0.5em; + text-align: left; + padding-right: 1em; + color: #555555; + background-color: #F4F4E5; + + background-image: -webkit-gradient(linear,center top,center bottom,from(#FFFFFF), color-stop(0.3,#FFFFFF), color-stop(0.30,#FFFFFF), color-stop(0.98,#F4F4E5), to(#ECECDE)); + background-image: -moz-linear-gradient(center top, #FFFFFF 0%, #FFFFFF 30%, #F4F4E5 98%, #ECECDE); + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#FFFFFF', endColorstr='#F4F4E5'); +} + +table.example td, table.manual td, table.manual-vl td, table.manual-hl td { + vertical-align:top; + border-width: 1px; + border-color: #cccccc; +} + +/* header of headers */ +table th.meta { + text-align:center; + font-size: 1.2em; + background-color:#FFFFFF; +} + +/* intermediate header */ +table th.inter { + text-align:left; + background-color:#FFFFFF; + background-image:none; + border-style:solid solid solid solid; + border-width: 1px; + border-color: #cccccc; +} + +/** class for example / output tables **/ + +table.example { +} + +table.example th { +} + +table.example td { + padding: 0.5em 0.5em 0.5em 0.5em; + vertical-align:top; +} + +/* standard class for the manual */ + +table.manual, table.manual-vl, table.manual-hl { + padding: 0.2em 0em 0.5em 0em; +} + +table.manual th, table.manual-vl th, table.manual-hl th { + margin: 0em 0em 0.3em 0em; +} + +table.manual td, table.manual-vl td, table.manual-hl td { + padding: 0.3em 0.5em 0.3em 0.5em; + vertical-align:top; + border-width: 1px; +} + +table.manual td.alt, table.manual tr.alt, table.manual-vl td.alt, table.manual-vl tr.alt { + background-color: #F4F4E5; +} + +table.manual-vl th, table.manual-vl td, table.manual-vl td.alt { + border-color: #cccccc; + border-width: 1px; + border-style: none solid none solid; +} + +table.manual-vl th.inter { + border-style: solid solid solid solid; +} + +table.manual-hl td { + border-color: #cccccc; + border-width: 1px; + border-style: solid none solid none; +} + +table td.code { + font-family: monospace; +} + +h2 { + margin-top:2em; + border-style: none none solid none; + border-width: 1px; + border-color: #cccccc; +} + +/**** Table of content in the side-nav ****/ + + +div.toc { + margin:0; + padding: 0.3em 0 0 0; + width:100%; + float:none; + position:absolute; + bottom:0; + border-radius:0px; + border-style: solid none none none; + max-height:50%; + overflow-y: scroll; +} + +div.toc h3 { + margin-left: 0.5em; + margin-bottom: 0.2em; +} + +div.toc ul { + margin: 0.2em 0 0.4em 0.5em; +} + +span.cpp11,span.cpp14,span.cpp17 { + color: #119911; + font-weight: bold; +} + +.newin3x { + color: #a37c1a; + font-weight: bold; +} + +div.warningbox { + max-width:60em; + border-style: solid solid solid solid; + border-color: red; + border-width: 3px; +} + +/**** old HDF5's styles ****/ + + +table.tutorial_code td { + border-color: transparent; /* required for Firefox */ + padding: 3pt 5pt 3pt 5pt; + vertical-align: top; +} + + +/* Whenever doxygen meets a '\n' or a '<BR/>', it will put + * the text containing the character into a <p class="starttd">. + * This little hack together with table.tutorial_code td.note + * aims at fixing this issue. */ +table.tutorial_code td.note p.starttd { + margin: 0px; + border: none; + padding: 0px; +} + +div.eimainmenu { + text-align: center; +} + +/* center version number on main page */ +h3.version { + text-align: center; +} + + +td.width20em p.endtd { + width: 20em; +} + +/* needed for huge screens */ +.ui-resizable-e { + background-repeat: repeat-y; +} + +/* Style external links -- nav-tree is different */ + +#nav-tree .label a { + padding:2px 16px 2px 2px; +} + +a { + outline: none; + text-decoration: none; + padding: 2px 1px 0; +} + +a[href*="http"] { + background: url('https://mdn.mozillademos.org/files/12982/external-link-52.png') no-repeat 100% 0; + background-size: 12px 12px; + padding-right: 16px; +} diff --git a/doxygen/hdf5doxy_layout.xml b/doxygen/hdf5doxy_layout.xml new file mode 100644 index 0000000..7f71c24 --- /dev/null +++ b/doxygen/hdf5doxy_layout.xml @@ -0,0 +1,182 @@ +<?xml version="1.0"?> +<doxygenlayout version="1.0"> + <!-- Navigation index tabs for HTML output --> + <navindex> + <tab type="user" url="index.html" title="Overview" /> + <tab type="user" url="https://portal.hdfgroup.org/display/HDF5/Learning+HDF5" title="Getting started" /> + <tab type="user" url="@ref Cookbook" title="Cookbook" /> + <tab type="user" url="https://portal.hdfgroup.org/display/HDF5/HDF5+User+Guides" title="User Guides" /> + <tab type="user" url="https://portal.hdfgroup.org/display/HDF5/HDF5+Application+Developer%27s+Guide" title="Application Developer's Guide" /> + <tab type="user" url="https://portal.hdfgroup.org/display/HDF5/HDF5+Glossary" title="Glossary" /> + <tab type="user" url="@ref RM" title="Reference Manual" /> + <tab type="user" url="@ref TN" title="Technical Notes" /> + <tab type="user" url="@ref SPEC" title="Specifications" /> + <tab type="user" url="@ref About" title="About" /> + </navindex> + + <!-- Layout definition for a class page --> + <class> + <briefdescription visible="no"/> + <includes visible="$SHOW_INCLUDE_FILES"/> + <detaileddescription title=""/> + <inheritancegraph visible="$CLASS_GRAPH"/> + <collaborationgraph visible="$COLLABORATION_GRAPH"/> + <allmemberslink visible="yes"/> + <memberdecl> + <nestedclasses visible="yes" title=""/> + <publictypes title=""/> + <publicslots title=""/> + <signals title=""/> + <publicmethods title=""/> + <publicstaticmethods title=""/> + <publicattributes title=""/> + <publicstaticattributes title=""/> + <protectedtypes title=""/> + <protectedslots title=""/> + <protectedmethods title=""/> + <protectedstaticmethods title=""/> + <protectedattributes title=""/> + <protectedstaticattributes title=""/> + <packagetypes title=""/> + <packagemethods title=""/> + <packagestaticmethods title=""/> + <packageattributes title=""/> + <packagestaticattributes title=""/> + <properties title=""/> + <events title=""/> + <privatetypes title=""/> + <privateslots title=""/> + <privatemethods title=""/> + <privatestaticmethods title=""/> + <privateattributes title=""/> + <privatestaticattributes title=""/> + <friends title=""/> + <related title="" subtitle=""/> + <membergroups visible="yes"/> + </memberdecl> + + <memberdef> + <inlineclasses title=""/> + <typedefs title=""/> + <enums title=""/> + <constructors title=""/> + <functions title=""/> + <related title=""/> + <variables title=""/> + <properties title=""/> + <events title=""/> + </memberdef> + <usedfiles visible="$SHOW_USED_FILES"/> + <authorsection visible="yes"/> + </class> + + <!-- Layout definition for a namespace page --> + <namespace> + <briefdescription visible="yes"/> + <memberdecl> + <nestednamespaces visible="yes" title=""/> + <classes visible="yes" title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <inlineclasses title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + </memberdef> + <authorsection visible="yes"/> + </namespace> + + <!-- Layout definition for a file page --> + <file> + <briefdescription visible="yes"/> + <includes visible="$SHOW_INCLUDE_FILES"/> + <includegraph visible="$INCLUDE_GRAPH"/> + <includedbygraph visible="$INCLUDED_BY_GRAPH"/> + <sourcelink visible="yes"/> + <memberdecl> + <classes visible="yes" title=""/> + <namespaces visible="yes" title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <inlineclasses title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + </memberdef> + <authorsection/> + </file> + + <!-- Layout definition for a group page --> + <group> + <briefdescription visible="no"/> + <detaileddescription title=""/> + <groupgraph visible="$GROUP_GRAPHS"/> + <memberdecl> + <nestedgroups visible="yes" title=""/> + <dirs visible="yes" title=""/> + <files visible="yes" title=""/> + <namespaces visible="yes" title=""/> + <classes visible="yes" title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <enumvalues title=""/> + <functions title=""/> + <variables title=""/> + <signals title=""/> + <publicslots title=""/> + <protectedslots title=""/> + <privateslots title=""/> + <events title=""/> + <properties title=""/> + <friends title=""/> + <membergroups visible="yes"/> + </memberdecl> + + <memberdef> + <pagedocs/> + <inlineclasses title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <enumvalues title=""/> + <functions title=""/> + <variables title=""/> + <signals title=""/> + <publicslots title=""/> + <protectedslots title=""/> + <privateslots title=""/> + <events title=""/> + <properties title=""/> + <friends title=""/> + </memberdef> + <authorsection visible="yes"/> + </group> + + <!-- Layout definition for a directory page --> + <directory> + <briefdescription visible="yes"/> + <directorygraph visible="yes"/> + <memberdecl> + <dirs visible="yes"/> + <files visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + </directory> +</doxygenlayout> diff --git a/doxygen/img/FF-IH_FileGroup.gif b/doxygen/img/FF-IH_FileGroup.gif Binary files differnew file mode 100644 index 0000000..b0d76f5 --- /dev/null +++ b/doxygen/img/FF-IH_FileGroup.gif diff --git a/doxygen/img/FF-IH_FileObject.gif b/doxygen/img/FF-IH_FileObject.gif Binary files differnew file mode 100644 index 0000000..8eba623 --- /dev/null +++ b/doxygen/img/FF-IH_FileObject.gif diff --git a/doxygen/img/FileFormatSpecChunkDiagram.jpg b/doxygen/img/FileFormatSpecChunkDiagram.jpg Binary files differnew file mode 100644 index 0000000..03fd90a --- /dev/null +++ b/doxygen/img/FileFormatSpecChunkDiagram.jpg diff --git a/doxygen/img/HDFG-logo.png b/doxygen/img/HDFG-logo.png Binary files differindex a2d52a9..38300ff 100644 --- a/doxygen/img/HDFG-logo.png +++ b/doxygen/img/HDFG-logo.png diff --git a/doxygen/img/PaletteExample1.gif b/doxygen/img/PaletteExample1.gif Binary files differnew file mode 100644 index 0000000..8694d9d --- /dev/null +++ b/doxygen/img/PaletteExample1.gif diff --git a/doxygen/img/Palettes.fm.anc.gif b/doxygen/img/Palettes.fm.anc.gif Binary files differnew file mode 100644 index 0000000..d344c03 --- /dev/null +++ b/doxygen/img/Palettes.fm.anc.gif diff --git a/doxygen/img/ftv2node.png b/doxygen/img/ftv2node.png Binary files differnew file mode 100644 index 0000000..63c605b --- /dev/null +++ b/doxygen/img/ftv2node.png diff --git a/doxygen/img/ftv2pnode.png b/doxygen/img/ftv2pnode.png Binary files differnew file mode 100644 index 0000000..c6ee22f --- /dev/null +++ b/doxygen/img/ftv2pnode.png |