summaryrefslogtreecommitdiffstats
path: root/doxygen/dox
diff options
context:
space:
mode:
Diffstat (limited to 'doxygen/dox')
-rw-r--r--doxygen/dox/About.dox11
-rw-r--r--doxygen/dox/Cookbook.dox5
-rw-r--r--doxygen/dox/DDLBNF110.dox650
-rw-r--r--doxygen/dox/DDLBNF112.dox653
-rw-r--r--doxygen/dox/FileFormatSpec.dox23
-rw-r--r--doxygen/dox/GettingStarted.dox3
-rw-r--r--doxygen/dox/H5Fget_info.dox7
-rw-r--r--doxygen/dox/H5Lget_info.dox5
-rw-r--r--doxygen/dox/H5Lget_info_by_idx.dox5
-rw-r--r--doxygen/dox/H5Literate.dox4
-rw-r--r--doxygen/dox/H5Literate_by_name.dox4
-rw-r--r--doxygen/dox/H5Lvisit.dox4
-rw-r--r--doxygen/dox/H5Lvisit_by_name.dox4
-rw-r--r--doxygen/dox/MetadataCachingInHDF5.dox1020
-rw-r--r--doxygen/dox/OtherSpecs.dox11
-rw-r--r--doxygen/dox/Overview.dox32
-rw-r--r--doxygen/dox/ReferenceManual.dox43
-rw-r--r--doxygen/dox/Specifications.dox22
-rw-r--r--doxygen/dox/TechnicalNotes.dox20
-rw-r--r--doxygen/dox/api-compat-macros.dox1
-rw-r--r--doxygen/dox/mainpage.dox44
-rw-r--r--doxygen/dox/maybe_metadata_reads.dox82
22 files changed, 2585 insertions, 68 deletions
diff --git a/doxygen/dox/About.dox b/doxygen/dox/About.dox
new file mode 100644
index 0000000..3be9202
--- /dev/null
+++ b/doxygen/dox/About.dox
@@ -0,0 +1,11 @@
+/** \page About About
+
+The implementation of this documentation set is based on the fantastic work of the
+<a href="https://eigen.tuxfamily.org/index.php?title=Main_Page">Eigen project</a>.
+Please refer to their <a href="https://gitlab.com/libeigen/eigen">GitLab repository</a>
+and the online version of their
+<a href="http://eigen.tuxfamily.org/dox/">Doxygen-based documentation</a>.
+Not only does Eigen set a standard as a piece of software, but also as an example
+of <em>documentation done right</em>.
+
+*/ \ No newline at end of file
diff --git a/doxygen/dox/Cookbook.dox b/doxygen/dox/Cookbook.dox
new file mode 100644
index 0000000..4abc896
--- /dev/null
+++ b/doxygen/dox/Cookbook.dox
@@ -0,0 +1,5 @@
+/** \page Cookbook Cookbook
+
+ Healthy, everyday recipes for every taste and budget...
+
+ */ \ No newline at end of file
diff --git a/doxygen/dox/DDLBNF110.dox b/doxygen/dox/DDLBNF110.dox
new file mode 100644
index 0000000..f7e4267
--- /dev/null
+++ b/doxygen/dox/DDLBNF110.dox
@@ -0,0 +1,650 @@
+/** \page DDLBNF110 DDL in BNF through HDF5 1.10
+
+\todo Revise this & break it up!
+
+\section intro110 Introduction
+
+This document contains the data description language (DDL) for an HDF5 file. The
+description is in Backus-Naur Form (BNF).
+
+\section expo110 Explanation of Symbols
+
+This section contains a brief explanation of the symbols used in the DDL.
+
+\code{.unparsed}
+::= defined as
+ <tname> a token with the name tname
+ <a> | <b> one of <a> or <b>
+ <a>opt zero or one occurrence of <a>
+ <a>* zero or more occurrence of <a>
+ <a>+ one or more occurrence of <a>
+ [0-9] an element in the range between 0 and 9
+ '[' the token within the quotes (used for special characters)
+ TBD To Be Decided
+\endcode
+
+\section ddl110 The DDL
+
+\code{.unparsed}
+<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }
+
+<file_name> ::= <identifier>
+
+<file_super_block> ::= SUPER_BLOCK {
+ SUPERBLOCK_VERSION <int_value>
+ FREELIST_VERSION <int_value>
+ SYMBOLTABLE_VERSION <int_value>
+ OBJECTHEADER_VERSION <int_value>
+ OFFSET_SIZE <int_value>
+ LENGTH_SIZE <int_value>
+ BTREE_RANK <int_value>
+ BTREE_LEAF <int_value>
+ ISTORE_K <int_value>
+ <super_block_filespace>
+ USER_BLOCK {
+ USERBLOCK_SIZE <int_value>
+ }
+ }
+
+<super_block_filespace> ::= FILE_SPACE_STRATEGY <super_block_strategy>
+ FREE_SPACE_PERSIST <boolean_value>
+ FREE_SPACE_SECTION_THRESHOLD <int_value>
+ FILE_SPACE_PAGE_SIZE <int_value>
+
+<super_block_strategy> ::= H5F_FSPACE_STRATEGY_FSM_AGGR | H5F_FSPACE_STRATEGY_PAGE |
+ H5F_FSPACE_STRATEGY_AGGR | H5F_FSPACE_STRATEGY_NONE |
+ Unknown strategy
+
+<root_group> ::= GROUP "/" {
+ <anon_named_datatype>*
+ <object_id>opt
+ <group_comment>opt
+ <group_attribute>*
+ <group_member>*
+ }
+
+<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type>
+
+<anon_named_datatype> ::= DATATYPE <anon_named_type_name> {
+ <datatype>
+ }
+
+<anon_named_type_name> ::= the assigned name for anonymous named type is
+ in the form of #oid, where oid is the object id
+ of the type
+
+<atomic_type> ::= <integer> | <float> | <time> | <string> |
+ <bitfield> | <opaque> | <reference> | <enum>
+
+<boolean_value> ::= FALSE | TRUE
+
+<integer> ::= H5T_STD_I8BE | H5T_STD_I8LE |
+ H5T_STD_I16BE | H5T_STD_I16LE |
+ H5T_STD_I32BE | H5T_STD_I32LE |
+ H5T_STD_I64BE | H5T_STD_I64LE |
+ H5T_STD_U8BE | H5T_STD_U8LE |
+ H5T_STD_U16BE | H5T_STD_U16LE |
+ H5T_STD_U32BE | H5T_STD_U32LE |
+ H5T_STD_U64BE | H5T_STD_U64LE |
+ H5T_NATIVE_CHAR | H5T_NATIVE_UCHAR |
+ H5T_NATIVE_SHORT | H5T_NATIVE_USHORT |
+ H5T_NATIVE_INT | H5T_NATIVE_UINT |
+ H5T_NATIVE_LONG | H5T_NATIVE_ULONG |
+ H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG
+
+<float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE |
+ H5T_IEEE_F64BE | H5T_IEEE_F64LE |
+ H5T_NATIVE_FLOAT | H5T_NATIVE_DOUBLE |
+ H5T_NATIVE_LDOUBLE
+
+<time> ::= H5T_TIME: not yet implemented
+
+<string> ::= H5T_STRING {
+ STRSIZE <strsize>;
+ STRPAD <strpad>;
+ CSET <cset>;
+ CTYPE <ctype>;
+ }
+
+<strsize> ::= <int_value>
+
+<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD
+
+<cset> ::= H5T_CSET_ASCII | H5T_CSET_UTF8
+
+<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1
+
+<bitfield> ::= H5T_STD_B8BE | H5T_STD_B8LE |
+ H5T_STD_B16BE | H5T_STD_B16LE |
+ H5T_STD_B32BE | H5T_STD_B32LE |
+ H5T_STD_B64BE | H5T_STD_B64LE
+
+<opaque> ::= H5T_OPAQUE {
+ OPAQUE_TAG <identifier>;
+ OPAQUE_SIZE <int_value>;opt
+ }
+
+<reference> ::= H5T_REFERENCE { <ref_type> }
+
+<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG | H5T_STD_REF | UNDEFINED
+
+<compound_type> ::= H5T_COMPOUND {
+ <member_type_def>+
+ }
+
+<member_type_def> ::= <datatype> <field_name>;
+
+<field_name> ::= <identifier>
+
+<variable_length_type> ::= H5T_VLEN { <datatype> }
+
+<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }
+
+<dim_sizes> ::= '['<dimsize>']' | '['<dimsize>']'<dim_sizes>
+
+<dimsize> ::= <int_value>
+
+<attribute> ::= ATTRIBUTE <attr_name> {
+ <dataset_type>
+ <dataset_space>
+ <data>opt
+ }
+
+<attr_name> ::= <identifier>
+
+<dataset_type> ::= DATATYPE <path_name> | <datatype>
+
+<enum> ::= H5T_ENUM {
+ <enum_base_type> <enum_def>+
+ }
+
+<enum_base_type> ::= <integer>
+// Currently enums can only hold integer type data, but they may be expanded
+// in the future to hold any datatype
+
+<enum_def> ::= <enum_symbol> <enum_val>;
+
+<enum_symbol> ::= <identifier>
+
+<enum_val> ::= <int_value>
+
+<path_name> ::= <path_part>+
+
+<path_part> ::= /<identifier>
+
+<dataspace> ::= <scalar_space> | <simple_space> | <complex_space> | <null_space>
+
+<null_space> ::= NULL
+
+<scalar_space> ::= SCALAR
+
+<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }
+
+<complex_space> ::= COMPLEX { <complex_space_definition> }
+
+<dataset_space> ::= DATASPACE <path_name> | <dataspace>
+
+<current_dims> ::= <dims>
+
+<max_dims> ::= '(' <max_dim_list> ')'
+
+<max_dim_list> ::= <max_dim> | <max_dim>, <max_dim_list>
+
+<max_dim> ::= <int_value> | H5S_UNLIMITED
+
+<data> ::= <subset> | <data_values>
+
+<data_values> ::= DATA {
+ <scalar_space_data> | <simple_space_data>
+ }
+
+<scalar_space_data> ::= <any_element>
+
+<any_element> ::= <atomic_element> | <compound_element> |
+ <variable_length_element> | <array_element>
+
+<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>
+
+<atomic_element> :: = <integer_data> | <float_data> | <time_data> |
+ <string_data> | <bitfield_data> | <opaque_data> |
+ <enum_data> | <reference_data>
+
+<subset> ::= SUBSET {
+ <start>;
+ <stride>;
+ <count>;
+ <block>;
+ DATA {
+ <simple_space_data>
+ }
+ }
+
+<start> ::= START (<coor_list>)
+
+<stride> ::= STRIDE (<pos_list>)
+
+<count> ::= COUNT (<max_dim_list>)
+
+<block> ::= BLOCK (<max_dim_list>)
+
+<coor_list> ::= <coor_data>, <coor_list> | <coor_data>
+
+<coor_data> ::= <integer_data> | H5S_UNLIMITED
+
+<integer_data> ::= <int_value>
+
+<float_data> ::= a floating point number
+
+<time_data> ::= DATA{ not yet implemented.}
+
+<string_data> ::= a string
+// A string is enclosed in double quotes.
+// If a string is displayed on more than one line, string concatenate
+// operator '//'is used.
+
+<bitfield_data> ::= <hex_value>
+
+<opaque_data> ::= <hex_value>:<hex_value> | <hex_value>
+
+<enum_data> ::= <enum_symbol>
+
+<reference_data> ::= <object_ref_data> | <data_region_data> | <attribute_data> | NULL
+
+<object_ref_data> ::= <object_type> <object_num>
+
+<object_type> ::= DATASET | GROUP | DATATYPE
+
+<object_id> ::= OBJECTID { <object_num> }
+
+<object_num> ::= <int_value>:<int_value> | <int_value>
+
+<attribute_data> ::= ATTRIBUTE <attr_name>
+
+<data_region_data> ::= DATASET <dataset_name> {
+ <data_region_type>opt <data_region_data_list>
+ <dataset_type>opt <dataset_space>opt
+ <data>opt
+ }
+
+<data_region_type> ::= REGION_TYPE <data_region_data_type>
+
+<data_region_data_type> ::= POINT | BLOCK
+
+<data_region_data_list> ::= <data_region_data_info>, <data_region_data_list> |
+ <data_region_data_info>
+
+<data_region_data_info> ::= <region_info> | <point_info>
+
+<region_info> ::= (<lower_region_vals>)-(<upper_region_vals>)
+
+<lower_region_vals> ::= <lower_bound>, <lower_region_vals> | <lower_bound>
+
+<upper_region_vals> ::= <upper_bound>, <upper_region_vals> | <upper_bound>
+
+<lower_bound> ::= <int_value>
+
+<upper_bound> ::= <int_value>
+
+<point_info> ::= (<point_vals>)
+
+<point_vals> ::= <int_value> | <int_value>, <point_vals>
+
+<compound_element> ::= { <any_data_seq> }
+
+<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> |
+ <atomic_element>
+
+<simple_space_data> :: = <any_data_seq>
+
+<variable_length_element> ::= ( <any_data_seq> )
+
+<array_element> ::= '[' <any_data_seq> ']'
+
+<named_datatype> ::= DATATYPE <type_name> { <datatype> }
+
+<type_name> ::= <identifier>
+
+<hardlink> ::= HARDLINK <path_name>
+
+<group> ::= GROUP <group_name> { <hardlink> | <group_info> }
+
+<group_comment> ::= COMMENT <string_data>
+
+<group_name> ::= <identifier>
+
+<group_info> ::= <object_id>opt <group_comment>opt <group_attribute>*
+ <group_member>*
+
+<group_attribute> ::= <attribute>
+
+<group_member> ::= <named_datatype> | <group> | <dataset> |
+ <softlink> | <external_link>
+
+<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }
+
+<dataset_info> ::= <dataset_type>
+ <dataset_space>
+ <dcpl_info>opt
+ <dataset_attribute>* <object_id>opt
+ <data>opt
+// Tokens above can be in any order as long as <data> is
+// after <dataset_type> and <dataset_space>.
+
+<dcpl_info> ::= <storagelayout>
+ <compression_filters>
+ <fillvalue>
+ <allocationtime>
+
+<dataset_name> ::= <identifier>
+
+<storagelayout> :: = STORAGE_LAYOUT {
+ <contiguous_layout> | <chunked_layout> |
+ <compact_layout> | <virtual_layout>
+ }
+
+<contiguous_layout> ::= CONTIGUOUS
+ <internal_layout> | <external_layout>
+
+<chunked_layout> ::= CHUNKED <dims>
+ <filter_ratio>opt
+
+<compact_layout> ::= COMPACT
+ <size>
+
+<internal_layout> ::= <size>
+ <offset>
+
+<external_layout> ::= EXTERNAL {
+ <external_file>+
+ }
+
+<virtual_layout> ::= <vmaps>*opt
+
+<vmaps> ::= MAPPING <int_value> {
+ <virtual_map>
+ <source_map>
+ }
+
+<virtual_map> ::= VIRTUAL {
+ <vmaps_selection>
+ }
+
+<source_map> ::= SOURCE {
+ FILE <file_name>
+ DATASET <dataset_name>
+ <vmaps_selection>
+ }
+
+<vmaps_selection> ::= <regular_hyperslab> | <irregular_hyperslab> |
+ <select_points> | <select_none> | <select_all>
+
+<regular_hyperslab> ::= SELECTION REGULAR_HYPERSLAB {
+ <start>
+ <stride>
+ <count>
+ <block>
+ }
+
+<irregular_hyperslab> ::= SELECTION IRREGULAR_HYPERSLAB {
+ <region_info>+
+ }
+
+<select_points> ::= SELECTION POINT {
+ (<coor_list>)+
+ }
+
+<select_none> ::= SELECTION NONE
+
+<select_all> ::= SELECTION ALL
+
+<dims> ::= (<dims_values>)
+
+<dims_values> ::= <int_value> | <int_value>, <dims_values>
+
+<external_file> ::= FILENAME <file_name> <size> <offset>
+
+<offset> ::= OFFSET <int_value>
+
+<size> ::= SIZE <int_value>
+
+<filter_ratio> ::= <size> | <compressionratio>
+
+<compressionratio> :: = <size> (<float_data>:1 COMPRESSION)
+
+<compression_filters> :: = FILTERS {
+ <filter_type>+ | NONE
+ }
+
+<filter_type> :: = <filter_deflate> | <filter_shuffle> |
+ <filter_flecther> | <filter_szip> |
+ <filter_nbit> | <filter_scaleoffset> |
+ <filter_default>
+
+<filter_default> :: = <filter_user> {
+ FILTER_ID <int_value>
+ <filter_comment>opt
+ <filter_params>opt
+ }
+
+<filter_user> :: = USER_DEFINED_FILTER
+
+<filter_deflate> :: = COMPRESSION DEFLATE { LEVEL <int_value> }
+
+<filter_shuffle> :: = PREPROCESSING SHUFFLE
+
+<filter_flecther> :: = CHECKSUM FLETCHER32
+
+<filter_szip> :: = COMPRESSION SZIP {
+ PIXELS_PER_BLOCK <int_value>
+ <filter_szip_mode>opt
+ <filter_szip_coding>opt
+ <filter_szip_order>opt
+ <filter_szip_header>opt
+ }
+
+<filter_szip_mode> :: = MODE HARDWARE | K13
+
+<filter_szip_coding> :: = CODING ENTROPY | NEAREST NEIGHBOUR
+
+<filter_szip_order> :: = BYTE_ORDER LSB | MSB
+
+<filter_szip_header> :: = HEADER RAW
+
+<filter_nbit> :: = CHECKSUM NBIT
+
+<filter_scaleoffset> :: = COMPRESSION SCALEOFFSET { MIN BITS <int_value> }
+
+<filter_comment> :: = COMMENT <identifier>
+
+<filter_params> :: = PARAMS { <int_value>* }
+
+<fillvalue> ::= FILLVALUE {
+ FILL_TIME H5D_FILL_TIME_ALLOC | H5D_FILL_TIME_NEVER | H5D_FILL_TIME_IFSET
+ VALUE H5D_FILL_VALUE_UNDEFINED | H5D_FILL_VALUE_DEFAULT | <any_element>
+ }
+
+<allocationtime> ::= ALLOCATION_TIME {
+ H5D_ALLOC_TIME_EARLY | H5D_ALLOC_TIME_INCR |
+ H5D_ALLOC_TIME_LATE
+ }
+
+<dataset_attribute> ::= <attribute>
+
+<softlink> ::= SOFTLINK <softlink_name> {
+ LINKTARGET <target>
+ }
+
+<softlink_name> ::= <identifier>
+
+<target> ::= <identifier>
+
+<external_link> ::= EXTERNAL_LINK <external_link_name> {
+ TARGETFILE <targetfile>
+ TARGETPATH <targetpath> <targetobj>opt
+ }
+
+<external_link_name> ::= <identifier>
+
+<user_defined_link> ::= USERDEFINED_LINK <external_link_name> {
+ LINKCLASS <user_link_type>
+ }
+
+<user_link_type> ::= <int_value>
+
+<targetfile> ::= <file_name>
+
+<targetpath> ::= <identifier>
+
+<targetobj> ::= <named_datatype> | <group> | <dataset>
+
+<identifier> ::= "a string"
+// character '/' should be used with care.
+
+<pos_list> ::= <pos_int>, <pos_list> | <pos_int>
+
+<int_value> ::= 0 | <pos_int>
+
+<pos_int> ::= [1-9][0-9]*
+
+<hex_value> ::= 0x[0-F][0-F]+ | [0-F][0-F]+
+\endcode
+
+\section example110 An Example of an HDF5 File in DDL
+
+\code{.unparsed}
+HDF5 "example.h5" {
+GROUP "/" {
+ ATTRIBUTE "attr1" {
+ DATATYPE H5T_STRING {
+ STRSIZE 17;
+ STRPAD H5T_STR_NULLTERM;
+ CSET H5T_CSET_ASCII;
+ CTYPE H5T_C_S1;
+ }
+ DATASPACE SCALAR
+ DATA {
+ "string attribute"
+ }
+ }
+ DATASET "dset1" {
+ DATATYPE H5T_STD_I32BE
+ DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }
+ DATA {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+ }
+ }
+ DATASET "dset2" {
+ DATATYPE H5T_COMPOUND {
+ H5T_STD_I32BE "a";
+ H5T_IEEE_F32BE "b";
+ H5T_IEEE_F64BE "c";
+ }
+ DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
+ DATA {
+ {
+ 1,
+ 0.1,
+ 0.01
+ },
+ {
+ 2,
+ 0.2,
+ 0.02
+ },
+ {
+ 3,
+ 0.3,
+ 0.03
+ },
+ {
+ 4,
+ 0.4,
+ 0.04
+ },
+ {
+ 5,
+ 0.5,
+ 0.05
+ }
+ }
+ }
+ GROUP "group1" {
+ COMMENT "This is a comment for group1";
+ DATASET "dset3" {
+ DATATYPE "/type1"
+ DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
+ DATA {
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ }
+ }
+ }
+ }
+ DATASET "dset3" {
+ DATATYPE H5T_VLEN { H5T_STD_I32LE }
+ DATASPACE SIMPLE { ( 4 ) / ( 4 ) }
+ DATA {
+ (0), (10, 11), (20, 21, 22), (30, 31, 32, 33)
+ }
+ }
+ GROUP "group2" {
+ HARDLINK "/group1"
+ }
+ SOFTLINK "slink1" {
+ LINKTARGET "somevalue"
+ }
+ DATATYPE "type1" H5T_COMPOUND {
+ H5T_ARRAY { [4] H5T_STD_I32BE } "a";
+ H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b";
+ }
+}
+}
+\endcode
+
+ */ \ No newline at end of file
diff --git a/doxygen/dox/DDLBNF112.dox b/doxygen/dox/DDLBNF112.dox
new file mode 100644
index 0000000..6809a06
--- /dev/null
+++ b/doxygen/dox/DDLBNF112.dox
@@ -0,0 +1,653 @@
+/** \page DDLBNF112 DDL in BNF for HDF5 1.12 and above
+
+\todo Revise this & break it up!
+
+\section intro112 Introduction
+
+This document contains the data description language (DDL) for an HDF5 file. The
+description is in Backus-Naur Form (BNF).
+
+\section expo112 Explanation of Symbols
+
+This section contains a brief explanation of the symbols used in the DDL.
+
+\code{.unparsed}
+::= defined as
+ <tname> a token with the name tname
+ <a> | <b> one of <a> or <b>
+ <a>opt zero or one occurrence of <a>
+ <a>* zero or more occurrence of <a>
+ <a>+ one or more occurrence of <a>
+ [0-9] an element in the range between 0 and 9
+ '[' the token within the quotes (used for special characters)
+ TBD To Be Decided
+\endcode
+
+\section ddl112 The DDL
+
+\code{.unparsed}
+<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }
+
+<file_name> ::= <identifier>
+
+<file_super_block> ::= SUPER_BLOCK {
+ SUPERBLOCK_VERSION <int_value>
+ FREELIST_VERSION <int_value>
+ SYMBOLTABLE_VERSION <int_value>
+ OBJECTHEADER_VERSION <int_value>
+ OFFSET_SIZE <int_value>
+ LENGTH_SIZE <int_value>
+ BTREE_RANK <int_value>
+ BTREE_LEAF <int_value>
+ ISTORE_K <int_value>
+ <super_block_filespace>
+ USER_BLOCK {
+ USERBLOCK_SIZE <int_value>
+ }
+ }
+
+<super_block_filespace> ::= FILE_SPACE_STRATEGY <super_block_strategy>
+ FREE_SPACE_PERSIST <boolean_value>
+ FREE_SPACE_SECTION_THRESHOLD <int_value>
+ FILE_SPACE_PAGE_SIZE <int_value>
+
+<super_block_strategy> ::= H5F_FSPACE_STRATEGY_FSM_AGGR | H5F_FSPACE_STRATEGY_PAGE |
+ H5F_FSPACE_STRATEGY_AGGR | H5F_FSPACE_STRATEGY_NONE |
+ Unknown strategy
+
+<root_group> ::= GROUP "/" {
+ <anon_named_datatype>*
+ <object_id>opt
+ <group_comment>opt
+ <group_attribute>*
+ <group_member>*
+ }
+
+<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type>
+
+<anon_named_datatype> ::= DATATYPE <anon_named_type_name> {
+ <datatype>
+ }
+
+<anon_named_type_name> ::= the assigned name for anonymous named type is
+ in the form of #oid, where oid is the object id
+ of the type
+
+<atomic_type> ::= <integer> | <float> | <time> | <string> |
+ <bitfield> | <opaque> | <reference> | <enum>
+
+<boolean_value> ::= FALSE | TRUE
+
+<integer> ::= H5T_STD_I8BE | H5T_STD_I8LE |
+ H5T_STD_I16BE | H5T_STD_I16LE |
+ H5T_STD_I32BE | H5T_STD_I32LE |
+ H5T_STD_I64BE | H5T_STD_I64LE |
+ H5T_STD_U8BE | H5T_STD_U8LE |
+ H5T_STD_U16BE | H5T_STD_U16LE |
+ H5T_STD_U32BE | H5T_STD_U32LE |
+ H5T_STD_U64BE | H5T_STD_U64LE |
+ H5T_NATIVE_CHAR | H5T_NATIVE_UCHAR |
+ H5T_NATIVE_SHORT | H5T_NATIVE_USHORT |
+ H5T_NATIVE_INT | H5T_NATIVE_UINT |
+ H5T_NATIVE_LONG | H5T_NATIVE_ULONG |
+ H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG
+
+<float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE |
+ H5T_IEEE_F64BE | H5T_IEEE_F64LE |
+ H5T_NATIVE_FLOAT | H5T_NATIVE_DOUBLE |
+ H5T_NATIVE_LDOUBLE
+
+<time> ::= H5T_TIME: not yet implemented
+
+<string> ::= H5T_STRING {
+ STRSIZE <strsize>;
+ STRPAD <strpad>;
+ CSET <cset>;
+ CTYPE <ctype>;
+ }
+
+<strsize> ::= <int_value>
+
+<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD
+
+<cset> ::= H5T_CSET_ASCII | H5T_CSET_UTF8
+
+<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1
+
+<bitfield> ::= H5T_STD_B8BE | H5T_STD_B8LE |
+ H5T_STD_B16BE | H5T_STD_B16LE |
+ H5T_STD_B32BE | H5T_STD_B32LE |
+ H5T_STD_B64BE | H5T_STD_B64LE
+
+<opaque> ::= H5T_OPAQUE {
+ OPAQUE_TAG <identifier>;
+ OPAQUE_SIZE <int_value>;opt
+ }
+
+<reference> ::= H5T_REFERENCE { <ref_type> }
+
+<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG | H5T_STD_REF | UNDEFINED
+
+<compound_type> ::= H5T_COMPOUND {
+ <member_type_def>+
+ }
+
+<member_type_def> ::= <datatype> <field_name>;
+
+<field_name> ::= <identifier>
+
+<variable_length_type> ::= H5T_VLEN { <datatype> }
+
+<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }
+
+<dim_sizes> ::= '['<dimsize>']' | '['<dimsize>']'<dim_sizes>
+
+<dimsize> ::= <int_value>
+
+<attribute> ::= ATTRIBUTE <attr_name> {
+ <dataset_type>
+ <dataset_space>
+ <data>opt
+ }
+
+<attr_name> ::= <identifier>
+
+<dataset_type> ::= DATATYPE <path_name> | <datatype>
+
+<enum> ::= H5T_ENUM {
+ <enum_base_type> <enum_def>+
+ }
+
+<enum_base_type> ::= <integer>
+// Currently enums can only hold integer type data, but they may be expanded
+// in the future to hold any datatype
+
+<enum_def> ::= <enum_symbol> <enum_val>;
+
+<enum_symbol> ::= <identifier>
+
+<enum_val> ::= <int_value>
+
+<path_name> ::= <path_part>+
+
+<path_part> ::= /<identifier>
+
+<dataspace> ::= <scalar_space> | <simple_space> | <complex_space> | <null_space>
+
+<null_space> ::= NULL
+
+<scalar_space> ::= SCALAR
+
+<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }
+
+<complex_space> ::= COMPLEX { <complex_space_definition> }
+
+<dataset_space> ::= DATASPACE <path_name> | <dataspace>
+
+<current_dims> ::= <dims>
+
+<max_dims> ::= '(' <max_dim_list> ')'
+
+<max_dim_list> ::= <max_dim> | <max_dim>, <max_dim_list>
+
+<max_dim> ::= <int_value> | H5S_UNLIMITED
+
+<data> ::= <subset> | <data_values>
+
+<data_values> ::= DATA {
+ <scalar_space_data> | <simple_space_data>
+ }
+
+<scalar_space_data> ::= <any_element>
+
+<any_element> ::= <atomic_element> | <compound_element> |
+ <variable_length_element> | <array_element>
+
+<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>
+
+<atomic_element> :: = <integer_data> | <float_data> | <time_data> |
+ <string_data> | <bitfield_data> | <opaque_data> |
+ <enum_data> | <reference_data>
+
+<subset> ::= SUBSET {
+ <start>;
+ <stride>;
+ <count>;
+ <block>;
+ DATA {
+ <simple_space_data>
+ }
+ }
+
+<start> ::= START (<coor_list>)
+
+<stride> ::= STRIDE (<pos_list>)
+
+<count> ::= COUNT (<max_dim_list>)
+
+<block> ::= BLOCK (<max_dim_list>)
+
+<coor_list> ::= <coor_data>, <coor_list> | <coor_data>
+
+<coor_data> ::= <integer_data> | H5S_UNLIMITED
+
+<integer_data> ::= <int_value>
+
+<float_data> ::= a floating point number
+
+<time_data> ::= DATA{ not yet implemented.}
+
+<string_data> ::= a string
+// A string is enclosed in double quotes.
+// If a string is displayed on more than one line, string concatenate
+// operator '//'is used.
+
+<bitfield_data> ::= <hex_value>
+
+<opaque_data> ::= <hex_value>:<hex_value> | <hex_value>
+
+<enum_data> ::= <enum_symbol>
+
+<reference_data> ::= <object_ref_data> | <data_region_data> | <attribute_data> | NULL
+
+<object_ref_data> ::= <object_type> <object_ref>
+
+<object_type> ::= ATTRIBUTE | DATASET | GROUP | DATATYPE
+
+<object_ref> ::= <object_id>
+
+<object_id> ::= <path_name> | OBJECTID { <object_num> }
+
+<object_num> ::= <int_value>:<int_value> | <int_value>
+
+<attribute_data> ::= ATTRIBUTE <attr_name>opt
+ <data>opt
+
+<data_region_data> ::= DATASET <dataset_name> {
+ <data_region_type>opt <data_region_data_list>
+ <dataset_type>opt <dataset_space>opt
+ <data>opt
+ }
+
+<data_region_type> ::= REGION_TYPE <data_region_data_type>
+
+<data_region_data_type> ::= POINT | BLOCK
+
+<data_region_data_list> ::= <data_region_data_info>, <data_region_data_list> |
+ <data_region_data_info>
+
+<data_region_data_info> ::= <region_info> | <point_info>
+
+<region_info> ::= (<lower_region_vals>)-(<upper_region_vals>)
+
+<lower_region_vals> ::= <lower_bound>, <lower_region_vals> | <lower_bound>
+
+<upper_region_vals> ::= <upper_bound>, <upper_region_vals> | <upper_bound>
+
+<lower_bound> ::= <int_value>
+
+<upper_bound> ::= <int_value>
+
+<point_info> ::= (<point_vals>)
+
+<point_vals> ::= <int_value> | <int_value>, <point_vals>
+
+<compound_element> ::= { <any_data_seq> }
+
+<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> |
+ <atomic_element>
+
+<simple_space_data> :: = <any_data_seq>
+
+<variable_length_element> ::= ( <any_data_seq> )
+
+<array_element> ::= '[' <any_data_seq> ']'
+
+<named_datatype> ::= DATATYPE <type_name> { <datatype> }
+
+<type_name> ::= <identifier>
+
+<hardlink> ::= HARDLINK <path_name>
+
+<group> ::= GROUP <group_name> { <hardlink> | <group_info> }
+
+<group_comment> ::= COMMENT <string_data>
+
+<group_name> ::= <identifier>
+
+<group_info> ::= <object_id>opt <group_comment>opt <group_attribute>*
+ <group_member>*
+
+<group_attribute> ::= <attribute>
+
+<group_member> ::= <named_datatype> | <group> | <dataset> |
+ <softlink> | <external_link>
+
+<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }
+
+<dataset_info> ::= <dataset_type>
+ <dataset_space>
+ <dcpl_info>opt
+ <dataset_attribute>* <object_id>opt
+ <data>opt
+// Tokens above can be in any order as long as <data> is
+// after <dataset_type> and <dataset_space>.
+
+<dcpl_info> ::= <storagelayout>
+ <compression_filters>
+ <fillvalue>
+ <allocationtime>
+
+<dataset_name> ::= <identifier>
+
+<storagelayout> :: = STORAGE_LAYOUT {
+ <contiguous_layout> | <chunked_layout> |
+ <compact_layout> | <virtual_layout>
+ }
+
+<contiguous_layout> ::= CONTIGUOUS
+ <internal_layout> | <external_layout>
+
+<chunked_layout> ::= CHUNKED <dims>
+ <filter_ratio>opt
+
+<compact_layout> ::= COMPACT
+ <size>
+
+<internal_layout> ::= <size>
+ <offset>
+
+<external_layout> ::= EXTERNAL {
+ <external_file>+
+ }
+
+<virtual_layout> ::= <vmaps>*opt
+
+<vmaps> ::= MAPPING <int_value> {
+ <virtual_map>
+ <source_map>
+ }
+
+<virtual_map> ::= VIRTUAL {
+ <vmaps_selection>
+ }
+
+<source_map> ::= SOURCE {
+ FILE <file_name>
+ DATASET <dataset_name>
+ <vmaps_selection>
+ }
+
+<vmaps_selection> ::= <regular_hyperslab> | <irregular_hyperslab> |
+ <select_points> | <select_none> | <select_all>
+
+<regular_hyperslab> ::= SELECTION REGULAR_HYPERSLAB {
+ <start>
+ <stride>
+ <count>
+ <block>
+ }
+
+<irregular_hyperslab> ::= SELECTION IRREGULAR_HYPERSLAB {
+ <region_info>+
+ }
+
+<select_points> ::= SELECTION POINT {
+ (<coor_list>)+
+ }
+
+<select_none> ::= SELECTION NONE
+
+<select_all> ::= SELECTION ALL
+
+<dims> ::= (<dims_values>)
+
+<dims_values> ::= <int_value> | <int_value>, <dims_values>
+
+<external_file> ::= FILENAME <file_name> <size> <offset>
+
+<offset> ::= OFFSET <int_value>
+
+<size> ::= SIZE <int_value>
+
+<filter_ratio> ::= <size> | <compressionratio>
+
+<compressionratio> :: = <size> (<float_data>:1 COMPRESSION)
+
+<compression_filters> :: = FILTERS {
+ <filter_type>+ | NONE
+ }
+
+<filter_type> :: = <filter_deflate> | <filter_shuffle> |
+ <filter_flecther> | <filter_szip> |
+ <filter_nbit> | <filter_scaleoffset> |
+ <filter_default>
+
+<filter_default> :: = <filter_user> {
+ FILTER_ID <int_value>
+ <filter_comment>opt
+ <filter_params>opt
+ }
+
+<filter_user> :: = USER_DEFINED_FILTER
+
+<filter_deflate> :: = COMPRESSION DEFLATE { LEVEL <int_value> }
+
+<filter_shuffle> :: = PREPROCESSING SHUFFLE
+
+<filter_flecther> :: = CHECKSUM FLETCHER32
+
+<filter_szip> :: = COMPRESSION SZIP {
+ PIXELS_PER_BLOCK <int_value>
+ <filter_szip_mode>opt
+ <filter_szip_coding>opt
+ <filter_szip_order>opt
+ <filter_szip_header>opt
+ }
+
+<filter_szip_mode> :: = MODE HARDWARE | K13
+
+<filter_szip_coding> :: = CODING ENTROPY | NEAREST NEIGHBOUR
+
+<filter_szip_order> :: = BYTE_ORDER LSB | MSB
+
+<filter_szip_header> :: = HEADER RAW
+
+<filter_nbit> :: = CHECKSUM NBIT
+
+<filter_scaleoffset> :: = COMPRESSION SCALEOFFSET { MIN BITS <int_value> }
+
+<filter_comment> :: = COMMENT <identifier>
+
+<filter_params> :: = PARAMS { <int_value>* }
+
+<fillvalue> ::= FILLVALUE {
+ FILL_TIME H5D_FILL_TIME_ALLOC | H5D_FILL_TIME_NEVER | H5D_FILL_TIME_IFSET
+ VALUE H5D_FILL_VALUE_UNDEFINED | H5D_FILL_VALUE_DEFAULT | <any_element>
+ }
+
+<allocationtime> ::= ALLOCATION_TIME {
+ H5D_ALLOC_TIME_EARLY | H5D_ALLOC_TIME_INCR |
+ H5D_ALLOC_TIME_LATE
+ }
+
+<dataset_attribute> ::= <attribute>
+
+<softlink> ::= SOFTLINK <softlink_name> {
+ LINKTARGET <target>
+ }
+
+<softlink_name> ::= <identifier>
+
+<target> ::= <identifier>
+
+<external_link> ::= EXTERNAL_LINK <external_link_name> {
+ TARGETFILE <targetfile>
+ TARGETPATH <targetpath> <targetobj>opt
+ }
+
+<external_link_name> ::= <identifier>
+
+<user_defined_link> ::= USERDEFINED_LINK <external_link_name> {
+ LINKCLASS <user_link_type>
+ }
+
+<user_link_type> ::= <int_value>
+
+<targetfile> ::= <file_name>
+
+<targetpath> ::= <identifier>
+
+<targetobj> ::= <named_datatype> | <group> | <dataset>
+
+<identifier> ::= "a string"
+// character '/' should be used with care.
+
+<pos_list> ::= <pos_int>, <pos_list> | <pos_int>
+
+<int_value> ::= 0 | <pos_int>
+
+<pos_int> ::= [1-9][0-9]*
+
+<hex_value> ::= 0x[0-F][0-F]+ | [0-F][0-F]+
+\endcode
+
+\section example112 An Example of an HDF5 File in DDL
+
+\code{.unparsed}
+HDF5 "example.h5" {
+GROUP "/" {
+ ATTRIBUTE "attr1" {
+ DATATYPE H5T_STRING {
+ STRSIZE 17;
+ STRPAD H5T_STR_NULLTERM;
+ CSET H5T_CSET_ASCII;
+ CTYPE H5T_C_S1;
+ }
+ DATASPACE SCALAR
+ DATA {
+ "string attribute"
+ }
+ }
+ DATASET "dset1" {
+ DATATYPE H5T_STD_I32BE
+ DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }
+ DATA {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+ }
+ }
+ DATASET "dset2" {
+ DATATYPE H5T_COMPOUND {
+ H5T_STD_I32BE "a";
+ H5T_IEEE_F32BE "b";
+ H5T_IEEE_F64BE "c";
+ }
+ DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
+ DATA {
+ {
+ 1,
+ 0.1,
+ 0.01
+ },
+ {
+ 2,
+ 0.2,
+ 0.02
+ },
+ {
+ 3,
+ 0.3,
+ 0.03
+ },
+ {
+ 4,
+ 0.4,
+ 0.04
+ },
+ {
+ 5,
+ 0.5,
+ 0.05
+ }
+ }
+ }
+ GROUP "group1" {
+ COMMENT "This is a comment for group1";
+ DATASET "dset3" {
+ DATATYPE "/type1"
+ DATASPACE SIMPLE { ( 5 ) / ( 5 ) }
+ DATA {
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ },
+ {
+ [ 0, 1, 2, 3 ],
+ [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
+ 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
+ }
+ }
+ }
+ }
+ DATASET "dset3" {
+ DATATYPE H5T_VLEN { H5T_STD_I32LE }
+ DATASPACE SIMPLE { ( 4 ) / ( 4 ) }
+ DATA {
+ (0), (10, 11), (20, 21, 22), (30, 31, 32, 33)
+ }
+ }
+ GROUP "group2" {
+ HARDLINK "/group1"
+ }
+ SOFTLINK "slink1" {
+ LINKTARGET "somevalue"
+ }
+ DATATYPE "type1" H5T_COMPOUND {
+ H5T_ARRAY { [4] H5T_STD_I32BE } "a";
+ H5T_ARRAY { [5][6] H5T_IEEE_F32BE } "b";
+ }
+}
+}
+\endcode
+
+*/ \ No newline at end of file
diff --git a/doxygen/dox/FileFormatSpec.dox b/doxygen/dox/FileFormatSpec.dox
new file mode 100644
index 0000000..fc10574
--- /dev/null
+++ b/doxygen/dox/FileFormatSpec.dox
@@ -0,0 +1,23 @@
+/** \page FMT3 HDF5 File Format Specification Version 3.0
+
+\htmlinclude H5.format.html
+
+*/
+
+/** \page FMT2 HDF5 File Format Specification Version 2.0
+
+\htmlinclude H5.format.2.0.html
+
+*/
+
+/** \page FMT11 HDF5 File Format Specification Version 1.1
+
+\htmlinclude H5.format.1.1.html
+
+*/
+
+/** \page FMT1 HDF5 File Format Specification Version 1.0
+
+\htmlinclude H5.format.1.0.html
+
+*/ \ No newline at end of file
diff --git a/doxygen/dox/GettingStarted.dox b/doxygen/dox/GettingStarted.dox
new file mode 100644
index 0000000..880491d
--- /dev/null
+++ b/doxygen/dox/GettingStarted.dox
@@ -0,0 +1,3 @@
+/** \page GettingStarted \Code{Hello, HDF5!}
+
+ */ \ No newline at end of file
diff --git a/doxygen/dox/H5Fget_info.dox b/doxygen/dox/H5Fget_info.dox
index b2eeb6c..9b02752 100644
--- a/doxygen/dox/H5Fget_info.dox
+++ b/doxygen/dox/H5Fget_info.dox
@@ -6,9 +6,8 @@
* Similarly, the macro for the \ref H5F_info_t struct is mapped to either
* H5F_info1_t or H5F_info2_t.
*
- * Such macros are provided to facilitate application
- * compatibility. Their use and mappings are fully described in "API Compatibility
- * Macros in HDF5".
+ * Such macros are provided to facilitate application compatibility.
+ * Their use and mappings are fully described in \ref api-compat-macros.
*
* When both the HDF5 library and the application are built and installed with
* no specific compatibility flags, H5Fget_info() is mapped to the most recent
@@ -37,8 +36,6 @@
* \li \Code{H5F_info_t_vers=2}: H5F_info2_t
* \li \Code{H5F_info_t_vers=1}: H5F_info1_t
*
- * \todo Fix the reference.
- *
* \version 1.10.0 The C function H5Fget_info() and H5F_info_t renamed to
* H5Fget_info1() and H5F_info1_t, respectively, and deprecated
* in this release. The C macro #H5Fget_info, the C function
diff --git a/doxygen/dox/H5Lget_info.dox b/doxygen/dox/H5Lget_info.dox
index 686dfe4..2c0971e 100644
--- a/doxygen/dox/H5Lget_info.dox
+++ b/doxygen/dox/H5Lget_info.dox
@@ -3,8 +3,8 @@
* \def H5Lget_info()
* H5Lget_info() is a macro that is mapped to either H5Lget_info1()
* or H5Lget_info2() Such macros are provided to facilitate application
- * compatibility. Their use and mappings are fully described in Compatibility
- * Macros in HDF5. If the library and/or application is compiled for Release
+ * compatibility. Their use and mappings are fully described in \ref api-compat-macros.
+ * If the library and/or application is compiled for Release
* 1.12 emulation, H5Lget_info() will be mapped to H5Lget_info2() and
* H5Lget_info1() is deprecated. With earlier versions, H5Lget_info() is mapped to
* H5Lget_info1(). Specific compile-time compatibility flags and the resulting
@@ -14,5 +14,4 @@
* \li Emulate Release 1.12: H5Lget_info2()
* \li Emulate Release 1.8 or 1.10 interface: H5Lget_info1()
*
- * \todo Fix the reference.
*/
diff --git a/doxygen/dox/H5Lget_info_by_idx.dox b/doxygen/dox/H5Lget_info_by_idx.dox
index 8f1511e..bf76822 100644
--- a/doxygen/dox/H5Lget_info_by_idx.dox
+++ b/doxygen/dox/H5Lget_info_by_idx.dox
@@ -3,8 +3,8 @@
* \def H5Lget_info_by_idx()
* H5Lget_info_by_idx() is a macro that is mapped to either H5Lget_info_by_idx1()
* or H5Lget_info_by_idx2() Such macros are provided to facilitate application
- * compatibility. Their use and mappings are fully described in Compatibility
- * Macros in HDF5. If the library and/or application is compiled for Release
+ * compatibility. Their use and mappings are fully described in \ref api-compat-macros.
+ * If the library and/or application is compiled for Release
* 1.12 emulation, H5Lget_info_by_idx() will be mapped to H5Lget_info_by_idx2() and
* H5Lget_info_by_idx1() is deprecated. With earlier versions, H5Lget_infoby_idx() is mapped to
* H5Lget_info_by_idx1(). Specific compile-time compatibility flags and the resulting
@@ -14,5 +14,4 @@
* \li Emulate Release 1.12: H5Lget_info_by_idx2()
* \li Emulate Release 1.8 or 1.10 interface: H5Lget_info_by_idx1()
*
- * \todo Fix the reference.
*/
diff --git a/doxygen/dox/H5Literate.dox b/doxygen/dox/H5Literate.dox
index 4d3a8dc..eaaf2fe 100644
--- a/doxygen/dox/H5Literate.dox
+++ b/doxygen/dox/H5Literate.dox
@@ -4,7 +4,7 @@
* H5Literate() is a macro that is mapped to either H5Literate1() or
* H5Literate2() Such macros are provided to facilitate application
* compatibility. Their use and mappings are fully described in
- * Compatibility Macros in HDF5. If the library and/or application is
+ * \ref api-compat-macros. If the library and/or application is
* compiled for Release 1.12 emulation, H5Literate() will be mapped to
* H5Literate2() and H5Literate1() is deprecated. With earlier versions,
* H5Literate() is mapped to H5Literate1(). Specific compile-time compatibility
@@ -14,8 +14,6 @@
* \li Emulate Release 1.12: H5Literate2()
* \li Emulate Release 1.8 or 1.10 interface: H5Literate1()
*
- * \todo Fix the reference.
- *
* \version 1.12.0 The function H5Literate() was renamed to H5Literate1() and
* deprecated in this release. The macro H5Literate() and the
* function H5Literate2() were introduced in this release.
diff --git a/doxygen/dox/H5Literate_by_name.dox b/doxygen/dox/H5Literate_by_name.dox
index 174157a..5ffd7c6 100644
--- a/doxygen/dox/H5Literate_by_name.dox
+++ b/doxygen/dox/H5Literate_by_name.dox
@@ -4,7 +4,7 @@
* H5Literate_by_name() is a macro that is mapped to either
* H5Literate_by_name1() or H5Literate_by_name2() Such macros are provided to
* facilitate application compatibility. Their use and mappings are fully
- * described in Compatibility Macros in HDF5. If the library and/or application is
+ * described in \ref api-compat-macros. If the library and/or application is
* compiled for Release 1.12 emulation, H5Literate_by_name() will be mapped to
* H5Literate_by_name2() and H5Literate_by_name1() is deprecated. With earlier
* versions, H5Literate_by_name() is mapped to H5Literate_by_name1().
@@ -15,8 +15,6 @@
* \li Emulate Release 1.12: H5Literate_by_name2()
* \li Emulate Release 1.8 or 1.10 interface: H5Literate_by_name1()
*
- * \todo Fix the reference.
- *
* \version 1.12.0 The function H5Literate_by_name() was renamed to H5Literate_by_name1() and
* deprecated in this release. The macro H5Literate_by_name() and the
* function H5Literate_by_name2() were introduced in this release.
diff --git a/doxygen/dox/H5Lvisit.dox b/doxygen/dox/H5Lvisit.dox
index e356576..2dc547f 100644
--- a/doxygen/dox/H5Lvisit.dox
+++ b/doxygen/dox/H5Lvisit.dox
@@ -4,7 +4,7 @@
* H5Lvisit() is a macro that is mapped to either H5Lvisit1() or
* H5Lvisit2() Such macros are provided to facilitate application
* compatibility. Their use and mappings are fully described in
- * Compatibility Macros in HDF5. If the library and/or application is
+ * \ref api-compat-macros. If the library and/or application is
* compiled for Release 1.12 emulation, H5Lvisit() will be mapped to
* H5Lvisit2() and H5Lvisit1() is deprecated. With earlier versions,
* H5Lvisit() is mapped to H5Lvisit1(). Specific compile-time compatibility
@@ -14,8 +14,6 @@
* \li Emulate Release 1.12: H5Lvisit2()
* \li Emulate Release 1.8 or 1.10 interface: H5Lvisit1()
*
- * \todo Fix the reference.
- *
* \version 1.12.0 The function H5Lvisit() was renamed to H5Lvisit1() and
* deprecated in this release. The macro H5Lvisit() and the
* function H5Lvisit2() were introduced in this release.
diff --git a/doxygen/dox/H5Lvisit_by_name.dox b/doxygen/dox/H5Lvisit_by_name.dox
index 0bb482e..691787f 100644
--- a/doxygen/dox/H5Lvisit_by_name.dox
+++ b/doxygen/dox/H5Lvisit_by_name.dox
@@ -4,7 +4,7 @@
* H5Lvisit_by_name() is a macro that is mapped to either H5Lvisit_by_name1() or
* H5Lvisit_by_name2() Such macros are provided to facilitate application
* compatibility. Their use and mappings are fully described in
- * Compatibility Macros in HDF5. If the library and/or application is
+ * \ref api-compat-macros. If the library and/or application is
* compiled for Release 1.12 emulation, H5Lvisit_by_name() will be mapped to
* H5Lvisit_by_name2() and H5Lvisit_by_name1() is deprecated. With earlier versions,
* H5Lvisit_by_name() is mapped to H5Lvisit_by_name1(). Specific compile-time
@@ -14,8 +14,6 @@
* \li Emulate Release 1.12: H5Lvisit_by_name2()
* \li Emulate Release 1.8 or 1.10 interface: H5Lvisit_by_name1()
*
- * \todo Fix the reference.
- *
* \version 1.12.0 The function H5Lvisit_by_name() was renamed to H5Lvisit_by_name1() and
* deprecated in this release. The macro H5Lvisit_by_name() and the
* function H5Lvisit_by_name2() were introduced in this release.
diff --git a/doxygen/dox/MetadataCachingInHDF5.dox b/doxygen/dox/MetadataCachingInHDF5.dox
new file mode 100644
index 0000000..9ba0fab
--- /dev/null
+++ b/doxygen/dox/MetadataCachingInHDF5.dox
@@ -0,0 +1,1020 @@
+/** \page TNMDC Metadata Caching in HDF5
+
+\todo Revise this!
+
+\section intro Introduction
+
+In the 1.6.4 release, we introduced a re-implementation of the metadata
+cache. That release contained an incomplete version of the cache which could not
+be controlled via the API. The version in the 1.8 release is more mature and
+includes new API calls that allow the user program to configure the metadata
+cache both on file open and at run time.
+
+From the user perspective, the most striking effect of the new cache should be a
+large reduction in the cache memory requirements when working with complex HDF5
+files.
+
+Those working with such files may also notice a reduction in file close time.
+
+Those working with HDF5 files with a simple structure shouldn't notice any
+particular changes in most cases. In rare cases, there may be a significant
+improvement in performance.
+
+The remainder of this document contains an architectural overview of the old and
+new metadata caches, a discussion of algorithms used to automatically adjust
+cache size to circumstances, and a high-level discussion of the cache
+configuration controls. It can be safely skipped by anyone who works only with
+HDF5 files with relatively simple structure (i.e. no huge groups, no datasets
+with large numbers of chunks, and no objects with large numbers of attributes.)
+
+On the other hand, it is mandatory reading if you want to use something other
+than the default metadata cache configuration. The documentation on the metadata
+cache-related API calls will not make much sense without this background.
+
+\section oldnew Old and New Metadata Cache
+
+\subsection old The Old Metadata Cache
+
+The old metadata cache indexed the cache with a hash table with no provision for
+collisions. Instead, collisions were handled by evicting the existing entry to
+make room for the new entry. Aside from flushes, there was no other mechanism
+for evicting entries, so the replacement policy could best be described as
+"Evict on Collision".
+
+As a result, if two frequently used entries hashed to the same location, they
+would evict each other regularly. To decrease the likelihood of this situation,
+the default hash table size was set fairly large -- slightly more than
+10,000. This worked well, but since the size of metadata entries is not bounded,
+and since entries were only evicted on collision, the large hash table size
+allowed the cache size to explode when working with HDF5 files with complex
+structure.
+
+The "Evict on Collision" replacement policy also caused problems with the
+parallel version of the HDF5 library, as a collision with a dirty entry could
+force a write in response to a metadata read. Since all metadata writes must be
+collective in the parallel case while reads need not be, this could cause the
+library to hang if only some of the processes participated in a metadata read
+that forced a write. Prior to the implementation of the new metadata cache, we
+dealt with this issue by maintaining a shadow cache for dirty entries evicted by
+a read.
+
+\subsection new The New Metadata Cache
+
+The new metadata cache was designed to address the above issues. After
+implementation, it became evident that the working set size for HDF5 files
+varies widely depending on both structure and access patterns. Thus it was
+necessary to add support for cache size adjustment under either automatic or
+user program control (see section 2.3 for details).
+
+When the cache is operating under direct user program control, it is also
+possible to temporarily disable evictions from the metadata cache so as to
+maximize raw data throughput at the expense of allowing the cache to grow
+without bound until evictions are enabled again.
+
+Structurally, the new metadata cache can be thought of as a heavily modified
+version of the UNIX buffer cache as described in chapter three of M. J. Bach's
+"The Design of the UNIX Operating System" In essence, the UNIX buffer cache uses
+a hash table with chaining to index a pool of fixed-size buffers. It uses the
+LRU replacement policy to select candidates for eviction.
+
+Since HDF5 metadata entries are not of fixed size and may grow arbitrarily
+large, the size of the new metadata cache cannot be controlled by setting a
+maximum number of entries. Instead, the new cache keeps a running sum of the
+sizes of all entries and attempts to evict entries as necessary to stay within a
+user-specified maximum size. (Note the use of the word "attempts" here -- as
+will be seen, it is possible for the cache to exceed its currently specified
+maximum size.) At present, the LRU replacement policy is the only option for
+selecting candidates for eviction.
+
+Per the standard Unix buffer cache, dirty entries are given two passes through
+the LRU list before being evicted. The first time they reach the end of the LRU
+list, they are flushed, marked as clean, and moved to the head of the LRU
+list. When a clean entry reaches the end of the LRU list, it is simply evicted
+if space is needed.
+
+The cache cannot evict entries that are locked, and thus it will temporarily
+grow beyond its maximum size if there are insufficient unlocked entries
+available for eviction.
+
+In the parallel version of the library, only the cache running under process 0
+of the file communicator is allowed to write metadata to file. All the other
+caches must retain dirty metadata until the process 0 cache tells them that the
+metadata is clean.
+
+Since all operations modifying metadata must be collective, all caches see the
+same stream of dirty metadata. This fact is used to allow them to synchronize
+every n bytes of dirty metadata, where n is a user-configurable value that
+defaults to 256 KB.
+
+To avoid sending the other caches messages from the future, process 0 must not
+write any dirty entries until it reaches a synchronization point. When it
+reaches a synchronization point, it writes entries as needed, and then
+broadcasts the list of flushed entries to the other caches. The caches on the
+other processes use this list to mark entries clean before they leave the
+synchronization point, allowing them to evict those entries as needed.
+
+The caches will also synchronize on a user-initiated flush.
+
+To minimize overhead when running in parallel, the cache maintains a "clean" LRU
+list in addition to the regular LRU list. This list contains only clean entries
+and is used as a source of candidates for eviction when flushing dirty entries
+is not allowed.
+
+Since flushing entries is forbidden most of the time when running in parallel,
+the caches can be forced to exceed their maximum sizes if they run out of clean
+entries to evict.
+
+To decrease the likelihood of this event, the new cache allows the user to
+specify a minimum clean size -- which is a minimum total size of all the entries
+on the clean LRU plus all unused space in the cache.
+
+While the clean LRU list is only maintained in the parallel version of the HDF5
+library, the notion of a minimum clean size still applies in the serial
+case. Here it is used to force a mix of clean and dirty entries in the cache
+even in the write-only case.
+
+This, in turn, reduces the number of redundant flushes by avoiding the case in
+which the cache fills with dirty metadata and all entries must be flushed before
+a clean entry can be evicted to make room for a new entry.
+
+Observe that in both the serial and parallel cases, the maintenance of a minimum
+clean size modifies the replacement policy, as dirty entries may be flushed
+earlier than would otherwise be the case so as to maintain the desired amount of
+clean and/or empty space in the cache.
+
+While the new metadata cache only supports the LRU replacement policy at
+present, that may change. Support for multiple replacement policies was very
+much in mind when the cache was designed, as was the ability to switch
+replacement policies at run time. The situation has been complicated by the
+later addition of the adaptive cache resizing requirement, as two of the
+resizing algorithms piggyback on the LRU list. However, if there is a need for
+additional replacement policies, it shouldn't be too hard to implement them.
+
+\section adapt Adaptive Cache Resizing in the New Metadata Cache
+
+As mentioned earlier, the metadata working set size for an HDF5 file varies
+wildly depending on the structure of the file and the access pattern. For
+example, a 2MB limit on metadata cache size is excessive for an H5repack of
+almost all HDF5 files we have tested. However, I have a file submitted by one of
+our users that will run a 13% hit rate with this cache size and will lock up one
+of our Linux boxes using the old metadata cache. Increase the new metadata cache
+size to 4 MB, and the hit rate exceeds 99%.
+
+In this case, the main culprit is a root group with more than 20,000 entries in
+it. As a result, the root group heap exceeds 1 MB, which tends to crowd out the
+rest of the metadata in a 2 MB cache
+
+This case and a number of synthetic tests convinced us that we needed to modify
+the new metadata cache to expand and contract according to need within
+user-specified bounds.
+
+I was unable to find any previous work on this problem, so I invented solutions
+as I went along. If you are aware of prior work, please send me references. The
+closest I was able to come was a group of embedded CPU designers who were
+turning off sections of their cache to conserve power.
+
+\subsection increasing Increasing the Cache Size
+
+In the context of the HDF5 library, the problem of increasing the cache size as
+necessary to contain the current working set turns out to involve two rather
+different issues.
+
+The first of these, which was recognized immediately, is the problem of
+recognizing long term changes in working set size, and increasing the cache size
+accordingly, while not reacting to transients.
+
+The second, which I recognized the hard way, is to adjust the cache size for
+sudden, dramatic increases in working set size caused by requests for large
+pieces of metadata which may be larger than the current metadata cache size.
+
+The algorithms for handling these situations are discussed below. These problems
+are largely orthogonal to each other, so both algorithms may be used
+simultaneously.
+
+\subsubsection hrtcsi Hit Rate Threshold Cache Size Increment
+
+Perhaps the most obvious heuristic for identifying cases in which the cache is
+too small involves monitoring the hit rate. If the hit rate is low for a while,
+and the cache is at its current maximum size, the current maximum cache size is
+probably too small.
+
+The hit rate threshold algorithm for increasing cache size applies this
+intuition directly.
+
+Hit rate statistics are collected over a user-specified number of cache
+accesses. This period is known as an epoch.
+
+At the end of each epoch, the hit rate is computed, and the counters are
+reset. If the hit rate is below a user-specified threshold and the cache is at
+its current maximum size, the maximum size of the cache is increased by a
+user-specified multiple. If required, the new cache maximum size is clipped to
+stay within the user-specified upper bound on the maximum cache size, and
+optionally, within a user-specified maximum increment.
+
+My tests indicate that this algorithm works well in most cases. However, in a
+synthetic test in which hit rate increased slowly with cache size, and load
+remained steady for many epochs, I observed a case in which cache size increased
+until the hit rate just exceeded the specified minimum and then stalled. This is
+a problem, as to avoid volatility, it is necessary to set the minimum hit rate
+threshold well below the desired hit rate. Thus we may find ourselves with a
+cache running with a 91% hit rate when we really want it to increase its size
+until the hit rate is about 99%.
+
+If this case occurs frequently in actual use, I will have to come up with an
+improved algorithm. Please let me know if you see this behavior. However, I had
+to work rather hard to create it in my synthetic tests, so I would expect it to
+be uncommon.
+
+\subsubsection fcsi Flash Cache Size Increment
+
+A fundamental problem with the above algorithm is that contains the hidden
+assumption that cache entries are relatively small in comparison to the cache
+itself. While I knew this assumption was not generally true when I developed the
+algorithm, I thought that cases, where it failed, would be so rare as to not be
+worth considering, as even if they did occur, the above algorithm would rectify
+the situation within an epoch or two.
+
+While it is true that such occurrences are rare, and it is true that the hit
+rate threshold cache size increment algorithm will rectify the situation
+eventually, the performance degradation experienced by users while waiting for
+the epoch to end was so extreme that some way of accelerating response to such
+situations was essential.
+
+To understand the problem, consider the following use case:
+
+Suppose we create a group, and then repeatedly create a new data set in the
+group, write some data to it and then close it.
+
+In some versions of the HDF5 file format, the names of the datasets will be
+stored in a local heap associated with the group, and the space for that heap
+will be allocated in a single, contiguous chunk. When this local heap is full,
+we allocate a new chunk twice the size of the old, copy the data from the old
+local heap into the new, and discard the old local heap.
+
+By default, the minimum metadata cache size is set to 2 MB. Thus in this use
+case, our hit rate will be fine as long as the local heap is no larger than a
+little less than 2 MB, as the group related metadata is accessed frequently and
+never evicted, and the data set related metadata is never accessed once the data
+set is closed, and thus is evicted smoothly to make room for new data sets.
+
+All this changes abruptly when the local heap finally doubles in size to a value
+above the slightly less than 2 MB limit. All of a sudden, the local heap is the
+size of the metadata cache, and the cache must constantly swap it in to access
+it, and then swap it out to make room for other metadata.
+
+The hit rate threshold-based algorithm for increasing the cache size will fix
+this problem eventually, but performance will be very bad until it does, as the
+metadata cache will largely ineffective until its size is increased.
+
+An obvious heuristic for addressing this "big rock in a small pond" issue is to
+watch for large "incoming rocks", and increase the size of the "pond" if the
+rock is so big that it will force most of the "water" out of the "pond".
+
+The add space flash cache size increment algorithm applies this intuition
+directly:
+
+Let x be either the size of a newly inserted entry, a newly loaded entry, or the
+number of bytes by which the size of an existing entry has been increased
+(i.e. the size of the "rock").
+
+If x is greater than some user-specified fraction of the current maximum cache
+size, increase the current maximum cache size by x times some user-specified
+multiple, less any free space that was in the cache, to begin with. Further, to
+avoid confusing the other cache size increment/decrement code, start a new
+epoch.
+
+At present, this algorithm pays no attention to any user-specified limit on the
+maximum size of any single cache size increase, but it DOES stay within the
+user-specified upper bound on the maximum cache size.
+
+While it should be easy to see how this algorithm could be fooled into
+inactivity by a large number of entries that were not quite large enough to
+cross the threshold, in practice it seems to work reasonably well.
+
+Needless to say, I will revisit the issue should this cease to be the case.
+
+\subsection decreasing Decreasing the Cache Size
+
+Identifying cases in which the maximum cache size is larger than necessary
+turned out to be more difficult.
+
+\subsubsection hrtcsr Hit Rate Threshold Cache Size Reduction
+
+One obvious heuristic is to monitor the hit rate and guess that we can safely
+decrease cache size if the hit rate exceeds some user-supplied threshold (say
+.99995). The hit rate threshold size decrement algorithm implemented in the new
+metadata cache implements this intuition as follows:
+
+At the end of each epoch (this is the same epoch that is used in the cache size
+increment algorithm), the hit rate is compared with the user-specified
+threshold. If the hit rate exceeds that threshold, the current maximum cache
+size is decreased by a user-specified factor. If required, the size of the
+reduction is clipped to stay within a user-specified lower bound on the maximum
+cache size, and optionally, within a user-specified maximum decrement.
+
+In my synthetic tests, this algorithm works poorly. Even with a very high
+threshold and a small maximum reduction, it results in cache size
+oscillations. The size increment code typically increments the maximum cache
+size above the working set size. This results in a high hit rate, which causes
+the threshold size decrement code to reduce the maximum cache size below the
+working set size, which causes the hit rate to crash causing the cycle to
+repeat. The resulting average hit rate is poor.
+
+It remains to be seen if this behavior will be seen in the field. The algorithm
+is available for use, but it wouldn't be my first choice. If you use it, please
+report back.
+
+\subsubsection acsr Ageout Cache Size Reduction
+
+Another heuristic for dealing with oversized cache conditions is to look for
+entries that haven't been accessed for a long time, evict them, and reduce the
+cache size accordingly.
+
+The age out cache size reduction applies this intuition as follows: At the end
+of each epoch (again the same epoch as used in the cache size increment
+algorithm), all entries that haven't been accessed for a user-configurable
+number of epochs (1 - 10 at present) are evicted. The maximum cache size is then
+reduced to equal the sum of the sizes of the remaining entries. The size of the
+reduction is clipped to stay within a user-specified lower bound on maximum
+cache size, and optionally, within a user-specified maximum decrement.
+
+In addition, the user may specify a minimum fraction of the cache which must be
+empty before the cache size is reduced. Thus if an empty reserve of 0.1 was
+specified on a 10 MB cache, there would be no cache size reduction unless the
+eviction of aged out entries resulted in more than 1 MB of empty space. Further,
+even after the reduction, the cache would be one-tenth empty.
+
+In my synthetic tests, the age out algorithm works rather well, although it is
+somewhat sensitive to the epoch length and age out period selection.
+
+\subsubsection awhrtcsr Ageout With Hit Rate Threshold Cache Size Reduction
+
+To address these issues, I combined the hit rate threshold and age out
+heuristics.
+
+Age out with threshold works just like age out, except that the algorithm is not
+run unless the hit rate exceeded a user-specified threshold in the previous
+epoch.
+
+In my synthetic tests, age out with threshold seems to work nicely, with no
+observed oscillation. Thus I have selected it as the default cache size
+reduction algorithm.
+
+For those interested in such things, the age out algorithm is implemented by
+inserting a marker entry at the head of the LRU list at the beginning of each
+epoch. Entries that haven't been accessed for at least n epochs are simply
+entries that appear in the LRU list after the n-th marker at the end of an
+epoch.
+
+\section configuring Configuring the New Metadata Cache
+
+Due to a lack of resources, the design work on the automatic cache size
+adjustment algorithms was done hastily, using primarily synthetic tests. I don't
+think I spent more than a couple weeks writing and running performance tests --
+most time went into coding and functional testing.
+
+As a result, while I think the algorithms provided for adaptive cache resizing
+will work well in actual use, I don't really know (although preliminary results
+from the field are promising). Fortunately, the issue shouldn't arise for the
+vast majority of HDF5 users, and those for whom it may arise should be savvy
+enough to recognize problems and deal with them.
+
+For this latter class of users, I have implemented a number of new API calls
+allowing the user to select and configure the cache resize algorithms, or to
+turn them off and control cache size directly from the user program. There are
+also API calls that allow the user program to monitor hit rate and cache size.
+
+From the user perspective, all the cache configuration data for a given file is
+contained in an instance of the \ref H5AC_cache_config_t structure -- the definition
+of which is given below:
+
+\snippet H5ACpublic.h H5AC_cache_config_t_snip
+
+This structure is defined in \c H5ACpublic.h. Each field is discussed below and in
+the associated header comment.
+
+The C API allows you to get and set this structure directly. Unfortunately, the
+Fortran API has to do this with individual parameters for each of the fields
+(with the exception of version).
+
+While the API calls are discussed individually in the reference manual, the
+following high-level discussion of what fields to change for different purposes
+should be useful.
+
+\subsection gconfig General Configuration
+
+The \c version field is intended to allow \THG to change the \c
+H5AC_cache_config_t structure without breaking old code. For now, this field
+should always be set to \c H5AC__CURR_CACHE_CONFIG_VERSION, even when you are
+getting the current configuration data from the cache. The library needs the
+version number to know where fields are located with reference to the supplied
+base address.
+
+The \ref H5AC_cache_config_t.rpt_fcn_enabled "rpt_fcn_enabled" field is a
+boolean flag that allows you to turn on and off the resize reporting function
+that reports the activities of the adaptive cache resize code at the end of each
+epoch -- assuming that it is enabled.
+
+The report function is unsupported, so you are on your own if you use it. Since
+it dumps status data to stdout, you should not attempt to use it with Windows
+unless you modify the source. You may find it useful if you want to experiment
+with different adaptive resize configurations. It is also a convenient way of
+diagnosing poor cache configuration. Finally, if you do lots of runs with
+identical behavior, you can use it to determine the metadata cache size needed
+in each phase of your program so you can set the required cache sizes manually.
+
+The trace file fields are also unsupported. They allow one to open and close a
+trace file in which all calls to the metadata cache are logged in a
+user-specified file for later analysis. The feature is intended primarily for
+THG use in debugging or optimizing the metadata cache in cases where users in
+the field observe obscure failures or poor performance that we cannot re-create
+in the lab. The trace file will allow us to re-create the exact sequence of
+cache operations that are triggering the problem.
+
+At present we do not have a playback utility for trace files, although I imagine
+that we will write one quickly when and if we need it.
+
+To enable the trace file, you load the full path of the desired trace file into
+\ref H5AC_cache_config_t.trace_file_name "trace_file_name", and set \ref
+H5AC_cache_config_t.open_trace_file "open_trace_file" to \c TRUE. In the
+parallel case, an ASCII representation of the rank of each process is appended
+to the supplied trace file name to create a unique trace file name for that
+process.
+
+To close an open trace file, set \ref H5AC_cache_config_t.close_trace_file
+"close_trace_file" to \c TRUE.
+
+It must be emphasized that you are on your own if you play with the trace file
+feature absent a request from \THG. Needless to say, the trace file feature is
+disabled by default. If you enable it, you will take a large performance hit and
+generate huge trace files.
+
+The \ref H5AC_cache_config_t.evictions_enabled "evictions_enabled" field is a
+boolean flag allowing the user to disable the eviction of entries from the
+metadata cache. Under normal operation conditions, this field will always be set
+to \c TRUE.
+
+In rare circumstances, the raw data throughput requirements may be so high that
+the user wishes to postpone metadata writes so as to reserve I/O throughput for
+raw data. The \ref H5AC_cache_config_t.evictions_enabled "evictions_enabled"
+field exists to allow this -- although the user is to be warned that the
+metadata cache will grow without bound while evictions are disabled. Thus
+evictions should be re-enabled as soon as possible, and it may be wise to
+monitor cache size and statistics (to see how to enable statistics, see the
+debugging facilities section below).
+
+Evictions may only be disabled when the automatic cache resize code is disabled
+as well. Thus to disable evictions, not only must the user set the \ref
+H5AC_cache_config_t.evictions_enabled "evictions_enabled" field to \c FALSE, but
+he must also set \ref H5AC_cache_config_t.incr_mode "incr_mode" to
+#H5C_incr__off, set \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode"
+to #H5C_flash_incr__off, and set \ref H5AC_cache_config_t.decr_mode "decr_mode"
+to #H5C_decr__off.
+
+To re-enable evictions, just set \ref H5AC_cache_config_t.evictions_enabled
+"evictions_enabled" back to \c TRUE.
+
+Before passing on to other subjects, it is worth re-iterating that disabling
+evictions is an extreme step. Before attempting it, you might consider setting a
+large cache size manually, and flushing the cache just before high raw data
+throughput is required. This may yield the desired results without the risks
+inherent in disabling evictions.
+
+The \ref H5AC_cache_config_t.set_initial_size "set_initial_size" and \ref
+H5AC_cache_config_t.initial_size "initial_size" fields allow you to specify an
+initial maximum cache size. If \ref H5AC_cache_config_t.set_initial_size
+"set_initial_size" is \c TRUE, \ref H5AC_cache_config_t.initial_size
+"initial_size" must lie in the interval [\ref H5AC_cache_config_t.min_size
+"min_size", \ref H5AC_cache_config_t.max_size "max_size"] (see below for a
+discussion of the \ref H5AC_cache_config_t.min_size "min_size" and \ref
+H5AC_cache_config_t.max_size "max_size" fields).
+
+If you disable the adaptive cache resizing code (done by setting \ref
+H5AC_cache_config_t.incr_mode "incr_mode" to #H5C_incr__off, \ref
+H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" to #H5C_flash_incr__off,
+and \ref H5AC_cache_config_t.decr_mode "decr_mode" to #H5C_decr__off), you can
+use these fields to control maximum cache size manually, as the maximum cache
+size will remain at the initial size.
+
+Note, that the maximum cache size is only modified when \ref
+H5AC_cache_config_t.set_initial_size "set_initial_size" is \c TRUE. This allows
+the use of configurations specified at compile time to change resize
+configuration without altering the current maximum size of the cache. Without
+this feature, an additional call would be required to get the current maximum
+cache size so as to set the \ref H5AC_cache_config_t.initial_size "initial_size"
+to the current maximum cache size, and thereby avoid changing it.
+
+The \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" sets the
+current minimum clean size as a fraction of the current max cache size. While
+this field was originally used only in the parallel version of the library, it
+now applies to the serial version as well. Its value must lie in the range
+\Code{[0.0, 1.0]}. 0.01 is reasonable in the serial case, and 0.3 in the
+parallel.
+
+A potential interaction, discovered at release 1.8.3, between the enforcement of
+the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and the
+adaptive cache resize code can severely degrade performance. While this
+interaction is easily dealt with in the serial case by setting \ref
+H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" to 0.01, the problem
+is more difficult in the parallel case. Please see the Interactions section
+below for further details.
+
+The \ref H5AC_cache_config_t.max_size "max_size" and \ref
+H5AC_cache_config_t.min_size "min_size" fields specify the range of maximum
+sizes that may be set for the cache by the automatic resize code. \ref
+H5AC_cache_config_t.min_size "min_size" must be less than or equal to
+\ref H5AC_cache_config_t.max_size "max_size", and both must lie in the range
+\Code{[H5C__MIN_MAX_CACHE_SIZE, H5C__MAX_MAX_CACHE_SIZE]} -- currently [1 KB,
+128 MB]. If you routinely run a cache size in the top half of this range, you
+should increase the hash table size. To do this, modify the \c
+H5C__HASH_TABLE_LEN \Code{\#define} in \c H5Cpkg.h and re-compile. At present,
+\c H5C__HASH_TABLE_LEN must be a power of two.
+
+The \c epoch_length is the number of cache accesses between runs of the adaptive
+cache size control algorithms. It is ignored if these algorithms are turned
+off. It must lie in the range \Code{[H5C__MIN_AR_EPOCH_LENGTH,
+H5C__MAX_AR_EPOCH_LENGTH]} -- currently [100, 1000000]. The above constants are
+defined in \c H5Cprivate.h. 50000 is a reasonable value.
+
+\subsection increment Increment Configuration
+
+The \ref H5AC_cache_config_t.incr_mode "incr_mode" field specifies the cache
+size increment algorithm used. Its value must be a member of the \ref
+H5C_cache_incr_mode enum type -- currently either #H5C_incr__off or
+#H5C_incr__threshold (note the double underscores after \c "incr"). This type is
+defined in H5Cpublic.h.
+
+If \ref H5AC_cache_config_t.incr_mode "incr_mode" is set to #H5C_incr__off,
+regular automatic cache size increases are disabled, and the \ref
+H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold", \ref
+H5AC_cache_config_t.increment "increment", \ref
+H5AC_cache_config_t.apply_max_increment "apply_max_increment", and \ref
+H5AC_cache_config_t.max_increment "max_increment", fields are ignored.
+
+The \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" field specifies
+the flash cache size increment algorithm used. Its value must be a member of the
+\ref H5C_cache_flash_incr_mode enum type -- currently either
+#H5C_flash_incr__off or #H5C_flash_incr__add_space (note the double underscores
+after \c "incr"). This type is defined in H5Cpublic.h.
+
+If \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" is set to
+#H5C_flash_incr__off, flash cache size increases are disabled, and the \ref
+H5AC_cache_config_t.flash_multiple "flash_multiple", and \ref
+H5AC_cache_config_t.flash_threshold "flash_threshold", fields are ignored.
+
+\subsubsection hrtcsic Hit Rate Threshold Cache Size Increase Configuration
+
+If \ref H5AC_cache_config_t.incr_mode "incr_mode" is #H5C_incr__threshold, the
+cache size is increased via the hit rate threshold algorithm. The remaining
+fields in the section are then used as follows:
+
+\ref H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold" is the
+threshold below which the hit rate must fall to trigger an increase. The value
+must lie in the range \Code{[0.0 - 1.0]}. In my tests, a relatively high value
+seems to work best -- 0.9 for example.
+
+\ref H5AC_cache_config_t.increment "increment" is the factor by which the old
+maximum cache size is multiplied to obtain an initial new maximum cache size
+when an increment is needed. The actual change in size may be smaller as
+required by \ref H5AC_cache_config_t.max_size "max_size" (above) and \c
+max_increment (discussed below). increment must be greater than or equal to
+1.0. If you set it to 1.0, you will effectively turn off the increment code. 2.0
+is a reasonable value.
+
+\ref H5AC_cache_config_t.apply_max_increment "apply_max_increment" and \ref
+H5AC_cache_config_t.max_increment "max_increment" allow the user to specify a
+maximum increment. If \ref H5AC_cache_config_t.apply_max_increment
+"apply_max_increment" is \c TRUE, the cache size will never be increased by more
+than the number of bytes specified in \ref H5AC_cache_config_t.max_increment
+"max_increment" in any single increase.
+
+\subsubsection fcsic Flash Cache Size Increase Configuration
+
+If \ref H5AC_cache_config_t.flash_incr_mode "flash_incr_mode" is set to
+#H5C_flash_incr__add_space, flash cache size increases are enabled. The size of
+the cache will be increased under the following circumstances:
+
+Let \c t be the current maximum cache size times the value of the \ref
+H5AC_cache_config_t.flash_threshold "flash_threshold" field.
+
+Let \c x be either the size of the newly inserted entry, the size of the newly
+loaded entry, or the number of bytes added to the size of the entry under
+consideration for triggering a flash cache size increase.
+
+If \Code{t < x}, the basic condition for a flash cache size increase is met, and
+we proceed as follows:
+
+Let \c space_needed equal \c x less the amount of free space in the cache.
+
+Further, let \ref H5AC_cache_config_t.increment "increment" equal \c
+space_needed times the value of the \ref H5AC_cache_config_t.flash_multiple
+"flash_multiple" field. If \ref H5AC_cache_config_t.increment "increment" plus
+the current cache size is greater than \ref H5AC_cache_config_t.max_size
+"max_size" (discussed above), reduce \ref H5AC_cache_config_t.increment
+"increment" so that \ref H5AC_cache_config_t.increment "increment" plus the
+current cache size is equal to \ref H5AC_cache_config_t.max_size "max_size".
+
+If the increment is greater than zero, increase the current cache size by \ref
+H5AC_cache_config_t.increment "increment". To avoid confusing the other cache
+size increment or decrement algorithms, start a new epoch. Note, however, that
+we do not cycle the epoch markers if some variant of the age out algorithm is in
+use.
+
+The use of the \ref H5AC_cache_config_t.flash_threshold "flash_threshold" field
+is discussed above. It must be a floating-point value in the range of
+\Code{[0.1, 1.0]}. 0.25 is a reasonable value.
+
+The use of the \ref H5AC_cache_config_t.flash_multiple "flash_multiple" field is
+also discussed above. It must be a floating-point value in the range of
+\Code{[0.1, 10.0]}. 1.4 is a reasonable value.
+
+\subsection decrement Decrement Configuration
+
+The \ref H5AC_cache_config_t.decr_mode "decr_mode" field specifies the cache
+size decrement algorithm used. Its value must be a member of the \ref
+H5C_cache_decr_mode enum type -- currently either #H5C_decr__off,
+#H5C_decr__threshold, #H5C_decr__age_out, or #H5C_decr__age_out_with_threshold
+(note the double underscores after \c "decr"). This type is defined in
+H5Cpublic.h.
+
+If \ref H5AC_cache_config_t.decr_mode "decr_mode" is set to #H5C_decr__off,
+automatic cache size decreases are disabled, and the remaining fields in the
+cache size decrease control section are ignored.
+
+\subsubsection hrtcsdc Hit Rate Threshold Cache Size Decrease Configuration
+
+If \ref H5AC_cache_config_t.decr_mode "decr_mode" is #H5C_decr__threshold, the
+cache size is decreased by the threshold algorithm, and the remaining fields of
+the decrement section are used as follows:
+
+\ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" is the
+threshold above which the hit rate must rise to trigger cache size reduction. It
+must be in the range \Code{[0.0, 1.0]}. In my synthetic tests, very high values
+like .9995 or .99995 seemed to work best.
+
+\ref H5AC_cache_config_t.decrement "decrement" is the factor by which the
+current maximum cache size is multiplied to obtain a tentative new maximum cache
+size. It must lie in the range \Code{[0.0, 1.0]}. Relatively large values like
+.9 seem to work best in my synthetic tests. Note that the actual size reduction
+may be smaller as required by \ref H5AC_cache_config_t.min_size "min_size" and
+\ref H5AC_cache_config_t.max_decrement "max_decrement" (discussed below). \ref
+H5AC_cache_config_t.apply_max_decrement "apply_max_decrement" and \ref
+H5AC_cache_config_t.max_decrement "max_decrement" allow the user to specify a
+maximum decrement. If \ref H5AC_cache_config_t.apply_max_decrement
+"apply_max_decrement" is \c TRUE, the cache size will never be reduced by more
+than \ref H5AC_cache_config_t.max_decrement "max_decrement" bytes in any single
+reduction.
+
+With the hit rate threshold cache size decrement algorithm, the remaining fields
+in the section are ignored.
+
+\subsubsection acsr Ageout Cache Size Reduction
+
+If \ref H5AC_cache_config_t.decr_mode "decr_mode" is #H5C_decr__age_out the
+cache size is decreased by the ageout algorithm, and the remaining fields of the
+decrement section are used as follows:
+
+\ref H5AC_cache_config_t.epochs_before_eviction "epochs_before_eviction" is the
+number of epochs an entry must reside unaccessed in the cache before it is
+evicted. This value must lie in the range \Code{[1, H5C__MAX_EPOCH_MARKERS]}. \c
+H5C__MAX_EPOCH_MARKERS is defined in H5Cprivate.h, and is currently set to 10.
+
+\ref H5AC_cache_config_t.apply_max_decrement "apply_max_decrement" and \ref
+H5AC_cache_config_t.max_decrement "max_decrement" are used as in section
+2.4.3.1.
+
+\ref H5AC_cache_config_t.apply_empty_reserve "apply_emty_reserve" and \ref
+H5AC_cache_config_t.empty_reserve "empty_reserve" allow the user to specify a
+minimum empty reserve as discussed in section 2.3.2.2. An empty reserve of 0.05
+or 0.1 seems to work well.
+
+The \ref H5AC_cache_config_t.decrement "decrement" and \ref
+H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" fields are ignored
+in this case.
+
+\subsubsection awhrtcsr Ageout With Hit Rate Threshold Cache Size Reduction
+
+If \ref H5AC_cache_config_t.decr_mode "decr_mode" is
+#H5C_decr__age_out_with_threshold, the cache size is decreased by the ageout
+with hit rate threshold algorithm, and the fields of decrement section are used
+as per the Ageout algorithm (see 5.3.2) with the exception of \ref
+H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold".
+
+Here, \ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold" is the
+threshold above which the hit rate must rise to trigger cache size reduction. It
+must be in the range \Code{[0.0, 1.0]}. In my synthetic tests, high values like
+.999 seemed to work well.
+
+\subsection parallel Parallel Configuration
+
+This section is a catch-all for parallel specific configuration data. At
+present, it has only one field --
+\ref H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold".
+
+In PHDF5, all operations that modify metadata must be executed collectively. We
+used to think that this was enough to ensure consistency across the metadata
+caches, but since we allow processes to read metadata individually, the order of
+dirty entries in the LRU list can vary across processes. This, in turn, can
+change the order in which dirty metadata cache entries reach the bottom of the
+LRU and are flushed to disk -- opening the door to messages from the past and
+messages from the future bugs.
+
+To prevent this, only the metadata cache on process 0 of the file communicator
+is allowed to write to file, and then only after entering a sync point with the
+other caches. After it writes entries to file, it sends the base addresses of
+the now clean entries to the other caches, so they can mark these entries clean
+as well, and then leaves the sync point. The other caches mark the specified
+entries as clean before they leave the synch point as well. (Observe, that since
+all caches see the same stream of dirty metadata, they will all have the same
+set of dirty entries upon sync point entry and exit.)
+
+The different caches know when to synchronize by counting the number of bytes of
+dirty metadata created by the collective operations modifying metadata. Whenever
+this count exceeds the value specified in the \ref
+H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold", they all
+enter the sync point, and process 0 flushes down to its minimum clean size and
+sends the list of newly cleaned entries to the other caches.
+
+Needless to say, the value of the \ref H5AC_cache_config_t.dirty_bytes_threshold
+"dirty_bytes_threshold" field must be consistent across all the caches operating
+on a given file.
+
+All dirty metadata can also by flushed under programmatic control via the
+H5Fflush() call. This call must be collective and will reset the dirty data
+counts on each metadata cache.
+
+Absent calls to H5Fflush(), dirty metadata will only be flushed when the \ref
+H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold" is exceeded,
+and then only down to the H5AC_cache_config_t.min_clean_fraction
+"min_clean_fraction". Thus, if a program does all its metadata modifications in
+one phase, and then doesn't modify metadata thereafter, a residue of dirty
+metadata will be frozen in the metadata caches for the remainder of the
+computation -- effectively reducing the sizes of the caches.
+
+In the default configuration, the caches will eventually resize themselves to
+maintain an acceptable hit rate. However, this will take time, and it will
+increase the application's footprint in memory.
+
+If your application behaves in this manner, you can avoid this by a collective
+call to H5Fflush() immediately after the metadata modification phase.
+
+\subsection interactions Interactions
+
+Evictions may not be disabled unless the automatic cache resize code is disabled
+as well (by setting \ref H5AC_cache_config_t.decr_mode "decr_mode" to
+#H5C_decr__off, \c flash_decr_mode to #H5C_flash_incr__add_space, and \ref
+H5AC_cache_config_t.incr_mode "incr_mode" to #H5C_incr__off) -- thus placing the
+cache size under the direct control of the user program.
+
+There is no logical necessity for this restriction. It is imposed because it
+simplifies testing greatly and because I can't see any reason why one would want
+to disable evictions while the automatic cache size adjustment code was
+enabled. This restriction can be relaxed if anyone can come up with a good
+reason to do so.
+
+At present, there are two interactions between the increment and decrement
+sections of the configuration.
+
+If \ref H5AC_cache_config_t.incr_mode "incr_mode" is #H5C_incr__threshold, and
+\ref H5AC_cache_config_t.decr_mode "decr_mode" is either #H5C_decr__threshold or
+#H5C_decr__age_out_with_threshold, then \ref
+H5AC_cache_config_t.lower_hr_threshold "lower_hr_threshold" must be strictly
+less than \ref H5AC_cache_config_t.upper_hr_threshold "upper_hr_threshold".
+
+Also, if the flash cache size increment code is enabled and is triggered, it
+will restart the current epoch without calling any other cache size increment or
+decrement code.
+
+In both the serial and parallel cases, there is the potential for an interaction
+between the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and
+the cache size increment code that can severely degrade
+performance. Specifically, if the \ref H5AC_cache_config_t.min_clean_fraction
+"min_clean_fraction" is large enough, it is possible that keeping the specified
+fraction of the cache clean may generate enough flushes to seriously degrade
+performance even though the hit rate is excellent.
+
+In the serial case, this is easily dealt with by selecting a very small \ref
+H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" -- 0.01 for example
+-- as this still avoids the "metadata blizzard" phenomenon that appears when the
+cache fills with dirty metadata and must then flush all of it before evicting an
+entry to make space for a new entry.
+
+The problem is more difficult in the parallel case, as the \ref
+H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" is used to ensure
+that the cache contains clean entries that can be evicted to make space for new
+entries when metadata writes are forbidden -- i.e. between sync points.
+
+This issue was discovered shortly before release 1.8.3 and an automated solution
+has not been implemented. Should it become an issue for an application, try
+manually setting the cache size to ~1.5 times the maximum working set size for
+the application, and leave \ref H5AC_cache_config_t.min_clean_fraction
+"min_clean_fraction" set to 0.3.
+
+You can approximate the working set size of your application via repeated calls
+to H5Fget_mdc_size() and H5Fget_mdc_hit_rate() while running your program with
+the cache resize code enabled. The maximum value returned by H5Fget_mdc_size()
+should be a reasonable approximation -- particularly if the associated hit rate
+is good. In the parallel case, there is also an interaction between \c
+min_clean_fraction and \ref H5AC_cache_config_t.dirty_bytes_threshold
+"dirty_bytes_threshold". Absent calls to H5Fflush() (discussed above), the upper
+bound on the amount of dirty data in the metadata caches will oscillate between
+(1 - \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction") times
+current maximum cache size, and that value plus the \ref
+H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold". Needless to
+say, it will be best if the \ref H5AC_cache_config_t.min_size "min_size", \ref
+H5AC_cache_config_t.min_clean_fraction "min_clean_fraction", and the \ref
+H5AC_cache_config_t.dirty_bytes_threshold "dirty_bytes_threshold"
+are chosen so that the cache can't fill with dirty data.
+
+\subsection defaults Default Metadata Cache Configuration
+
+Starting with release 1.8.3, HDF5 provides different default metadata cache
+configurations depending on whether the library is compiled for serial or
+parallel.
+
+The default configuration for the serial case is as follows:
+
+\code{.c}
+{
+ /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER,
+ /* hbool_t rpt_fcn_enabled = */ FALSE,
+ /* hbool_t open_trace_file = */ FALSE,
+ /* hbool_t close_trace_file = */ FALSE,
+ /* char trace_file_name[] = */ "",
+ /* hbool_t evictions_enabled = */ TRUE,
+ /* hbool_t set_initial_size = */ TRUE,
+ /* size_t initial_size = */ ( 2 * 1024 * 1024),
+ /* double min_clean_fraction = */ 0.01,
+ /* size_t max_size = */ (32 * 1024 * 1024),
+ /* size_t min_size = */ ( 1 * 1024 * 1024),
+ /* long int epoch_length = */ 50000,
+ /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold,
+ /* double lower_hr_threshold = */ 0.9,
+ /* double increment = */ 2.0,
+ /* hbool_t apply_max_increment = */ TRUE,
+ /* size_t max_increment = */ (4 * 1024 * 1024),
+ /* enum H5C_cache_flash_incr_mode */
+ /* flash_incr_mode = */ H5C_flash_incr__add_space,
+ /* double flash_multiple = */ 1.4,
+ /* double flash_threshold = */ 0.25,
+ /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold,
+ /* double upper_hr_threshold = */ 0.999,
+ /* double decrement = */ 0.9,
+ /* hbool_t apply_max_decrement = */ TRUE,
+ /* size_t max_decrement = */ (1 * 1024 * 1024),
+ /* int epochs_before_eviction = */ 3,
+ /* hbool_t apply_empty_reserve = */ TRUE,
+ /* double empty_reserve = */ 0.1,
+ /* int dirty_bytes_threshold = */ (256 * 1024)
+}
+\endcode
+
+The default configuration for the parallel case is as follows:
+
+\code{.c}
+{
+ /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER,
+ /* hbool_t rpt_fcn_enabled = */ FALSE,
+ /* hbool_t open_trace_file = */ FALSE,
+ /* hbool_t close_trace_file = */ FALSE,
+ /* char trace_file_name[] = */ "",
+ /* hbool_t evictions_enabled = */ TRUE,
+ /* hbool_t set_initial_size = */ TRUE,
+ /* size_t initial_size = */ ( 2 * 1024 * 1024),
+ /* double min_clean_fraction = */ 0.3,
+ /* size_t max_size = */ (32 * 1024 * 1024),
+ /* size_t min_size = */ ( 1 * 1024 * 1024),
+ /* long int epoch_length = */ 50000,
+ /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold,
+ /* double lower_hr_threshold = */ 0.9,
+ /* double increment = */ 2.0,
+ /* hbool_t apply_max_increment = */ TRUE,
+ /* size_t max_increment = */ (4 * 1024 * 1024),
+ /* enum H5C_cache_flash_incr_mode */
+ /* flash_incr_mode = */ H5C_flash_incr__add_space,
+ /* double flash_multiple = */ 1.0,
+ /* double flash_threshold = */ 0.25,
+ /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold,
+ /* double upper_hr_threshold = */ 0.999,
+ /* double decrement = */ 0.9,
+ /* hbool_t apply_max_decrement = */ TRUE,
+ /* size_t max_decrement = */ (1 * 1024 * 1024),
+ /* int epochs_before_eviction = */ 3,
+ /* hbool_t apply_empty_reserve = */ TRUE,
+ /* double empty_reserve = */ 0.1,
+ /* int dirty_bytes_threshold = */ (256 * 1024)
+}
+\endcode
+
+The default serial configuration should be adequate for most serial HDF5 users.
+
+The same may not be true for the default parallel configuration due to the
+interaction between the \ref H5AC_cache_config_t.min_clean_fraction "min_clean_fraction" and the cache size increase code. See
+the Interactions section for further details.
+
+Should you need to change the default configuration, it can be found in
+H5ACprivate.h. Look for the definition of H5AC__DEFAULT_RESIZE_CONFIG.
+
+\section controlling Controlling the New Metadata Cache Size From Your Program
+
+You have already seen how \ref H5AC_cache_config_t has facilities that allow you
+to control the metadata cache size directly. Use H5Fget_mdc_config() and
+H5Fset_mdc_config() to get and set the metadata cache configuration on an open
+file. Use H5Pget_mdc_config() and H5Pset_mdc_config() to get and set the initial
+metadata cache configuration in a file access property list. Recall that this
+list contains configuration data used when opening a file.
+
+Use H5Fget_mdc_hit_rate() to get the average hit rate since the last time the
+hit rate stats were reset. This happens automatically at the beginning of each
+epoch if the adaptive cache resize code is enabled. You can also do it manually
+with H5Freset_mdc_hit_rate_stats(). Be careful about doing this if the adaptive
+cache resize code is enabled, as you may confuse it.
+
+Use H5Fget_mdc_size() to get metadata cache size data on an open file.
+
+Finally, note that cache size and cache footprint are two different things -- in
+my tests, the cache footprint (as inferred from the UNIX \c top command) is
+typically about three times the maximum cache size. I haven't tracked it down
+yet, but I would guess that most of this is due to the very small typical cache
+entry size combined with the rather large size of the cache entry header
+structure. This should be investigated further, but there are other matters of
+higher priority.
+
+\section news New Metadata Cache Debugging Facilities
+
+The new metadata cache has a variety of debugging facilities that may be of
+use. I doubt that any other than the report function and the trace file will
+ever be accessible via the API, but they are relatively easy to turn on in the
+source code.
+
+Note that none of this should be viewed as supported -- it is described here on
+the off chance that you want to use it, but you are on your own if you do. Also,
+there are no promises as to consistency between versions.
+
+As mentioned above, you can use the \ref H5AC_cache_config_t.rpt_fcn_enabled "rpt_fcn_enabled" field of the
+configuration structure to enable the default reporting function
+(H5C_def_auto_resize_rpt_fcn() in H5C.c). If this function doesn't work for you,
+you will have to write your own. In particular, remember that it uses \c stdout,
+so it will probably be unhappy under Windows.
+
+Again, remember that this facility is not supported. Further, it is likely to
+change every time I do any serious work on the cache.
+
+There is also an extensive statistics collection code. Use
+H5C_COLLECT_CACHE_STATS and H5C_COLLECT_CACHE_ENTRY_STATS in H5Cprivate.h to
+turn this on. If you also turn on H5AC_DUMP_STATS_ON_CLOSE in H5ACprivate.h,
+stats will be dumped when you close a file. Alternatively you can call
+H5C_stats() and H5C_stats__reset() within the library to dump and reset
+stats. Both of these functions are defined in H5C.c.
+
+Finally, the cache also contains an extensive sanity checking code. Much of this
+is turned on when you compile in debug mode, but to enable the full suite, turn
+on H5C_DO_SANITY_CHECKS in H5Cprivate.h.
+
+\section trouble Trouble Shooting
+
+Absent major bugs in the cache, the only troubleshooting you should have to do
+is diagnosing and fixing problems with your cache configuration.
+
+Assuming it runs on your platform (I've only used it under Linux), the reporting
+function is probably the most convenient diagnosis tool. However, since it is
+unsupported code, I will not discuss it further beyond directing you to the
+source (H5C_def_auto_resize_rpt_fcn() in H5C.c).
+
+Absent the reporting function, regular calls to H5Fget_mdc_hit_rate() should
+give you a good idea of the hit rate over time. Remember that the hit rate stats
+are reset at the end of each epoch (when adaptive cache resizing is enabled), so
+you should expect some jitter.
+
+Similar calls to H5Fget_mdc_size() should allow you to monitor cache size and
+the fraction of the current maximum cache size that is actually in use.
+
+If the hit rate is consistently low, and the cache it at its current maximum
+size, increasing the maximum size is an obvious fix.
+
+If you see hit rate and cache size oscillations, try disabling adaptive cache
+resizing and setting a fixed cache size a bit greater than the high end of the
+cache size oscillations you observed.
+
+If the hit rate oscillations don't go away, you are probably looking at a
+feature of your application that can't be helped without major changes to the
+cache. Please send along a description of the situation.
+
+If the oscillations do go away, you may be able to come up with a configuration
+that deals with the situation. If that fails, control the cache size manually,
+and write to me, so I can try to develop an adaptive resize algorithm that works
+in your case.
+
+Needless to say, you should give the cache a few epochs to adapt to
+circumstances. If that is too slow for you, try manual cache size control.
+
+If you find it necessary to disable evictions, you may find it useful to enable
+the internal statistics collection code mentioned above in the section on
+debugging facilities.
+
+Amongst many other things, the stats code will report the maximum cache size,
+and the average successful and unsuccessful search depths in the hash table. If
+these latter figures are significantly above 1, you should increase the size of
+the hash table.
+
+ */ \ No newline at end of file
diff --git a/doxygen/dox/OtherSpecs.dox b/doxygen/dox/OtherSpecs.dox
new file mode 100644
index 0000000..e53f26e
--- /dev/null
+++ b/doxygen/dox/OtherSpecs.dox
@@ -0,0 +1,11 @@
+/** \page IMG HDF5 Image and Palette Specification Version 1.2
+
+\htmlinclude ImageSpec.html
+
+*/
+
+/** \page TBL HDF5 Table Specification Version 1.0
+
+\htmlinclude TableSpec.html
+
+*/
diff --git a/doxygen/dox/Overview.dox b/doxygen/dox/Overview.dox
new file mode 100644
index 0000000..754722e
--- /dev/null
+++ b/doxygen/dox/Overview.dox
@@ -0,0 +1,32 @@
+
+/** \mainpage notitle
+
+This is the documentation set for HDF5. You can
+<a href="hdf5-doc.tgz">download</a> it as a tgz archive for offline reading.
+
+This is the documention set for HDF5 in terms of specifications and software
+developed and maintained by <a href="https://www.hdfgroup.org/">The HDF
+Group</a>. It is impractical to document the entire HDF5 ecosystem in one place,
+and you should also consult the documentation sets of the many outstanding
+community projects.
+
+For a first contact with HDF5, the best place is to have a look at the \link
+GettingStarted getting started \endlink page that shows you how to write and
+compile your first program with HDF5.
+
+The \b main \b documentation is organized by documentation flavor. Most
+technical documentation consists to varying degrees of information related to
+<em>tasks</em>, <em>concepts</em>, or <em>reference</em> material. As its title
+suggests, the \link RM Reference Manual \endlink is 100% reference material,
+while the \link Cookbook \endlink is focused on tasks. The different guide-type
+documents cover a mix of tasks, concepts, and reference, to help a certain
+<em>audience</em> succeed.
+
+Finally, do not miss the search engine (top right-hand corner)! If you are
+looking for a specific function, it'll take you there directly. If unsure, it'll
+give you an idea of what's on offer and a few promising leads.
+
+\par ToDo List
+ There is plenty of <a href="./todo.html">unfinished business</a>.
+
+*/
diff --git a/doxygen/dox/ReferenceManual.dox b/doxygen/dox/ReferenceManual.dox
new file mode 100644
index 0000000..596a224
--- /dev/null
+++ b/doxygen/dox/ReferenceManual.dox
@@ -0,0 +1,43 @@
+/** \page RM Reference Manual
+
+The functions provided by the HDF5 C-API are grouped into the following
+\Emph{modules}:
+
+\li \ref H5A "Attributes" — Management of HDF5 attributes (\ref H5A)
+\li \ref H5D "Datasets" — Management of HDF5 datasets (\ref H5D)
+\li \ref H5S "Dataspaces" — Management of HDF5 dataspaces which describe the shape of datasets and attributes (\ref H5S)
+\li \ref H5T "Datatypes" — Management of datatypes which describe elements of datasets and attributes (\ref H5T)
+\li \ref H5E "Error Handling" — Functions for handling HDF5 errors (\ref H5E)
+\li \ref H5ES "Event Sets" — Functions for handling HDF5 event sets (\ref H5ES)
+\li \ref H5F "Files" — Management of HDF5 files (\ref H5F)
+\li \ref H5Z "Filters" — Configuration of filters that process data during I/O operation (\ref H5Z)
+\li \ref H5G "Groups" — Management of groups in HDF5 files (\ref H5G)
+\li \ref H5I "Identifiers" — Management of object identifiers and object names (\ref H5I)
+\li \ref H5 "Library" — General purpose library functions (\ref H5)
+\li \ref H5L "Links" — Management of links in HDF5 groups (\ref H5L)
+\li \ref H5M "Maps" — Management of HDF5 maps (\ref H5M)
+\li \ref H5O "Objects" — Management of objects in HDF5 files (\ref H5O)
+\li \ref H5PL "Plugins" — Programmatic control over dynamically loaded plugins (\ref H5PL)
+\li \ref H5P "Property Lists" — Management of property lists to control HDF5 library behavior (\ref H5P)
+\li \ref H5R "References" — Management of references to specific objects and data regions in an HDF5 file (\ref H5R)
+\li \ref H5VL "Virtual Object Layer" — Management of the Virtual Object Layer (\ref H5VL)
+
+\par Asynchronous Functions
+ A subset of functions has \ref ASYNC "asynchronous variants".
+
+\par API Versioning
+ See \ref api-compat-macros
+
+\par Deprecated Functions and Types
+ A list of deprecated functions and types can be found
+ <a href="./deprecated.html">here</a>.
+
+\par Etiquette
+ Here are a few simple rules to follow:
+ \li \Bold{Handle discipline:} If you acquire a handle (by creation or copy), \Emph{you own it!} (..., i.e., you have to close it.)
+ \li \Bold{Dynamic memory allocation:} ...
+ \li \Bold{Use of locations:} Identifier + name combo
+
+\cpp_c_api_note
+
+*/ \ No newline at end of file
diff --git a/doxygen/dox/Specifications.dox b/doxygen/dox/Specifications.dox
new file mode 100644
index 0000000..4ae48d0
--- /dev/null
+++ b/doxygen/dox/Specifications.dox
@@ -0,0 +1,22 @@
+/** \page SPEC Specifications
+
+\section DDL
+
+\li \ref DDLBNF110 "DDL in BNF through HDF5 1.10"
+\li \ref DDLBNF112 "DDL in BNF for HDF5 1.12 and above"
+
+\section File Format
+
+\li \ref FMT1 "HDF5 File Format Specification Version 1.0"
+\li \ref FMT11 "HDF5 File Format Specification Version 1.1"
+\li \ref FMT2 "HDF5 File Format Specification Version 2.0"
+\li \ref FMT3 "HDF5 File Format Specification Version 3.0"
+
+\section Other
+
+\li \ref IMG "HDF5 Image and Palette Specification Version 1.2"
+\li \ref TBL "HDF5 Table Specification Version 1.0"
+\li <a href="https://support.hdfgroup.org/HDF5/doc/HL/H5DS_Spec.pdf">
+ HDF5 Dimension Scale Specification</a>
+
+*/ \ No newline at end of file
diff --git a/doxygen/dox/TechnicalNotes.dox b/doxygen/dox/TechnicalNotes.dox
new file mode 100644
index 0000000..2bda175
--- /dev/null
+++ b/doxygen/dox/TechnicalNotes.dox
@@ -0,0 +1,20 @@
+/** \page TN Technical Notes
+
+\li \link api-compat-macros API Compatibility Macros \endlink
+\li \ref TNMDC "Metadata Caching in HDF5"
+\li \ref MT "Thread Safe library"
+\li \ref VFL "Virtual File Layer"
+
+ */
+
+/** \page MT HDF5 Thread Safe library
+
+\htmlinclude ThreadSafeLibrary.html
+
+*/
+
+/** \page VFL HDF5 Virtual File Layer
+
+\htmlinclude VFL.html
+
+*/
diff --git a/doxygen/dox/api-compat-macros.dox b/doxygen/dox/api-compat-macros.dox
index 6b85ccb..4a1578d 100644
--- a/doxygen/dox/api-compat-macros.dox
+++ b/doxygen/dox/api-compat-macros.dox
@@ -1,5 +1,4 @@
/** \page api-compat-macros API Compatibility Macros
- \tableofcontents
\section audience Audience
The target audience for this document has existing applications that use the
diff --git a/doxygen/dox/mainpage.dox b/doxygen/dox/mainpage.dox
deleted file mode 100644
index eda967b..0000000
--- a/doxygen/dox/mainpage.dox
+++ /dev/null
@@ -1,44 +0,0 @@
-/*! \mainpage HDF5 C-API Reference
- *
- * The HDF5 C-API provides applications with fine-grained control over all
- * aspects HDF5 functionality. This functionality is grouped into the following
- * \Emph{modules}:
- * \li \ref H5A "Attributes" — Management of HDF5 attributes (\ref H5A)
- * \li \ref H5D "Datasets" — Management of HDF5 datasets (\ref H5D)
- * \li \ref H5S "Dataspaces" — Management of HDF5 dataspaces which describe the shape of datasets and attributes (\ref H5S)
- * \li \ref H5T "Datatypes" — Management of datatypes which describe elements of datasets and attributes (\ref H5T)
- * \li \ref H5E "Error Handling" — Functions for handling errors that occur within HDF5 (\ref H5E)
- * \li \ref H5F "Files" — Management of HDF5 files (\ref H5F)
- * \li \ref H5Z "Filters" — Configuration of filters that process data during I/O operation (\ref H5Z)
- * \li \ref H5G "Groups" — Management of groups in HDF5 files (\ref H5G)
- * \li \ref H5I "Identifiers" — Management of object identifiers and object names (\ref H5I)
- * \li \ref H5 "Library" — General purpose library functions (\ref H5)
- * \li \ref H5L "Links" — Management of links in HDF5 groups (\ref H5L)
- * \li \ref H5O "Objects" — Management of objects in HDF5 files (\ref H5O)
- * \li \ref H5PL "Plugins" — Programmatic control over dynamically loaded plugins (\ref H5PL)
- * \li \ref H5P "Property Lists" — Management of property lists to control HDF5 library behavior (\ref H5P)
- * \li \ref H5R "References" — Management of references to specific objects and data regions in an HDF5 file (\ref H5R)
- * \li \ref H5VL "Virtual Object Layer" — Management of the Virtual Object Layer (\ref H5VL)
- *
- * Here are a few simple rules to follow:
- *
- * \li \Bold{Handle discipline:} If you acquire a handle (by creation or coopy), \Emph{you own it!} (..., i.e., you have to close it.)
- * \li \Bold{Dynamic memory allocation:} ...
- * \li \Bold{Use of locations:} Identifier + name combo
- *
- * \attention \Bold{C++ Developers using HDF5 C-API functions beware:}\n
- * If a C routine that takes a function pointer as an argument is called from
- * within C++ code, the C routine should be returned from normally.
- * Examples of this kind of routine include callbacks such as H5Pset_elink_cb()
- * and H5Pset_type_conv_cb() and functions such as H5Tconvert() and H5Ewalk2().\n
- * Exiting the routine in its normal fashion allows the HDF5 C library to clean
- * up its work properly. In other words, if the C++ application jumps out of
- * the routine back to the C++ \c catch statement, the library is not given the
- * opportunity to close any temporary data structures that were set up when the
- * routine was called. The C++ application should save some state as the
- * routine is started so that any problem that occurs might be diagnosed.
- *
- * \todo Fix the search form for server deployments.
- * \todo Make it mobile-friendly
- *
- */ \ No newline at end of file
diff --git a/doxygen/dox/maybe_metadata_reads.dox b/doxygen/dox/maybe_metadata_reads.dox
new file mode 100644
index 0000000..25c905f
--- /dev/null
+++ b/doxygen/dox/maybe_metadata_reads.dox
@@ -0,0 +1,82 @@
+/**
+ * \page maybe_metadata_reads Functions with No Access Property List Parameter that May Generate Metadata Reads
+ *
+ * \ingroup GACPL
+ *
+ * Currently there are several operations in HDF5 that can issue metadata reads
+ * from the metadata cache, but that take no property list. It is therefore not
+ * possible to set a collective requirement individually for those operations. The
+ * only solution with the HDF5 1.10.0 release is to set the collective requirement
+ * globally on H5Fopen() or H5Fcreate() for all metadata operations to be
+ * collective.
+ *
+ * The following is a list of those functions in the HDF5 library. This list is
+ * integral to the discussion in the H5Pset_all_coll_metadata_ops() entry:
+ *
+ * <pre>
+ *
+ * H5Awrite()
+ * H5Aread()
+ * H5Arename()
+ * H5Aiterate2()
+ * H5Adelete()
+ * H5Aexists()
+ *
+ * H5Dget_space_status()
+ * H5Dget_storage_size()
+ * H5Dset_extent()
+ * H5Ddebug()
+ * H5Dclose()
+ * H5Dget_create_plist()
+ * H5Dget_space() (when dataset is a virtual dataset)
+ *
+ * H5Gget_create_plist()
+ * H5Gget_info()
+ * H5Gclose()
+ *
+ * H5Literate()
+ * H5Lvisit()
+ *
+ * H5Rcreate()
+ * H5Rdereference2() (when reference is an object reference)
+ * H5Rget_region()
+ * H5Rget_obj_type2()
+ * H5Rget_name()
+ *
+ * H5Ocopy()
+ * H5Oopen_by_addr()
+ * H5Oincr_refcount()
+ * H5Odecr_refcount()
+ * H5Oget_info()
+ * H5Oset_comment()
+ * H5Ovisit()
+ *
+ * H5Fis_hdf5()
+ * H5Fflush()
+ * H5Fclose()
+ * H5Fget_file_image()
+ * H5Freopen()
+ * H5Fget_freespace()
+ * H5Fget_info2()
+ * H5Fget_free_sections()
+ * H5Fmount()
+ * H5Funmount()
+ *
+ * H5Iget_name()
+ *
+ * H5Tget_create_plist()
+ * H5Tclose()
+ *
+ * H5Zunregister()
+ * </pre>
+ *
+ * In addition, \b most deprecated functions fall into this category.
+ *
+ * The HDF Group may address the above limitation in a future major release, but
+ * no decision has been made at this time. Such a change might, for example,
+ * include adding new versions of some or all the above functions with an extra
+ * property list parameter to allow an individual setting for the collective
+ * calling requirement.
+ *
+ * \sa_metadata_ops
+ */