diff options
author | Allen Byrne <50328838+byrnHDF@users.noreply.github.com> | 2022-09-14 20:44:24 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-14 20:44:24 (GMT) |
commit | 45178c87a3099a9fef8bae6f7249ca306cf89629 (patch) | |
tree | cb404581365434d641e4d6303921613ef3432bd0 /src | |
parent | dcf3b54b6ef3ffe2093cfae81fe80cdb2bb53047 (diff) | |
download | hdf5-45178c87a3099a9fef8bae6f7249ca306cf89629.zip hdf5-45178c87a3099a9fef8bae6f7249ca306cf89629.tar.gz hdf5-45178c87a3099a9fef8bae6f7249ca306cf89629.tar.bz2 |
develop Merge doxygen from 1.12 branch (#2095)
Diffstat (limited to 'src')
-rw-r--r-- | src/H5ACpublic.h | 2 | ||||
-rw-r--r-- | src/H5Amodule.h | 346 | ||||
-rw-r--r-- | src/H5Dmodule.h | 2956 | ||||
-rw-r--r-- | src/H5Dpublic.h | 2 | ||||
-rw-r--r-- | src/H5ESmodule.h | 88 | ||||
-rw-r--r-- | src/H5Emodule.h | 514 | ||||
-rw-r--r-- | src/H5Epublic.h | 8 | ||||
-rw-r--r-- | src/H5Fmodule.h | 1443 | ||||
-rw-r--r-- | src/H5Gmodule.h | 924 | ||||
-rw-r--r-- | src/H5Gpublic.h | 10 | ||||
-rw-r--r-- | src/H5Imodule.h | 7 | ||||
-rw-r--r-- | src/H5Lmodule.h | 7 | ||||
-rw-r--r-- | src/H5Mmodule.h | 16 | ||||
-rw-r--r-- | src/H5Omodule.h | 7 | ||||
-rw-r--r-- | src/H5Opublic.h | 6 | ||||
-rw-r--r-- | src/H5PLmodule.h | 9 | ||||
-rw-r--r-- | src/H5Pmodule.h | 960 | ||||
-rw-r--r-- | src/H5Ppublic.h | 80 | ||||
-rw-r--r-- | src/H5Rmodule.h | 27 | ||||
-rw-r--r-- | src/H5Smodule.h | 1492 | ||||
-rw-r--r-- | src/H5Tmodule.h | 3832 | ||||
-rw-r--r-- | src/H5VLmodule.h | 92 | ||||
-rw-r--r-- | src/H5Zmodule.h | 7 | ||||
-rw-r--r-- | src/H5module.h | 1404 |
24 files changed, 14053 insertions, 186 deletions
diff --git a/src/H5ACpublic.h b/src/H5ACpublic.h index 0e03574..f9da6f6 100644 --- a/src/H5ACpublic.h +++ b/src/H5ACpublic.h @@ -563,7 +563,7 @@ typedef struct H5AC_cache_config_t { * The value must lie in the interval [0.0, 1.0]. 0.01 is a good place to * start in the serial case. In the parallel case, a larger value is needed * -- see the overview of the metadata cache in the - * “Metadata Caching in HDF5” section of the -- <em>HDF5 User’s Guide</em> + * “Metadata Caching in HDF5” section of the -- <em>\ref UG</em> * for details. */ size_t max_size; diff --git a/src/H5Amodule.h b/src/H5Amodule.h index 3586414..7f88a22 100644 --- a/src/H5Amodule.h +++ b/src/H5Amodule.h @@ -28,30 +28,92 @@ #define H5_MY_PKG H5A #define H5_MY_PKG_ERR H5E_ATTR -/**\defgroup H5A H5A +/** \page H5A_UG HDF5 Attributes * - * Use the functions in this module to manage HDF5 attributes. + * \section sec_attribute HDF5 Attributes * - * Like HDF5 datasets, HDF5 attributes are array variables which have an element - * datatype and a shape (dataspace). However, they perform a different function: - * Attributes decorate other HDF5 objects, and are typically used to - * represent application metadata. Unlike datasets, the HDF5 library does not - * support partial I/O operations for attributes and they cannot be compressed - * or extended. + * An HDF5 attribute is a small metadata object describing the nature and/or intended usage of a primary data + * object. A primary data object may be a dataset, group, or committed datatype. * + * \subsection subsec_attribute_intro Introduction + * + * Attributes are assumed to be very small as data objects go, so storing them as standard HDF5 datasets would + * be quite inefficient. HDF5 attributes are therefore managed through a special attributes interface, + * \ref H5A, which is designed to easily attach attributes to primary data objects as small datasets + * containing metadata information and to minimize storage requirements. + * + * Consider, as examples of the simplest case, a set of laboratory readings taken under known temperature and + * pressure conditions of 18.0 degrees Celsius and 0.5 atmospheres, respectively. The temperature and pressure + * stored as attributes of the dataset could be described as the following name/value pairs: + * \li temp=18.0 + * \li pressure=0.5 + * + * While HDF5 attributes are not standard HDF5 datasets, they have much in common: + * \li An attribute has a user-defined dataspace and the included metadata has a user-assigned datatype + * \li Metadata can be of any valid HDF5 datatype + * \li Attributes are addressed by name + * + * But there are some very important differences: + * \li There is no provision for special storage such as compression or chunking + * \li There is no partial I/O or sub-setting capability for attribute data + * \li Attributes cannot be shared + * \li Attributes cannot have attributes + * \li Being small, an attribute is stored in the object header of the object it describes and is thus + * attached directly to that object + * + * \subsection subsec_error_H5A Attribute Function Summaries + * @see H5A reference manual + * + * \subsection subsec_attribute_program Programming Model for Attributes + * + * The figure below shows the UML model for an HDF5 attribute and its associated dataspace and datatype. * <table> - * <tr><th>Create</th><th>Read</th></tr> + * <tr> + * <td> + * \image html UML_Attribute.jpg "The UML model for an HDF5 attribute" + * </td> + * </tr> + * </table> + * + * Creating an attribute is similar to creating a dataset. To create an attribute, the application must + * specify the object to which the attribute is attached, the datatype and dataspace of the attribute + * data, and the attribute creation property list. + * + * The following steps are required to create and write an HDF5 attribute: + * \li Obtain the object identifier for the attribute’s primary data object + * \li Define the characteristics of the attribute and specify the attribute creation property list + * <ul> <li> Define the datatype</li> + * <li> Define the dataspace</li> + * <li> Specify the attribute creation property list</li></ul> + * \li Create the attribute + * \li Write the attribute data (optional) + * \li Close the attribute (and datatype, dataspace, and attribute creation property list, if necessary) + * \li Close the primary data object (if appropriate) + * + * The following steps are required to open and read/write an existing attribute. Since HDF5 attributes + * allow no partial I/O, you need specify only the attribute and the attribute’s memory datatype to read it: + * \li Obtain the object identifier for the attribute’s primary data object + * \li Obtain the attribute’s name or index + * \li Open the attribute + * \li Get attribute dataspace and datatype (optional) + * \li Specify the attribute’s memory type + * \li Read and/or write the attribute data + * \li Close the attribute + * \li Close the primary data object (if appropriate) + * + * <table> + * <tr><th>Create</th><th>Update</th></tr> * <tr valign="top"> * <td> * \snippet{lineno} H5A_examples.c create * </td> * <td> - * \snippet{lineno} H5A_examples.c read + * \snippet{lineno} H5A_examples.c update * </td> - * <tr><th>Update</th><th>Delete</th></tr> + * <tr><th>Read</th><th>Delete</th></tr> * <tr valign="top"> * <td> - * \snippet{lineno} H5A_examples.c update + * \snippet{lineno} H5A_examples.c read * </td> * <td> * \snippet{lineno} H5A_examples.c delete @@ -59,6 +121,266 @@ * </tr> * </table> * + * \subsection subsec_attribute_work Working with Attributes + * + * \subsubsection subsubsec_attribute_work_struct The Structure of an Attribute + * + * An attribute has two parts: name and value(s). + * + * HDF5 attributes are sometimes discussed as name/value pairs in the form name=value. + * + * An attribute’s name is a null-terminated ASCII or UTF-8 character string. Each attribute attached to an + * object has a unique name. + * + * The value portion of the attribute contains one or more data elements of the same datatype. + * + * HDF5 attributes have all the characteristics of HDF5 datasets except that there is no partial I/O + * capability. In other words, attributes can be written and read only in full with no sub-setting. + * + * \subsubsection subsubsec_attribute_work_create Creating, Writing, and Reading Attributes + * + * If attributes are used in an HDF5 file, these functions will be employed: \ref H5Acreate, \ref H5Awrite, + * and \ref H5Aread. \ref H5Acreate and \ref H5Awrite are used together to place the attribute in the file. If + * an attribute is to be used and is not currently in memory, \ref H5Aread generally comes into play + * usually in concert with one each of the H5Aget_* and H5Aopen_* functions. + * + * To create an attribute, call H5Acreate: + * \code + * hid_t H5Acreate (hid_t loc_id, const char *name, + * hid_t type_id, hid_t space_id, hid_t create_plist, + * hid_t access_plist) + * \endcode + * loc_id identifies the object (dataset, group, or committed datatype) to which the attribute is to be + * attached. name, type_id, space_id, and create_plist convey, respectively, the attribute’s name, datatype, + * dataspace, and attribute creation property list. The attribute’s name must be locally unique: it must be + * unique within the context of the object to which it is attached. + * + * \ref H5Acreate creates the attribute in memory. The attribute does not exist in the file until + * \ref H5Awrite writes it there. + * + * To write or read an attribute, call H5Awrite or H5Aread, respectively: + * \code + * herr_t H5Awrite (hid_t attr_id, hid_t mem_type_id, const void *buf) + * herr_t H5Aread (hid_t attr_id, hid_t mem_type_id, void *buf) + * \endcode + * attr_id identifies the attribute while mem_type_id identifies the in-memory datatype of the attribute data. + * + * \ref H5Awrite writes the attribute data from the buffer buf to the file. \ref H5Aread reads attribute data + * from the file into buf. + * + * The HDF5 Library converts the metadata between the in-memory datatype, mem_type_id, and the in-file + * datatype, defined when the attribute was created, without user intervention. + * + * \subsubsection subsubsec_attribute_work_access Accessing Attributes by Name or Index + * + * Attributes can be accessed by name or index value. The use of an index value makes it possible to iterate + * through all of the attributes associated with a given object. + * + * To access an attribute by its name, use the \ref H5Aopen_by_name function. \ref H5Aopen_by_name returns an + * attribute identifier that can then be used by any function that must access an attribute such as \ref + * H5Aread. Use the function \ref H5Aget_name to determine an attribute’s name. + * + * To access an attribute by its index value, use the \ref H5Aopen_by_idx function. To determine an attribute + * index value when it is not already known, use the H5Oget_info function. \ref H5Aopen_by_idx is generally + * used in the course of opening several attributes for later access. Use \ref H5Aiterate if the intent is to + * perform the same operation on every attribute attached to an object. + * + * \subsubsection subsubsec_attribute_work_info Obtaining Information Regarding an Object’s Attributes + * + * In the course of working with HDF5 attributes, one may need to obtain any of several pieces of information: + * \li An attribute name + * \li The dataspace of an attribute + * \li The datatype of an attribute + * \li The number of attributes attached to an object + * + * To obtain an attribute’s name, call H5Aget_name with an attribute identifier, attr_id: + * \code + * ssize_t H5Aget_name (hid_t attr_id, size_t buf_size, char *buf) + * \endcode + * As with other attribute functions, attr_id identifies the attribute; buf_size defines the size of the + * buffer; and buf is the buffer to which the attribute’s name will be read. + * + * If the length of the attribute name, and hence the value required for buf_size, is unknown, a first call + * to \ref H5Aget_name will return that size. If the value of buf_size used in that first call is too small, + * the name will simply be truncated in buf. A second \ref H5Aget_name call can then be used to retrieve the + * name in an appropriately-sized buffer. + * + * To determine the dataspace or datatype of an attribute, call \ref H5Aget_space or \ref H5Aget_type, + * respectively: \code hid_t H5Aget_space (hid_t attr_id) hid_t H5Aget_type (hid_t attr_id) \endcode \ref + * H5Aget_space returns the dataspace identifier for the attribute attr_id. \ref H5Aget_type returns the + * datatype identifier for the attribute attr_id. + * + * To determine the number of attributes attached to an object, use the \ref H5Oget_info function. The + * function signature is below. \code herr_t H5Oget_info( hid_t object_id, H5O_info_t *object_info ) \endcode + * The number of attributes will be returned in the object_info buffer. This is generally the preferred first + * step in determining attribute index values. If the call returns N, the attributes attached to the object + * object_id have index values of 0 through N-1. + * + * \subsubsection subsubsec_attribute_work_iterate Iterating across an Object’s Attributes + * + * It is sometimes useful to be able to perform the identical operation across all of the attributes attached + * to an object. At the simplest level, you might just want to open each attribute. At a higher level, you + * might wish to perform a rather complex operation on each attribute as you iterate across the set. + * + * To iterate an operation across the attributes attached to an object, one must make a series of calls to + * \ref H5Aiterate + * \code + * herr_t H5Aiterate (hid_t obj_id, H5_index_t index_type, + * H5_iter_order_t order, hsize_t *n, H5A_operator2_t op, + * void *op_data) + * \endcode + * \ref H5Aiterate successively marches across all of the attributes attached to the object specified in + * loc_id, performing the operation(s) specified in op_func with the data specified in op_data on each + * attribute. + * + * When \ref H5Aiterate is called, index contains the index of the attribute to be accessed in this call. When + * \ref H5Aiterate returns, index will contain the index of the next attribute. If the returned index is the + * null pointer, then all attributes have been processed, and the iterative process is complete. + * + * op_func is a user-defined operation that adheres to the \ref H5A_operator_t prototype. This prototype and + * certain requirements imposed on the operator’s behavior are described in the \ref H5Aiterate entry in the + * \ref RM. + * + * op_data is also user-defined to meet the requirements of op_func. Beyond providing a parameter with which + * to pass this data, HDF5 provides no tools for its management and imposes no restrictions. + * + * \subsubsection subsubsec_attribute_work_delete Deleting an Attribute + * + * Once an attribute has outlived its usefulness or is no longer appropriate, it may become necessary to + * delete it. + * + * To delete an attribute, call \ref H5Adelete + * \code + * herr_t H5Adelete (hid_t loc_id, const char *name) + * \endcode + * \ref H5Adelete removes the attribute name from the group, dataset, or committed datatype specified in + * loc_id. + * + * \ref H5Adelete must not be called if there are any open attribute identifiers on the object loc_id. Such a + * call can cause the internal attribute indexes to change; future writes to an open attribute would then + * produce unintended results. + * + * \subsubsection subsubsec_attribute_work_close Closing an Attribute + * + * As is the case with all HDF5 objects, once access to an attribute it is no longer needed, that attribute + * must be closed. It is best practice to close it as soon as practicable; it is mandatory that it be closed + * prior to the H5close call closing the HDF5 Library. + * + * To close an attribute, call \ref H5Aclose + * \code + * herr_t H5Aclose (hid_t attr_id) + * \endcode + * \ref H5Aclose closes the specified attribute by terminating access to its identifier, attr_id. + * + * \subsection subsec_attribute_special Special Issues + * + * Some special issues for attributes are discussed below. + * + * <h4>Large Numbers of Attributes Stored in Dense Attribute Storage</h4> + * + * The dense attribute storage scheme was added in version 1.8 so that datasets, groups, and committed + * datatypes that have large numbers of attributes could be processed more quickly. + * + * Attributes start out being stored in an object's header. This is known as compact storage. For more + * information, see "Storage Strategies." + * + * As the number of attributes grows, attribute-related performance slows. To improve performance, dense + * attribute storage can be initiated with the H5Pset_attr_phase_change function. See the HDF5 Reference + * Manual for more information. + * + * When dense attribute storage is enabled, a threshold is defined for the number of attributes kept in + * compact storage. When the number is exceeded, the library moves all of the attributes into dense storage + * at another location. The library handles the movement of attributes and the pointers between the locations + * automatically. If some of the attributes are deleted so that the number falls below the threshold, then + * the attributes are moved back to compact storage by the library. + * + * The improvements in performance from using dense attribute storage are the result of holding attributes + * in a heap and indexing the heap with a B-tree. + * + * Note that there are some disadvantages to using dense attribute storage. One is that this is a new feature. + * Datasets, groups, and committed datatypes that use dense storage cannot be read by applications built with + * earlier versions of the library. Another disadvantage is that attributes in dense storage cannot be + * compressed. + * + * <h4>Large Attributes Stored in Dense Attribute Storage</h4> + * + * We generally consider the maximum size of an attribute to be 64K bytes. The library has two ways of storing + * attributes larger than 64K bytes: in dense attribute storage or in a separate dataset. Using dense + * attribute storage is described in this section, and storing in a separate dataset is described in the next + * section. + * + * To use dense attribute storage to store large attributes, set the number of attributes that will be stored + * in compact storage to 0 with the H5Pset_attr_phase_change function. This will force all attributes to be + * put into dense attribute storage and will avoid the 64KB size limitation for a single attribute in compact + * attribute storage. + * + * The example code below illustrates how to create a large attribute that will be kept in dense storage. + * + * <table> + * <tr><th>Create</th></tr> + * <tr valign="top"> + * <td> + * \snippet{lineno} H5A_examples.c create + * </td> + * </tr> + * </table> + * + * <h4>Large Attributes Stored in a Separate Dataset</h4> + * + * In addition to dense attribute storage (see above), a large attribute can be stored in a separate dataset. + * In the figure below, DatasetA holds an attribute that is too large for the object header in Dataset1. By + * putting a pointer to DatasetA as an attribute in Dataset1, the attribute becomes available to those + * working with Dataset1. + * This way of handling large attributes can be used in situations where backward compatibility is important + * and where compression is important. Applications built with versions before 1.8.x can read large + * attributes stored in separate datasets. Datasets can be compressed while attributes cannot. + * <table> + * <tr> + * <td> + * \image html Shared_Attribute.jpg "A large or shared HDF5 attribute and its associated dataset(s)" + * </td> + * </tr> + * </table> + * Note: In the figure above, DatasetA is an attribute of Dataset1 that is too large to store in Dataset1's + * header. DatasetA is associated with Dataset1 by means of an object reference pointer attached as an + * attribute to Dataset1. The attribute in DatasetA can be shared among multiple datasets by means of + * additional object reference pointers attached to additional datasets. + * + * <h4>Shared Attributes</h4> + * + * Attributes written and managed through the \ref H5A interface cannot be shared. If shared attributes are + * required, they must be handled in the manner described above for large attributes and illustrated in + * the figure above. + * + * <h4>Attribute Names</h4> + * + * While any ASCII or UTF-8 character may be used in the name given to an attribute, it is usually wise + * to avoid the following kinds of characters: + * \li Commonly used separators or delimiters such as slash, backslash, colon, and semi-colon (\, /, :, ;) + * \li Escape characters + * \li Wild cards such as asterisk and question mark (*, ?) + * NULL can be used within a name, but HDF5 names are terminated with a NULL: whatever comes after the NULL + * will be ignored by HDF5. + * + * The use of ASCII or UTF-8 characters is determined by the character encoding property. See + * #H5Pset_char_encoding in the \ref RM. + * + * <h4>No Special I/O or Storage</h4> + * + * HDF5 attributes have all the characteristics of HDF5 datasets except the following: + * \li Attributes are written and read only in full: there is no provision for partial I/O or sub-setting + * \li No special storage capability is provided for attributes: there is no compression or chunking, and + * attributes are not extendable + * + * Previous Chapter \ref sec_dataspace - Next Chapter \ref sec_error + * + * \defgroup H5A Attributes (H5A) + * + * An HDF5 attribute is a small metadata object describing the nature and/or intended usage of a primary data + * object. A primary data object may be a dataset, group, or committed datatype. + * + * @see sec_attribute + * */ #endif /* H5Amodule_H */ diff --git a/src/H5Dmodule.h b/src/H5Dmodule.h index a05d717..474efd9 100644 --- a/src/H5Dmodule.h +++ b/src/H5Dmodule.h @@ -28,7 +28,2961 @@ #define H5_MY_PKG H5D #define H5_MY_PKG_ERR H5E_DATASET -/**\defgroup H5D H5D +/** \page H5D_UG HDF5 Datasets + * + * \section sec_dataset HDF5 Datasets + * + * \subsection subsec_dataset_intro Introduction + * + * An HDF5 dataset is an object composed of a collection of data elements, or raw data, and + * metadata that stores a description of the data elements, data layout, and all other information + * necessary to write, read, and interpret the stored data. From the viewpoint of the application the + * raw data is stored as a one-dimensional or multi-dimensional array of elements (the raw data), + * those elements can be any of several numerical or character types, small arrays, or even + * compound types similar to C structs. The dataset object may have attribute objects. See the + * figure below. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig1.gif "Application view of a dataset" + * </td> + * </tr> + * </table> + * + * A dataset object is stored in a file in two parts: a header and a data array. The header contains + * information that is needed to interpret the array portion of the dataset, as well as metadata (or + * pointers to metadata) that describes or annotates the dataset. Header information includes the + * name of the object, its dimensionality, its number-type, information about how the data itself is + * stored on disk (the storage layout), and other information used by the library to speed up access + * to the dataset or maintain the file’s integrity. + * + * The HDF5 dataset interface, comprising the @ref H5D functions, provides a mechanism for managing + * HDF5 datasets including the transfer of data between memory and disk and the description of + * dataset properties. + * + * A dataset is used by other HDF5 APIs, either by name or by an identifier. For more information, + * \see \ref api-compat-macros. + * + * \subsubsection subsubsec_dataset_intro_link Link/Unlink + * A dataset can be added to a group with one of the H5Lcreate calls, and deleted from a group with + * #H5Ldelete. The link and unlink operations use the name of an object, which may be a dataset. + * The dataset does not have to open to be linked or unlinked. + * + * \subsubsection subsubsec_dataset_intro_obj Object Reference + * A dataset may be the target of an object reference. The object reference is created by + * #H5Rcreate with the name of an object which may be a dataset and the reference type + * #H5R_OBJECT. The dataset does not have to be open to create a reference to it. + * + * An object reference may also refer to a region (selection) of a dataset. The reference is created + * with #H5Rcreate and a reference type of #H5R_DATASET_REGION. + * + * An object reference can be accessed by a call to #H5Rdereference. When the reference is to a + * dataset or dataset region, the #H5Rdereference call returns an identifier to the dataset just as if + * #H5Dopen has been called. + * + * \subsubsection subsubsec_dataset_intro_attr Adding Attributes + * A dataset may have user-defined attributes which are created with #H5Acreate and accessed + * through the @ref H5A API. To create an attribute for a dataset, the dataset must be open, and the + * identifier is passed to #H5Acreate. The attributes of a dataset are discovered and opened using + * #H5Aopen_name, #H5Aopen_idx, or #H5Aiterate; these functions use the identifier of the dataset. + * An attribute can be deleted with #H5Adelete which also uses the identifier of the dataset. + * + * \subsection subsec_dataset_function Dataset Function Summaries + * Functions that can be used with datasets (@ref H5D functions) and property list functions that can + * used with datasets (@ref H5P functions) are listed below. + * + * <table> + * <caption>Dataset functions</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Dcreate</td> + * <td>Creates a dataset at the specified location. The + * C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Dcreate_anon</td> + * <td>Creates a dataset in a file without linking it into the file structure.</td> + * </tr> + * <tr> + * <td>#H5Dopen</td> + * <td>Opens an existing dataset. The C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Dclose</td> + * <td>Closes the specified dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_space</td> + * <td>Returns an identifier for a copy of the dataspace for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_space_status</td> + * <td>Determines whether space has been allocated for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_type</td> + * <td>Returns an identifier for a copy of the datatype for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_create_plist</td> + * <td>Returns an identifier for a copy of the dataset creation property list for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_access_plist</td> + * <td>Returns the dataset access property list associated with a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dget_offset</td> + * <td>Returns the dataset address in a file.</td> + * </tr> + * <tr> + * <td>#H5Dget_storage_size</td> + * <td>Returns the amount of storage required for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Dvlen_get_buf_size</td> + * <td>Determines the number of bytes required to store variable-length (VL) data.</td> + * </tr> + * <tr> + * <td>#H5Dvlen_reclaim</td> + * <td>Reclaims VL datatype memory buffers.</td> + * </tr> + * <tr> + * <td>#H5Dread</td> + * <td>Reads raw data from a dataset into a buffer.</td> + * </tr> + * <tr> + * <td>#H5Dwrite</td> + * <td>Writes raw data from a buffer to a dataset.</td> + * </tr> + * <tr> + * <td>#H5Diterate</td> + * <td>Iterates over all selected elements in a dataspace.</td> + * </tr> + * <tr> + * <td>#H5Dgather</td> + * <td>Gathers data from a selection within a memory buffer.</td> + * </tr> + * <tr> + * <td>#H5Dscatter</td> + * <td>Scatters data into a selection within a memory buffer.</td> + * </tr> + * <tr> + * <td>#H5Dfill</td> + * <td>Fills dataspace elements with a fill value in a memory buffer.</td> + * </tr> + * <tr> + * <td>#H5Dset_extent</td> + * <td>Changes the sizes of a dataset’s dimensions.</td> + * </tr> + * </table> + * + * <table> + * <caption>Dataset creation property list functions (H5P)</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_layout</td> + * <td>Sets the type of storage used to store the raw data for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_layout</td> + * <td>Returns the layout of the raw data for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pset_chunk</td> + * <td>Sets the size of the chunks used to store a chunked layout dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_chunk</td> + * <td>Retrieves the size of chunks for the raw data of a chunked layout dataset.</td> + * </tr> + * <tr> + * <td>#H5Pset_deflate</td> + * <td>Sets compression method and compression level.</td> + * </tr> + * <tr> + * <td>#H5Pset_fill_value</td> + * <td>Sets the fill value for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_fill_value</td> + * <td>Retrieves a dataset fill value.</td> + * </tr> + * <tr> + * <td>#H5Pfill_value_defined</td> + * <td>Determines whether the fill value is defined.</td> + * </tr> + * <tr> + * <td>#H5Pset_fill_time</td> + * <td>Sets the time when fill values are written to a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_fill_time</td> + * <td>Retrieves the time when fill value are written to a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pset_alloc_time</td> + * <td>Sets the timing for storage space allocation.</td> + * </tr> + * <tr> + * <td>#H5Pget_alloc_time</td> + * <td>Retrieves the timing for storage space allocation.</td> + * </tr> + * <tr> + * <td>#H5Pset_filter</td> + * <td>Adds a filter to the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pall_filters_avail</td> + * <td>Verifies that all required filters are available.</td> + * </tr> + * <tr> + * <td>#H5Pget_nfilters</td> + * <td>Returns the number of filters in the pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pget_filter</td> + * <td>Returns information about a filter in a pipeline. + * The C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Pget_filter_by_id</td> + * <td>Returns information about the specified filter. + * The C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Pmodify_filter</td> + * <td>Modifies a filter in the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Premove_filter</td> + * <td>Deletes one or more filters in the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pset_fletcher32</td> + * <td>Sets up use of the Fletcher32 checksum filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_nbit</td> + * <td>Sets up use of the n-bit filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_scaleoffset</td> + * <td>Sets up use of the scale-offset filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_shuffle</td> + * <td>Sets up use of the shuffle filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_szip</td> + * <td>Sets up use of the Szip compression filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_external</td> + * <td>Adds an external file to the list of external files.</td> + * </tr> + * <tr> + * <td>#H5Pget_external_count</td> + * <td>Returns the number of external files for a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_external</td> + * <td>Returns information about an external file.</td> + * </tr> + * <tr> + * <td>#H5Pset_char_encoding</td> + * <td>Sets the character encoding used to encode a string. Use to set ASCII or UTF-8 character + * encoding for object names.</td> + * </tr> + * <tr> + * <td>#H5Pget_char_encoding</td> + * <td>Retrieves the character encoding used to create a string.</td> + * </tr> + * </table> + * + * <table> + * <caption>Dataset access property list functions (H5P)</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_buffer</td> + * <td>Sets type conversion and background buffers.</td> + * </tr> + * <tr> + * <td>#H5Pget_buffer</td> + * <td>Reads buffer settings.</td> + * </tr> + * <tr> + * <td>#H5Pset_chunk_cache</td> + * <td>Sets the raw data chunk cache parameters.</td> + * </tr> + * <tr> + * <td>#H5Pget_chunk_cache</td> + * <td>Retrieves the raw data chunk cache parameters.</td> + * </tr> + * <tr> + * <td>#H5Pset_edc_check</td> + * <td>Sets whether to enable error-detection when reading a dataset.</td> + * </tr> + * <tr> + * <td>#H5Pget_edc_check</td> + * <td>Determines whether error-detection is enabled for dataset reads.</td> + * </tr> + * <tr> + * <td>#H5Pset_filter_callback</td> + * <td>Sets user-defined filter callback function.</td> + * </tr> + * <tr> + * <td>#H5Pset_data_transform</td> + * <td>Sets a data transform expression.</td> + * </tr> + * <tr> + * <td>#H5Pget_data_transform</td> + * <td>Retrieves a data transform expression.</td> + * </tr> + * <tr> + * <td>#H5Pset_type_conv_cb</td> + * <td>Sets user-defined datatype conversion callback function.</td> + * </tr> + * <tr> + * <td>#H5Pget_type_conv_cb</td> + * <td>Gets user-defined datatype conversion callback function.</td> + * </tr> + * <tr> + * <td>#H5Pset_hyper_vector_size</td> + * <td>Sets number of I/O vectors to be read/written in hyperslab I/O.</td> + * </tr> + * <tr> + * <td>#H5Pget_hyper_vector_size</td> + * <td>Retrieves number of I/O vectors to be read/written in hyperslab I/O.</td> + * </tr> + * <tr> + * <td>#H5Pset_btree_ratios</td> + * <td>Sets B-tree split ratios for a dataset transfer property list.</td> + * </tr> + * <tr> + * <td>#H5Pget_btree_ratios</td> + * <td>Gets B-tree split ratios for a dataset transfer property list.</td> + * </tr> + * <tr> + * <td>#H5Pset_vlen_mem_manager</td> + * <td>Sets the memory manager for variable-length datatype allocation in #H5Dread and + * #H5Dvlen_reclaim.</td> + * </tr> + * <tr> + * <td>#H5Pget_vlen_mem_manager</td> + * <td>Gets the memory manager for variable-length datatype allocation in #H5Dread and + * #H5Dvlen_reclaim.</td> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio</td> + * <td>Sets data transfer mode.</td> + * </tr> + * <tr> + * <td>#H5Pget_dxpl_mpio</td> + * <td>Returns the data transfer mode.</td> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio_chunk_opt</td> + * <td>Sets a flag specifying linked-chunk I/O or multi-chunk I/O.</td> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio_chunk_opt_num</td> + * <td>Sets a numeric threshold for linked-chunk I/O.</td> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio_chunk_opt_ratio</td> + * <td>Sets a ratio threshold for collective I/O.</td> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio_collective_opt</td> + * <td>Sets a flag governing the use of independent versus collective I/O.</td> + * </tr> + * <tr> + * <td>#H5Pset_multi_type</td> + * <td>Sets the type of data property for the MULTI driver.</td> + * </tr> + * <tr> + * <td>#H5Pget_multi_type</td> + * <td>Retrieves the type of data property for the MULTI driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_small_data_block_size</td> + * <td>Sets the size of a contiguous block reserved for small data.</td> + * </tr> + * <tr> + * <td>#H5Pget_small_data_block_size</td> + * <td>Retrieves the current small data block size setting.</td> + * </tr> + * </table> + * + * \subsection subsec_dataset_program Programming Model for Datasets + * This section explains the programming model for datasets. + * + * \subsubsection subsubsec_dataset_program_general General Model + * + * The programming model for using a dataset has three main phases: + * \li Obtain access to the dataset + * \li Operate on the dataset using the dataset identifier returned at access + * \li Release the dataset + * + * These three phases or steps are described in more detail below the figure. + * + * A dataset may be opened several times and operations performed with several different + * identifiers to the same dataset. All the operations affect the dataset although the calling program + * must synchronize if necessary to serialize accesses. + * + * Note that the dataset remains open until every identifier is closed. The figure below shows the + * basic sequence of operations. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig2.gif "Dataset programming sequence" + * </td> + * </tr> + * </table> + * + * Creation and data access operations may have optional parameters which are set with property + * lists. The general programming model is: + * \li Create property list of appropriate class (dataset create, dataset transfer) + * \li Set properties as needed; each type of property has its own format and datatype + * \li Pass the property list as a parameter of the API call + * + * The steps below describe the programming phases or steps for using a dataset. + * <h4>Step 1. Obtain Access</h4> + * A new dataset is created by a call to #H5Dcreate. If successful, the call returns an identifier for the + * newly created dataset. + * + * Access to an existing dataset is obtained by a call to #H5Dopen. This call returns an identifier for + * the existing dataset. + * + * An object reference may be dereferenced to obtain an identifier to the dataset it points to. + * + * In each of these cases, the successful call returns an identifier to the dataset. The identifier is + * used in subsequent operations until the dataset is closed. + * + * <h4>Step 2. Operate on the Dataset</h4> + * The dataset identifier can be used to write and read data to the dataset, to query and set + * properties, and to perform other operations such as adding attributes, linking in groups, and + * creating references. + * + * The dataset identifier can be used for any number of operations until the dataset is closed. + * + * <h4>Step 3. Close the Dataset</h4> + * When all operations are completed, the dataset identifier should be closed with a call to + * #H5Dclose. This releases the dataset. + * + * After the identifier is closed, it cannot be used for further operations. + * + * \subsubsection subsubsec_dataset_program_create Create Dataset + * + * A dataset is created and initialized with a call to #H5Dcreate. The dataset create operation sets + * permanent properties of the dataset: + * \li Name + * \li Dataspace + * \li Datatype + * \li Storage properties + * + * These properties cannot be changed for the life of the dataset, although the dataspace may be + * expanded up to its maximum dimensions. + * + * <h4>Name</h4> + * A dataset name is a sequence of alphanumeric ASCII characters. The full name would include a + * tracing of the group hierarchy from the root group of the file. An example is + * /rootGroup/groupA/subgroup23/dataset1. The local name or relative name within the lowest- + * level group containing the dataset would include none of the group hierarchy. An example is + * Dataset1. + * + * <h4>Dataspace</h4> + * The dataspace of a dataset defines the number of dimensions and the size of each dimension. The + * dataspace defines the number of dimensions, and the maximum dimension sizes and current size + * of each dimension. The maximum dimension size can be a fixed value or the constant + * #H5S_UNLIMITED, in which case the actual dimension size can be changed with calls to + * #H5Dset_extent, up to the maximum set with the maxdims parameter in the #H5Screate_simple + * call that established the dataset’s original dimensions. The maximum dimension size is set when + * the dataset is created and cannot be changed. + * + * <h4>Datatype</h4> + * Raw data has a datatype which describes the layout of the raw data stored in the file. The + * datatype is set when the dataset is created and can never be changed. When data is transferred to + * and from the dataset, the HDF5 library will assure that the data is transformed to and from the + * stored format. + * + * <h4>Storage Properties</h4> + * Storage properties of the dataset are set when it is created. The required inputs table below shows + * the categories of storage properties. The storage properties cannot be changed after the dataset is + * created. + * + * <h4>Filters</h4> + * When a dataset is created, optional filters are specified. The filters are added to the data transfer + * pipeline when data is read or written. The standard library includes filters to implement + * compression, data shuffling, and error detection code. Additional user-defined filters may also be + * used. + * + * The required filters are stored as part of the dataset, and the list may not be changed after the + * dataset is created. The HDF5 library automatically applies the filters whenever data is + * transferred. + * + * <h4>Summary</h4> + * + * A newly created dataset has no attributes and no data values. The dimensions, datatype, storage + * properties, and selected filters are set. The table below lists the required inputs, and the second + * table below lists the optional inputs. + * + * <table> + * <caption>Required inputs</caption> + * <tr> + * <th>Required Inputs</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Dataspace</td> + * <td>The shape of the array.</td> + * </tr> + * <tr> + * <td>Datatype</td> + * <td>The layout of the stored elements.</td> + * </tr> + * <tr> + * <td>Name</td> + * <td>The name of the dataset in the group.</td> + * </tr> + * </table> + * + * <table> + * <caption>Optional inputs</caption> + * <tr> + * <th>Optional Inputs</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Storage Layout</td> + * <td>How the data is organized in the file including chunking.</td> + * </tr> + * <tr> + * <td>Fill Value</td> + * <td>The behavior and value for uninitialized data.</td> + * </tr> + * <tr> + * <td>External Storage</td> + * <td>Option to store the raw data in an external file.</td> + * </tr> + * <tr> + * <td>Filters</td> + * <td>Select optional filters to be applied. One of the filters that might be applied is compression.</td> + * </tr> + * </table> + * + * <h4>Example</h4> + * To create a new dataset, go through the following general steps: + * \li Set dataset characteristics (optional where default settings are acceptable) + * \li Datatype + * \li Dataspace + * \li Dataset creation property list + * \li Create the dataset + * \li Close the datatype, dataspace, and property list (as necessary) + * \li Close the dataset + * + * Example 1 below shows example code to create an empty dataset. The dataspace is 7 x 8, and the + * datatype is a big-endian integer. The dataset is created with the name “dset1” and is a member of + * the root group, “/”. + * + * <em> Example 1. Create an empty dataset</em> + * \code + * hid_t dataset, datatype, dataspace; + * + * // Create dataspace: Describe the size of the array and create the dataspace for fixed-size dataset. + * dimsf[0] = 7; + * dimsf[1] = 8; + * dataspace = H5Screate_simple(2, dimsf, NULL); + * + * // Define datatype for the data in the file. + * // For this example, store little-endian integer numbers. + * datatype = H5Tcopy(H5T_NATIVE_INT); + * status = H5Tset_order(datatype, H5T_ORDER_LE); + * + * // Create a new dataset within the file using defined + * // dataspace and datatype. No properties are set. + * dataset = H5Dcreate(file, "/dset", datatype, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * H5Dclose(dataset); + * H5Sclose(dataspace); + * H5Tclose(datatype); + * \endcode + * + * Example 2, below, shows example code to create a similar dataset with a fill value of ‘-1’. This + * code has the same steps as in the example above, but uses a non-default property list. A file + * creation property list is created, and then the fill value is set to the desired value. Then the + * property list is passed to the #H5Dcreate call. + * + * <em> Example 2. Create a dataset with fill value set</em> + * \code + * hid_t plist; // property list + * hid_t dataset, datatype, dataspace; + * int fillval = -1; + * + * dimsf[0] = 7; + * dimsf[1] = 8; + * dataspace = H5Screate_simple(2, dimsf, NULL); + * datatype = H5Tcopy(H5T_NATIVE_INT); + * status = H5Tset_order(datatype, H5T_ORDER_LE); + * + * // Example of Dataset Creation property list: set fill value to '-1' + * plist = H5Pcreate(H5P_DATASET_CREATE); + * status = H5Pset_fill_value(plist, datatype, &fillval); + * + * // Same as above, but use the property list + * dataset = H5Dcreate(file, "/dset", datatype, dataspace, H5P_DEFAULT, plist, H5P_DEFAULT); + * H5Dclose(dataset); + * H5Sclose(dataspace); + * H5Tclose(datatype); + * H5Pclose(plist); + * \endcode + * + * After this code is executed, the dataset has been created and written to the file. The data array is + * uninitialized. Depending on the storage strategy and fill value options that have been selected, + * some or all of the space may be allocated in the file, and fill values may be written in the file. + * + * \subsubsection subsubsec_dataset_program_transfer Data Transfer Operations on a Dataset + * Data is transferred between memory and the raw data array of the dataset through #H5Dwrite and + * #H5Dread operations. A data transfer has the following basic steps: + * \li 1. Allocate and initialize memory space as needed + * \li 2. Define the datatype of the memory elements + * \li 3. Define the elements to be transferred (a selection, or all the elements) + * \li 4. Set data transfer properties (including parameters for filters or file drivers) as needed + * \li 5. Call the @ref H5D API + * + * Note that the location of the data in the file, the datatype of the data in the file, the storage + * properties, and the filters do not need to be specified because these are stored as a permanent part + * of the dataset. A selection of elements from the dataspace is specified; the selected elements may + * be the whole dataspace. + * + * The following figure shows a diagram of a write operation which + * transfers a data array from memory to a dataset in the file (usually on disk). A read operation has + * similar parameters with the data flowing the other direction. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig3.gif "A write operation" + * </td> + * </tr> + * </table> + * + * <h4>Memory Space</h4> + * The calling program must allocate sufficient memory to store the data elements to be transferred. + * For a write (from memory to the file), the memory must be initialized with the data to be written + * to the file. For a read, the memory must be large enough to store the elements that will be read. + * The amount of storage needed can be computed from the memory datatype (which defines the + * size of each data element) and the number of elements in the selection. + * + * <h4>Memory Datatype</h4> + * The memory layout of a single data element is specified by the memory datatype. This specifies + * the size, alignment, and byte order of the element as well as the datatype class. Note that the + * memory datatype must be the same datatype class as the file, but may have different byte order + * and other properties. The HDF5 Library automatically transforms data elements between the + * source and destination layouts. For more information, \ref sec_datatype. + * + * For a write, the memory datatype defines the layout of the data to be written; an example is IEEE + * floating-point numbers in native byte order. If the file datatype (defined when the dataset is + * created) is different but compatible, the HDF5 Library will transform each data element when it + * is written. For example, if the file byte order is different than the native byte order, the HDF5 + * library will swap the bytes. + * + * For a read, the memory datatype defines the desired layout of the data to be read. This must be + * compatible with the file datatype, but should generally use native formats such as byte orders. + * The HDF5 library will transform each data element as it is read. + * + * <h4>Selection</h4> + * The data transfer will transfer some or all of the elements of the dataset depending on the + * dataspace selection. The selection has two dataspace objects: one for the source, and one for the + * destination. These objects describe which elements of the dataspace to be transferred. Some + * (partial I/O) or all of the data may be transferred. Partial I/O is defined by defining hyperslabs or + * lists of elements in a dataspace object. + * + * The dataspace selection for the source defines the indices of the elements to be read or written. + * The two selections must define the same number of points, but the order and layout may be + * different. The HDF5 Library automatically selects and distributes the elements according to the + * selections. It might, for example, perform a scatter-gather or sub-set of the data. + * + * <h4>Data Transfer Properties</h4> + * For some data transfers, additional parameters should be set using the transfer property list. The + * table below lists the categories of transfer properties. These properties set parameters for the + * HDF5 Library and may be used to pass parameters for optional filters and file drivers. For + * example, transfer properties are used to select independent or collective operation when using + * MPI-I/O. + * + * <table> + * <caption>Categories of transfer properties</caption> + * <tr> + * <th>Properties</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Library parameters</td> + * <td>Internal caches, buffers, B-Trees, etc.</td> + * </tr> + * <tr> + * <td>Memory management</td> + * <td>Variable-length memory management, data overwrite</td> + * </tr> + * <tr> + * <td>File driver management</td> + * <td>Parameters for file drivers</td> + * </tr> + * <tr> + * <td>Filter management</td> + * <td>Parameters for filters</td> + * </tr> + * </table> + * + * <h4>Data Transfer Operation (Read or Write)</h4> + * The data transfer is done by calling #H5Dread or #H5Dwrite with the parameters described above. + * The HDF5 Library constructs the required pipeline, which will scatter-gather, transform + * datatypes, apply the requested filters, and use the correct file driver. + * + * During the data transfer, the transformations and filters are applied to each element of the data in + * the required order until all the data is transferred. + * + * <h4>Summary</h4> + * To perform a data transfer, it is necessary to allocate and initialize memory, describe the source + * and destination, set required and optional transfer properties, and call the \ref H5D API. + * + * <h4>Examples</h4> + * The basic procedure to write to a dataset is the following: + * \li Open the dataset. + * \li Set the dataset dataspace for the write (optional if dataspace is #H5S_ALL). + * \li Write data. + * \li Close the datatype, dataspace, and property list (as necessary). + * \li Close the dataset. + * + * Example 3 below shows example code to write a 4 x 6 array of integers. In the example, the data + * is initialized in the memory array dset_data. The dataset has already been created in the file, so it + * is opened with H5Dopen. + * + * The data is written with #H5Dwrite. The arguments are the dataset identifier, the memory + * datatype (#H5T_NATIVE_INT), the memory and file selections (#H5S_ALL in this case: the + * whole array), and the default (empty) property list. The last argument is the data to be + * transferred. + * + * <em> Example 3. Write an array of integers</em> + * \code + * hid_t file_id, dataset_id; // identifiers + * herr_t status; + * int i, j, dset_data[4][6]; + * + * // Initialize the dataset. + * for (i = 0; i < 4; i++) + * for (j = 0; j < 6; j++) + * dset_data[i][j] = i * 6 + j + 1; + * + * // Open an existing file. + * file_id = H5Fopen("dset.h5", H5F_ACC_RDWR, H5P_DEFAULT); + * + * // Open an existing dataset. + * dataset_id = H5Dopen(file_id, "/dset", H5P_DEFAULT); + * + * // Write the entire dataset, using 'dset_data': memory type is 'native int' + * // write the entire dataspace to the entire dataspace, no transfer properties + * status = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dset_data); + * + * status = H5Dclose(dataset_id); + * \endcode + * + * Example 4 below shows a similar write except for setting a non-default value for the transfer + * buffer. The code is the same as Example 3, but a transfer property list is created, and the desired + * buffer size is set. The #H5Dwrite function has the same arguments, but uses the property list to set + * the buffer. + * + * <em> Example 4. Write an array using a property list</em> + * \code + * hid_t file_id, dataset_id; + * hid_t xferplist; + * herr_t status; + * int i, j, dset_data[4][6]; + * + * file_id = H5Fopen("dset.h5", H5F_ACC_RDWR, H5P_DEFAULT); + * dataset_id = H5Dopen(file_id, "/dset", H5P_DEFAULT); + * + * // Example: set type conversion buffer to 64MB + * xferplist = H5Pcreate(H5P_DATASET_XFER); + * status = H5Pset_buffer( xferplist, 64 * 1024 *1024, NULL, NULL); + * + * // Write the entire dataset, using 'dset_data': memory type is 'native int' + * write the entire dataspace to the entire dataspace, set the buffer size with the property list + * status = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xferplist, dset_data); + * + * status = H5Dclose(dataset_id); + * \endcode + * + * The basic procedure to read from a dataset is the following: + * \li Define the memory dataspace of the read (optional if dataspace is #H5S_ALL). + * \li Open the dataset. + * \li Get the dataset dataspace (if using #H5S_ALL above). + * + * Else define dataset dataspace of read. + * \li Define the memory datatype (optional). + * \li Define the memory buffer. + * \li Open the dataset. + * \li Read data. + * \li Close the datatype, dataspace, and property list (as necessary). + * \li Close the dataset. + * + * The example below shows code that reads a 4 x 6 array of integers from a dataset called “dset1”. + * First, the dataset is opened. The #H5Dread call has parameters: + * \li The dataset identifier (from #H5Dopen) + * \li The memory datatype (#H5T_NATIVE_INT) + * \li The memory and file dataspace (#H5S_ALL, the whole array) + * \li A default (empty) property list + * \li The memory to be filled + * + * <em> Example 5. Read an array from a dataset</em> + * \code + * hid_t file_id, dataset_id; + * herr_t status; + * int i, j, dset_data[4][6]; + * + * // Open an existing file. + * file_id = H5Fopen("dset.h5", H5F_ACC_RDWR, H5P_DEFAULT); + * + * // Open an existing dataset. + * dataset_id = H5Dopen(file_id, "/dset", H5P_DEFAULT); + * + * // read the entire dataset, into 'dset_data': memory type is 'native int' + * // read the entire dataspace to the entire dataspace, no transfer properties, + * status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dset_data); + * + * status = H5Dclose(dataset_id); + * \endcode + * + * \subsubsection subsubsec_dataset_program_read Retrieve the Properties of a Dataset + * The functions listed below allow the user to retrieve information regarding a dataset including + * the datatype, the dataspace, the dataset creation property list, and the total stored size of the data. + * + * <table> + * <caption>Retrieve dataset information</caption> + * <tr> + * <th>Query Function</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>H5Dget_space</td> + * <td>Retrieve the dataspace of the dataset as stored in the file.</td> + * </tr> + * <tr> + * <td>H5Dget_type</td> + * <td>Retrieve the datatype of the dataset as stored in the file.</td> + * </tr> + * <tr> + * <td>H5Dget_create_plist</td> + * <td>Retrieve the dataset creation properties.</td> + * </tr> + * <tr> + * <td>H5Dget_storage_size</td> + * <td>Retrieve the total bytes for all the data of the dataset.</td> + * </tr> + * <tr> + * <td>H5Dvlen_get_buf_size</td> + * <td>Retrieve the total bytes for all the variable-length data of the dataset.</td> + * </tr> + * </table> + * + * The example below illustrates how to retrieve dataset information. + * + * <em> Example 6. Retrieve dataset</em> + * \code + * hid_t file_id, dataset_id; + * hid_t dspace_id, dtype_id, plist_id; + * herr_t status; + * + * // Open an existing file. + * file_id = H5Fopen("dset.h5", H5F_ACC_RDWR, H5P_DEFAULT); + * + * // Open an existing dataset. + * dataset_id = H5Dopen(file_id, "/dset", H5P_DEFAULT); + * dspace_id = H5Dget_space(dataset_id); + * dtype_id = H5Dget_type(dataset_id); + * plist_id = H5Dget_create_plist(dataset_id); + * + * // use the objects to discover the properties of the dataset + * status = H5Dclose(dataset_id); + * \endcode + * + * \subsection subsec_dataset_transfer Data Transfer + * The HDF5 library implements data transfers through a pipeline which implements data + * transformations (according to the datatype and selections), chunking (as requested), and I/O + * operations using different mechanisms (file drivers). The pipeline is automatically configured by + * the HDF5 library. Metadata is stored in the file so that the correct pipeline can be constructed to + * retrieve the data. In addition, optional filters such as compression may be added to the standard + * pipeline. + * + * The figure below illustrates data layouts for different layers of an application using HDF5. The + * application data is organized as a multidimensional array of elements. The HDF5 format + * specification defines the stored layout of the data and metadata. The storage layout properties + * define the organization of the abstract data. This data is written to and read from some storage + * medium. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig4.gif "Data layouts in an application" + * </td> + * </tr> + * </table> + * + * The last stage of a write (and first stage of a read) is managed by an HDF5 file driver module. + * The virtual file layer of the HDF5 Library implements a standard interface to alternative I/O + * methods, including memory (AKA “core”) files, single serial file I/O, multiple file I/O, and + * parallel I/O. The file driver maps a simple abstract HDF5 file to the specific access methods. + * + * The raw data of an HDF5 dataset is conceived to be a multidimensional array of data elements. + * This array may be stored in the file according to several storage strategies: + * \li Contiguous + * \li Chunked + * \li Compact + * + * The storage strategy does not affect data access methods except that certain operations may be + * more or less efficient depending on the storage strategy and the access patterns. + * + * Overall, the data transfer operations (#H5Dread and #H5Dwrite) work identically for any storage + * method, for any file driver, and for any filters and transformations. The HDF5 library + * automatically manages the data transfer process. In some cases, transfer properties should or + * must be used to pass additional parameters such as MPI/IO directives when using the parallel file + * driver. + * + * \subsubsection subsubsec_dataset_transfer_pipe The Data Pipeline + * When data is written or read to or from an HDF5 file, the HDF5 library passes the data through a + * sequence of processing steps which are known as the HDF5 data pipeline. This data pipeline + * performs operations on the data in memory such as byte swapping, alignment, scatter-gather, and + * hyperslab selections. The HDF5 library automatically determines which operations are needed + * and manages the organization of memory operations such as extracting selected elements from a + * data block. The data pipeline modules operate on data buffers: each module processes a buffer + * and passes the transformed buffer to the next stage. + * + * The table below lists the stages of the data pipeline. The figure below the table shows the order + * of processing during a read or write. + * + * <table> + * <caption>Stages of the data pipeline</caption> + * <tr> + * <th>Layers</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>I/O initiation</td> + * <td>Initiation of HDF5 I/O activities (#H5Dwrite and #H5Dread) in a user’s application program.</td> + * </tr> + * <tr> + * <td>Memory hyperslab operation</td> + * <td>Data is scattered to (for read), or gathered from (for write) the application’s memory buffer + * (bypassed if no datatype conversion is needed).</td> + * </tr> + * <tr> + * <td>Datatype conversion</td> + * <td>Datatype is converted if it is different between memory and storage (bypassed if no datatype + * conversion is needed).</td> + * </tr> + * <tr> + * <td>File hyperslab operation</td> + * <td>Data is gathered from (for read), or scattered to (for write) to file space in memory (bypassed + * if no datatype conversion is needed).</td> + * </tr> + * <tr> + * <td>Filter pipeline</td> + * <td>Data is processed by filters when it passes. Data can be modified and restored here (bypassed + * if no datatype conversion is needed, no filter is enabled, or dataset is not chunked).</td> + * </tr> + * <tr> + * <td>Virtual File Layer</td> + * <td>Facilitate easy plug-in file drivers such as MPIO or POSIX I/O.</td> + * </tr> + * <tr> + * <td>Actual I/O</td> + * <td>Actual file driver used by the library such as MPIO or STDIO.</td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig5.gif "The processing order in the data pipeline" + * </td> + * </tr> + * </table> + * + * The HDF5 library automatically applies the stages as needed. + * + * When the memory dataspace selection is other than the whole dataspace, the memory hyperslab + * stage scatters/gathers the data elements between the application memory (described by the + * selection) and a contiguous memory buffer for the pipeline. On a write, this is a gather operation; + * on a read, this is a scatter operation. + * + * When the memory datatype is different from the file datatype, the datatype conversion stage + * transforms each data element. For example, if data is written from 32-bit big-endian memory, + * and the file datatype is 32-bit little-endian, the datatype conversion stage will swap the bytes of + * every element. Similarly, when data is read from the file to native memory, byte swapping will + * be applied automatically when needed. + * + * The file hyperslab stage is similar to the memory hyperslab stage, but is managing the + * arrangement of the elements according to the dataspace selection. When data is read, data + * elements are gathered from the data blocks from the file to fill the contiguous buffers which are + * then processed by the pipeline. When data is read, the elements from a buffer are scattered to the + * data blocks of the file. + * + * \subsubsection subsubsec_dataset_transfer_filter Data Pipeline Filters + * In addition to the standard pipeline, optional stages, called filters, can be inserted in the pipeline. + * The standard distribution includes optional filters to implement compression and error checking. + * User applications may add custom filters as well. + * + * The HDF5 library distribution includes or employs several optional filters. These are listed in the + * table below. The filters are applied in the pipeline between the virtual file layer and the file + * hyperslab operation. See the figure above. The application can use any number of filters in any + * order. + * + * <table> + * <caption>Data pipeline filters</caption> + * <tr> + * <th>Filter</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>gzip compression</td> + * <td>Data compression using zlib.</td> + * </tr> + * <tr> + * <td>Szip compression</td> + * <td>Data compression using the Szip library. See The HDF Group website for more information + * regarding the Szip filter.</td> + * </tr> + * <tr> + * <td>N-bit compression</td> + * <td>Data compression using an algorithm specialized for n-bit datatypes.</td> + * </tr> + * <tr> + * <td>Scale-offset compression</td> + * <td>Data compression using a “scale and offset” algorithm.</td> + * </tr> + * <tr> + * <td>Shuffling</td> + * <td>To improve compression performance, data is regrouped by its byte position in the data + * unit. In other words, the 1st, 2nd, 3rd, and 4th bytes of integers are stored together + * respectively.</td> + * </tr> + * <tr> + * <td>Fletcher32</td> + * <td>Fletcher32 checksum for error-detection.</td> + * </tr> + * </table> + * + * Filters may be used only for chunked data and are applied to chunks of data between the file + * hyperslab stage and the virtual file layer. At this stage in the pipeline, the data is organized as + * fixed-size blocks of elements, and the filter stage processes each chunk separately. + * + * Filters are selected by dataset creation properties, and some behavior may be controlled by data + * transfer properties. The library determines what filters must be applied and applies them in the + * order in which they were set by the application. That is, if an application calls + * #H5Pset_shuffle and then #H5Pset_deflate when creating a dataset’s creation property list, the + * library will apply the shuffle filter first and then the deflate filter. + * + * For more information, + * \li @see @ref subsubsec_dataset_filters_nbit + * \li @see @ref subsubsec_dataset_filters_scale + * + * \subsubsection subsubsec_dataset_transfer_drive File Drivers + * I/O is performed by the HDF5 virtual file layer. The file driver interface writes and reads blocks + * of data; each driver module implements the interface using different I/O mechanisms. The table + * below lists the file drivers currently supported. Note that the I/O mechanisms are separated from + * the pipeline processing: the pipeline and filter operations are identical no matter what data access + * mechanism is used. + * + * <table> + * <caption>I/O file drivers</caption> + * <tr> + * <th>File Driver</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>#H5FD_CORE</td> + * <td>Store in memory (optional backing store to disk file).</td> + * </tr> + * <tr> + * <td>#H5FD_FAMILY</td> + * <td>Store in a set of files.</td> + * </tr> + * <tr> + * <td>#H5FD_LOG</td> + * <td>Store in logging file.</td> + * </tr> + * <tr> + * <td>#H5FD_MPIO</td> + * <td>Store using MPI/IO.</td> + * </tr> + * <tr> + * <td>#H5FD_MULTI</td> + * <td>Store in multiple files. There are several options to control layout.</td> + * </tr> + * <tr> + * <td>#H5FD_SEC2</td> + * <td>Serial I/O to file using Unix “section 2” functions.</td> + * </tr> + * <tr> + * <td>#H5FD_STDIO</td> + * <td>Serial I/O to file using Unix “stdio” functions.</td> + * </tr> + * </table> + * + * Each file driver writes/reads contiguous blocks of bytes from a logically contiguous address + * space. The file driver is responsible for managing the details of the different physical storage + * methods. + * + * In serial environments, everything above the virtual file layer tends to work identically no matter + * what storage method is used. + * + * Some options may have substantially different performance depending on the file driver that is + * used. In particular, multi-file and parallel I/O may perform considerably differently from serial + * drivers depending on chunking and other settings. + * + * \subsubsection subsubsec_dataset_transfer_props Data Transfer Properties to Manage the Pipeline + * Data transfer properties set optional parameters that control parts of the data pipeline. The + * function listing below shows transfer properties that control the behavior of the library. + * + * <table> + * <caption>Data transfer property list functions</caption> + * <tr> + * <th>C Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_buffer</td> + * <td>Maximum size for the type conversion buffer and the background buffer. May also supply + * pointers to application-allocated buffers.</td> + * </tr> + * <tr> + * <td>#H5Pset_hyper_vector_size</td> + * <td>set the number of "I/O vectors" (offset and length pairs) which are to be + * accumulated in memory before being issued to the lower levels + * of the library for reading or writing the actual data.</td> + * </tr> + * <tr> + * <td>#H5Pset_btree_ratios</td> + * <td>Set the B-tree split ratios for a dataset transfer property list. The split ratios determine + * what percent of children go in the first node when a node splits.</td> + * </tr> + * </table> + * + * Some filters and file drivers require or use additional parameters from the application program. + * These can be passed in the data transfer property list. The table below shows file driver property + * list functions. + * + * <table> + * <caption>File driver property list functions</caption> + * <tr> + * <th>C Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_dxpl_mpio</td> + * <td>Control the MPI I/O transfer mode (independent or collective) during data I/O operations.</td> + * </tr> + * <tr> + * <td>#H5Pset_small_data_block_size</td> + * <td>Reserves blocks of size bytes for the contiguous storage of the raw data portion of small + * datasets. The HDF5 Library then writes the raw data from small datasets to this reserved space + * which reduces unnecessary discontinuities within blocks of metadata and improves + * I/O performance.</td> + * </tr> + * <tr> + * <td>#H5Pset_edc_check</td> + * <td>Disable/enable EDC checking for read. When selected, EDC is always written.</td> + * </tr> + * </table> + * + * The transfer properties are set in a property list which is passed as a parameter of the #H5Dread or + * #H5Dwrite call. The transfer properties are passed to each pipeline stage. Each stage may use or + * ignore any property in the list. In short, there is one property list that contains all the properties. + * + * \subsubsection subsubsec_dataset_transfer_store Storage Strategies + * The raw data is conceptually a multi-dimensional array of elements that is stored as a contiguous + * array of bytes. The data may be physically stored in the file in several ways. The table below lists + * the storage strategies for a dataset. + * + * <table> + * <caption> Dataset storage strategies</caption> + * <tr> + * <th>Storage Strategy</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Contiguous</td> + * <td>The dataset is stored as one continuous array of bytes.</td> + * </tr> + * <tr> + * <td>Chunked </td> + * <td>The dataset is stored as fixed-size chunks.</td> + * </tr> + * <tr> + * <td>Compact</td> + * <td>A small dataset is stored in the metadata header.</td> + * </tr> + * </table> + * + * The different storage strategies do not affect the data transfer operations of the dataset: reads and + * writes work the same for any storage strategy. + * + * These strategies are described in the following sections. + * + * <h4>Contiguous</h4> + * A contiguous dataset is stored in the file as a header and a single continuous array of bytes. See + * the figure below. In the case of a multi-dimensional array, the data is serialized in row major order. By + * default, data is stored contiguously. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig6.gif "Contiguous data storage" + * </td> + * </tr> + * </table> + * + * Contiguous storage is the simplest model. It has several limitations. First, the dataset must be a + * fixed-size: it is not possible to extend the limit of the dataset or to have unlimited dimensions. In + * other words, if the number of dimensions of the array might change over time, then chunking + * storage must be used instead of contiguous. Second, because data is passed through the pipeline + * as fixed-size blocks, compression and other filters cannot be used with contiguous data. + * + * <h4>Chunked</h4> + * The data of a dataset may be stored as fixed-size chunks. A chunk is a hyper- + * rectangle of any shape. When a dataset is chunked, each chunk is read or written as a single I/O + * operation, and individually passed from stage to stage of the data pipeline. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig7.gif "Chunked data storage" + * </td> + * </tr> + * </table> + * + * Chunks may be any size and shape that fits in the dataspace of the dataset. For example, a three + * dimensional dataspace can be chunked as 3-D cubes, 2-D planes, or 1-D lines. The chunks may + * extend beyond the size of the dataspace. For example, a 3 x 3 dataset might by chunked in 2 x 2 + * chunks. Sufficient chunks will be allocated to store the array, and any extra space will not be + * accessible. So, to store the 3 x 3 array, four 2 x 2 chunks would be allocated with 5 unused + * elements stored. + * + * Chunked datasets can be unlimited in any direction and can be compressed or filtered. + * + * Since the data is read or written by chunks, chunking can have a dramatic effect on performance + * by optimizing what is read and written. Note, too, that for specific access patterns such as + * parallel I/O, decomposition into chunks can have a large impact on performance. + * + * Two restrictions have been placed on chunk shape and size: + * <ul><li> The rank of a chunk must be less than or equal to the rank of the dataset</li> + * <li> Chunk size cannot exceed the size of a fixed-size dataset; for example, a dataset consisting of + * a 5 x 4 fixed-size array cannot be defined with 10 x 10 chunks</li></ul> + * + * <h4>Compact</h4> + * For contiguous and chunked storage, the dataset header information and data are stored in two + * (or more) blocks. Therefore, at least two I/O operations are required to access the data: one to + * access the header, and one (or more) to access data. For a small dataset, this is considerable + * overhead. + * + * A small dataset may be stored in a continuous array of bytes in the header block using the + * compact storage option. This dataset can be read entirely in one operation which retrieves the + * header and data. The dataset must fit in the header. This may vary depending on the metadata + * that is stored. In general, a compact dataset should be approximately 30 KB or less total size. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig8.gif "Compact data storage" + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_dataset_transfer_partial Partial I/O Sub‐setting and Hyperslabs + * Data transfers can write or read some of the data elements of the dataset. This is controlled by + * specifying two selections: one for the source and one for the destination. Selections are specified + * by creating a dataspace with selections. + * + * Selections may be a union of hyperslabs or a list of points. A hyperslab is a contiguous hyper- + * rectangle from the dataspace. Selected fields of a compound datatype may be read or written. In + * this case, the selection is controlled by the memory and file datatypes. + * + * Summary of procedure: + * \li 1. Open the dataset + * \li 2. Define the memory datatype + * \li 3. Define the memory dataspace selection and file dataspace selection + * \li 4. Transfer data (#H5Dread or #H5Dwrite) + * + * For more information, + * @see @ref sec_dataspace + * + * \subsection subsec_dataset_allocation Allocation of Space in the File + * When a dataset is created, space is allocated in the file for its header and initial data. The amount +of space allocated when the dataset is created depends on the storage properties. When the +dataset is modified (data is written, attributes added, or other changes), additional storage may be +allocated if necessary. + * + * <table> + * <caption>Initial dataset size</caption> + * <tr> + * <th>Object</th> + * <th>Size</th> + * </tr> + * <tr> + * <td>Header</td> + * <td>Variable, but typically around 256 bytes at the creation of a simple dataset with a simple + * datatype.</td> + * </tr> + * <tr> + * <td>Data</td> + * <td>Size of the data array (number of elements x size of element). Space allocated in + * the file depends on the storage strategy and the allocation strategy.</td> + * </tr> + * </table> + * + * <h4>Header</h4> + * A dataset header consists of one or more header messages containing persistent metadata + * describing various aspects of the dataset. These records are defined in the HDF5 File Format + * Specification. The amount of storage required for the metadata depends on the metadata to be + * stored. The table below summarizes the metadata. + * + * <table> + * <caption>Metadata storage sizes</caption> + * <tr> + * <th>Header Information</th> + * <th>Approximate Storage Size</th> + * </tr> + * <tr> + * <td>Datatype (required)</td> + * <td>Bytes or more. Depends on type.</td> + * </tr> + * <tr> + * <td>Dataspace (required)</td> + * <td>Bytes or more. Depends on number of dimensions and hsize_t.</td> + * </tr> + * <tr> + * <td>Layout (required)</td> + * <td>Points to the stored data. Bytes or more. Depends on hsize_t and number of dimensions.</td> + * </tr> + * <tr> + * <td>Filters</td> + * <td>Depends on the number of filters. The size of the filter message depends on the name and + * data that will be passed.</td> + * </tr> + * </table> + * + * The header blocks also store the name and values of attributes, so the total storage depends on + * the number and size of the attributes. + * + * In addition, the dataset must have at least one link, including a name, which is stored in the file + * and in the group it is linked from. + * + * The different storage strategies determine when and how much space is allocated for the data + * array. See the discussion of fill values below for a detailed explanation of the storage allocation. + * + * <h4>Contiguous Storage</h4> + * For a continuous storage option, the data is stored in a single, contiguous block in the file. The + * data is nominally a fixed-size, (number of elements x size of element). The figure below shows + * an example of a two dimensional array stored as a contiguous dataset. + * + * Depending on the fill value properties, the space may be allocated when the dataset is created or + * when first written (default), and filled with fill values if specified. For parallel I/O, by default the + * space is allocated when the dataset is created. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig9.gif "A two dimensional array stored as a contiguous dataset" + * </td> + * </tr> + * </table> + * + * <h4>Chunked Storage</h4> + * For chunked storage, the data is stored in one or more chunks. Each chunk is a continuous block + * in the file, but chunks are not necessarily stored contiguously. Each chunk has the same size. The + * data array has the same nominal size as a contiguous array (number of elements x size of + * element), but the storage is allocated in chunks, so the total size in the file can be larger than the + * nominal size of the array. See the figure below. + * + * If a fill value is defined, each chunk will be filled with the fill value. Chunks must be allocated + * when data is written, but they may be allocated when the file is created, as the file expands, or + * when data is written. + * + * For serial I/O, by default chunks are allocated incrementally, as data is written to the chunk. For + * a sparse dataset, chunks are allocated only for the parts of the dataset that are written. In this + * case, if the dataset is extended, no storage is allocated. + * + * For parallel I/O, by default chunks are allocated when the dataset is created or extended with fill + * values written to the chunk. + * + * In either case, the default can be changed using fill value properties. For example, using serial + * I/O, the properties can select to allocate chunks when the dataset is created. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig10.gif "A two dimensional array stored in chunks" + * </td> + * </tr> + * </table> + * + * <h4>Changing Dataset Dimensions</h4> + * #H5Dset_extent is used to change the current dimensions of the dataset within the limits of the + * dataspace. Each dimension can be extended up to its maximum or unlimited. Extending the + * dataspace may or may not allocate space in the file and may or may not write fill values, if they + * are defined. See the example code below. + * + * The dimensions of the dataset can also be reduced. If the sizes specified are smaller than the + * dataset’s current dimension sizes, #H5Dset_extent will reduce the dataset’s dimension sizes to the + * specified values. It is the user’s responsibility to ensure that valuable data is not lost; + * #H5Dset_extent does not check. + * + * <em>Using #H5Dset_extent to increase the size of a dataset</em> + * \code + * hid_t file_id, dataset_id; + * herr_t status; + * size_t newdims[2]; + * + * // Open an existing file. + * file_id = H5Fopen("dset.h5", H5F_ACC_RDWR, H5P_DEFAULT); + * + * // Open an existing dataset. + * dataset_id = H5Dopen(file_id, "/dset", H5P_DEFAULT); + * + * // Example: dataset is 2 x 3, each dimension is UNLIMITED + * // extend to 2 x 7 + * newdims[0] = 2; + * newdims[1] = 7; + * status = H5Dset_extent(dataset_id, newdims); + * + * // dataset is now 2 x 7 + * + * status = H5Dclose(dataset_id); + * \endcode + * + * \subsubsection subsubsec_dataset_allocation_store Storage Allocation in the File: Early, Incremental, Late + * The HDF5 Library implements several strategies for when storage is allocated if and when it is + * filled with fill values for elements not yet written by the user. Different strategies are + * recommended for different storage layouts and file drivers. In particular, a parallel program + * needs storage allocated during a collective call (for example, create or extend), while serial + * programs may benefit from delaying the allocation until the data is written. + * + * Two file creation properties control when to allocate space, when to write the fill value, and the + * actual fill value to write. + * + * <h4>When to Allocate Space</h4> + * The table below shows the options for when data is allocated in the file. Early allocation is done + * during the dataset create call. Certain file drivers (especially MPI-I/O and MPI-POSIX) require + * space to be allocated when a dataset is created, so all processors will have the correct view of the + * data. + * + * <table> + * <caption>File storage allocation options</caption> + * <tr> + * <th>Strategy</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Early</td> + * <td>Allocate storage for the dataset immediately when the dataset is created.</td> + * </tr> + * <tr> + * <td>Late</td> + * <td>Defer allocating space for storing the dataset until the dataset is written.</td> + * </tr> + * <tr> + * <td>Incremental</td> + * <td>Defer allocating space for storing each chunk until the chunk is written.</td> + * </tr> + * <tr> + * <td>Default</td> + * <td>Use the strategy (Early, Late, or Incremental) for the storage method and + * access method. This is the recommended strategy.</td> + * </tr> + * </table> + * + * Late allocation is done at the time of the first write to dataset. Space for the whole dataset is + * allocated at the first write. + * + * Incremental allocation (chunks only) is done at the time of the first write to the chunk. Chunks + * that have never been written are not allocated in the file. In a sparsely populated dataset, this + * option allocates chunks only where data is actually written. + * + * The “Default” property selects the option recommended as appropriate for the storage method + * and access method. The defaults are shown in the table below. Note that Early allocation is + * recommended for all Parallel I/O, while other options are recommended as the default for serial + * I/O cases. + * + * <table> + * <caption>Default storage options</caption> + * <tr> + * <th>Storage Type</th> + * <th>Serial I/O</th> + * <th>Parallel I/O</th> + * </tr> + * <tr> + * <td>Contiguous</td> + * <td>Late</td> + * <td>Early</td> + * </tr> + * <tr> + * <td>Chunked</td> + * <td>Incremental</td> + * <td>Early</td> + * </tr> + * <tr> + * <td>Compact</td> + * <td>Early</td> + * <td>Early</td> + * </tr> + * </table> + * + * <h4>When to Write the Fill Value</h4> + * The second property is when to write the fill value. The possible values are “Never” and + * “Allocation”. The table below shows these options. + * + * <table> + * <caption>When to write fill values</caption> + * <tr> + * <th>When</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Never</td> + * <td>Fill value will never be written.</td> + * </tr> + * <tr> + * <td>Allocation</td> + * <td>Fill value is written when space is allocated. (Default for chunked and contiguous + * data storage.)</td> + * </tr> + * </table> + * + * <h4>What Fill Value to Write</h4> + * The third property is the fill value to write. The table below shows the values. By default, the + * data is filled with zeros. The application may choose no fill value (Undefined). In this case, + * uninitialized data may have random values. The application may define a fill value of an + * appropriate type. For more information, @see @ref subsec_datatype_fill. + * + * <table> + * <caption>Fill values to write</caption> + * <tr> + * <th>What to Write</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>Default</td> + * <td>By default, the library fills allocated space with zeros.</td> + * </tr> + * <tr> + * <td>Undefined</td> + * <td>Allocated space is filled with random values.</td> + * </tr> + * <tr> + * <td>User-defined</td> + * <td>The application specifies the fill value.</td> + * </tr> + * </table> + * + * Together these three properties control the library’s behavior. The table below summarizes the + * possibilities during the dataset create-write-close cycle. + * + * <table> + * <caption>Storage allocation and fill summary</caption> + * <tr> + * <th>When to allocate space</th> + * <th>When to write fill value</th> + * <th>What fill value to write</th> + * <th>Library create-write-close behavior</th> + * </tr> + * <tr> + * <td>Early</td> + * <td>Never</td> + * <td>-</td> + * <td>Library allocates space when dataset is created, but never writes a fill value to dataset. A read + * of unwritten data returns undefined values.</td> + * </tr> + * <tr> + * <td>Late</td> + * <td>Never</td> + * <td>-</td> + * <td>Library allocates space when dataset is written to, but never writes a fill value to the dataset. A + * read of unwritten data returns undefined values.</td> + * </tr> + * <tr> + * <td>Incremental</td> + * <td>Never</td> + * <td>-</td> + * <td>Library allocates space when a dataset or chunk (whichever is the smallest unit of space) + * is written to, but it never writes a fill value to a dataset or a chunk. A read of unwritten data + * returns undefined values.</td> + * </tr> + * <tr> + * <td>-</td> + * <td>Allocation</td> + * <td>Undefined</td> + * <td>Error on creating the dataset. The dataset is not created.</td> + * </tr> + * <tr> + * <td>Early</td> + * <td>Allocation</td> + * <td>Default or User-defined</td> + * <td>Allocate space for the dataset when the dataset is created. Write the fill value (default or + * user-defined) to the entire dataset when the dataset is created.</td> + * </tr> + * <tr> + * <td>Late</td> + * <td>Allocation</td> + * <td>Default or User-define</td> + * <td>Allocate space for the dataset when the application first writes data values to the dataset. + * Write the fill value to the entire dataset before writing application data values.</td> + * </tr> + * <tr> + * <td>Incremental</td> + * <td>Allocation</td> + * <td>Default or User-define</td> + * <td>Allocate space for the dataset when the application first writes data values to the dataset or + * chunk (whichever is the smallest unit of space). Write the fill value to the entire dataset + * or chunk before writing application data values.</td> + * </tr> + * </table> + * + * During the #H5Dread function call, the library behavior depends on whether space has been + * allocated, whether the fill value has been written to storage, how the fill value is defined, and + * when to write the fill value. The table below summarizes the different behaviors. + * + * <table> + * <caption>H5Dread summary</caption> + * <tr> + * <th>Is space allocated in the file?</th> + * <th>What is the fill value?</th> + * <th>When to write the fill value?</th> + * <th>Library read behavior</th> + * </tr> + * <tr> + * <td>No</td> + * <td>Undefined</td> + * <td>anytime</td> + * <td>Error. Cannot create this dataset.</td> + * </tr> + * <tr> + * <td>No</td> + * <td>Default or User-define</td> + * <td>anytime</td> + * <td>Fill the memory buffer with the fill value.</td> + * </tr> + * <tr> + * <td>Yes</td> + * <td>Undefined</td> + * <td>anytime</td> + * <td>Return data from storage (dataset). Trash is possible if the application has not written data + * to the portion of the dataset being read.</td> + * </tr> + * <tr> + * <td>Yes</td> + * <td>Default or User-define</td> + * <td>Never</td> + * <td>Return data from storage (dataset). Trash is possible if the application has not written data + * to the portion of the dataset being read.</td> + * </tr> + * <tr> + * <td>Yes</td> + * <td>Default or User-define</td> + * <td>Allocation</td> + * <td>Return data from storage (dataset).</td> + * </tr> + * </table> + * + * There are two cases to consider depending on whether the space in the file has been allocated + * before the read or not. When space has not yet been allocated and if a fill value is defined, the + * memory buffer will be filled with the fill values and returned. In other words, no data has been + * read from the disk. If space has been allocated, the values are returned from the stored data. The + * unwritten elements will be filled according to the fill value. + * + * \subsubsection subsubsec_dataset_allocation_delete Deleting a Dataset from a File and Reclaiming Space + * HDF5 does not at this time provide an easy mechanism to remove a dataset from a file or to + * reclaim the storage space occupied by a deleted object. + * + * Removing a dataset and reclaiming the space it used can be done with the #H5Ldelete function + * and the h5repack utility program. With the H5Ldelete function, links to a dataset can be removed + * from the file structure. After all the links have been removed, the dataset becomes inaccessible to + * any application and is effectively removed from the file. The way to recover the space occupied + * by an unlinked dataset is to write all of the objects of the file into a new file. Any unlinked object + * is inaccessible to the application and will not be included in the new file. Writing objects to a + * new file can be done with a custom program or with the h5repack utility program. + * + * For more information, @see @ref sec_group + * + * \subsubsection subsubsec_dataset_allocation_release Releasing Memory Resources + * The system resources required for HDF5 objects such as datasets, datatypes, and dataspaces + * should be released once access to the object is no longer needed. This is accomplished via the + * appropriate close function. This is not unique to datasets but a general requirement when + * working with the HDF5 Library; failure to close objects will result in resource leaks. + * + * In the case where a dataset is created or data has been transferred, there are several objects that + * must be closed. These objects include datasets, datatypes, dataspaces, and property lists. + * + * The application program must free any memory variables and buffers it allocates. When + * accessing data from the file, the amount of memory required can be determined by calculating + * the size of the memory datatype and the number of elements in the memory selection. + * + * Variable-length data are organized in two or more areas of memory. For more information, + * \see \ref h4_vlen_datatype "Variable-length Datatypes". + * + * When writing data, the application creates an array of + * vl_info_t which contains pointers to the elements. The elements might be, for example, strings. + * In the file, the variable-length data is stored in two parts: a heap with the variable-length values + * of the data elements and an array of vl_info_t elements. When the data is read, the amount of + * memory required for the heap can be determined with the #H5Dvlen_get_buf_size call. + * + * The data transfer property may be used to set a custom memory manager for allocating variable- + * length data for a #H5Dread. This is set with the #H5Pset_vlen_mem_manager call. + * To free the memory for variable-length data, it is necessary to visit each element, free the + * variable-length data, and reset the element. The application must free the memory it has + * allocated. For memory allocated by the HDF5 Library during a read, the #H5Dvlen_reclaim + * function can be used to perform this operation. + * + * \subsubsection subsubsec_dataset_allocation_ext External Storage Properties + * The external storage format allows data to be stored across a set of non-HDF5 files. A set of + * segments (offsets and sizes) in one or more files is defined as an external file list, or EFL, and + * the contiguous logical addresses of the data storage are mapped onto these segments. Currently, + * only the #H5D_CONTIGUOUS storage format allows external storage. External storage is + * enabled by a dataset creation property. The table below shows the API. + * + * <table> + * <caption>External storage API</caption> + * <tr> + * <th>Function</th> + * <th>Description</th> + * </tr> + * <tr> + * <td>#H5Pset_external</td> + * <td>This function adds a new segment to the end of the external file list of the specified dataset + * creation property list. The segment begins a byte offset of file name and continues for size + * bytes. The space represented by this segment is adjacent to the space already represented by + * the external file list. The last segment in a file list may have the size #H5F_UNLIMITED, in + * which case the external file may be of unlimited size and no more files can be added to the + * external files list.</td> + * </tr> + * <tr> + * <td>#H5Pget_external_count</td> + * <td>Calling this function returns the number of segments in an external file list. If the dataset + * creation property list has no external data, then zero is returned.</td> + * </tr> + * <tr> + * <td>#H5Pget_external</td> + * <td>This is the counterpart for the #H5Pset_external function. Given a dataset creation + * property list and a zero-based index into that list, the file name, byte offset, and segment + * size are returned through non-null arguments. At most name_size characters are copied into + * the name argument which is not null terminated if the file name is longer than the + * supplied name buffer (this is similar to strncpy()).</td> + * </tr> + * </table> + * + * The figure below shows an example of how a contiguous, one-dimensional dataset is partitioned + * into three parts and each of those parts is stored in a segment of an external file. The top + * rectangle represents the logical address space of the dataset while the bottom rectangle represents + * an external file. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig11.gif "External file storage" + * </td> + * </tr> + * </table> + * + * The example below shows code that defines the external storage for the example. Note that the + * segments are defined in order of the logical addresses they represent, not their order within the + * external file. It would also have been possible to put the segments in separate files. Care should + * be taken when setting up segments in a single file since the library does not automatically check + * for segments that overlap. + * + * <em>External storage</em> + * \code + * plist = H5Pcreate (H5P_DATASET_CREATE); + * H5Pset_external (plist, "velocity.data", 3000, 1000); + * H5Pset_external (plist, "velocity.data", 0, 2500); + * H5Pset_external (plist, "velocity.data", 4500, 1500); + * \endcode + * + * The figure below shows an example of how a contiguous, two-dimensional dataset is partitioned + * into three parts and each of those parts is stored in a separate external file. The top rectangle + * represents the logical address space of the dataset while the bottom rectangles represent external + * files. + * + * <table> + * <tr> + * <td> + * \image html Dsets_fig12.gif "Partitioning a 2-D dataset for external storage" + * </td> + * </tr> + * </table> + * + * The example below shows code for the partitioning described above. In this example, the library + * maps the multi-dimensional array onto a linear address space as defined by the HDF5 format + * specification, and then maps that address space into the segments defined in the external file list. + * + * <em>Partitioning a 2-D dataset for external storage</em> + * \code + * plist = H5Pcreate (H5P_DATASET_CREATE); + * H5Pset_external (plist, "scan1.data", 0, 24); + * H5Pset_external (plist, "scan2.data", 0, 24); + * H5Pset_external (plist, "scan3.data", 0, 16); + * \endcode + * + * The segments of an external file can exist beyond the end of the (external) file. The library reads + * that part of a segment as zeros. When writing to a segment that exists beyond the end of a file, + * the external file is automatically extended. Using this feature, one can create a segment (or set of + * segments) which is larger than the current size of the dataset. This allows the dataset to be + * extended at a future time (provided the dataspace also allows the extension). + * + * All referenced external data files must exist before performing raw data I/O on the dataset. This + * is normally not a problem since those files are being managed directly by the application or + * indirectly through some other library. However, if the file is transferred from its original context, + * care must be taken to assure that all the external files are accessible in the new location. + * + * \subsection subsec_dataset_filters Using HDF5 Filters + * This section describes in detail how to use the n-bit, scale-offset filters and szip filters. + * + * \subsubsection subsubsec_dataset_filters_nbit Using the N‐bit Filter + * N-bit data has n significant bits, where n may not correspond to a precise number of bytes. On + * the other hand, computing systems and applications universally, or nearly so, run most efficiently + * when manipulating data as whole bytes or multiple bytes. + * + * Consider the case of 12-bit integer data. In memory, that data will be handled in at least 2 bytes, + * or 16 bits, and on some platforms in 4 or even 8 bytes. The size of such a dataset can be + * significantly reduced when written to disk if the unused bits are stripped out. + * + * The n-bit filter is provided for this purpose, packing n-bit data on output by stripping off all + * unused bits and unpacking on input, restoring the extra bits required by the computational + * processor. + * + * <h4>N-bit Datatype</h4> + * An n-bit datatype is a datatype of n significant bits. Unless it is packed, an n-bit datatype is + * presented as an n-bit bitfield within a larger-sized value. For example, a 12-bit datatype might be + * presented as a 12-bit field in a 16-bit, or 2-byte, value. + * + * Currently, the datatype classes of n-bit datatype or n-bit field of a compound datatype or an array + * datatype are limited to integer or floating-point. + * + * The HDF5 user can create an n-bit datatype through a series of function calls. For example, the + * following calls create a 16-bit datatype that is stored in a 32-bit value with a 4-bit offset: + * \code + * hid_t nbit_datatype = H5Tcopy(H5T_STD_I32LE); + * H5Tset_precision(nbit_datatype, 16); + * H5Tset_offset(nbit_datatype, 4); + * \endcode + * + * In memory, one value of the above example n-bit datatype would be stored on a little-endian + * machine as follows: + * <table> + * <tr> + * <th>byte 3</th> + * <th>byte 2</th> + * <th>byte 1</th> + * <th>byte 0</th> + * </tr> + * <tr> + * <td>????????</td> + * <td>????SPPP</td> + * <td>PPPPPPPP</td> + * <td>PPPP????</td> + * </tr> + * <tr> + * <td colspan="4"> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in signed integer datatype precision.</em> + * </td> + * </tr> + * </table> + * + * <h4>N-bit Filter</h4> + * When data of an n-bit datatype is stored on disk using the n-bit filter, the filter packs the data by + * stripping off the padding bits; only the significant bits are retained and stored. The values on disk + * will appear as follows: + * <table> + * <tr> + * <th>1st value</th> + * <th>2nd value</th> + * <th>nth value</th> + * </tr> + * <tr> + * <td>SPPPPPPP PPPPPPPP</td> + * <td>SPPPPPPP PPPPPPPP</td> + * <td>...</td> + * </tr> + * <tr> + * <td colspan="3"> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in signed integer datatype precision.</em> + * </td> + * </tr> + * </table> + * + * <h4>How Does the N-bit Filter Work?</h4> + * The n-bit filter always compresses and decompresses according to dataset properties supplied by + * the HDF5 library in the datatype, dataspace, or dataset creation property list. + * + * The dataset datatype refers to how data is stored in an HDF5 file while the memory datatype + * refers to how data is stored in memory. The HDF5 library will do datatype conversion when + * writing data in memory to the dataset or reading data from the dataset to memory if the memory + * datatype differs from the dataset datatype. Datatype conversion is performed by HDF5 library + * before n-bit compression and after n-bit decompression. + * + * The following sub-sections examine the common cases: + * \li N-bit integer conversions + * \li N-bit floating-point conversions + * + * <h4>N-bit Integer Conversions</h4> + * Integer data with a dataset of integer datatype of less than full precision and a memory datatype + * of #H5T_NATIVE_INT, provides the simplest application of the n-bit filter. + * + * The precision of #H5T_NATIVE_INT is 8 multiplied by sizeof(int). This value, the size of an + * int in bytes, differs from platform to platform; we assume a value of 4 for the following + * illustration. We further assume the memory byte order to be little-endian. + * + * In memory, therefore, the precision of #H5T_NATIVE_INT is 32 and the offset is 0. One value of + * #H5T_NATIVE_INT is laid out in memory as follows: + * <table> + * <tr> + * <td> + * \image html Dsets_NbitInteger1.gif "H5T_NATIVE_INT in memory"<br /> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in signed integer datatype precision.</em> + * </td> + * </tr> + * </table> + * + * Suppose the dataset datatype has a precision of 16 and an offset of 4. After HDF5 converts + * values from the memory datatype to the dataset datatype, it passes something like the following + * to the n-bit filter for compression: + * <table> + * <tr> + * <td> + * \image html Dsets_NbitInteger2.gif "Passed to the n-bit filter"<br /> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in signed integer datatype precision.</em> + * </td> + * </tr> + * </table> + * + * Notice that only the specified 16 bits (15 significant bits and the sign bit) are retained in the + * conversion. All other significant bits of the memory datatype are discarded because the dataset + * datatype calls for only 16 bits of precision. After n-bit compression, none of these discarded bits, + * known as padding bits will be stored on disk. + * + * <h4>N-bit Floating-point Conversions</h4> + * Things get more complicated in the case of a floating-point dataset datatype class. This sub- + * section provides an example that illustrates the conversion from a memory datatype of + * #H5T_NATIVE_FLOAT to a dataset datatype of class floating-point. + * + * As before, let the #H5T_NATIVE_FLOAT be 4 bytes long, and let the memory byte order be + * little-endian. Per the IEEE standard, one value of #H5T_NATIVE_FLOAT is laid out in memory + * as follows: + * <table> + * <tr> + * <td> + * \image html Dsets_NbitFloating1.gif "H5T_NATIVE_FLOAT in memory"<br /> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in floating-point datatype precision.</em> + * </td> + * </tr> + * </table> + * + * Suppose the dataset datatype has a precision of 20, offset of 7, mantissa size of 13, mantissa + * position of 7, exponent size of 6, exponent position of 20, and sign position of 26. For more + * information, @see @ref subsubsec_datatype_program_define. + * + * After HDF5 converts values from the memory datatype to the dataset datatype, it passes + * something like the following to the n-bit filter for compression: + * <table> + * <tr> + * <td> + * \image html Dsets_NbitFloating2.gif "Passed to the n-bit filter"<br /> + * <em>Note: Key: S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit. Sign bit is + * included in floating-point datatype precision.</em> + * </td> + * </tr> + * </table> + * + * The sign bit and truncated mantissa bits are not changed during datatype conversion by the + * HDF5 library. On the other hand, the conversion of the 8-bit exponent to a 6-bit exponent is a + * little tricky: + * + * The bias for the new exponent in the n-bit datatype is: + * <code> + * 2<sup>(n-1)</sup>-1 + * </code> + * + * The following formula is used for this exponent conversion:<br /> + * <code> + * exp8 - (2<sup>(8-1)</sup> -1) = exp6 - (2<sup>(6-1)</sup>-1) = actual exponent value + * </code><br /> + * where exp8 is the stored decimal value as represented by the 8-bit exponent, and exp6 is the + * stored decimal value as represented by the 6-bit exponent. + * + * In this example, caution must be taken to ensure that, after conversion, the actual exponent value + * is within the range that can be represented by a 6-bit exponent. For example, an 8-bit exponent + * can represent values from -127 to 128 while a 6-bit exponent can represent values only from -31 + * to 32. + * + * <h4>N-bit Filter Behavior</h4> + * The n-bit filter was designed to treat the incoming data byte by byte at the lowest level. The + * purpose was to make the n-bit filter as generic as possible so that no pointer cast related to the + * datatype is needed. + * + * Bitwise operations are employed for packing and unpacking at the byte level. + * + * Recursive function calls are used to treat compound and array datatypes. + * + * <h4>N-bit Compression</h4> + * The main idea of n-bit compression is to use a loop to compress each data element in a chunk. + * Depending on the datatype of each element, the n-bit filter will call one of four functions. Each + * of these functions performs one of the following tasks: + * \li Compress a data element of a no-op datatype + * \li Compress a data element of an atomic datatype + * \li Compress a data element of a compound datatype + * \li Compress a data element of an array datatype + * + * No-op datatypes: The n-bit filter does not actually compress no-op datatypes. Rather, it copies + * the data buffer of the no-op datatype from the non-compressed buffer to the proper location in + * the compressed buffer; the compressed buffer has no holes. The term “compress” is used here + * simply to distinguish this function from the function that performs the reverse operation during + * decompression. + * + * Atomic datatypes: The n-bit filter will find the bytes where significant bits are located and try to + * compress these bytes, one byte at a time, using a loop. At this level, the filter needs the following + * information: + * <ul><li>The byte offset of the beginning of the current data element with respect to the + * beginning of the input data buffer</li> + * <li>Datatype size, precision, offset, and byte order</li></ul> + * + * The n-bit filter compresses from the most significant byte containing significant bits to the least + * significant byte. For big-endian data, therefore, the loop index progresses from smaller to larger + * while for little-endian, the loop index progresses from larger to smaller. + * + * In the extreme case of when the n-bit datatype has full precision, this function copies the content + * of the entire non-compressed datatype to the compressed output buffer. + * + * Compound datatypes: The n-bit filter will compress each data member of the compound + * datatype. If the member datatype is of an integer or floating-point datatype, the n-bit filter will + * call the function described above. If the member datatype is of a no-op datatype, the filter will + * call the function described above. If the member datatype is of a compound datatype, the filter + * will make a recursive call to itself. If the member datatype is of an array datatype, the filter will + * call the function described below. + * + * Array datatypes: The n-bit filter will use a loop to compress each array element in the array. If + * the base datatype of array element is of an integer or floating-point datatype, the n-bit filter will + * call the function described above. If the base datatype is of a no-op datatype, the filter will call + * the function described above. If the base datatype is of a compound datatype, the filter will call + * the function described above. If the member datatype is of an array datatype, the filter will make + * a recursive call of itself. + * + * <h4>N-bit Decompression</h4> + * The n-bit decompression algorithm is very similar to n-bit compression. The only difference is + * that at the byte level, compression packs out all padding bits and stores only significant bits into + * a continuous buffer (unsigned char) while decompression unpacks significant bits and inserts + * padding bits (zeros) at the proper positions to recover the data bytes as they existed before + * compression. + * + * <h4>Storing N-bit Parameters to Array cd_value[]</h4> + * All of the information, or parameters, required by the n-bit filter are gathered and stored in the + * array cd_values[] by the private function H5Z__set_local_nbit and are passed to another private + * function, H5Z__filter_nbit, by the HDF5 Library. + * These parameters are as follows: + * \li Parameters related to the datatype + * \li The number of elements within the chunk + * \li A flag indicating whether compression is needed + * + * The first and second parameters can be obtained using the HDF5 dataspace and datatype + * interface calls. + * + * A compound datatype can have members of array or compound datatype. An array datatype’s + * base datatype can be a complex compound datatype. Recursive calls are required to set + * parameters for these complex situations. + * + * Before setting the parameters, the number of parameters should be calculated to dynamically + * allocate the array cd_values[], which will be passed to the HDF5 Library. This also requires + * recursive calls. + * + * For an atomic datatype (integer or floating-point), parameters that will be stored include the + * datatype’s size, endianness, precision, and offset. + * + * For a no-op datatype, only the size is required. + * + * For a compound datatype, parameters that will be stored include the datatype’s total size and + * number of members. For each member, its member offset needs to be stored. Other parameters + * for members will depend on the respective datatype class. + * + * For an array datatype, the total size parameter should be stored. Other parameters for the array’s + * base type depend on the base type’s datatype class. + * + * Further, to correctly retrieve the parameter for use of n-bit compression or decompression later, + * parameters for distinguishing between datatype classes should be stored. + * + * <h4>Implementation</h4> + * Three filter callback functions were written for the n-bit filter: + * \li H5Z__can_apply_nbit + * \li H5Z__set_local_nbit + * \li H5Z__filter_nbit + * + * These functions are called internally by the HDF5 library. A number of utility functions were + * written for the function H5Z__set_local_nbit. Compression and decompression functions were + * written and are called by function H5Z__filter_nbit. All these functions are included in the file + * H5Znbit.c. + * + * The public function #H5Pset_nbit is called by the application to set up the use of the n-bit filter. + * This function is included in the file H5Pdcpl.c. The application does not need to supply any + * parameters. + * + * <h4>How N-bit Parameters are Stored</h4> + * A scheme of storing parameters required by the n-bit filter in the array cd_values[] was + * developed utilizing recursive function calls. + * + * Four private utility functions were written for storing the parameters associated with atomic + * (integer or floating-point), no-op, array, and compound datatypes: + * \li H5Z__set_parms_atomic + * \li H5Z__set_parms_array + * \li H5Z__set_parms_nooptype + * \li H5Z__set_parms_compound + * + * The scheme is briefly described below. + * + * First, assign a numeric code for datatype class atomic (integer or float), no-op, array, and + * compound datatype. The code is stored before other datatype related parameters are stored. + * + * The first three parameters of cd_values[] are reserved for: + * \li 1. The number of valid entries in the array cd_values[] + * \li 2. A flag indicating whether compression is needed + * \li 3. The number of elements in the chunk + * + * Throughout the balance of this explanation, i represents the index of cd_values[]. + * In the function H5Z__set_local_nbit: + * <ul><li>1. i = 2</li> + * <li>2. Get the number of elements in the chunk and store in cd_value[i]; increment i</li> + * <li>3. Get the class of the datatype: + * <ul><li>For an integer or floating-point datatype, call H5Z__set_parms_atomic</li> + * <li>For an array datatype, call H5Z__set_parms_array</li> + * <li>For a compound datatype, call H5Z__set_parms_compound</li> + * <li>For none of the above, call H5Z__set_parms_noopdatatype</li></ul></li> + * <li>4. Store i in cd_value[0] and flag in cd_values[1]</li></ul> + * + * In the function H5Z__set_parms_atomic: + * \li 1. Store the assigned numeric code for the atomic datatype in cd_value[i]; increment i + * \li 2. Get the size of the atomic datatype and store in cd_value[i]; increment i + * \li 3. Get the order of the atomic datatype and store in cd_value[i]; increment i + * \li 4. Get the precision of the atomic datatype and store in cd_value[i]; increment i + * \li 5. Get the offset of the atomic datatype and store in cd_value[i]; increment i + * \li 6. Determine the need to do compression at this point + * + * In the function H5Z__set_parms_nooptype: + * \li 1. Store the assigned numeric code for the no-op datatype in cd_value[i]; increment i + * \li 2. Get the size of the no-op datatype and store in cd_value[i]; increment i + * + * In the function H5Z__set_parms_array: + * <ul><li>1. Store the assigned numeric code for the array datatype in cd_value[i]; increment i</li> + * <li>2. Get the size of the array datatype and store in cd_value[i]; increment i</li> + * <li>3. Get the class of the array’s base datatype. + * <ul><li>For an integer or floating-point datatype, call H5Z__set_parms_atomic</li> + * <li>For an array datatype, call H5Z__set_parms_array</li> + * <li>For a compound datatype, call H5Z__set_parms_compound</li> + * <li>If none of the above, call H5Z__set_parms_noopdatatype</li></ul></li></ul> + * + * In the function H5Z__set_parms_compound: + * <ul><li>1. Store the assigned numeric code for the compound datatype in cd_value[i]; increment i</li> + * <li>2. Get the size of the compound datatype and store in cd_value[i]; increment i</li> + * <li>3. Get the number of members and store in cd_values[i]; increment i</li> + * <li>4. For each member + * <ul><li>Get the member offset and store in cd_values[i]; increment i</li> + * <li>Get the class of the member datatype</li> + * <li>For an integer or floating-point datatype, call H5Z__set_parms_atomic</li> + * <li>For an array datatype, call H5Z__set_parms_array</li> + * <li>For a compound datatype, call H5Z__set_parms_compound</li> + * <li>If none of the above, call H5Z__set_parms_noopdatatype</li></ul></li></ul> + * + * <h4>N-bit Compression and Decompression Functions</h4> + * The n-bit compression and decompression functions above are called by the private HDF5 + * function H5Z__filter_nbit. The compress and decompress functions retrieve the n-bit parameters + * from cd_values[] as it was passed by H5Z__filter_nbit. Parameters are retrieved in exactly the + * same order in which they are stored and lower-level compression and decompression functions + * for different datatype classes are called. + * + * N-bit compression is not implemented in place. Due to the difficulty of calculating actual output + * buffer size after compression, the same space as that of the input buffer is allocated for the output + * buffer as passed to the compression function. However, the size of the output buffer passed by + * reference to the compression function will be changed (smaller) after the compression is + * complete. + * + * <h4>Usage Examples</h4> + * + * The following code example illustrates the use of the n-bit filter for writing and reading n-bit + * integer data. + * + * <em>N-bit compression for integer data</em> + * \code + * #include "hdf5.h" + * #include "stdlib.h" + * #include "math.h" + * + * #define H5FILE_NAME "nbit_test_int.h5" + * #define DATASET_NAME "nbit_int" + * #define NX 200 + * #define NY 300 + * #define CH_NX 10 + * #define CH_NY 15 + * + * int main(void) + * { + * hid_t file, dataspace, dataset, datatype, mem_datatype, dset_create_props; + * hsize_t dims[2], chunk_size[2]; + * int orig_data[NX][NY]; + * int new_data[NX][NY]; + * int i, j; + * size_t precision, offset; + * + * // Define dataset datatype (integer), and set precision, offset + * datatype = H5Tcopy(H5T_NATIVE_INT); + * precision = 17; // precision includes sign bit + * if(H5Tset_precision(datatype,precision) < 0) { + * printf("Error: fail to set precision\n"); + * return -1; + * } + * offset = 4; + * if(H5Tset_offset(datatype,offset) < 0) { + * printf("Error: fail to set offset\n"); + * return -1; + * } + * + * // Copy to memory datatype + * mem_datatype = H5Tcopy(datatype); + * + * // Set order of dataset datatype + * if(H5Tset_order(datatype, H5T_ORDER_BE) < 0) { + * printf("Error: fail to set endianness\n"); + * return -1; + * } + * + * // Initialize data buffer with random data within correct + * // range corresponding to the memory datatype's precision + * // and offset. + * for (i = 0; i < NX; i++) + * for (j = 0; j < NY; j++) + * orig_data[i][j] = rand() % (int)pow(2, precision-1) << offset; + * + * // Describe the size of the array. + * dims[0] = NX; + * dims[1] = NY; + * if((dataspace = H5Screate_simple (2, dims, NULL)) < 0) { + * printf("Error: fail to create dataspace\n"); + * return -1; + * } + * + * // Create a new file using read/write access, default file + * // creation properties, and default file access properties. + * if((file = H5Fcreate (H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create file\n"); + * return -1; + * } + * + * // Set the dataset creation property list to specify that + * // the raw data is to be partitioned into 10 x 15 element + * // chunks and that each chunk is to be compressed. + * chunk_size[0] = CH_NX; + * chunk_size[1] = CH_NY; + * if((dset_create_props = H5Pcreate (H5P_DATASET_CREATE)) < 0) { + * printf("Error: fail to create dataset property\n"); + * return -1; + * } + * if(H5Pset_chunk (dset_create_props, 2, chunk_size) < 0) { + * printf("Error: fail to set chunk\n"); + * return -1; + * } + * + * // Set parameters for n-bit compression; check the description + * // of the H5Pset_nbit function in the HDF5 Reference Manual + * // for more information. + * if(H5Pset_nbit (dset_create_props) < 0) { + * printf("Error: fail to set nbit filter\n"); + * return -1; + * } + * + * // Create a new dataset within the file. The datatype + * // and dataspace describe the data on disk, which may + * // be different from the format used in the application's + * // memory. + * if((dataset = H5Dcreate(file, DATASET_NAME, datatype, dataspace, + * H5P_DEFAULT, dset_create_props, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create dataset\n"); + * return -1; + * } + * + * // Write the array to the file. The datatype and dataspace + * // describe the format of the data in the 'orig_data' buffer. + * // The raw data is translated to the format required on disk, + * // as defined above. We use default raw data transfer + * // properties. + * if(H5Dwrite (dataset, mem_datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, orig_data) < 0) { + * printf("Error: fail to write to dataset\n"); + * return -1; + * } + * H5Dclose (dataset); + * + * if((dataset = H5Dopen(file, DATASET_NAME, H5P_DEFAULT)) < 0) { + * printf("Error: fail to open dataset\n"); + * return -1; + * } + * + * // Read the array. This is similar to writing data, + * // except the data flows in the opposite direction. + * // Note: Decompression is automatic. + * if(H5Dread (dataset, mem_datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, new_data) < 0) { + * printf("Error: fail to read from dataset\n"); + * return -1; + * } + * + * H5Tclose (datatype); + * H5Tclose (mem_datatype); + * H5Dclose (dataset); + * H5Sclose (dataspace); + * H5Pclose (dset_create_props); + * H5Fclose (file); + * + * return 0; + * } + * \endcode + * + * The following code example illustrates the use of the n-bit filter for writing and reading n-bit + * floating-point data. + * + * <em>N-bit compression for floating-point data</em> + * \code + * #include "hdf5.h" + * + * #define H5FILE_NAME "nbit_test_float.h5" + * #define DATASET_NAME "nbit_float" + * #define NX 2 + * #define NY 5 + * #define CH_NX 2 + * #define CH_NY 5 + * + * int main(void) + * { + * hid_t file, dataspace, dataset, datatype, dset_create_props; + * hsize_t dims[2], chunk_size[2]; + * + * // orig_data[] are initialized to be within the range that + * // can be represented by dataset datatype (no precision + * // loss during datatype conversion) + * // + * float orig_data[NX][NY] = {{188384.00, 19.103516,-1.0831790e9, -84.242188, 5.2045898}, + * {-49140.000, 2350.2500, -3.2110596e-1, 6.4998865e-5, -0.0000000}}; + * float new_data[NX][NY]; + * size_t precision, offset; + * + * // Define single-precision floating-point type for dataset + * //--------------------------------------------------------------- + * // size=4 byte, precision=20 bits, offset=7 bits, + * // mantissa size=13 bits, mantissa position=7, + * // exponent size=6 bits, exponent position=20, + * // exponent bias=31. + * // It can be illustrated in little-endian order as: + * // (S - sign bit, E - exponent bit, M - mantissa bit, ? - padding bit) + * // + * // 3 2 1 0 + * // ?????SEE EEEEMMMM MMMMMMMM M??????? + * // + * // To create a new floating-point type, the following + * // properties must be set in the order of + * // set fields -> set offset -> set precision -> set size. + * // All these properties must be set before the type can + * // function. Other properties can be set anytime. Derived + * // type size cannot be expanded bigger than original size + * // but can be decreased. There should be no holes + * // among the significant bits. Exponent bias usually + * // is set 2^(n-1)-1, where n is the exponent size. + * //--------------------------------------------------------------- + * datatype = H5Tcopy(H5T_IEEE_F32BE); + * if(H5Tset_fields(datatype, 26, 20, 6, 7, 13) < 0) { + * printf("Error: fail to set fields\n"); + * return -1; + * } + * offset = 7; + * if(H5Tset_offset(datatype,offset) < 0) { + * printf("Error: fail to set offset\n"); + * return -1; + * } + * precision = 20; + * if(H5Tset_precision(datatype,precision) < 0) { + * printf("Error: fail to set precision\n"); + * return -1; + * } + * if(H5Tset_size(datatype, 4) < 0) { + * printf("Error: fail to set size\n"); + * return -1; + * } + * if(H5Tset_ebias(datatype, 31) < 0) { + * printf("Error: fail to set exponent bias\n"); + * return -1; + * } + * + * // Describe the size of the array. + * dims[0] = NX; + * dims[1] = NY; + * if((dataspace = H5Screate_simple (2, dims, NULL)) < 0) { + * printf("Error: fail to create dataspace\n"); + * return -1; + * } + * + * // Create a new file using read/write access, default file + * // creation properties, and default file access properties. + * if((file = H5Fcreate (H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create file\n"); + * return -1; + * } + * + * // Set the dataset creation property list to specify that + * // the raw data is to be partitioned into 2 x 5 element + * // chunks and that each chunk is to be compressed. + * chunk_size[0] = CH_NX; + * chunk_size[1] = CH_NY; + * if((dset_create_props = H5Pcreate (H5P_DATASET_CREATE)) < 0) { + * printf("Error: fail to create dataset property\n"); + * return -1; + * } + * if(H5Pset_chunk (dset_create_props, 2, chunk_size) < 0) { + * printf("Error: fail to set chunk\n"); + * return -1; + * } + * + * // Set parameters for n-bit compression; check the description + * // of the H5Pset_nbit function in the HDF5 Reference Manual + * // for more information. + * if(H5Pset_nbit (dset_create_props) < 0) { + * printf("Error: fail to set nbit filter\n"); + * return -1; + * } + * + * // Create a new dataset within the file. The datatype + * // and dataspace describe the data on disk, which may + * // be different from the format used in the application's memory. + * if((dataset = H5Dcreate(file, DATASET_NAME, datatype, dataspace, H5P_DEFAULT, + * dset_create_plists, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create dataset\n"); + * return -1; + * } + * + * // Write the array to the file. The datatype and dataspace + * // describe the format of the data in the 'orig_data' buffer. + * // The raw data is translated to the format required on disk, + * // as defined above. We use default raw data transfer properties. + * if(H5Dwrite (dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, orig_data) < 0) { + * printf("Error: fail to write to dataset\n"); + * return -1; + * } + * H5Dclose (dataset); + * if((dataset = H5Dopen(file, DATASET_NAME, H5P_DEFAULT))<0) { + * printf("Error: fail to open dataset\n"); + * return -1; + * } + * + * // Read the array. This is similar to writing data, + * // except the data flows in the opposite direction. + * // Note: Decompression is automatic. + * if(H5Dread (dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, new_data) < 0) { + * printf("Error: fail to read from dataset\n"); + * return -1; + * } + * H5Tclose (datatype); + * H5Dclose (dataset); + * H5Sclose (dataspace); + * H5Pclose (dset_create_props); + * H5Fclose (file); + * + * return 0 + * } + * \endcode + * + * <h4>Limitations</h4> + * Because the array cd_values[] has to fit into an object header message of 64K, the n-bit filter has + * an upper limit on the number of n-bit parameters that can be stored in it. To be conservative, a + * maximum of 4K is allowed for the number of parameters. + * + * The n-bit filter currently only compresses n-bit datatypes or fields derived from integer or + * floating-point datatypes. The n-bit filter assumes padding bits of zero. This may not be true since + * the HDF5 user can set padding bit to be zero, one, or leave the background alone. However, it is + * expected the n-bit filter will be modified to adjust to such situations. + * + * The n-bit filter does not have a way to handle the situation where the fill value of a dataset is + * defined and the fill value is not of an n-bit datatype although the dataset datatype is. + * + * \subsubsection subsubsec_dataset_filters_scale Using the Scale‐offset Filter + * Generally speaking, scale-offset compression performs a scale and/or offset operation on each + * data value and truncates the resulting value to a minimum number of bits (minimum-bits) before + * storing it. + * + * The current scale-offset filter supports integer and floating-point datatypes only. For the floating- + * point datatype, float and double are supported, but long double is not supported. + * + * Integer data compression uses a straight-forward algorithm. Floating-point data compression + * adopts the GRiB data packing mechanism which offers two alternate methods: a fixed minimum- + * bits method, and a variable minimum-bits method. Currently, only the variable minimum-bits + * method is implemented. + * + * Like other I/O filters supported by the HDF5 library, applications using the scale-offset filter + * must store data with chunked storage. + * + * Integer type: The minimum-bits of integer data can be determined by the filter. For example, if + * the maximum value of data to be compressed is 7065 and the minimum value is 2970. Then the + * “span” of dataset values is equal to (max-min+1), which is 4676. If no fill value is defined for the + * dataset, the minimum-bits is: ceiling(log2(span)) = 12. With fill value set, the minimum-bits is: + * ceiling(log2(span+1)) = 13. + * + * HDF5 users can also set the minimum-bits. However, if the user gives a minimum-bits that is + * less than that calculated by the filter, the compression will be lossy. + * + * Floating-point type: The basic idea of the scale-offset filter for the floating-point type is to + * transform the data by some kind of scaling to integer data, and then to follow the procedure of + * the scale-offset filter for the integer type to do the data compression. Due to the data + * transformation from floating-point to integer, the scale-offset filter is lossy in nature. + * + * Two methods of scaling the floating-point data are used: the so-called D-scaling and E-scaling. + * D-scaling is more straightforward and easy to understand. For HDF5 1.8 release, only the + * D-scaling method had been implemented. + * + * <h4>Design</h4> + * Before the filter does any real work, it needs to gather some information from the HDF5 Library + * through API calls. The parameters the filter needs are: + * \li The minimum-bits of the data value + * \li The number of data elements in the chunk + * \li The datatype class, size, sign (only for integer type), byte order, and fill value if defined + * + * Size and sign are needed to determine what kind of pointer cast to use when retrieving values + * from the data buffer. + * + * The pipeline of the filter can be divided into four parts: (1)pre-compression; (2)compression; + * (3)decompression; (4)post-decompression. + * + * Depending on whether a fill value is defined or not, the filter will handle pre-compression and + * post-decompression differently. + * + * The scale-offset filter only needs the memory byte order, size of datatype, and minimum-bits for + * compression and decompression. + * + * Since decompression has no access to the original data, the minimum-bits and the minimum + * value need to be stored with the compressed data for decompression and post-decompression. + * + * <h4>Integer Type</h4> + * Pre-compression: During pre-compression minimum-bits is calculated if it is not set by the user. + * For more information on how minimum-bits are calculated, @see @ref subsubsec_dataset_filters_nbit. + * + * If the fill value is defined, finding the maximum and minimum values should ignore the data + * element whose value is equal to the fill value. + * + * If no fill value is defined, the value of each data element is subtracted by the minimum value + * during this stage. + * + * If the fill value is defined, the fill value is assigned to the maximum value. In this way minimum- + * bits can represent a data element whose value is equal to the fill value and subtracts the + * minimum value from a data element whose value is not equal to the fill value. + * + * The fill value (if defined), the number of elements in a chunk, the class of the datatype, the size + * of the datatype, the memory order of the datatype, and other similar elements will be stored in + * the HDF5 object header for the post-decompression usage. + * + * After pre-compression, all values are non-negative and are within the range that can be stored by + * minimum-bits. + * + * Compression: All modified data values after pre-compression are packed together into the + * compressed data buffer. The number of bits for each data value decreases from the number of + * bits of integer (32 for most platforms) to minimum-bits. The value of minimum-bits and the + * minimum value are added to the data buffer and the whole buffer is sent back to the library. In + * this way, the number of bits for each modified value is no more than the size of minimum-bits. + * + * Decompression: In this stage, the number of bits for each data value is resumed from minimum- + * bits to the number of bits of integer. + * + * Post-decompression: For the post-decompression stage, the filter does the opposite of what it + * does during pre-compression except that it does not calculate the minimum-bits or the minimum + * value. These values were saved during compression and can be retrieved through the resumed + * data buffer. If no fill value is defined, the filter adds the minimum value back to each data + * element. + * + * If the fill value is defined, the filter assigns the fill value to the data element whose value is equal + * to the maximum value that minimum-bits can represent and adds the minimum value back to + * each data element whose value is not equal to the maximum value that minimum-bits can + * represent. + * + * @anchor h4_float_datatype <h4>Floating-point Type</h4> + * The filter will do data transformation from floating-point type to integer type and then handle the + * data by using the procedure for handling the integer data inside the filter. Insignificant bits of + * floating-point data will be cut off during data transformation, so this filter is a lossy compression + * method. + * + * There are two scaling methods: D-scaling and E-scaling. The HDF5 1.8 release only supports D- + * scaling. D-scaling is short for decimal scaling. E-scaling should be similar conceptually. In order + * to transform data from floating-point to integer, a scale factor is introduced. The minimum value + * will be calculated. Each data element value will subtract the minimum value. The modified data + * will be multiplied by 10 (Decimal) to the power of scale_factor, and only the integer part will be + * kept and manipulated through the routines for the integer type of the filter during pre- + * compression and compression. Integer data will be divided by 10 to the power of scale_factor to + * transform back to floating-point data during decompression and post-decompression. Each data + * element value will then add the minimum value, and the floating-point data are resumed. + * However, the resumed data will lose some insignificant bits compared with the original value. + * + * For example, the following floating-point data are manipulated by the filter, and the D-scaling + * factor is 2. + * <em>{104.561, 99.459, 100.545, 105.644}</em> + * + * The minimum value is 99.459, each data element subtracts 99.459, the modified data is + * <em>{5.102, 0, 1.086, 6.185}</em> + * + * Since the D-scaling factor is 2, all floating-point data will be multiplied by 10^2 with this result: + * <em>{510.2, 0, 108.6, 618.5}</em> + * + * The digit after decimal point will be rounded off, and then the set looks like: + * <em>{510, 0, 109, 619}</em> + * + * After decompression, each value will be divided by 10^2 and will be added to the offset 99.459. + * The floating-point data becomes + * <em>{104.559, 99.459, 100.549, 105.649}</em> + * + * The relative error for each value should be no more than 5* (10^(D-scaling factor +1)). + * D-scaling sometimes is also referred as a variable minimum-bits method since for different datasets + * the minimum-bits to represent the same decimal precision will vary. The data value is modified + * to 2 to power of scale_factor for E-scaling. E-scaling is also called fixed-bits method since for + * different datasets the minimum-bits will always be fixed to the scale factor of E-scaling. + * Currently, HDF5 ONLY supports the D-scaling (variable minimum-bits) method. + * + * <h4>Implementation</h4> + * The scale-offset filter implementation was written and included in the file H5Zscaleoffset.c. + * Function #H5Pset_scaleoffset was written and included in the file “H5Pdcpl.c”. The HDF5 user + * can supply minimum-bits by calling function #H5Pset_scaleoffset. + * + * The scale-offset filter was implemented based on the design outlined in this section. However, + * the following factors need to be considered: + * <ol><li> + * The filter needs the appropriate cast pointer whenever it needs to retrieve data values. + * </li> + * <li> + * The HDF5 Library passes to the filter the to-be-compressed data in the format of the dataset + * datatype, and the filter passes back the decompressed data in the same format. If a fill value is + * defined, it is also in dataset datatype format. For example, if the byte order of the dataset data- + * type is different from that of the memory datatype of the platform, compression or decompression performs + * an endianness conversion of data buffer. Moreover, it should be aware that + * memory byte order can be different during compression and decompression. + * </li> + * <li> + * The difference of endianness and datatype between file and memory should be considered + * when saving and retrieval of minimum-bits, minimum value, and fill value. + * <li> + * If the user sets the minimum-bits to full precision of the datatype, no operation is needed at + * the filter side. If the full precision is a result of calculation by the filter, then the minimum-bits + * needs to be saved for decompression but no compression or decompression is needed (only a + * copy of the input buffer is needed).</li> + * <li> + * If by calculation of the filter, the minimum-bits is equal to zero, special handling is needed. + * Since it means all values are the same, no compression or decompression is needed. But the + * minimum-bits and minimum value still need to be saved during compression.</li> + * <li> + * For floating-point data, the minimum value of the dataset should be calculated at first. Each + * data element value will then subtract the minimum value to obtain the “offset” data. The offset + * data will then follow the steps outlined above in the discussion of floating-point types to do data + * transformation to integer and rounding. For more information, @see @ref h4_float_datatype. + * </li></ol> + * + * <h4>Usage Examples</h4> + * The following code example illustrates the use of the scale-offset filter for writing and reading + * integer data. + * + * <em>Scale-offset compression integer data</em> + * \code + * #include "hdf5.h" + * #include "stdlib.h" + * + * #define H5FILE_NAME "scaleoffset_test_int.h5" + * #define DATASET_NAME "scaleoffset_int" + * #define NX 200 + * #define NY 300 + * #define CH_NX 10 + * #define CH_NY 15 + * int main(void) + * { + * hid_t file, dataspace, dataset, datatype, dset_create_props; + * hsize_t dims[2], chunk_size[2]; + * int orig_data[NX][NY]; + * int new_data[NX][NY]; + * int i, j, fill_val; + * + * // Define dataset datatype + * datatype = H5Tcopy(H5T_NATIVE_INT); + * + * // Initialize data buffer + * for (i=0; i < NX; i++) + * for (j=0; j < NY; j++) + * orig_data[i][j] = rand() % 10000; + * + * // Describe the size of the array. + * dims[0] = NX; + * dims[1] = NY; + * if((dataspace = H5Screate_simple (2, dims, NULL)) < 0) { + * printf("Error: fail to create dataspace\n"); + * return -1; + * } + * + * // Create a new file using read/write access, default file + * // creation properties, and default file access properties. + * if((file = H5Fcreate (H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create file\n"); + * return -1; + * } + * + * // Set the dataset creation property list to specify that + * // the raw data is to be partitioned into 10 x 15 element + * // chunks and that each chunk is to be compressed. + * chunk_size[0] = CH_NX; + * chunk_size[1] = CH_NY; + * if((dset_create_props = H5Pcreate (H5P_DATASET_CREATE)) < 0) { + * printf("Error: fail to create dataset property\n"); + * return -1; + * } + * if(H5Pset_chunk (dset_create_props, 2, chunk_size) < 0) { + * printf("Error: fail to set chunk\n"); + * return -1; + * } + * + * // Set the fill value of dataset + * fill_val = 10000; + * if (H5Pset_fill_value(dset_create_props, H5T_NATIVE_INT, &fill_val)<0) { + * printf("Error: can not set fill value for dataset\n"); + * return -1; + * } + * + * // Set parameters for scale-offset compression. Check the + * // description of the H5Pset_scaleoffset function in the + * // HDF5 Reference Manual for more information. + * if(H5Pset_scaleoffset (dset_create_props, H5Z_SO_INT, H5Z_SO_INT_MINIMUMBITS_DEFAULT) < 0) { + * printf("Error: fail to set scaleoffset filter\n"); + * return -1; + * } + * + * // Create a new dataset within the file. The datatype + * // and dataspace describe the data on disk, which may + * // or may not be different from the format used in the + * // application's memory. The link creation and + * // dataset access property list parameters are passed + * // with default values. + * if((dataset = H5Dcreate (file, DATASET_NAME, datatype, dataspace, H5P_DEFAULT, + * dset_create_props, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create dataset\n"); + * return -1; + * } + * + * // Write the array to the file. The datatype and dataspace + * // describe the format of the data in the 'orig_data' buffer. + * // We use default raw data transfer properties. + * if(H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, orig_data) < 0) { + * printf("Error: fail to write to dataset\n"); + * return -1; + * } + * + * H5Dclose (dataset); + * + * if((dataset = H5Dopen(file, DATASET_NAME, H5P_DEFAULT)) < 0) { + * printf("Error: fail to open dataset\n"); + * return -1; + * } + * + * // Read the array. This is similar to writing data, + * // except the data flows in the opposite direction. + * // Note: Decompression is automatic. + * if(H5Dread (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, new_data) < 0) { + * printf("Error: fail to read from dataset\n"); + * return -1; + * } + * + * H5Tclose (datatype); + * H5Dclose (dataset); + * H5Sclose (dataspace); + * H5Pclose (dset_create_props); + * H5Fclose (file); + * + * return 0; + * } + * \endcode + * + * The following code example illustrates the use of the scale-offset filter (set for variable + * minimum-bits method) for writing and reading floating-point data. + * + * <em>Scale-offset compression floating-point data</em> + * \code + * #include "hdf5.h" + * #include "stdlib.h" + * + * #define H5FILE_NAME "scaleoffset_test_float_Dscale.h5" + * #define DATASET_NAME "scaleoffset_float_Dscale" + * #define NX 200 + * #define NY 300 + * #define CH_NX 10 + * #define CH_NY 15 + * + * int main(void) + * { + * hid_t file, dataspace, dataset, datatype, dset_create_props; + * hsize_t dims[2], chunk_size[2]; + * float orig_data[NX][NY]; + * float new_data[NX][NY]; + * float fill_val; + * int i, j; + * + * // Define dataset datatype + * datatype = H5Tcopy(H5T_NATIVE_FLOAT); + * + * // Initialize data buffer + * for (i=0; i < NX; i++) + * for (j=0; j < NY; j++) + * orig_data[i][j] = (rand() % 10000) / 1000.0; + * + * // Describe the size of the array. + * dims[0] = NX; + * dims[1] = NY; + * if((dataspace = H5Screate_simple (2, dims, NULL)) < 0) { + * printf("Error: fail to create dataspace\n"); + * return -1; + * } + * + * // Create a new file using read/write access, default file + * // creation properties, and default file access properties. + * if((file = H5Fcreate (H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create file\n"); + * return -1; + * } + * + * // Set the dataset creation property list to specify that + * // the raw data is to be partitioned into 10 x 15 element + * // chunks and that each chunk is to be compressed. + * chunk_size[0] = CH_NX; + * chunk_size[1] = CH_NY; + * if((dset_create_props = H5Pcreate (H5P_DATASET_CREATE)) < 0) { + * printf("Error: fail to create dataset property\n"); + * return -1; + * } + * if(H5Pset_chunk (dset_create_props, 2, chunk_size) < 0) { + * printf("Error: fail to set chunk\n"); + * return -1; + * } + * + * // Set the fill value of dataset + * fill_val = 10000.0; + * if (H5Pset_fill_value(dset_create_props, H5T_NATIVE_FLOAT, &fill_val) < 0) { + * printf("Error: can not set fill value for dataset\n"); + * return -1; + * } + * + * // Set parameters for scale-offset compression; use variable + * // minimum-bits method, set decimal scale factor to 3. Check + * // the description of the H5Pset_scaleoffset function in the + * // HDF5 Reference Manual for more information. + * if(H5Pset_scaleoffset (dset_create_props, H5Z_SO_FLOAT_DSCALE, 3) < 0) { + * printf("Error: fail to set scaleoffset filter\n"); + * return -1; + * } + * + * // Create a new dataset within the file. The datatype + * // and dataspace describe the data on disk, which may + * // or may not be different from the format used in the + * // application's memory. + * if((dataset = H5Dcreate (file, DATASET_NAME, datatype, dataspace, H5P_DEFAULT, + * dset_create_props, H5P_DEFAULT)) < 0) { + * printf("Error: fail to create dataset\n"); + * return -1; + * } + * + * // Write the array to the file. The datatype and dataspace + * // describe the format of the data in the 'orig_data' buffer. + * // We use default raw data transfer properties. + * if(H5Dwrite (dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, orig_data) < 0) { + * printf("Error: fail to write to dataset\n"); + * return -1; + * } + * + * H5Dclose (dataset); + * + * if((dataset = H5Dopen(file, DATASET_NAME, H5P_DEFAULT)) < 0) { + * printf("Error: fail to open dataset\n"); + * return -1; + * } + * + * // Read the array. This is similar to writing data, + * // except the data flows in the opposite direction. + * // Note: Decompression is automatic. + * if(H5Dread (dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, new_data) < 0) { + * printf("Error: fail to read from dataset\n"); + * return -1; + * } + * + * H5Tclose (datatype); + * H5Dclose (dataset); + * H5Sclose (dataspace); + * H5Pclose (dset_create_props); + * H5Fclose (file); + * + * return 0; + * } + * \endcode + * + * <h4>Limitations</h4> + * For floating-point data handling, there are some algorithmic limitations to the GRiB data packing + * mechanism: + * <ol><li> + * Both the E-scaling and D-scaling methods are lossy compression + * </li> + * <li> + * For the D-scaling method, since data values have been rounded to integer values (positive) + * before truncating to the minimum-bits, their range is limited by the maximum value that can be + * represented by the corresponding unsigned integer type (the same size as that of the floating- + * point type) + * </li></ol> + * + * <h4>Suggestions</h4> + * The following are some suggestions for using the filter for floating-point data: + * <ol><li> + * It is better to convert the units of data so that the units are within certain common range (for + * example, 1200m to 1.2km) + * </li> + * <li> + * If data values to be compressed are very near to zero, it is strongly recommended that the + * user sets the fill value away from zero (for example, a large positive number); if the user does + * nothing, the HDF5 library will set the fill value to zero, and this may cause undesirable + * compression results + * </li> + * <li> + * Users are not encouraged to use a very large decimal scale factor (for example, 100) for the + * D-scaling method; this can cause the filter not to ignore the fill value when finding maximum + * and minimum values, and they will get a much larger minimum-bits (poor compression) + * </li></ol> + * + * \subsubsection subsubsec_dataset_filters_szip Using the Szip Filter + * See The HDF Group website for further information regarding the Szip filter. + * + * Previous Chapter \ref sec_group - Next Chapter \ref sec_datatype + * + */ + +/** + * \defgroup H5D Datasets (H5D) * * Use the functions in this module to manage HDF5 datasets, including the * transfer of data between memory and disk and the description of dataset diff --git a/src/H5Dpublic.h b/src/H5Dpublic.h index 8126aff..6fad138 100644 --- a/src/H5Dpublic.h +++ b/src/H5Dpublic.h @@ -666,7 +666,7 @@ H5_DLL herr_t H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, u * \brief Iterate over all chunks of a chunked dataset * * \dset_id - * \param[in] dxpl_id Identifier of a transfer property list + * \param[in] dxpl_id Identifier of a transfer property list * \param[in] cb User callback function, called for every chunk. * \param[in] op_data User-defined pointer to data required by op * diff --git a/src/H5ESmodule.h b/src/H5ESmodule.h index 205089a..b05b7f4 100644 --- a/src/H5ESmodule.h +++ b/src/H5ESmodule.h @@ -28,7 +28,93 @@ #define H5_MY_PKG H5ES #define H5_MY_PKG_ERR H5E_EVENTSET -/**\defgroup H5ES H5ES +/** \page H5ES_UG The HDF5 Event Set + * @todo Under Construction + * + * \section sec_async The HDF5 Event Set Interface + * + * \section subsec_async_intro Introduction + * HDF5 provides asynchronous APIs for the HDF5 VOL connectors that support asynchronous HDF5 + * operations using the HDF5 Event Set (H5ES) API. This allows I/O to proceed in the background + * while the application is performing other tasks. + * + * To support AIO capabilities for the HDF5 VOL connectors, the AIO versions for the functions + * listed in the table below were added to HDF5 library version 1.13.0 and later. The async version + * of the function has “_async” suffix added to the function name. For example, the async version + * for H5Fcreate is H5Fcreate_async. + * + * <table> + * <tr> + * <th>Interface</th> + * <th>Functions</th> + * </tr> + * <tr> + * <th>H5F</th> + * <td>#H5Fcreate, #H5Fflush, #H5Fis_accessible, #H5Fopen, #H5Fclose + * </td> + * </tr> + * <tr> + * <th>H5G</th> + * <td>#H5Gcreate, #H5Gget_info, #H5Gget_info_by_idx, #H5Gget_info_by_name, #H5Gclose + * </td> + * </tr> + * <tr> + * <th>H5D</th> + * <td>#H5Dcreate, #H5Dopen, #H5Dset_extent, #H5Dwrite, #H5Dread, #H5Dget_space, #H5Dclose + * </td> + * </tr> + * <tr> + * <th>H5A</th> + * <td>#H5Acreate, #H5Acreate_by_name, #H5Aopen, #H5Aopen_by_name, #H5Aexists, #H5Awrite, #H5Aread, +#H5Aclose, #H5Aopen_by_idx, #H5Arename, #H5Arename_by_name + * </td> + * </tr> + * <tr> + * <th>H5L</th> + * <td>#H5Lcreate_hard, #H5Lcreate_soft, #H5Ldelete, #H5Ldelete_by_idx, #H5Lexists + * </td> + * </tr> + * <tr> + * <th>H5O</th> + * <td>#H5Ocopy, #H5Orefresh, #H5Oflush, #H5Oclose, #H5Oopen, #H5Oopen_by_idx + * </td> + * </tr> + * <tr> + * <th>H5R</th> + * <td>#H5Ropen_attr, #H5Ropen_object #H5Ropen_region, #H5Rdereference + * </td> + * </tr> + * <tr> + * <th>H5M</th> + * <td>#H5Mcreate, #H5Mopen, #H5Mput, #H5Mget, #H5Mclose + * </td> + * </tr> + * <tr> + * <th>H5T</th> + * <td>#H5Tcommit, #H5Topen, #H5Tcopy, #H5Tclose + * </td> + * </tr> + * </table> + * + * Async versions of the functions have an extra parameter called the event set parameter or es_id. + * For example, compare the signatures of #H5Dclose and #H5Dclose_async: + * \code + * herr_t H5Dclose(hid_t dset_id); + * herr_t H5Dclose_async(hid_t dset_id, hid_t es_id); + * \endcode + * + * An event set is an in-memory object that is created by an application and used to track many + * asynchronous operations with a single object. They function like a "bag" -- holding request + * tokens from one or more asynchronous operations and provide a simple interface for inspecting + * the status of the entire set of operations. + * + * See the \ref H5ES APIs that were added to the HDF5 library to manage event sets. + * + * Previous Chapter \ref sec_vol - Next Chapter \ref sec_map + * + */ + +/**\defgroup H5ES Event Set Interface (H5ES) * * \todo Add the event set life cycle. * diff --git a/src/H5Emodule.h b/src/H5Emodule.h index a2d59f3..0e4655c 100644 --- a/src/H5Emodule.h +++ b/src/H5Emodule.h @@ -28,30 +28,502 @@ #define H5_MY_PKG H5E #define H5_MY_PKG_ERR H5E_ERROR -/**\defgroup H5E H5E +/** \page H5E_UG HDF5 Error Handling * - * Use the functions in this module to manage HDF5 error stacks and error - * messages. + * \section sec_error HDF5 Error Handling + * + * The HDF5 library provides an error reporting mechanism for both the library itself and for user + * application programs. It can trace errors through function stack and error information like file + * name, function name, line number, and error description. + * + * \subsection subsec_error_intro Introduction + * The HDF5 Library provides an error reporting mechanism for both the library itself and for user application + * programs. It can trace errors through function stack and error information like file name, function name, + * line number, and error description. + * + * \ref subsec_error_ops discusses the basic error concepts such as error stack, error record, and error + * message and describes the related API functions. These concepts and functions are sufficient for + * application programs to trace errors inside the HDF5 Library. + * + * \ref subsec_error_adv talks about the advanced concepts of error + * class and error stack handle and talks about the related functions. With these concepts and functions, an + * application library or program using the HDF5 Library can have its own error report blended with HDF5’s + * error report. + * + * Starting with Release 1.8, we have a new set of Error Handling API functions. For the purpose of backward + * compatibility with version 1.6 and before, we still keep the old API functions, \ref H5Epush1, + * \ref H5Eprint1, \ref H5Ewalk1, \ref H5Eclear1, \ref H5Eget_auto1, \ref H5Eset_auto1. These functions do + * not have the error stack as a parameter. The library allows them to operate on the default error stack. + * (The H5E compatibility macros will choose the correct function based on the parameters) + * + * The old API is similar to functionality discussed in \ref subsec_error_ops. The functionality discussed in + * \ref subsec_error_adv,the ability of allowing applications to add their own error records, is the new + * design for the Error Handling API. + * + * \subsection subsec_error_H5E Error Handling Function Summaries + * @see H5E reference manual + * + * \subsection subsec_error_program Programming Model for Error Handling + * This section is under construction. + * + * \subsection subsec_error_ops Basic Error Handling Operations + * Let us first try to understand the error stack. An error stack is a collection of error records. Error + * records can be pushed onto or popped off the error stack. By default, when an error occurs deep within + * the HDF5 Library, an error record is pushed onto an error stack and that function returns a failure + * indication. + * Its caller detects the failure, pushes another record onto the stack, and returns a failure indication. + * This continues until the API function called by the application returns a failure indication. The next + * API function being called will reset the error stack. All HDF5 Library error records belong to the same + * error class. For more information, see \ref subsec_error_adv. + * + * \subsubsection subsubsec_error_ops_stack Error Stack and Error Message + * In normal circumstances, an error causes the stack to be printed on the standard error stream + * automatically. + * This automatic error stack is the library’s default stack. For all the functions in this section, whenever + * an error stack ID is needed as a parameter, \ref H5E_DEFAULT can be used to indicate the library’s default + * stack. The first error record of the error stack, number #000, is produced by the API function itself and + * is usually sufficient to indicate to the application what went wrong. + * <table> + * <caption align=top>Example: An Error Message</caption> + * <tr> + * <td> + * <p>If an application calls \ref H5Tclose on a + * predefined datatype then the following message is + * printed on the standard error stream. This is a + * simple error that has only one component, the API + * function; other errors may have many components. + * <p><code><pre> + * HDF5-DIAG: Error detected in HDF5 (1.10.9) thread 0. + * #000: H5T.c line ### in H5Tclose(): predefined datatype + * major: Function argument + * minor: Bad value + * </pre></code> + * </td> + * </tr> + * </table> + * In the example above, we can see that an error record has a major message and a minor message. A major + * message generally indicates where the error happens. The location can be a dataset or a dataspace, for + * example. A minor message explains further details of the error. An example is “unable to open file”. + * Another specific detail about the error can be found at the end of the first line of each error record. + * This error description is usually added by the library designer to tell what exactly goes wrong. In the + * example above, the “predefined datatype” is an error description. + * + * \subsubsection subsubsec_error_ops_print Print and Clear an Error Stack + * Besides the automatic error report, the error stack can also be printed and cleared by the functions + * \ref H5Eprint2 and \ref H5Eclear2. If an application wishes to make explicit + * calls to \ref H5Eprint2 to print the error stack, the automatic printing should be turned off + * to prevent error messages from being displayed twice (see \ref H5Eset_auto2). + * + * <em>To print an error stack:</em> + * \code + * herr_t H5Eprint2(hid_t error_stack, FILE * stream) + * \endcode + * This function prints the error stack specified by error_stack on the specified stream, stream. If the + * error stack is empty, a one‐line message will be printed. The following is an example of such a message. + * This message would be generated if the error was in the HDF5 Library. + * \code + * HDF5-DIAG: Error detected in HDF5 Library version: 1.10.9 thread 0. + * \endcode + * + * <em>To clear an error stack:</em> + * \code + * herr_t H5Eclear2(hid_t error_stack) + * \endcode + * The \ref H5Eclear2 function shown above clears the error stack specified by error_stack. + * \ref H5E_DEFAULT can be passed in to clear the current error stack. The current stack is also cleared + * whenever an API function is called; there are certain exceptions to this rule such as \ref H5Eprint2. + * + * \subsubsection subsubsec_error_ops_mute Mute Error Stack + * Sometimes an application calls a function for the sake of its return value, fully expecting the function + * to fail; sometimes the application wants to call \ref H5Eprint2 explicitly. In these situations, + * it would be misleading if an error message were still automatically printed. Using the + * \ref H5Eset_auto2 function can control the automatic printing of error messages. + * + * <em>To enable or disable automatic printing of errors:</em> + * \code + * herr_t H5Eset_auto2(hid_t error_stack, H5E_auto_t func, void *client_data) + * \endcode + * The \ref H5Eset_auto2 function can be used to turn on or off the automatic printing of errors + * for the error stack specified by error_stack. When turned on (non‐null func pointer), any API function + * which returns an error indication will first call func, passing it client_data as an argument. When the + * library is first initialized the auto printing function is set to \ref H5Eprint2 and client_data + * is the standard error stream pointer, stderr. + * + * <em>To see the current settings:</em> + * \code + * herr_t H5Eget_auto(hid_t error_stack, H5E_auto_t * func, void **client_data) + * \endcode + * The function above returns the current settings for the automatic error stack traversal function, func, and + * its data, client_data. If either or both of the arguments are null, then the value is not returned. + * + * An application can temporarily turn off error messages while “probing” a function. See the + * example below. + * + * <em>Example: Turn off error messages while probing a function</em> + * \code + * *** Save old error handler *** + * H5E_auto2_t oldfunc; + * void *old_client_data; + * H5Eget_auto2(error_stack, &old_func, &old_client_data); + * *** Turn off error handling *** + * H5Eset_auto2(error_stack, NULL, NULL); + * *** Probe. Likely to fail, but that’s okay *** + * status = H5Fopen (......); + * *** Restore previous error handler *** + * H5Eset_auto2(error_stack, old_func, old_client_data); + * \endcode + * + * Or automatic printing can be disabled altogether and error messages can be explicitly printed. + * + * <em>Example: Disable automatic printing and explicitly print error messages</em> + * \code + * *** Turn off error handling permanently *** + * H5Eset_auto2(error_stack, NULL, NULL); + * *** If failure, print error message *** + * if (H5Fopen (....)<0) { + * H5Eprint2(H5E_DEFAULT, stderr); + * exit (1); + * } + * \endcode + * + * \subsubsection subsubsec_error_ops_custom_print Customized Printing of an Error Stack + * Applications are allowed to define an automatic error traversal function other than the default + * \ref H5Eprint(). For instance, one can define a function that prints a simple, one‐line error message to + * the standard error stream and then exits. The first example below defines a such a function. The second + * example below installs the function as the error handler. + * + * <em>Example: Defining a function to print a simple error message</em> + * \code + * herr_t + * my_hdf5_error_handler(void *unused) + * { + * fprintf (stderr, “An HDF5 error was detected. Bye.\\n”); + * exit (1); + * } + * \endcode + * + * <em>Example: The user‐defined error handler</em> + * \code + * H5Eset_auto2(H5E_DEFAULT, my_hdf5_error_handler, NULL); + * \endcode + * + * \subsubsection subsubsec_error_ops_walk Walk through the Error Stack + * The \ref H5Eprint2 function is actually just a wrapper around the more complex \ref H5Ewalk function + * which traverses an error stack and calls a user‐defined function for each member of the stack. The example + * below shows how \ref H5Ewalk is used. + * \code + * herr_t H5Ewalk(hid_t err_stack, H5E_direction_t direction, + * H5E_walk_t func, void *client_data) + * \endcode + * The error stack err_stack is traversed and func is called for each member of the stack. Its arguments + * are an integer sequence number beginning at zero (regardless of direction) and the client_data + * pointer. If direction is \ref H5E_WALK_UPWARD, then traversal begins at the inner‐most function that + * detected the error and concludes with the API function. Use \ref H5E_WALK_DOWNWARD for the opposite + * order. + * + * \subsubsection subsubsec_error_ops_travers Traverse an Error Stack with a Callback Function + * An error stack traversal callback function takes three arguments: n is a sequence number beginning at + * zero for each traversal, eptr is a pointer to an error stack member, and client_data is the same pointer + * used in the example above passed to \ref H5Ewalk. See the example below. + * \code + * typedef herr_t (*H5E_walk_t)(unsigned n, H5E_error2_t *eptr, void *client_data) + * \endcode + * The H5E_error2_t structure is shown below. + * \code + * typedef struct { + * hid_t cls_id; + * hid_t maj_num; + * hid_t min_num; + * unsigned line; + * const char *func_name; + * const char *file_name; + * const char *desc; + * } H5E_error2_t; + * \endcode + * The maj_num and min_num are major and minor error IDs, func_name is the name of the function where + * the error was detected, file_name and line locate the error within the HDF5 Library source code, and + * desc points to a description of the error. + * + * The following example shows a user‐defined callback function. + * + * <em>Example: A user‐defined callback function</em> + * \code + * \#define MSG_SIZE 64 + * herr_t + * custom_print_cb(unsigned n, const H5E_error2_t *err_desc, void *client_data) + * { + * FILE *stream = (FILE *)client_data; + * char maj[MSG_SIZE]; + * char min[MSG_SIZE]; + * char cls[MSG_SIZE]; + * const int indent = 4; + * + * *** Get descriptions for the major and minor error numbers *** + * if(H5Eget_class_name(err_desc->cls_id, cls, MSG_SIZE) < 0) + * TEST_ERROR; + * if(H5Eget_msg(err_desc->maj_num, NULL, maj, MSG_SIZE) < 0) + * TEST_ERROR; + * if(H5Eget_msg(err_desc->min_num, NULL, min, MSG_SIZE) < 0) + * TEST_ERROR; + * fprintf (stream, “%*serror #%03d: %s in %s(): + * line %u\\n”, + * indent, “”, n, err_desc->file_name, + * err_desc->func_name, err_desc->line); + * fprintf (stream, “%*sclass: %s\\n”, indent*2, “”, cls); + * fprintf (stream, “%*smajor: %s\\n”, indent*2, “”, maj); + * fprintf (stream, “%*sminor: %s\\n”, indent*2, “”, min); + * return 0; + * error: + * return -1; + * } + * \endcode + * + * <h4>Programming Note for C++ Developers Using C Functions</h4> + * If a C routine that takes a function pointer as an argument is called from within C++ code, the C routine + * should be returned from normally. + * + * Examples of this kind of routine include callbacks such as \ref H5Pset_elink_cb and + * \ref H5Pset_type_conv_cb and + * functions such as \ref H5Tconvert and \ref H5Ewalk2. + * + * Exiting the routine in its normal fashion allows the HDF5 C Library to clean up its work properly. In other + * words, if the C++ application jumps out of the routine back to the C++ “catch” statement, the library is + * not given the opportunity to close any temporary data structures that were set up when the routine was + * called. The C++ application should save some state as the routine is started so that any problem that + * occurs might be diagnosed. + * + * \subsection subsec_error_adv Advanced Error Handling Operations + * The section above, see \ref subsec_error_ops, discusses the basic error + * handling operations of the library. In that section, all the error records on the error stack are from the + * library itself. In this section, we are going to introduce the operations that allow an application program + * to push its own error records onto the error stack once it declares an error class of its own through the + * HDF5 Error API. * * <table> - * <tr><th>Create</th><th>Read</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5E_examples.c create - * </td> - * <td> - * \snippet{lineno} H5E_examples.c read - * </td> - * <tr><th>Update</th><th>Delete</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5E_examples.c update - * </td> - * <td> - * \snippet{lineno} H5E_examples.c delete - * </td> - * </tr> + * <caption align=top>Example: An Error Report</caption> + * <tr> + * <td> + * <p>An error report shows both the library’s error record and the application’s error records. + * See the example below. + * <p><code><pre> + * Error Test-DIAG: Error detected in Error Program (1.0) + * thread 8192: + * #000: ../../hdf5/test/error_test.c line ### in main(): + * Error test failed + * major: Error in test + * minor: Error in subroutine + * #001: ../../hdf5/test/error_test.c line ### in + * test_error(): H5Dwrite failed as supposed to + * major: Error in IO + * minor: Error in H5Dwrite + * HDF5-DIAG: Error detected in HDF5 (1.10.9) thread #####: + * #002: ../../hdf5/src/H5Dio.c line ### in H5Dwrite(): + * not a dataset + * major: Invalid arguments to routine + * minor: Inappropriate type + * </pre></code> + * </td> + * </tr> * </table> + * In the line above error record #002 in the example above, the starting phrase is HDF5. This is the error + * class name of the HDF5 Library. All of the library’s error messages (major and minor) are in this default + * error class. The Error Test in the beginning of the line above error record #000 is the name of the + * application’s error class. The first two error records, #000 and #001, are from application’s error class. + * By definition, an error class is a group of major and minor error messages for a library (the HDF5 Library + * or an application library built on top of the HDF5 Library) or an application program. The error class can + * be registered for a library or program through the HDF5 Error API. Major and minor messages can be defined + * in an error class. An application will have object handles for the error class and for major and minor + * messages for further operation. See the example below. + * + * <em>Example: The user‐defined error handler</em> + * \code + * \#define MSG_SIZE 64 + * herr_t + * custom_print_cb(unsigned n, const H5E_error2_t *err_desc, + * void* client_data) + * { + * FILE *stream = (FILE *)client_data; + * char maj[MSG_SIZE]; + * char min[MSG_SIZE]; + * char cls[MSG_SIZE]; + * const int indent = 4; + * + * *** Get descriptions for the major and minor error numbers *** + * if(H5Eget_class_name(err_desc->cls_id, cls, MSG_SIZE) < 0) + * TEST_ERROR; + * if(H5Eget_msg(err_desc->maj_num, NULL, maj, MSG_SIZE) < 0) + * TEST_ERROR; + * if(H5Eget_msg(err_desc->min_num, NULL, min, MSG_SIZE) < 0) + * TEST_ERROR; + * fprintf (stream, “%*serror #%03d: %s in %s(): + * line %u\\n”, + * indent, “”, n, err_desc->file_name, + * err_desc->func_name, err_desc->line); + * fprintf (stream, “%*sclass: %s\\n”, indent*2, “”, cls); + * fprintf (stream, “%*smajor: %s\\n”, indent*2, “”, maj); + * fprintf (stream, “%*sminor: %s\\n”, indent*2, “”, min); + * return 0; + * error: + * return -1; + * } + * \endcode + * + * \subsubsection subsubsec_error_adv_more More Error API Functions + * The Error API has functions that can be used to register or unregister an error class, to create or close + * error messages, and to query an error class or error message. These functions are illustrated below. + * + * <em>To register an error class:</em> + * \code + * hid_t H5Eregister_class(const char* cls_name, const char* lib_name, const char* version) + * \endcode + * This function registers an error class with the HDF5 Library so that the application library or program + * can report errors together with the HDF5 Library. + * + * <em>To add an error message to an error class:</em> + * \code + * hid_t H5Ecreate_msg(hid_t class, H5E_type_t msg_type, const char* mesg) + * \endcode + * This function adds an error message to an error class defined by an application library or program. The + * error message can be either major or minor which is indicated by parameter msg_type. + * + * <em>To get the name of an error class:</em> + * \code + * ssize_t H5Eget_class_name(hid_t class_id, char* name, size_t size) + * \endcode + * This function retrieves the name of the error class specified by the class ID. + * + * <em>To retrieve an error message:</em> + * \code + * ssize_t H5Eget_msg(hid_t mesg_id, H5E_type_t* mesg_type, char* mesg, size_t size) + * \endcode + * This function retrieves the error message including its length and type. + * + * <em>To close an error message:</em> + * \code + * herr_t H5Eclose_msg(hid_t mesg_id) + * \endcode + * This function closes an error message. + * + * <em>To remove an error class:</em> + * \code + * herr_t H5Eunregister_class(hid_t class_id) + * \endcode + * This function removes an error class from the Error API. + * + * The example below shows how an application creates an error class and error messages. + * + * <em>Example: Create an error class and error messages</em> + * \code + * *** Create an error class *** + * class_id = H5Eregister_class(ERR_CLS_NAME, PROG_NAME, PROG_VERS); + * *** Retrieve class name *** + * H5Eget_class_name(class_id, cls_name, cls_size); + * *** Create a major error message in the class *** + * maj_id = H5Ecreate_msg(class_id, H5E_MAJOR, “... ...”); + * *** Create a minor error message in the class *** + * min_id = H5Ecreate_msg(class_id, H5E_MINOR, “... ...”); + * \endcode + * + * The example below shows how an application closes error messages and unregisters the error class. + * + * <em>Example: Closing error messages and unregistering the error class</em> + * \code + * H5Eclose_msg(maj_id); + * H5Eclose_msg(min_id); + * H5Eunregister_class(class_id); + * \endcode + * + * \subsubsection subsubsec_error_adv_app Pushing an Application Error Message onto Error Stack + * An application can push error records onto or pop error records off of the error stack just as the library + * does internally. An error stack can be registered, and an object handle can be returned to the application + * so that the application can manipulate a registered error stack. + * + * <em>To register the current stack:</em> + * \code + * hid_t H5Eget_current_stack(void) + * \endcode + * This function registers the current error stack, returns an object handle, and clears the current error + * stack. + * An empty error stack will also be assigned an ID. + * + * <em>To replace the current error stack with another:</em> + * \code + * herr_t H5Eset_current_stack(hid_t error_stack) + * \endcode + * This function replaces the current error stack with another error stack specified by error_stack and + * clears the current error stack. The object handle error_stack is closed after this function call. + * + * <em>To push a new error record to the error stack:</em> + * \code + * herr_t H5Epush(hid_t error_stack, const char* file, const char* func, + * unsigned line, hid_t cls_id, hid_t major_id, hid_t minor_id, + * const char* desc, ... ) + * \endcode + * This function pushes a new error record onto the error stack for the current thread. + * + * <em>To delete some error messages:</em> + * \code + * herr_t H5Epop(hid_t error_stack, size_t count) + * \endcode + * This function deletes some error messages from the error stack. + * + * <em>To retrieve the number of error records:</em> + * \code + * int H5Eget_num(hid_t error_stack) + * \endcode + * This function retrieves the number of error records from an error stack. + * + * <em>To clear the error stack:</em> + * \code + * herr_t H5Eclear_stack(hid_t error_stack) + * \endcode + * This function clears the error stack. + * + * <em>To close the object handle for an error stack:</em> + * \code + * herr_t H5Eclose_stack(hid_t error_stack) + * \endcode + * This function closes the object handle for an error stack and releases its resources. + * + * The example below shows how an application pushes an error record onto the default error stack. + * + * <em>Example: Pushing an error message to an error stack</em> + * \code + * *** Make call to HDF5 I/O routine *** + * if((dset_id=H5Dopen(file_id, dset_name, access_plist)) < 0) + * { + * *** Push client error onto error stack *** + * H5Epush(H5E_DEFAULT,__FILE__,FUNC,__LINE__,cls_id, + * CLIENT_ERR_MAJ_IO,CLIENT_ERR_MINOR_OPEN, “H5Dopen failed”); + * } + * *** Indicate error occurred in function *** + * return 0; + * \endcode + * + * The example below shows how an application registers the current error stack and + * creates an object handle to avoid another HDF5 function from clearing the error stack. + * + * <em>Example: Registering the error stack</em> + * \code + * if (H5Dwrite(dset_id, mem_type_id, mem_space_id, file_space_id, dset_xfer_plist_id, buf) < 0) + * { + * *** Push client error onto error stack *** + * H5Epush2(H5E_DEFAULT,__FILE__,FUNC,__LINE__,cls_id, + * CLIENT_ERR_MAJ_IO,CLIENT_ERR_MINOR_HDF5, + * “H5Dwrite failed”); + * *** Preserve the error stack by assigning an object handle to it *** + * error_stack = H5Eget_current_stack(); + * *** Close dataset *** + * H5Dclose(dset_id); + * *** Replace the current error stack with the preserved one *** + * H5Eset_current_stack(error_stack); + * } + * return 0; + * \endcode + * + * Previous Chapter \ref sec_attribute - Next Chapter \ref sec_plist + * + * \defgroup H5E Error Handling (H5E) * * \internal The \c FUNC_ENTER macro clears the error stack whenever an * interface function is entered. When an error is detected, an entry @@ -76,6 +548,8 @@ * error stack. The error stack is statically allocated to reduce the * complexity of handling errors within the \ref H5E package. * + * @see sec_error + * */ #endif /* H5Emodule_H */ diff --git a/src/H5Epublic.h b/src/H5Epublic.h index 0254c37..6e47d28 100644 --- a/src/H5Epublic.h +++ b/src/H5Epublic.h @@ -899,8 +899,8 @@ H5_DLL herr_t H5Ewalk1(H5E_direction_t direction, H5E_walk1_t func, void *client * * \deprecated 1.8.0 Function deprecated in this release. * - * \details Given a major error number, H5Eget_major() returns a constant - * character string that describes the error. + * \details H5Eget_major() returns a constant + * character string that describes the error, given a major error number. * * \attention This function returns a dynamically allocated string (\c char * array). An application calling this function must free the memory @@ -920,8 +920,8 @@ H5_DLL char *H5Eget_major(H5E_major_t maj); * * \deprecated 1.8.0 Function deprecated and return type changed in this release. * - * \details Given a minor error number, H5Eget_minor() returns a constant - * character string that describes the error. + * \details H5Eget_minor() returns a constant + * character string that describes the error, given a minor error number. * * \attention In the Release 1.8.x series, H5Eget_minor() returns a string of * dynamic allocated \c char array. An application calling this diff --git a/src/H5Fmodule.h b/src/H5Fmodule.h index 6047693..867ef0e 100644 --- a/src/H5Fmodule.h +++ b/src/H5Fmodule.h @@ -28,7 +28,1448 @@ #define H5_MY_PKG H5F #define H5_MY_PKG_ERR H5E_FILE -/**\defgroup H5F H5F +/** \page H5F_UG The HDF5 File + * + * \section sec_file The HDF5 File + * \subsection subsec_file_intro Introduction + * The purpose of this chapter is to describe how to work with HDF5 data files. + * + * If HDF5 data is to be written to or read from a file, the file must first be explicitly created or + * opened with the appropriate file driver and access privileges. Once all work with the file is + * complete, the file must be explicitly closed. + * + * This chapter discusses the following: + * \li File access modes + * \li Creating, opening, and closing files + * \li The use of file creation property lists + * \li The use of file access property lists + * \li The use of low-level file drivers + * + * This chapter assumes an understanding of the material presented in the data model chapter. For + * more information, @see @ref sec_data_model. + * + * \subsection subsec_file_access_modes File Access Modes + * There are two issues regarding file access: + * <ul><li>What should happen when a new file is created but a file of the same name already + * exists? Should the create action fail, or should the existing file be overwritten?</li> + * <li>Is a file to be opened with read-only or read-write access?</li></ul> + * + * Four access modes address these concerns. Two of these modes can be used with #H5Fcreate, and + * two modes can be used with #H5Fopen. + * \li #H5Fcreate accepts #H5F_ACC_EXCL or #H5F_ACC_TRUNC + * \li #H5Fopen accepts #H5F_ACC_RDONLY or #H5F_ACC_RDWR + * + * The access modes are described in the table below. + * + * <table> + * <caption>Access flags and modes</caption> + * <tr> + * <th>Access Flag</th> + * <th>Resulting Access Mode</th> + * </tr> + * <tr> + * <td>#H5F_ACC_EXCL</td> + * <td>If the file already exists, #H5Fcreate fails. If the file does not exist, + * it is created and opened with read-write access. (Default)</td> + * </tr> + * <tr> + * <td>#H5F_ACC_TRUNC</td> + * <td>If the file already exists, the file is opened with read-write access, + * and new data will overwrite any existing data. If the file does not exist, + * it is created and opened with read-write access.</td> + * </tr> + * <tr> + * <td>#H5F_ACC_RDONLY</td> + * <td>An existing file is opened with read-only access. If the file does not + * exist, #H5Fopen fails. (Default)</td> + * </tr> + * <tr> + * <td>#H5F_ACC_RDWR</td> + * <td>An existing file is opened with read-write access. If the file does not + * exist, #H5Fopen fails.</td> + * </tr> + * </table> + * + * By default, #H5Fopen opens a file for read-only access; passing #H5F_ACC_RDWR allows + * read-write access to the file. + * + * By default, #H5Fcreate fails if the file already exists; only passing #H5F_ACC_TRUNC allows + * the truncating of an existing file. + * + * \subsection subsec_file_creation_access File Creation and File Access Properties + * File creation and file access property lists control the more complex aspects of creating and + * accessing files. + * + * File creation property lists control the characteristics of a file such as the size of the userblock, + * a user-definable data block; the size of data address parameters; properties of the B-trees that are + * used to manage the data in the file; and certain HDF5 Library versioning information. + * + * For more information, @see @ref subsubsec_file_property_lists_props. + * + * This section has a more detailed discussion of file creation properties. If you have no special + * requirements for these file characteristics, you can simply specify #H5P_DEFAULT for the default + * file creation property list when a file creation property list is called for. + * + * File access property lists control properties and means of accessing a file such as data alignment + * characteristics, metadata block and cache sizes, data sieve buffer size, garbage collection + * settings, and parallel I/O. Data alignment, metadata block and cache sizes, and data sieve buffer + * size are factors in improving I/O performance. + * + * For more information, @see @ref subsubsec_file_property_lists_access. + * + * This section has a more detailed discussion of file access properties. If you have no special + * requirements for these file access characteristics, you can simply specify #H5P_DEFAULT for the + * default file access property list when a file access property list is called for. + * + * <table> + * <caption>Figure 10 - More sample file structures</caption> + * <tr> + * <td> + * \image html UML_FileAndProps.gif "UML model for an HDF5 file and its property lists" + * </td> + * </tr> + * </table> + * + * \subsection subsec_file_drivers Low-level File Drivers + * The concept of an HDF5 file is actually rather abstract: the address space for what is normally + * thought of as an HDF5 file might correspond to any of the following at the storage level: + * \li Single file on a standard file system + * \li Multiple files on a standard file system + * \li Multiple files on a parallel file system + * \li Block of memory within an application’s memory space + * \li More abstract situations such as virtual files + * + * This HDF5 address space is generally referred to as an HDF5 file regardless of its organization at + * the storage level. + * + * HDF5 accesses a file (the address space) through various types of low-level file drivers. The + * default HDF5 file storage layout is as an unbuffered permanent file which is a single, contiguous + * file on local disk. Alternative layouts are designed to suit the needs of a variety of systems, + * environments, and applications. + * + * \subsection subsec_file_program_model Programming Model for Files + * Programming models for creating, opening, and closing HDF5 files are described in the + * sub-sections below. + * + * \subsubsection subsubsec_file_program_model_create Creating a New File + * The programming model for creating a new HDF5 file can be summarized as follows: + * \li Define the file creation property list + * \li Define the file access property list + * \li Create the file + * + * First, consider the simple case where we use the default values for the property lists. See the + * example below. + * + * <em>Creating an HDF5 file using property list defaults</em> + * \code + * file_id = H5Fcreate ("SampleFile.h5", H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT) + * \endcode + * + * Note: The example above specifies that #H5Fcreate should fail if SampleFile.h5 already exists. + * + * A more complex case is shown in the example below. In this example, we define file creation + * and access property lists (though we do not assign any properties), specify that #H5Fcreate + * should fail if SampleFile.h5 already exists, and create a new file named SampleFile.h5. The example + * does not specify a driver, so the default driver, #H5FD_SEC2, will be used. + * + * <em>Creating an HDF5 file using property lists</em> + * \code + * fcplist_id = H5Pcreate (H5P_FILE_CREATE) + * <...set desired file creation properties...> + * faplist_id = H5Pcreate (H5P_FILE_ACCESS) + * <...set desired file access properties...> + * file_id = H5Fcreate ("SampleFile.h5", H5F_ACC_EXCL, fcplist_id, faplist_id) + * \endcode + * Notes: + * 1. A root group is automatically created in a file when the file is first created. + * + * 2. File property lists, once defined, can be reused when another file is created within the same + * application. + * + * \subsubsection subsubsec_file_program_model_open Opening an Existing File + * The programming model for opening an existing HDF5 file can be summarized as follows: + * <ul><li>Define or modify the file access property list including a low-level file driver (optional)</li> + * <li>Open the file</li></ul> + * + * The code in the example below shows how to open an existing file with read-only access. + * + * <em>Opening an HDF5 file</em> + * \code + * faplist_id = H5Pcreate (H5P_FILE_ACCESS) + * status = H5Pset_fapl_stdio (faplist_id) + * file_id = H5Fopen ("SampleFile.h5", H5F_ACC_RDONLY, faplist_id) + * \endcode + * + * \subsubsection subsubsec_file_program_model_close Closing a File + * The programming model for closing an HDF5 file is very simple: + * \li Close file + * + * We close SampleFile.h5 with the code in the example below. + * + * <em>Closing an HDF5 file</em> + * \code + * status = H5Fclose (file_id) + * \endcode + * Note that #H5Fclose flushes all unwritten data to storage and that file_id is the identifier returned + * for SampleFile.h5 by #H5Fopen. + * + * More comprehensive discussions regarding all of these steps are provided below. + * + * \subsection subsec_file_h5dump Using h5dump to View a File + * h5dump is a command-line utility that is included in the HDF5 distribution. This program + * provides a straight-forward means of inspecting the contents of an HDF5 file. You can use + * h5dump to verify that a program is generating the intended HDF5 file. h5dump displays ASCII + * output formatted according to the HDF5 DDL grammar. + * + * The following h5dump command will display the contents of SampleFile.h5: + * \code + * h5dump SampleFile.h5 + * \endcode + * + * If no datasets or groups have been created in and no data has been written to the file, the output + * will look something like the following: + * \code + * HDF5 "SampleFile.h5" { + * GROUP "/" { + * } + * } + * \endcode + * + * Note that the root group, indicated above by /, was automatically created when the file was created. + * + * h5dump is described on the + * <a href="https://portal.hdfgroup.org/display/HDF5/h5dump">Tools</a> + * page under + * <a href="https://portal.hdfgroup.org/display/HDF5/Libraries+and+Tools+Reference"> + * Libraries and Tools Reference</a>. + * The HDF5 DDL grammar is described in the document \ref DDLBNF110. + * + * \subsection subsec_file_summary File Function Summaries + * General library (\ref H5 functions and macros), (\ref H5F functions), file related + * (\ref H5P functions), and file driver (\ref H5P functions) are listed below. + * + * <table> + * <caption>General library functions and macros</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5check_version</td> + * <td>Verifies that HDF5 library versions are consistent.</td> + * </tr> + * <tr> + * <td>#H5close</td> + * <td>Flushes all data to disk, closes all open identifiers, and cleans up memory.</td> + * </tr> + * <tr> + * <td>#H5dont_atexit</td> + * <td>Instructs the library not to install the atexit cleanup routine.</td> + * </tr> + * <tr> + * <td>#H5garbage_collect</td> + * <td>Garbage collects on all free-lists of all types.</td> + * </tr> + * <tr> + * <td>#H5get_libversion</td> + * <td>Returns the HDF library release number.</td> + * </tr> + * <tr> + * <td>#H5open</td> + * <td>Initializes the HDF5 library.</td> + * </tr> + * <tr> + * <td>#H5set_free_list_limits</td> + * <td>Sets free-list size limits.</td> + * </tr> + * <tr> + * <td>#H5_VERSION_GE</td> + * <td>Determines whether the version of the library being used is greater than or equal + * to the specified version.</td> + * </tr> + * <tr> + * <td>#H5_VERSION_LE</td> + * <td>Determines whether the version of the library being used is less than or equal + * to the specified version.</td> + * </tr> + * </table> + * + * <table> + * <caption>File functions </caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Fclear_elink_file_cache</td> + * <td>Clears the external link open file cache for a file.</td> + * </tr> + * <tr> + * <td>#H5Fclose</td> + * <td>Closes HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Fcreate</td> + * <td>Creates new HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Fflush</td> + * <td>Flushes data to HDF5 file on storage medium.</td> + * </tr> + * <tr> + * <td>#H5Fget_access_plist</td> + * <td>Returns a file access property list identifier.</td> + * </tr> + * <tr> + * <td>#H5Fget_create_plist</td> + * <td>Returns a file creation property list identifier.</td> + * </tr> + * <tr> + * <td>#H5Fget_file_image</td> + * <td>Retrieves a copy of the image of an existing, open file.</td> + * </tr> + * <tr> + * <td>#H5Fget_filesize</td> + * <td>Returns the size of an HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Fget_freespace</td> + * <td>Returns the amount of free space in a file.</td> + * </tr> + * <tr> + * <td>#H5Fget_info</td> + * <td>Returns global information for a file.</td> + * </tr> + * <tr> + * <td>#H5Fget_intent</td> + * <td>Determines the read/write or read-only status of a file.</td> + * </tr> + * <tr> + * <td>#H5Fget_mdc_config</td> + * <td>Obtain current metadata cache configuration for target file.</td> + * </tr> + * <tr> + * <td>#H5Fget_mdc_hit_rate</td> + * <td>Obtain target file’s metadata cache hit rate.</td> + * </tr> + * <tr> + * <td>#H5Fget_mdc_size</td> + * <td>Obtain current metadata cache size data for specified file.</td> + * </tr> + * <tr> + * <td>#H5Fget_mpi_atomicity</td> + * <td>Retrieves the atomicity mode in use.</td> + * </tr> + * <tr> + * <td>#H5Fget_name</td> + * <td>Retrieves the name of the file to which the object belongs.</td> + * </tr> + * <tr> + * <td>#H5Fget_obj_count</td> + * <td>Returns the number of open object identifiers for an open file.</td> + * </tr> + * <tr> + * <td>#H5Fget_obj_ids</td> + * <td>Returns a list of open object identifiers.</td> + * </tr> + * <tr> + * <td>#H5Fget_vfd_handle</td> + * <td>Returns pointer to the file handle from the virtual file driver.</td> + * </tr> + * <tr> + * <td>#H5Fis_hdf5</td> + * <td>Determines whether a file is in the HDF5 format.</td> + * </tr> + * <tr> + * <td>#H5Fmount</td> + * <td>Mounts a file.</td> + * </tr> + * <tr> + * <td>#H5Fopen</td> + * <td>Opens an existing HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Freopen</td> + * <td>Returns a new identifier for a previously-opened HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Freset_mdc_hit_rate_stats</td> + * <td>Reset hit rate statistics counters for the target file.</td> + * </tr> + * <tr> + * <td>#H5Fset_mdc_config</td> + * <td>Use to configure metadata cache of target file.</td> + * </tr> + * <tr> + * <td>#H5Fset_mpi_atomicity</td> + * <td>Use to set the MPI atomicity mode.</td> + * </tr> + * <tr> + * <td>#H5Funmount</td> + * <td>Unmounts a file.</td> + * </tr> + * </table> + * + * <table> + * <caption>File creation property list functions </caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_userblock/#H5Pget_userblock</td> + * <td>Sets/retrieves size of userblock.</td> + * </tr> + * <tr> + * <td>#H5Pset_sizes/#H5Pget_sizes</td> + * <td>Sets/retrieves byte size of offsets and lengths used to address objects in HDF5 file.</td> + * </tr> + * <tr> + * <td>#H5Pset_sym_k/#H5Pget_sym_k</td> + * <td>Sets/retrieves size of parameters used to control symbol table nodes.</td> + * </tr> + * <tr> + * <td>#H5Pset_istore_k/#H5Pget_istore_k</td> + * <td>Sets/retrieves size of parameter used to control B-trees for indexing chunked datasets.</td> + * </tr> + * <tr> + * <td>#H5Pset_file_image</td> + * <td>Sets an initial file image in a memory buffer.</td> + * </tr> + * <tr> + * <td>#H5Pget_file_image</td> + * <td>Retrieves a copy of the file image designated as the initial content and structure of a file.</td> + * </tr> + * <tr> + * <td>#H5Pset_shared_mesg_nindexes/#H5Pget_shared_mesg_nindexes</td> + * <td>Sets or retrieves number of shared object header message indexes in file + * creation property list.</td> + * </tr> + * <tr> + * <td>#H5Pset_shared_mesg_index</td> + * <td>Configures the specified shared object header message index.</td> + * </tr> + * <tr> + * <td>#H5Pget_shared_mesg_index</td> + * <td>Retrieves the configuration settings for a shared message index.</td> + * </tr> + * <tr> + * <td>#H5Pset_shared_mesg_phase_change/#H5Pget_shared_mesg_phase_change</td> + * <td>Sets or retrieves shared object header message storage phase change thresholds.</td> + * </tr> + * <tr> + * <td>#H5Pget_version</td> + * <td></td> + * </tr> + * </table> + * + * <table> + * <caption>File access property list functions </caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_alignment/#H5Pget_alignment</td> + * <td>Sets/retrieves alignment properties.</td> + * </tr> + * <tr> + * <td>#H5Pset_cache/#H5Pget_cache</td> + * <td>Sets/retrieves metadata cache and raw data chunk cache parameters.</td> + * </tr> + * <tr> + * <td>#H5Pset_elink_file_cache_size/#H5Pget_elink_file_cache_size</td> + * <td>Sets/retrieves the size of the external link open file cache from the specified + * file access property list.</td> + * </tr> + * <tr> + * <td>#H5Pset_gc_references/#H5Pget_gc_references</td> + * <td>Sets/retrieves garbage collecting references flag.</td> + * </tr> + * <tr> + * <td>#H5Pset_family_offset</td> + * <td>Sets offset property for low-level access to a file in a family of files.</td> + * </tr> + * <tr> + * <td>#H5Pget_family_offset</td> + * <td>Retrieves a data offset from the file access property list.</td> + * </tr> + * <tr> + * <td>#H5Pset_meta_block_size/#H5Pget_meta_block_size</td> + * <td>Sets the minimum metadata blocksize or retrieves the current metadata block size setting.</td> + * </tr> + * <tr> + * <td>#H5Pset_mdc_config</td> + * <td>Set the initial metadata cache configuration in the indicated File Access Property List + * to the supplied value.</td> + * </tr> + * <tr> + * <td>#H5Pget_mdc_config</td> + * <td>Get the current initial metadata cache config-uration from the indicated File Access + * Property List.</td> + * </tr> + * <tr> + * <td>#H5Pset_sieve_buf_size/#H5Pget_sieve_buf_size</td> + * <td>Sets/retrieves maximum size of data sieve buffer.</td> + * </tr> + * <tr> + * <td>#H5Pset_libver_bounds</td> + * <td>Sets bounds on library versions, and indirectly format versions, to be used + * when creating objects.</td> + * </tr> + * <tr> + * <td>#H5Pget_libver_bounds</td> + * <td>Retrieves library version bounds settings that indirectly control the format + * versions used when creating objects.</td> + * </tr> + * <tr> + * <td>#H5Pset_small_data_block_size</td> + * <td>Sets the size of a contiguous block reserved for small data.</td> + * </tr> + * <tr> + * <td>#H5Pget_small_data_block_size</td> + * <td>Retrieves the current small data block size setting.</td> + * </tr> + * </table> + * + * <table> + * <caption>File driver functions </caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_driver</td> + * <td>Sets a file driver.</td> + * </tr> + * <tr> + * <td>#H5Pget_driver</td> + * <td>Returns the identifier for the driver used to create a file.</td> + * </tr> + * <tr> + * <td>#H5Pget_driver_info</td> + * <td>Returns a pointer to file driver information.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_core/#H5Pget_fapl_core</td> + * <td>Sets the driver for buffered memory files (in RAM) or retrieves information regarding + * the driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_direct/#H5Pget_fapl_direct</td> + * <td>Sets up use of the direct I/O driver or retrieves the direct I/O driver settings.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_family/#H5Pget_fapl_family</td> + * <td>Sets driver for file families, designed for systems that do not support files + * larger than 2 gigabytes, or retrieves information regarding driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_log</td> + * <td>Sets logging driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_mpio/#H5Pget_fapl_mpio</td> + * <td>Sets driver for files on parallel file systems (MPI I/O) or retrieves information + * regarding the driver.</td> + * </tr> + * <tr> + * <td>H5Pset_fapl_mpiposix/H5Pget_fapl_mpiposix</td> + * <td>No longer available.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_multi/#H5Pget_fapl_multi</td> + * <td>Sets driver for multiple files, separating categories of metadata and raw data, + * or retrieves information regarding driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_sec2</td> + * <td>Sets driver for unbuffered permanent files or retrieves information regarding driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_split</td> + * <td>Sets driver for split files, a limited case of multiple files with one metadata file + * and one raw data file.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_stdio</td> + * <td>Sets driver for buffered permanent files.</td> + * </tr> + * <tr> + * <td>#H5Pset_fapl_windows</td> + * <td>Sets the Windows I/O driver.</td> + * </tr> + * <tr> + * <td>#H5Pset_multi_type</td> + * <td>Specifies type of data to be accessed via the MULTI driver enabling more direct access.</td> + * </tr> + * <tr> + * <td>#H5Pget_multi_type</td> + * <td>Retrieves type of data property for MULTI driver.</td> + * </tr> + * </table> + * + * \subsection subsec_file_create Creating or Opening an HDF5 File + * This section describes in more detail how to create and how to open files. + * + * New HDF5 files are created and opened with #H5Fcreate; existing files are opened with + * #H5Fopen. Both functions return an object identifier which must eventually be released by calling + * #H5Fclose. + * + * To create a new file, call #H5Fcreate: + * \code + * hid_t H5Fcreate (const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id) + * \endcode + * + * #H5Fcreate creates a new file named name in the current directory. The file is opened with read + * and write access; if the #H5F_ACC_TRUNC flag is set, any pre-existing file of the same name in + * the same directory is truncated. If #H5F_ACC_TRUNC is not set or #H5F_ACC_EXCL is set and + * if a file of the same name exists, #H5Fcreate will fail. + * + * The new file is created with the properties specified in the property lists fcpl_id and fapl_id. + * fcpl is short for file creation property list. fapl is short for file access property list. Specifying + * #H5P_DEFAULT for either the creation or access property list will use the library’s default + * creation or access properties. + * + * If #H5Fcreate successfully creates the file, it returns a file identifier for the new file. This + * identifier will be used by the application any time an object identifier, an OID, for the file is + * required. Once the application has finished working with a file, the identifier should be released + * and the file closed with #H5Fclose. + * + * To open an existing file, call #H5Fopen: + * \code + * hid_t H5Fopen (const char *name, unsigned flags, hid_t fapl_id) + * \endcode + * + * #H5Fopen opens an existing file with read-write access if #H5F_ACC_RDWR is set and read-only + * access if #H5F_ACC_RDONLY is set. + * + * fapl_id is the file access property list identifier. Alternatively, #H5P_DEFAULT indicates that the + * application relies on the default I/O access parameters. Creating and changing access property + * lists is documented further below. + * + * A file can be opened more than once via multiple #H5Fopen calls. Each such call returns a unique + * file identifier and the file can be accessed through any of these file identifiers as long as they + * remain valid. Each of these file identifiers must be released by calling #H5Fclose when it is no + * longer needed. + * + * For more information, @see @ref subsubsec_file_property_lists_access. + * For more information, @see @ref subsec_file_property_lists. + * + * \subsection subsec_file_closes Closing an HDF5 File + * #H5Fclose both closes a file and releases the file identifier returned by #H5Fopen or #H5Fcreate. + * #H5Fclose must be called when an application is done working with a file; while the HDF5 + * Library makes every effort to maintain file integrity, failure to call #H5Fclose may result in the + * file being abandoned in an incomplete or corrupted state. + * + * To close a file, call #H5Fclose: + * \code + * herr_t H5Fclose (hid_t file_id) + * \endcode + * This function releases resources associated with an open file. After closing a file, the file + * identifier, file_id, cannot be used again as it will be undefined. + * + * #H5Fclose fulfills three purposes: to ensure that the file is left in an uncorrupted state, to ensure + * that all data has been written to the file, and to release resources. Use #H5Fflush if you wish to + * ensure that all data has been written to the file but it is premature to close it. + * + * Note regarding serial mode behavior: When #H5Fclose is called in serial mode, it closes the file + * and terminates new access to it, but it does not terminate access to objects that remain + * individually open within the file. That is, if #H5Fclose is called for a file but one or more objects + * within the file remain open, those objects will remain accessible until they are individually + * closed. To illustrate, assume that a file, fileA, contains a dataset, data_setA, and that both are + * open when #H5Fclose is called for fileA. data_setA will remain open and accessible, including + * writable, until it is explicitly closed. The file will be automatically and finally closed once all + * objects within it have been closed. + * + * Note regarding parallel mode behavior: Once #H5Fclose has been called in parallel mode, access + * is no longer available to any object within the file. + * + * \subsection subsec_file_property_lists File Property Lists + * Additional information regarding file structure and access are passed to #H5Fcreate and + * #H5Fopen through property list objects. Property lists provide a portable and extensible method of + * modifying file properties via simple API functions. There are two kinds of file-related property + * lists: + * \li File creation property lists + * \li File access property lists + * + * In the following sub-sections, we discuss only one file creation property, userblock size, in detail + * as a model for the user. Other file creation and file access properties are mentioned and defined + * briefly, but the model is not expanded for each; complete syntax, parameter, and usage + * information for every property list function is provided in the \ref H5P + * section of the HDF5 Reference Manual. + * + * For more information, @see @ref sec_plist. + * + * \subsubsection subsubsec_file_property_lists_create Creating a Property List + * If you do not wish to rely on the default file creation and access properties, you must first create + * a property list with #H5Pcreate. + * \code + * hid_t H5Pcreate (hid_t cls_id) + * \endcode + * cls_id is the type of property list being created. In this case, the appropriate values are + * #H5P_FILE_CREATE for a file creation property list and #H5P_FILE_ACCESS for a file access + * property list. + * + * Thus, the following calls create a file creation property list and a file access property list with + * identifiers fcpl_id and fapl_id, respectively: + * \code + * fcpl_id = H5Pcreate (H5P_FILE_CREATE) + * fapl_id = H5Pcreate (H5P_FILE_ACCESS) + * \endcode + * + * Once the property lists have been created, the properties themselves can be modified via the + * functions described in the following sub-sections. + * + * \subsubsection subsubsec_file_property_lists_props File Creation Properties + * File creation property lists control the file metadata, which is maintained in the superblock of the + * file. These properties are used only when a file is first created. + * + * <h4>Userblock Size</h4> + * \code + * herr_t H5Pset_userblock (hid_t plist, hsize_t size) + * herr_t H5Pget_userblock (hid_t plist, hsize_t *size) + * \endcode + * + * The userblock is a fixed-length block of data located at the beginning of the file and is ignored + * by the HDF5 library. This block is specifically set aside for any data or information that + * developers determine to be useful to their applications but that will not be used by the HDF5 + * library. The size of the userblock is defined in bytes and may be set to any power of two with a + * minimum size of 512 bytes. In other words, userblocks might be 512, 1024, or 2048 bytes in + * size. + * + * This property is set with #H5Pset_userblock and queried via #H5Pget_userblock. For example, if + * an application needed a 4K userblock, then the following function call could be used: + * \code + * status = H5Pset_userblock(fcpl_id, 4096) + * \endcode + * + * The property list could later be queried with: + * \code + * status = H5Pget_userblock(fcpl_id, size) + * \endcode + * and the value 4096 would be returned in the parameter size. + * + * Other properties, described below, are set and queried in exactly the same manner. Syntax and + * usage are detailed in the @ref H5P section of the HDF5 Reference Manual. + * + * <h4>Offset and Length Sizes</h4> + * This property specifies the number of bytes used to store the offset and length of objects in the + * HDF5 file. Values of 2, 4, and 8 bytes are currently supported to accommodate 16-bit, 32-bit, + * and 64-bit file address spaces. + * + * These properties are set and queried via #H5Pset_sizes and #H5Pget_sizes. + * + * <h4>Symbol Table Parameters</h4> + * The size of symbol table B-trees can be controlled by setting the 1/2-rank and 1/2-node size + * parameters of the B-tree. + * + * These properties are set and queried via #H5Pset_sym_k and #H5Pget_sym_k + * + * <h4>Indexed Storage Parameters</h4> + * The size of indexed storage B-trees can be controlled by setting the 1/2-rank and 1/2-node size + * parameters of the B-tree. + * + * These properties are set and queried via #H5Pset_istore_k and #H5Pget_istore_k. + * + * <h4>Version Information</h4> + * Various objects in an HDF5 file may over time appear in different versions. The HDF5 Library + * keeps track of the version of each object in the file. + * + * Version information is retrieved via #H5Pget_version. + * + * \subsubsection subsubsec_file_property_lists_access File Access Properties + * This section discusses file access properties that are not related to the low-level file drivers. File + * drivers are discussed separately later in this chapter. + * For more information, @see @ref subsec_file_alternate_drivers. + * + * File access property lists control various aspects of file I/O and structure. + * + * <h4>Data Alignment</h4> + * Sometimes file access is faster if certain data elements are aligned in a specific manner. This can + * be controlled by setting alignment properties via the #H5Pset_alignment function. There are two + * values involved: + * \li A threshold value + * \li An alignment interval + * + * Any allocation request at least as large as the threshold will be aligned on an address that is a + * multiple of the alignment interval. + * + * <h4>Metadata Block Allocation Size</h4> + * Metadata typically exists as very small chunks of data; storing metadata elements in a file + * without blocking them can result in hundreds or thousands of very small data elements in the + * file. This can result in a highly fragmented file and seriously impede I/O. By blocking metadata + * elements, these small elements can be grouped in larger sets, thus alleviating both problems. + * + * #H5Pset_meta_block_size sets the minimum size in bytes of metadata block allocations. + * #H5Pget_meta_block_size retrieves the current minimum metadata block allocation size. + * + * <h4>Metadata Cache</h4> + * Metadata and raw data I/O speed are often governed by the size and frequency of disk reads and + * writes. In many cases, the speed can be substantially improved by the use of an appropriate + * cache. + * + * #H5Pset_cache sets the minimum cache size for both metadata and raw data and a preemption + * value for raw data chunks. #H5Pget_cache retrieves the current values. + * + * <h4>Data Sieve Buffer Size</h4> + * Data sieve buffering is used by certain file drivers to speed data I/O and is most commonly when + * working with dataset hyperslabs. For example, using a buffer large enough to hold several pieces + * of a dataset as it is read in for hyperslab selections will boost performance noticeably. + * + * #H5Pset_sieve_buf_size sets the maximum size in bytes of the data sieve buffer. + * #H5Pget_sieve_buf_size retrieves the current maximum size of the data sieve buffer. + * + * <h4>Garbage Collection References</h4> + * Dataset region references and other reference types use space in an HDF5 file’s global heap. If + * garbage collection is on (1) and the user passes in an uninitialized value in a reference structure, + * the heap might become corrupted. When garbage collection is off (0), however, and the user reuses + * a reference, the previous heap block will be orphaned and not returned to the free heap + * space. When garbage collection is on, the user must initialize the reference structures to 0 or risk + * heap corruption. + * + * #H5Pset_gc_references sets the garbage collecting references flag. + * + * \subsection subsec_file_alternate_drivers Alternate File Storage Layouts and Low-level File Drivers + * The concept of an HDF5 file is actually rather abstract: the address space for what is normally + * thought of as an HDF5 file might correspond to any of the following: + * \li Single file on standard file system + * \li Multiple files on standard file system + * \li Multiple files on parallel file system + * \li Block of memory within application’s memory space + * \li More abstract situations such as virtual files + * + * This HDF5 address space is generally referred to as an HDF5 file regardless of its organization at + * the storage level. + * + * HDF5 employs an extremely flexible mechanism called the virtual file layer, or VFL, for file + * I/O. A full understanding of the VFL is only necessary if you plan to write your own drivers + * @see \ref VFL in the HDF5 Technical Notes. + * + * For our + * purposes here, it is sufficient to know that the low-level drivers used for file I/O reside in the + * VFL, as illustrated in the following figure. Note that H5FD_STREAM is not available with 1.8.x + * and later versions of the library. + * + * <table> + * <tr> + * <td> + * \image html VFL_Drivers.gif "I/O path from application to VFL and low-level drivers to storage" + * </td> + * </tr> + * </table> + * + * As mentioned above, HDF5 applications access HDF5 files through various low-level file + * drivers. The default driver for that layout is the POSIX driver (also known as the SEC2 driver), + * #H5FD_SEC2. Alternative layouts and drivers are designed to suit the needs of a variety of + * systems, environments, and applications. The drivers are listed in the table below. + * + * <table> + * <caption id="table_file_drivers">Supported file drivers</caption> + * <tr> + * <th>Driver Name</th> + * <th>Driver Identifier</th> + * <th>Description</th> + * <th>Related API</th> + * </tr> + * <tr> + * <td>POSIX</td> + * <td>#H5FD_SEC2</td> + * <td>This driver uses POSIX file-system functions like read and write to perform I/O to a single, + * permanent file on local disk with no system buffering. This driver is POSIX-compliant and is + * the default file driver for all systems.</td> + * <td>#H5Pset_fapl_sec2</td> + * </tr> + * <tr> + * <td>Direct</td> + * <td>#H5FD_DIRECT</td> + * <td>This is the #H5FD_SEC2 driver except data is written to or read from the file + * synchronously without being cached by the system.</td> + * <td>#H5Pset_fapl_direct</td> + * </tr> + * <tr> + * <td>Log</td> + * <td>#H5FD_LOG</td> + * <td>This is the #H5FD_SEC2 driver with logging capabilities.</td> + * <td>#H5Pset_fapl_log</td> + * </tr> + * <tr> + * <td>Windows</td> + * <td>#H5FD_WINDOWS</td> + * <td>This driver was modified in HDF5-1.8.8 to be a wrapper of the POSIX driver, + * #H5FD_SEC2. This change should not affect user applications.</td> + * <td>#H5Pset_fapl_windows</td> + * </tr> + * <tr> + * <td>STDIO</td> + * <td>#H5FD_STDIO</td> + * <td>This driver uses functions from the standard C stdio.h to perform I/O + * to a single, permanent file on local disk with additional system buffering.</td> + * <td>#H5Pset_fapl_stdio</td> + * </tr> + * <tr> + * <td>Memory</td> + * <td>#H5FD_CORE</td> + * <td>With this driver, an application can work with a file in memory for faster reads and + * writes. File contents are kept in memory until the file is closed. At closing, the memory + * version of the file can be written back to disk or abandoned.</td> + * <td>#H5Pset_fapl_core</td> + * </tr> + * <tr> + * <td>Family</td> + * <td>#H5FD_FAMILY</td> + * <td>With this driver, the HDF5 file’s address space is partitioned into pieces and sent to + * separate storage files using an underlying driver of the user’s choice. This driver is for + * systems that do not support files larger than 2 gigabytes.</td> + * <td>#H5Pset_fapl_family</td> + * </tr> + * <tr> + * <td>Multi</td> + * <td>#H5FD_MULTI</td> + * <td>With this driver, data can be stored in multiple files according to the type of the data. + * I/O might work better if data is stored in separate files based on the type of data. The Split + * driver is a special case of this driver.</td> + * <td>#H5Pset_fapl_multi</td> + * </tr> + * <tr> + * <td>Split</td> + * <td>H5FD_SPLIT</td> + * <td>This file driver splits a file into two parts. One part stores metadata, and the other part + * stores raw data. This splitting a file into two parts is a limited case of the Multi driver.</td> + * <td>#H5Pset_fapl_split</td> + * </tr> + * <tr> + * <td>Parallel</td> + * <td>#H5FD_MPIO</td> + * <td>This is the standard HDF5 file driver for parallel file systems. This driver uses the MPI + * standard for both communication and file I/O.</td> + * <td>#H5Pset_fapl_mpio</td> + * </tr> + * <tr> + * <td>Parallel POSIX</td> + * <td>H5FD_MPIPOSIX</td> + * <td>This driver is no longer available</td> + * <td></td> + * </tr> + * <tr> + * <td>Stream</td> + * <td>H5FD_STREAM</td> + * <td>This driver is no longer available.</td> + * <td></td> + * </tr> + * </table> + * + * For more information, see the HDF5 Reference Manual entries for the function calls shown in + * the column on the right in the table above. + * + * Note that the low-level file drivers manage alternative file storage layouts. Dataset storage + * layouts (chunking, compression, and external dataset storage) are managed independently of file + * storage layouts. + * + * If an application requires a special-purpose low-level driver, the VFL provides a public API for + * creating one. For more information on how to create a driver, + * @see @ref VFL in the HDF5 Technical Notes. + * + * \subsubsection subsubsec_file_alternate_drivers_id Identifying the Previously‐used File Driver + * When creating a new HDF5 file, no history exists, so the file driver must be specified if it is to be + * other than the default. + * + * When opening existing files, however, the application may need to determine which low-level + * driver was used to create the file. The function #H5Pget_driver is used for this purpose. See the + * example below. + * + * <em>Identifying a driver</em> + * \code + * hid_t H5Pget_driver (hid_t fapl_id) + * \endcode + * + * #H5Pget_driver returns a constant identifying the low-level driver for the access property list + * fapl_id. For example, if the file was created with the POSIX (aka SEC2) driver, + * #H5Pget_driver returns #H5FD_SEC2. + * + * If the application opens an HDF5 file without both determining the driver used to create the file + * and setting up the use of that driver, the HDF5 Library will examine the superblock and the + * driver definition block to identify the driver. + * See the <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> + * for detailed descriptions of the superblock and the driver definition block. + * + * \subsubsection subsubsec_file_alternate_drivers_sec2 The POSIX (aka SEC2) Driver + * The POSIX driver, #H5FD_SEC2, uses functions from section 2 of the POSIX manual to access + * unbuffered files stored on a local file system. This driver is also known as the SEC2 driver. The + * HDF5 Library buffers metadata regardless of the low-level driver, but using this driver prevents + * data from being buffered again by the lowest layers of the library. + * + * The function #H5Pset_fapl_sec2 sets the file access properties to use the POSIX driver. See the + * example below. + * + * <em>Using the POSIX, aka SEC2, driver</em> + * \code + * herr_t H5Pset_fapl_sec2 (hid_t fapl_id) + * \endcode + * + * Any previously-defined driver properties are erased from the property list. + * + * Additional parameters may be added to this function in the future. Since there are no additional + * variable settings associated with the POSIX driver, there is no H5Pget_fapl_sec2 function. + * + * \subsubsection subsubsec_file_alternate_drivers_direct The Direct Driver + * The Direct driver, #H5FD_DIRECT, functions like the POSIX driver except that data is written to + * or read from the file synchronously without being cached by the system. + * + * The functions #H5Pset_fapl_direct and #H5Pget_fapl_direct are used to manage file access properties. + * See the example below. + * + * <em>Using the Direct driver</em> + * \code + * herr_t H5Pset_fapl_direct(hid_t fapl_id, size_t alignment, size_t block_size, size_t cbuf_size) + * herr_t H5Pget_fapl_direct(hid_t fapl_id, size_t *alignment, size_t *block_size, size_t *cbuf_size) + * \endcode + * + * #H5Pset_fapl_direct sets the file access properties to use the Direct driver; any previously defined + * driver properties are erased from the property list. #H5Pget_fapl_direct retrieves the file access + * properties used with the Direct driver. fapl_id is the file access property list identifier. + * alignment is the memory alignment boundary. block_size is the file system block size. + * cbuf_size is the copy buffer size. + * + * Additional parameters may be added to this function in the future. + * + * \subsubsection subsubsec_file_alternate_drivers_log The Log Driver + * The Log driver, #H5FD_LOG, is designed for situations where it is necessary to log file access + * activity. + * + * The function #H5Pset_fapl_log is used to manage logging properties. See the example below. + * + * <em>Logging file access</em> + * \code + * herr_t H5Pset_fapl_log (hid_t fapl_id, const char *logfile, unsigned int flags, size_t buf_size) + * \endcode + * + * #H5Pset_fapl_log sets the file access property list to use the Log driver. File access characteristics + * are identical to access via the POSIX driver. Any previously defined driver properties are erased + * from the property list. + * + * Log records are written to the file logfile. + * + * The logging levels set with the verbosity parameter are shown in the table below. + * + * <table> + * <caption>Logging levels</caption> + * <tr> + * <th>Level</th> + * <th>Comments</th> + * </tr> + * <tr> + * <td>0</td> + * <td>Performs no logging.</td> + * </tr> + * <tr> + * <td>1</td> + * <td>Records where writes and reads occur in the file.</td> + * </tr> + * <tr> + * <td>2</td> + * <td>Records where writes and reads occur in the file and what kind of data is written + * at each location. This includes raw data or any of several types of metadata + * (object headers, superblock, B-tree data, local headers, or global headers).</td> + * </tr> + * </table> + * + * There is no H5Pget_fapl_log function. + * + * Additional parameters may be added to this function in the future. + * + * \subsubsection subsubsec_file_alternate_drivers_win The Windows Driver + * The Windows driver, #H5FD_WINDOWS, was modified in HDF5-1.8.8 to be a wrapper of the + * POSIX driver, #H5FD_SEC2. In other words, if the Windows drivers is used, any file I/O will + * instead use the functionality of the POSIX driver. This change should be transparent to all user + * applications. The Windows driver used to be the default driver for Windows systems. The + * POSIX driver is now the default. + * + * The function #H5Pset_fapl_windows sets the file access properties to use the Windows driver. + * See the example below. + * + * <em>Using the Windows driver</em> + * \code + * herr_t H5Pset_fapl_windows (hid_t fapl_id) + * \endcode + * + * Any previously-defined driver properties are erased from the property list. + * + * Additional parameters may be added to this function in the future. Since there are no additional + * variable settings associated with the POSIX driver, there is no H5Pget_fapl_windows function. + * + * \subsubsection subsubsec_file_alternate_drivers_stdio The STDIO Driver + * The STDIO driver, #H5FD_STDIO, accesses permanent files in a local file system like the + * POSIX driver does. The STDIO driver also has an additional layer of buffering beneath the + * HDF5 Library. + * + * The function #H5Pset_fapl_stdio sets the file access properties to use the STDIO driver. See the + * example below. + * + * <em>Using the STDIO driver</em> + * \code + * herr_t H5Pset_fapl_stdio (hid_t fapl_id) + * \endcode + * + * Any previously defined driver properties are erased from the property list. + * + * Additional parameters may be added to this function in the future. Since there are no additional + * variable settings associated with the STDIO driver, there is no H5Pget_fapl_stdio function. + * + * \subsubsection subsubsec_file_alternate_drivers_mem The Memory (aka Core) Driver + * There are several situations in which it is reasonable, sometimes even required, to maintain a file + * entirely in system memory. You might want to do so if, for example, either of the following + * conditions apply: + * <ul><li>Performance requirements are so stringent that disk latency is a limiting factor</li> + * <li>You are working with small, temporary files that will not be retained and, thus, + * need not be written to storage media</li></ul> + * + * The Memory driver, #H5FD_CORE, provides a mechanism for creating and managing such in memory files. + * The functions #H5Pset_fapl_core and #H5Pget_fapl_core manage file access + * properties. See the example below. + * + * <em>Managing file access for in-memory files</em> + * \code + * herr_t H5Pset_fapl_core (hid_t access_properties, size_t block_size, hbool_t backing_store) + * herr_t H5Pget_fapl_core (hid_t access_properties, size_t *block_size), hbool_t *backing_store) + * \endcode + * + * #H5Pset_fapl_core sets the file access property list to use the Memory driver; any previously + * defined driver properties are erased from the property list. + * + * Memory for the file will always be allocated in units of the specified block_size. + * + * The backing_store Boolean flag is set when the in-memory file is created. + * backing_store indicates whether to write the file contents to disk when the file is closed. If + * backing_store is set to 1 (TRUE), the file contents are flushed to a file with the same name as the + * in-memory file when the file is closed or access to the file is terminated in memory. If + * backing_store is set to 0 (FALSE), the file is not saved. + * + * The application is allowed to open an existing file with the #H5FD_CORE driver. While using + * #H5Fopen to open an existing file, if backing_store is set to 1 and the flag for #H5Fopen is set to + * #H5F_ACC_RDWR, changes to the file contents will be saved to the file when the file is closed. + * If backing_store is set to 0 and the flag for #H5Fopen is set to #H5F_ACC_RDWR, changes to the + * file contents will be lost when the file is closed. If the flag for #H5Fopen is set to + * #H5F_ACC_RDONLY, no change to the file will be allowed either in memory or on file. + * + * If the file access property list is set to use the Memory driver, #H5Pget_fapl_core will return + * block_size and backing_store with the relevant file access property settings. + * + * Note the following important points regarding in-memory files: + * <ul><li>Local temporary files are created and accessed directly from memory without ever + * being written to disk</li> + * <li>Total file size must not exceed the available virtual memory</li> + * <li>Only one HDF5 file identifier can be opened for the file, the identifier returned by + * #H5Fcreate or #H5Fopen</li> + * <li>The changes to the file will be discarded when access is terminated unless + * backing_store is set to 1</li></ul> + * + * Additional parameters may be added to these functions in the future. + * + * @see <a href="https://portal.hdfgroup.org/display/HDF5/HDF5+File+Image+Operations"> + * HDF5 File Image Operations</a> + * section for information on more advanced usage of the Memory file driver, and + * @see <a href="http://www.hdfgroup.org/HDF5/doc/Advanced/ModifiedRegionWrites/ModifiedRegionWrites.pdf"> + * Modified Region Writes</a> + * section for information on how to set write operations so that only modified regions are written + * to storage. + * + * \subsubsection subsubsec_file_alternate_drivers_family The Family Driver + * HDF5 files can become quite large, and this can create problems on systems that do not support + * files larger than 2 gigabytes. The HDF5 file family mechanism is designed to solve the problems + * this creates by splitting the HDF5 file address space across several smaller files. This structure + * does not affect how metadata and raw data are stored: they are mixed in the address space just as + * they would be in a single, contiguous file. + * + * HDF5 applications access a family of files via the Family driver, #H5FD_FAMILY. The + * functions #H5Pset_fapl_family and #H5Pget_fapl_family are used to manage file family + * properties. See the example below. + * + * <em>Managing file family properties</em> + * \code + * herr_t H5Pset_fapl_family (hid_t fapl_id, + * hsize_t memb_size, hid_t member_properties) + * herr_t H5Pget_fapl_family (hid_t fapl_id, + * hsize_t *memb_size, hid_t *member_properties) + * \endcode + * + * Each member of the family is the same logical size though the size and disk storage reported by + * file system listing tools may be substantially smaller. Examples of file system listing tools are + * \code + * ls -l + * \endcode + * on a Unix system or the detailed folder listing on an Apple or Microsoft Windows + * system. The name passed to #H5Fcreate or #H5Fopen should include a printf(3c)-style integer + * format specifier which will be replaced with the family member number. The first family + * member is numbered zero (0). + * + * #H5Pset_fapl_family sets the access properties to use the Family driver; any previously defined + * driver properties are erased from the property list. member_properties will serve as the file + * access property list for each member of the file family. memb_size specifies the logical size, in + * bytes, of each family member. memb_size is used only when creating a new file or truncating an + * existing file; otherwise the member size is determined by the size of the first member of the + * family being opened. Note: If the size of the off_t type is four bytes, the maximum family + * member size is usually 2^31-1 because the byte at offset 2,147,483,647 is generally inaccessible. + * + * #H5Pget_fapl_family is used to retrieve file family properties. If the file access property list is set + * to use the Family driver, member_properties will be returned with a pointer to a copy of the + * appropriate member access property list. If memb_size is non-null, it will contain the logical + * size, in bytes, of family members. + * + * Additional parameters may be added to these functions in the future. + * + * <h4>Unix Tools and an HDF5 Utility</h4> + * It occasionally becomes necessary to repartition a file family. A command-line utility for this + * purpose, h5repart, is distributed with the HDF5 library. + * + * \code + * h5repart [-v] [-b block_size[suffix]] [-m member_size[suffix]] source destination + * \endcode + * + * h5repart repartitions an HDF5 file by copying the source file or file family to the destination file + * or file family, preserving holes in the underlying UNIX files. Families are used for the source + * and/or destination if the name includes a printf-style integer format such as %d. The -v switch + * prints input and output file names on the standard error stream for progress monitoring, -b sets + * the I/O block size (the default is 1KB), and -m sets the output member size if the destination is a + * family name (the default is 1GB). block_size and member_size may be suffixed with the letters + * g, m, or k for GB, MB, or KB respectively. + * + * The h5repart utility is described on the Tools page of the HDF5 Reference Manual. + * + * An existing HDF5 file can be split into a family of files by running the file through split(1) on a + * UNIX system and numbering the output files. However, the HDF5 Library is lazy about + * extending the size of family members, so a valid file cannot generally be created by + * concatenation of the family members. + * + * Splitting the file and rejoining the segments by concatenation (split(1) and cat(1) on UNIX + * systems) does not generate files with holes; holes are preserved only through the use of h5repart. + * + * \subsubsection subsubsec_file_alternate_drivers_multi The Multi Driver + * In some circumstances, it is useful to separate metadata from raw data and some types of + * metadata from other types of metadata. Situations that would benefit from use of the Multi driver + * include the following: + * <ul><li>In networked situations where the small metadata files can be kept on local disks but + * larger raw data files must be stored on remote media</li> + * <li>In cases where the raw data is extremely large</li> + * <li>In situations requiring frequent access to metadata held in RAM while the raw data + * can be efficiently held on disk</li></ul> + * + * In either case, access to the metadata is substantially easier with the smaller, and possibly more + * localized, metadata files. This often results in improved application performance. + * + * The Multi driver, #H5FD_MULTI, provides a mechanism for segregating raw data and different + * types of metadata into multiple files. The functions #H5Pset_fapl_multi and + * #H5Pget_fapl_multi are used to manage access properties for these multiple files. See the example + * below. + * + * <em>Managing access properties for multiple files</em> + * \code + * herr_t H5Pset_fapl_multi (hid_t fapl_id, const H5FD_mem_t *memb_map, const hid_t *memb_fapl, + * const char * const *memb_name, const haddr_t *memb_addr, + * hbool_t relax) + * herr_t H5Pget_fapl_multi (hid_t fapl_id, const H5FD_mem_t *memb_map, const hid_t *memb_fapl, + * const char **memb_name, const haddr_t *memb_addr, hbool_t *relax) + * \endcode + * + * #H5Pset_fapl_multi sets the file access properties to use the Multi driver; any previously defined + * driver properties are erased from the property list. With the Multi driver invoked, the application + * will provide a base name to #H5Fopen or #H5Fcreate. The files will be named by that base name as + * modified by the rule indicated in memb_name. File access will be governed by the file access + * property list memb_properties. + * + * See #H5Pset_fapl_multi and #H5Pget_fapl_multi in the HDF5 Reference Manual for descriptions + * of these functions and their usage. + * + * Additional parameters may be added to these functions in the future. + * + * \subsubsection subsubsec_file_alternate_drivers_split The Split Driver + * The Split driver, H5FD_SPLIT, is a limited case of the Multi driver where only two files are + * created. One file holds metadata, and the other file holds raw data. + * The function #H5Pset_fapl_split is used to manage Split file access properties. See the example + * below. + * + * <em>Managing access properties for split files</em> + * \code + * herr_t H5Pset_fapl_split (hid_t access_properties, const char *meta_extension, + * hid_t meta_properties,const char *raw_extension, hid_t raw_properties) + * \endcode + * + * #H5Pset_fapl_split sets the file access properties to use the Split driver; any previously defined + * driver properties are erased from the property list. + * + * With the Split driver invoked, the application will provide a base file name such as file_name to + * #H5Fcreate or #H5Fopen. The metadata and raw data files in storage will then be named + * file_name.meta_extension and file_name.raw_extension, respectively. For example, if + * meta_extension is defined as .meta and raw_extension is defined as .raw, the final filenames will + * be file_name.meta and file_name.raw. + * + * Each file can have its own file access property list. This allows the creative use of other lowlevel + * file drivers. For instance, the metadata file can be held in RAM and accessed via the + * Memory driver while the raw data file is stored on disk and accessed via the POSIX driver. + * Metadata file access will be governed by the file access property list in meta_properties. Raw + * data file access will be governed by the file access property list in raw_properties. + * + * Additional parameters may be added to these functions in the future. Since there are no + * additional variable settings associated with the Split driver, there is no H5Pget_fapl_split + * function. + * + * \subsubsection subsubsec_file_alternate_drivers_par The Parallel Driver + * Parallel environments require a parallel low-level driver. HDF5’s default driver for parallel + * systems is called the Parallel driver, #H5FD_MPIO. This driver uses the MPI standard for both + * communication and file I/O. + * + * The functions #H5Pset_fapl_mpio and #H5Pget_fapl_mpio are used to manage file access + * properties for the #H5FD_MPIO driver. See the example below. + * + * <em>Managing parallel file access properties</em> + * \code + * herr_t H5Pset_fapl_mpio (hid_t fapl_id, MPI_Comm comm, MPI_info info) + * herr_t H5Pget_fapl_mpio (hid_t fapl_id, MPI_Comm *comm, MPI_info *info) + * \endcode + * + * The file access properties managed by #H5Pset_fapl_mpio and retrieved by + * #H5Pget_fapl_mpio are the MPI communicator, comm, and the MPI info object, info. comm and + * info are used for file open. info is an information object much like an HDF5 property list. Both + * are defined in MPI_FILE_OPEN of MPI-2. + * + * The communicator and the info object are saved in the file access property list fapl_id. + * fapl_id can then be passed to MPI_FILE_OPEN to create and/or open the file. + * + * #H5Pset_fapl_mpio and #H5Pget_fapl_mpio are available only in the parallel HDF5 Library and + * are not collective functions. The Parallel driver is available only in the parallel HDF5 Library. + * + * Additional parameters may be added to these functions in the future. + * + * \subsection subsec_file_examples Code Examples for Opening and Closing Files + * \subsubsection subsubsec_file_examples_trunc Example Using the H5F_ACC_TRUNC Flag + * The following example uses the #H5F_ACC_TRUNC flag when it creates a new file. The default + * file creation and file access properties are also used. Using #H5F_ACC_TRUNC means the + * function will look for an existing file with the name specified by the function. In this case, that + * name is FILE. If the function does not find an existing file, it will create one. If it does find an + * existing file, it will empty the file in preparation for a new set of data. The identifier for the + * "new" file will be passed back to the application program. + * For more information, @see @ref subsec_file_access_modes. + * + * <em>Creating a file with default creation and access properties</em> + * \code + * hid_t file; // identifier + * + * // Create a new file using H5F_ACC_TRUNC access, default + * // file creation properties, and default file access + * // properties. + * file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + * + * // Close the file. + * status = H5Fclose(file); + * \endcode + * + * \subsubsection subsubsec_file_examples_props Example with the File Creation Property List + * The example below shows how to create a file with 64-bit object offsets and lengths. + * + * <em>Creating a file with 64-bit offsets</em> + * \code + * hid_t create_plist; + * hid_t file_id; + * + * create_plist = H5Pcreate(H5P_FILE_CREATE); + * H5Pset_sizes(create_plist, 8, 8); + * file_id = H5Fcreate(“test.h5”, H5F_ACC_TRUNC, create_plist, H5P_DEFAULT); + * . + * . + * . + * + * H5Fclose(file_id); + * \endcode + * + * \subsubsection subsubsec_file_examples_access Example with the File Access Property List + * This example shows how to open an existing file for independent datasets access by MPI parallel + * I/O: + * + * <em>Opening an existing file for parallel I/O</em> + * \code + * hid_t access_plist; + * hid_t file_id; + * + * access_plist = H5Pcreate(H5P_FILE_ACCESS); + * H5Pset_fapl_mpi(access_plist, MPI_COMM_WORLD, MPI_INFO_NULL); + * + * // H5Fopen must be called collectively + * file_id = H5Fopen(“test.h5”, H5F_ACC_RDWR, access_plist); + * . + * . + * . + * + * // H5Fclose must be called collectively + * H5Fclose(file_id); + * \endcode + * + * \subsection subsec_file_multiple Working with Multiple HDF5 Files + * Multiple HDF5 files can be associated so that the files can be worked with as though all the + * information is in a single HDF5 file. A temporary association can be set up by means of the + * #H5Fmount function. A permanent association can be set up by means of the external link + * function #H5Lcreate_external. + * + * The purpose of this section is to describe what happens when the #H5Fmount function is used to + * mount one file on another. + * + * When a file is mounted on another, the mounted file is mounted at a group, and the root group of + * the mounted file takes the place of that group until the mounted file is unmounted or until the + * files are closed. + * + * The figure below shows two files before one is mounted on the other. File1 has two groups and + * three datasets. The group that is the target of the A link has links, Z and Y, to two of the datasets. + * The group that is the target of the B link has a link, W, to the other dataset. File2 has three + * groups and three datasets. The groups in File2 are the targets of the AA, BB, and CC links. The + * datasets in File2 are the targets of the ZZ, YY, and WW links. + * + * <table> + * <tr> + * <td> + * \image html Files_fig3.gif "Two separate files" + * </td> + * </tr> + * </table> + * + * The figure below shows the two files after File2 has been mounted File1 at the group that is the + * target of the B link. + * + * <table> + * <tr> + * <td> + * \image html Files_fig4.gif "File2 mounted on File1" + * </td> + * </tr> + * </table> + * + * Note: In the figure above, the dataset that is the target of the W link is not shown. That dataset is + * masked by the mounted file. + * + * If a file is mounted on a group that has members, those members are hidden until the mounted + * file is unmounted. There are two ways around this if you need to work with a group member. + * One is to mount the file on an empty group. Another is to open the group member before you + * mount the file. Opening the group member will return an identifier that you can use to locate the + * group member. + * + * The example below shows how #H5Fmount might be used to mount File2 onto File1. + * + * <em>Using H5Fmount</em> + * \code + * status = H5Fmount(loc_id, "/B", child_id, plist_id) + * \endcode + * + * Note: In the code example above, loc_id is the file identifier for File1, /B is the link path to the + * group where File2 is mounted, child_id is the file identifier for File2, and plist_id is a property + * list identifier. + * For more information, @see @ref sec_group. + * + * See the entries for #H5Fmount, #H5Funmount, and #H5Lcreate_external in the HDF5 Reference Manual. + * + * Previous Chapter \ref sec_program - Next Chapter \ref sec_group + * + */ + +/** + * \defgroup H5F Files (H5F) * * Use the functions in this module to manage HDF5 files. * diff --git a/src/H5Gmodule.h b/src/H5Gmodule.h index 93e7184..defa5fa 100644 --- a/src/H5Gmodule.h +++ b/src/H5Gmodule.h @@ -28,7 +28,929 @@ #define H5_MY_PKG H5G #define H5_MY_PKG_ERR H5E_SYM -/** \defgroup H5G H5G +/** \page H5G_UG HDF5 Groups + * + * \section sec_group HDF5 Groups + * \subsection subsec_group_intro Introduction + * As suggested by the name Hierarchical Data Format, an HDF5 file is hierarchically structured. + * The HDF5 group and link objects implement this hierarchy. + * + * In the simple and most common case, the file structure is a tree structure; in the general case, the + * file structure may be a directed graph with a designated entry point. The tree structure is very + * similar to the file system structures employed on UNIX systems, directories and files, and on + * Apple and Microsoft Windows systems, folders and files. HDF5 groups are analogous + * to the directories and folders; HDF5 datasets are analogous to the files. + * + * The one very important difference between the HDF5 file structure and the above-mentioned file + * system analogs is that HDF5 groups are linked as a directed graph, allowing circular references; + * the file systems are strictly hierarchical, allowing no circular references. The figures below + * illustrate the range of possibilities. + * + * In the first figure below, the group structure is strictly hierarchical, identical to the file system + * analogs. + * + * In the next two figures below, the structure takes advantage of the directed graph’s allowance of + * circular references. In the second figure, GroupA is not only a member of the root group, /, but a + * member of GroupC. Since Group C is a member of Group B and Group B is a member of Group + * A, Dataset1 can be accessed by means of the circular reference /Group A/Group B/Group + * C/Group A/Dataset1. The third figure below illustrates an extreme case in which GroupB is a + * member of itself, enabling a reference to a member dataset such as /Group A/Group B/Group + * B/Group B/Dataset2. + * + * <table> + * <tr> + * <td> + * \image html Groups_fig1.gif "A file with a strictly hierarchical group structure" + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Groups_fig2.gif "A file with a circular reference" + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Groups_fig3.gif "A file with one group as a member of itself" + * </td> + * </tr> + * </table> + * + * As becomes apparent upon reflection, directed graph structures can become quite complex; + * caution is advised! + * + * The balance of this chapter discusses the following topics: + * \li The HDF5 group object (or a group) and its structure in more detail + * \li HDF5 link objects (or links) + * \li The programming model for working with groups and links + * \li HDF5 functions provided for working with groups, group members, and links + * \li Retrieving information about objects in a group + * \li Discovery of the structure of an HDF5 file and the contained objects + * \li Examples of file structures + * + * \subsection subsec_group_descr Description of the Group Object + * \subsubsection subsubsec_group_descr_object The Group Object + * Abstractly, an HDF5 group contains zero or more objects and every object must be a member of + * at least one group. The root group, the sole exception, may not belong to any group. + * + * <table> + * <tr> + * <td> + * \image html Groups_fig4.gif "Abstract model of the HDF5 group object" + * </td> + * </tr> + * </table> + * + * Group membership is actually implemented via link objects. See the figure above. A link object + * is owned by a group and points to a named object. Each link has a name, and each link points to + * exactly one object. Each named object has at least one and possibly many links to it. + * + * There are three classes of named objects: group, dataset, and committed datatype (formerly + * called named datatype). See the figure below. Each of these objects is the member of at least one + * group, which means there is at least one link to it. + * + * <table> + * <tr> + * <td> + * \image html Groups_fig5.gif "Classes of named objects" + * </td> + * </tr> + * </table> + * + * The primary operations on a group are to add and remove members and to discover member + * objects. These abstract operations, as listed in the figure below, are implemented in the \ref H5G + * APIs. For more information, @see @ref subsec_group_function. + * + * To add and delete members of a group, links from the group to existing objects in the file are + * created and deleted with the link and unlink operations. When a new named object is created, the + * HDF5 Library executes the link operation in the background immediately after creating the + * object (in other words, a new object is added as a member of the group in which it is created + * without further user intervention). + * + * Given the name of an object, the get_object_info method retrieves a description of the object, + * including the number of references to it. The iterate method iterates through the members of the + * group, returning the name and type of each object. + * + * <table> + * <tr> + * <td> + * \image html Groups_fig6.gif "The group object" + * </td> + * </tr> + * </table> + * + * Every HDF5 file has a single root group, with the name /. The root group is identical to any other + * HDF5 group, except: + * \li The root group is automatically created when the HDF5 file is created (#H5Fcreate). + * \li The root group has no parent, but by convention has a reference count of 1. + * \li The root group cannot be deleted (in other words, unlinked)! + * + * \subsubsection subsubsec_group_descr_model The Hierarchy of Data Objects + * An HDF5 file is organized as a rooted, directed graph using HDF5 group objects. The named + * data objects are the nodes of the graph, and the links are the directed arcs. Each arc of the graph + * has a name, with the special name / reserved for the root group. New objects are created and then + * inserted into the graph with a link operation that is automatically executed by the library; + * existing objects are inserted into the graph with a link operation explicitly called by the user, + * which creates a named link from a group to the object. + * + * An object can be the target of more than one link. + * + * The names on the links must be unique within each group, but there may be many links with the + * same name in different groups. These are unambiguous, because some ancestor must have a + * different name, or else they are the same object. The graph is navigated with path names, + * analogous to Unix file systems. For more information, @see @ref subsubsec_group_descr_path. + * + * An object can be opened with a full path starting at the root group, or with a relative path and a + * starting point. That starting point is always a group, though it may be the current working group, + * another specified group, or the root group of the file. Note that all paths are relative to a single + * HDF5 file. In this sense, an HDF5 file is analogous to a single UNIX file system. + * + * It is important to note that, just like the UNIX file system, HDF5 objects do not have names, the + * names are associated with paths. An object has an object identifier that is unique within the file, + * but a single object may have many names because there may be many paths to the same object. + * An object can be renamed, or moved to another group, by adding and deleting links. In this case, + * the object itself never moves. For that matter, membership in a group has no implication for the + * physical location of the stored object. + * + * Deleting a link to an object does not necessarily delete the object. The object remains available + * as long as there is at least one link to it. After all links to an object are deleted, it can no longer + * be opened, and the storage may be reclaimed. + * + * It is also important to realize that the linking mechanism can be used to construct very complex + * graphs of objects. For example, it is possible for an object to be shared between several groups + * and even to have more than one name in the same group. It is also possible for a group to be a + * member of itself, or to create other cycles in the graph, such as in the case where a child group is + * linked to one of its ancestors. + * + * HDF5 also has soft links similar to UNIX soft links. A soft link is an object that has a name and + * a path name for the target object. The soft link can be followed to open the target of the link just + * like a regular or hard link. The differences are that the hard link cannot be created if the target + * object does not exist and it always points to the same object. A soft link can be created with any + * path name, whether or not the object exists; it may or may not, therefore, be possible to follow a + * soft link. Furthermore, a soft link’s target object may be changed. + * + * \subsubsection subsubsec_group_descr_path HDF5 Path Names + * The structure of the HDF5 file constitutes the name space for the objects in the file. A path name + * is a string of components separated by slashes (/). Each component is the name of a hard or soft + * link which points to an object in the file. The slash not only separates the components, but + * indicates their hierarchical relationship; the component indicated by the link name following a + * slash is a always a member of the component indicated by the link name preceding that slash. + * + * The first component in the path name may be any of the following: + * \li The special character dot (., a period), indicating the current group + * \li The special character slash (/), indicating the root group + * \li Any member of the current group + * + * Component link names may be any string of ASCII characters not containing a slash or a dot + * (/ and ., which are reserved as noted above). However, users are advised to avoid the use of + * punctuation and non-printing characters, as they may create problems for other software. The + * figure below provides a BNF grammar for HDF5 path names. + * + * <em>A BNF grammar for HDF5 path names</em> + * \code + * PathName ::= AbsolutePathName | RelativePathName + * Separator ::= "/" ["/"]* + * AbsolutePathName ::= Separator [ RelativePathName ] + * RelativePathName ::= Component [ Separator RelativePathName ]* + * Component ::= "." | Characters + * Characters ::= Character+ - { "." } + * Character ::= {c: c Î { { legal ASCII characters } - {'/'} } + * \endcode + * + * An object can always be addressed by either a full or an absolute path name, starting at the root + * group, or by a relative path name, starting in a known location such as the current working + * group. As noted elsewhere, a given object may have multiple full and relative path names. + * + * Consider, for example, the file illustrated in the figure below. Dataset1 can be identified by either + * of these absolute path names: + * <em>/GroupA/Dataset1</em> + * + * <em>/GroupA/GroupB/GroupC/Dataset1</em> + * + * Since an HDF5 file is a directed graph structure, and is therefore not limited to a strict tree + * structure, and since this illustrated file includes the sort of circular reference that a directed graph + * enables, Dataset1 can also be identified by this absolute path name: + * <em>/GroupA/GroupB/GroupC/GroupA/Dataset1</em> + * + * Alternatively, if the current working location is GroupB, Dataset1 can be identified by either of + * these relative path names: + * <em>GroupC/Dataset1</em> + * + * <em>GroupC/GroupA/Dataset1</em> + * + * Note that relative path names in HDF5 do not employ the ../ notation, the UNIX notation + * indicating a parent directory, to indicate a parent group. + * + * <table> + * <tr> + * <td> + * \image html Groups_fig2.gif "A file with a circular reference" + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_group_descr_impl Group Implementations in HDF5 + * The original HDF5 group implementation provided a single indexed structure for link storage. A + * new group implementation, as of HDF5 Release 1.8.0, enables more efficient compact storage + * for very small groups, improved link indexing for large groups, and other advanced features. + * <ul> + * <li>The original indexed format remains the default. Links are stored in a B-tree in the + * group’s local heap.</li> + * <li>Groups created in the new compact-or-indexed format, the implementation introduced + * with Release 1.8.0, can be tuned for performance, switching between the compact and + * indexed formats at thresholds set in the user application. + * <ul> + * <li>The compact format will conserve file space and processing overhead when + * working with small groups and is particularly valuable when a group contains + * no links. Links are stored as a list of messages in the group’s header.</li> + * <li>The indexed format will yield improved performance when working with large + * groups. A large group may contain thousands to millions of members. Links + * are stored in a fractal heap and indexed with an improved B-tree.</li> + * </ul></li> + * <li>The new implementation also enables the use of link names consisting of non-ASCII + * character sets (see #H5Pset_char_encoding) and is required for all link types other than + * hard or soft links; the link types other than hard or soft links are external links and + * user-defined links @see @ref H5L APIs.</li> + * </ul> + * + * The original group structure and the newer structures are not directly interoperable. By default, a + * group will be created in the original indexed format. An existing group can be changed to a + * compact-or-indexed format if the need arises; there is no capability to change back. As stated + * above, once in the compact-or-indexed format, a group can switch between compact and indexed + * as needed. + * + * Groups will be initially created in the compact-or-indexed format only when one or more of the + * following conditions is met: + * <ul> + * <li>The low version bound value of the library version bounds property has been set to + * Release 1.8.0 or later in the file access property list (see #H5Pset_libver_bounds). + * Currently, that would require an #H5Pset_libver_bounds call with the low parameter + * set to #H5F_LIBVER_LATEST. + * + * When this property is set for an HDF5 file, all objects in the file will be created using + * the latest available format; no effort will be made to create a file that can be read by + * older libraries.</li> + * <li>The creation order tracking property, #H5P_CRT_ORDER_TRACKED, has been set + * in the group creation property list (see #H5Pset_link_creation_order).</li> + * </ul> + * + * An existing group, currently in the original indexed format, will be converted to the compact-or- + * indexed format upon the occurrence of any of the following events: + * <ul> + * <li>An external or user-defined link is inserted into the group. + * <li>A link named with a string composed of non-ASCII characters is inserted into the + * group. + * </ul> + * + * The compact-or-indexed format offers performance improvements that will be most notable at + * the extremes (for example, in groups with zero members and in groups with tens of thousands of + * members). But measurable differences may sometimes appear at a threshold as low as eight + * group members. Since these performance thresholds and criteria differ from application to + * application, tunable settings are provided to govern the switch between the compact and indexed + * formats (see #H5Pset_link_phase_change). Optimal thresholds will depend on the application and + * the operating environment. + * + * Future versions of HDF5 will retain the ability to create, read, write, and manipulate all groups + * stored in either the original indexed format or the compact-or-indexed format. + * + * \subsection subsec_group_h5dump Using h5dump + * You can use h5dump, the command-line utility distributed with HDF5, to examine a file for + * purposes either of determining where to create an object within an HDF5 file or to verify that + * you have created an object in the intended place. + * + * In the case of the new group created later in this chapter, the following h5dump command will + * display the contents of FileA.h5: + * \code + * h5dump FileA.h5 + * \endcode + * + * For more information, @see @ref subsubsec_group_program_create. + * + * Assuming that the discussed objects, GroupA and GroupB are the only objects that exist in + * FileA.h5, the output will look something like the following: + * \code + * HDF5 "FileA.h5" { + * GROUP "/" { + * GROUP GroupA { + * GROUP GroupB { + * } + * } + * } + * } + * \endcode + * + * h5dump is described on the “HDF5 Tools” page of the \ref RM. + * + * The HDF5 DDL grammar is described in the @ref DDLBNF110. + * + * \subsection subsec_group_function Group Function Summaries + * Functions that can be used with groups (\ref H5G functions) and property list functions that can used + * with groups (\ref H5P functions) are listed below. A number of group functions have been + * deprecated. Most of these have become link (\ref H5L) or object (\ref H5O) functions. These replacement + * functions are also listed below. + * + * <table> + * <caption>Group functions</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Gcreate</td> + * <td>Creates a new empty group and gives it a name. The + * C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Gcreate_anon</td> + * <td>Creates a new empty group without linking it into the file structure.</td> + * </tr> + * <tr> + * <td>#H5Gopen</td> + * <td>Opens an existing group for modification and returns a group identifier for that group. + * The C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Gclose</td> + * <td>Closes the specified group.</td> + * </tr> + * <tr> + * <td>#H5Gget_create_plist</td> + * <td>Gets a group creation property list identifier.</td> + * </tr> + * <tr> + * <td>#H5Gget_info</td> + * <td>Retrieves information about a group. Use instead of H5Gget_num_objs.</td> + * </tr> + * <tr> + * <td>#H5Gget_info_by_idx</td> + * <td>Retrieves information about a group according to the group’s position within an index.</td> + * </tr> + * <tr> + * <td>#H5Gget_info_by_name</td> + * <td>Retrieves information about a group.</td> + * </tr> + * </table> + * + * <table> + * <caption>Link and object functions</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Lcreate_hard</td> + * <td>Creates a hard link to an object. Replaces H5Glink and H5Glink2.</td> + * </tr> + * <tr> + * <td>#H5Lcreate_soft</td> + * <td>Creates a soft link to an object. Replaces H5Glink and H5Glink2.</td> + * </tr> + * <tr> + * <td>#H5Lcreate_external</td> + * <td>Creates a soft link to an object in a different file. Replaces H5Glink and H5Glink2.</td> + * </tr> + * <tr> + * <td>#H5Lcreate_ud</td> + * <td>Creates a link of a user-defined type.</td> + * </tr> + * </tr> + * <tr> + * <td>#H5Lget_val</td> + * <td>Returns the value of a symbolic link. Replaces H5Gget_linkval.</td> + * </tr> + * <tr> + * <td>#H5Literate</td> + * <td>Iterates through links in a group. Replaces H5Giterate. + * See also #H5Ovisit and #H5Lvisit.</td> + * </tr> + * <tr> + * <td>#H5Literate_by_name</td> + * <td>Iterates through links in a group.</td> + * </tr> + * <tr> + * <td>#H5Lvisit</td> + * <td>Recursively visits all links starting from a specified group.</td> + * </tr> + * <tr> + * <td>#H5Ovisit</td> + * <td>Recursively visits all objects accessible from a specified object.</td> + * </tr> + * <tr> + * <td>#H5Lget_info</td> + * <td>Returns information about a link. Replaces H5Gget_objinfo.</td> + * </tr> + * <tr> + * <td>#H5Oget_info</td> + * <td>Retrieves the metadata for an object specified by an identifier. Replaces H5Gget_objinfo.</td> + * </tr> + * <tr> + * <td>#H5Lget_name_by_idx</td> + * <td>Retrieves name of the nth link in a group, according to the order within a specified field + * or index. Replaces H5Gget_objname_by_idx.</td> + * </tr> + * <tr> + * <td>#H5Oget_info_by_idx</td> + * <td>Retrieves the metadata for an object, identifying the object by an index position. Replaces + * H5Gget_objtype_by_idx.</td> + * </tr> + * <tr> + * <td>#H5Oget_info_by_name</td> + * <td>Retrieves the metadata for an object, identifying the object by location and relative name.</td> + * </tr> + * <tr> + * <td>#H5Oset_comment</td> + * <td>Sets the comment for specified object. Replaces H5Gset_comment.</td> + * </tr> + * <tr> + * <td>#H5Oget_comment</td> + * <td>Gets the comment for specified object. Replaces H5Gget_comment.</td> + * </tr> + * <tr> + * <td>#H5Ldelete</td> + * <td>Removes a link from a group. Replaces H5Gunlink.</td> + * </tr> + * <tr> + * <td>#H5Lmove</td> + * <td>Renames a link within an HDF5 file. Replaces H5Gmove and H5Gmove2.</td> + * </tr> + * </table> + * + * <table> + * <caption>Group creation property list functions</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pall_filters_avail</td> + * <td>Verifies that all required filters are available.</td> + * </tr> + * <tr> + * <td>#H5Pget_filter</td> + * <td>Returns information about a filter in a pipeline. The + * C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Pget_filter_by_id</td> + * <td>Returns information about the specified filter. The + * C function is a macro: \see \ref api-compat-macros.</td> + * </tr> + * <tr> + * <td>#H5Pget_nfilters</td> + * <td>Returns the number of filters in the pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pmodify_filter</td> + * <td>Modifies a filter in the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Premove_filter</td> + * <td>Deletes one or more filters in the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pset_deflate</td> + * <td>Sets the deflate (GNU gzip) compression method and compression level.</td> + * </tr> + * <tr> + * <td>#H5Pset_filter</td> + * <td>Adds a filter to the filter pipeline.</td> + * </tr> + * <tr> + * <td>#H5Pset_fletcher32</td> + * <td>Sets up use of the Fletcher32 checksum filter.</td> + * </tr> + * <tr> + * <td>#H5Pset_link_phase_change</td> + * <td>Sets the parameters for conversion between compact and dense groups.</td> + * </tr> + * <tr> + * <td>#H5Pget_link_phase_change</td> + * <td>Queries the settings for conversion between compact and dense groups.</td> + * </tr> + * <tr> + * <td>#H5Pset_est_link_info</td> + * <td>Sets estimated number of links and length of link names in a group.</td> + * </tr> + * <tr> + * <td>#H5Pget_est_link_info</td> + * <td>Queries data required to estimate required local heap or object header size.</td> + * </tr> + * <tr> + * <td>#H5Pset_nlinks</td> + * <td>Sets maximum number of soft or user-defined link traversals.</td> + * </tr> + * <tr> + * <td>#H5Pget_nlinks</td> + * <td>Retrieves the maximum number of link traversals.</td> + * </tr> + * <tr> + * <td>#H5Pset_link_creation_order</td> + * <td>Sets creation order tracking and indexing for links in a group.</td> + * </tr> + * <tr> + * <td>#H5Pget_link_creation_order</td> + * <td>Queries whether link creation order is tracked and/or indexed in a group.</td> + * </tr> + * <tr> + * <td>#H5Pset_create_intermediate_group</td> + * <td>Specifies in the property list whether to create missing intermediate groups.</td> + * </tr> + * <tr> + * <td>#H5Pget_create_intermediate_group</td> + * <td>Determines whether the property is set to enable creating missing intermediate groups.</td> + * </tr> + * <tr> + * <td>#H5Pset_char_encoding</td> + * <td>Sets the character encoding used to encode a string. Use to set ASCII or UTF-8 character + * encoding for object names.</td> + * </tr> + * <tr> + * <td>#H5Pget_char_encoding</td> + * <td>Retrieves the character encoding used to create a string.</td> + * </tr> + * </table> + * + * <table> + * <caption>Other external link functions</caption> + * <tr> + * <th>Function</th> + * <th>Purpose</th> + * </tr> + * <tr> + * <td>#H5Pset_elink_file_cache_size</td> + * <td>Sets the size of the external link open file cache from the specified + * file access property list.</td> + * </tr> + * <tr> + * <td>#H5Pget_elink_file_cache_size</td> + * <td>Retrieves the size of the external link open file cache from the specified + * file access property list.</td> + * </tr> + * <tr> + * <td>#H5Fclear_elink_file_cache</td> + * <td>Clears the external link open file cache for a file.</td> + * </tr> + * </table> + * + * \subsection subsec_group_program Programming Model for Groups + * The programming model for working with groups is as follows: + * <ol><li>Create a new group or open an existing one.</li> + * <li>Perform the desired operations on the group. + * <ul><li>Create new objects in the group.</li> + * <li>Insert existing objects as group members.</li> + * <li>Delete existing members.</li> + * <li>Open and close member objects.</li> + * <li>Access information regarding member objects.</li> + * <li>Iterate across group members.</li> + * <li>Manipulate links.</li></ul> + * <li>Terminate access to the group (Close the group).</li></ol> + * + * \subsubsection subsubsec_group_program_create Creating a Group + * To create a group, use #H5Gcreate, specifying the location and the path of the new group. The + * location is the identifier of the file or the group in a file with respect to which the new group is to + * be identified. The path is a string that provides either an absolute path or a relative path to the + * new group. For more information, @see @ref subsubsec_group_descr_path. + * + * A path that begins with a slash (/) is + * an absolute path indicating that it locates the new group from the root group of the HDF5 file. A + * path that begins with any other character is a relative path. When the location is a file, a relative + * path is a path from that file’s root group; when the location is a group, a relative path is a path + * from that group. + * + * The sample code in the example below creates three groups. The group Data is created in the + * root directory; two groups are then created in /Data, one with absolute path, the other with a + * relative path. + * + * <em>Creating three new groups</em> + * \code + * hid_t file; + * file = H5Fopen(....); + * + * group = H5Gcreate(file, "/Data", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * group_new1 = H5Gcreate(file, "/Data/Data_new1", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * group_new2 = H5Gcreate(group, "Data_new2", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * The third #H5Gcreate parameter optionally specifies how much file space to reserve to store the + * names that will appear in this group. If a non-positive value is supplied, a default size is chosen. + * + * \subsubsection subsubsec_group_program_open Opening a Group and Accessing an Object in that Group + * Though it is not always necessary, it is often useful to explicitly open a group when working + * with objects in that group. Using the file created in the example above, the example below + * illustrates the use of a previously-acquired file identifier and a path relative to that file to open + * the group Data. + * + * Any object in a group can be also accessed by its absolute or relative path. To open an object + * using a relative path, an application must first open the group or file on which that relative path + * is based. To open an object using an absolute path, the application can use any location identifier + * in the same file as the target object; the file identifier is commonly used, but object identifier for + * any object in that file will work. Both of these approaches are illustrated in the example below. + * + * Using the file created in the examples above, the example below provides sample code + * illustrating the use of both relative and absolute paths to access an HDF5 data object. The first + * sequence (two function calls) uses a previously-acquired file identifier to open the group Data, + * and then uses the returned group identifier and a relative path to open the dataset CData. The + * second approach (one function call) uses the same previously-acquired file identifier and an + * absolute path to open the same dataset. + * + * <em>Open a dataset with relative and absolute paths</em> + * \code + * group = H5Gopen(file, "Data", H5P_DEFAULT); + * + * dataset1 = H5Dopen(group, "CData", H5P_DEFAULT); + * dataset2 = H5Dopen(file, "/Data/CData", H5P_DEFAULT); + * \endcode + * + * \subsubsection subsubsec_group_program_dataset Creating a Dataset in a Specific Group + * Any dataset must be created in a particular group. As with groups, a dataset may be created in a + * particular group by specifying its absolute path or a relative path. The example below illustrates + * both approaches to creating a dataset in the group /Data. + * + * <em> Create a dataset with absolute and relative paths</em> + * \code + * dataspace = H5Screate_simple(RANK, dims, NULL); + * dataset1 = H5Dcreate(file, "/Data/CData", H5T_NATIVE_INT, dataspace, + * H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * group = H5Gopen(file, "Data", H5P_DEFAULT); + * dataset2 = H5Dcreate(group, "Cdata2", H5T_NATIVE_INT, dataspace, + * H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * \subsubsection subsubsec_group_program_close Closing a Group + * To ensure the integrity of HDF5 objects and to release system resources, an application should + * always call the appropriate close function when it is through working with an HDF5 object. In + * the case of groups, H5Gclose ends access to the group and releases any resources the HDF5 + * library has maintained in support of that access, including the group identifier. + * + * As illustrated in the example below, all that is required for an H5Gclose call is the group + * identifier acquired when the group was opened; there are no relative versus absolute path + * considerations. + * + * <em>Close a group</em> + * \code + * herr_t status; + * + * status = H5Gclose(group); + * \endcode + * + * A non-negative return value indicates that the group was successfully closed and the resources + * released; a negative return value indicates that the attempt to close the group or release resources + * failed. + * + * \subsubsection subsubsec_group_program_links Creating Links + * As previously mentioned, every object is created in a specific group. Once created, an object can + * be made a member of additional groups by means of links created with one of the H5Lcreate_* + * functions. + * + * A link is, in effect, a path by which the target object can be accessed; it therefore has a name + * which functions as a single path component. A link can be removed with an #H5Ldelete call, + * effectively removing the target object from the group that contained the link (assuming, of + * course, that the removed link was the only link to the target object in the group). + * + * <h4>Hard Links</h4> + * There are two kinds of links, hard links and symbolic links. Hard links are reference counted; + * symbolic links are not. When an object is created, a hard link is automatically created. An object + * can be deleted from the file by removing all the hard links to it. + * + * Working with the file from the previous examples, the code in the example below illustrates the + * creation of a hard link, named Data_link, in the root group, /, to the group Data. Once that link is + * created, the dataset Cdata can be accessed via either of two absolute paths, /Data/Cdata or + * /Data_Link/Cdata. + * + * <em>Create a hard link</em> + * \code + * status = H5Lcreate_hard(Data_loc_id, "Data", DataLink_loc_id, "Data_link", H5P_DEFAULT, H5P_DEFAULT); + * + * dataset1 = H5Dopen(file, "/Data_link/CData", H5P_DEFAULT); + * dataset2 = H5Dopen(file, "/Data/CData", H5P_DEFAULT); + * \endcode + * + * The example below shows example code to delete a link, deleting the hard link Data from the + * root group. The group /Data and its members are still in the file, but they can no longer be + * accessed via a path using the component /Data. + * + * <em>Delete a link</em> + * \code + * status = H5Ldelete(Data_loc_id, "Data", H5P_DEFAULT); + * + * dataset1 = H5Dopen(file, "/Data_link/CData", H5P_DEFAULT); + * // This call should succeed; all path components still exist + * dataset2 = H5Dopen(file, "/Data/CData", H5P_DEFAULT); + * // This call will fail; the path component '/Data' has been deleted. + * \endcode + * + * When the last hard link to an object is deleted, the object is no longer accessible. #H5Ldelete will + * not prevent you from deleting the last link to an object. To see if an object has only one link, use + * the #H5Oget_info function. If the value of the rc (reference count) field in the is greater than 1, + * then the link can be deleted without making the object inaccessible. + * + * The example below shows #H5Oget_info to the group originally called Data. + * + * <em>Finding the number of links to an object</em> + * \code + * status = H5Oget_info(Data_loc_id, object_info); + * \endcode + * + * It is possible to delete the last hard link to an object and not make the object inaccessible. + * Suppose your application opens a dataset, and then deletes the last hard link to the dataset. While + * the dataset is open, your application still has a connection to the dataset. If your application + * creates a hard link to the dataset before it closes the dataset, then the dataset will still be + * accessible. + * + * <h4>Symbolic Links</h4> + * Symbolic links are objects that assign a name in a group to a path. Notably, the target object is + * determined only when the symbolic link is accessed, and may, in fact, not exist. Symbolic links + * are not reference counted, so there may be zero, one, or more symbolic links to an object. + * + * The major types of symbolic links are soft links and external links. Soft links are symbolic links + * within an HDF5 file and are created with the #H5Lcreate_soft function. Symbolic links to objects + * located in external files, in other words external links, can be created with the + * #H5Lcreate_external function. Symbolic links are removed with the #H5Ldelete function. + * + * The example below shows the creating two soft links to the group /Data. + * + * <em>Create a soft link</em> + * \code + * status = H5Lcreate_soft(path_to_target, link_loc_id, "Soft2", H5P_DEFAULT, H5P_DEFAULT); + * status = H5Lcreate_soft(path_to_target, link_loc_id, "Soft3", H5P_DEFAULT, H5P_DEFAULT); + * dataset = H5Dopen(file, "/Soft2/CData", H5P_DEFAULT); + * \endcode + * + * With the soft links defined in the example above, the dataset CData in the group /Data can now + * be opened with any of the names /Data/CData, /Soft2/CData, or /Soft3/CData. + * + * In release 1.8.7, a cache was added to hold the names of files accessed via external links. The + * size of this cache can be changed to help improve performance. For more information, see the + * entry in the \ref RM for the #H5Pset_elink_file_cache_size function call. + * + * <h4>Note Regarding Hard Links and Soft Links</h4> + * Note that an object’s existence in a file is governed by the presence of at least one hard link to + * that object. If the last hard link to an object is removed, the object is removed from the file and + * any remaining soft link becomes a dangling link, a link whose target object does not exist. + * + * <h4>Moving or Renaming Objects, and a Warning</h4> + * An object can be renamed by changing the name of a link to it with #H5Lmove. This has the same + * effect as creating a new link with the new name and deleting the link with the old name. + * + * Exercise caution in the use of #H5Lmove and #H5Ldelete as these functions each include a step + * that unlinks a pointer to an HDF5 object. If the link that is removed is on the only path leading to + * an HDF5 object, that object will become permanently inaccessible in the file. + * + * <h5>Scenario 1: Removing the Last Link</h5> + * To avoid removing the last link to an object or otherwise making an object inaccessible, use the + * #H5Oget_info function. Make sure that the value of the reference count field (rc) is greater than 1. + * + * <h5>Scenario 2: Moving a Link that Isolates an Object</h5> + * Consider the following example: assume that the group group2 can only be accessed via the + * following path, where top_group is a member of the file’s root group: + * <em>/top_group/group1/group2/</em> + * + * Using #H5Lmove, top_group is renamed to be a member ofgroup2. At this point, since + * top_group was the only route from the root group to group1, there is no longer a path by which + * one can access group1, group2, or any member datasets. And since top_group is now a member + * of group2, top_group itself and any member datasets have thereby also become inaccessible. + * + * <h4>Mounting a File</h4> + * An external link is a permanent connection between two files. A temporary connection can be set + * up with the #H5Fmount function. For more information, @see sec_file. + * For more information, see the #H5Fmount function in the \ref RM. + * + * \subsubsection subsubsec_group_program_info Discovering Information about Objects + * There is often a need to retrieve information about a particular object. The #H5Lget_info and + * #H5Oget_info functions fill this niche by returning a description of the object or link in an + * #H5L_info_t or #H5O_info_t structure. + * + * \subsubsection subsubsec_group_program_objs Discovering Objects in a Group + * To examine all the objects or links in a group, use the #H5Literate or #H5Ovisit functions to + * examine the objects, and use the #H5Lvisit function to examine the links. #H5Literate is useful + * both with a single group and in an iterative process that examines an entire file or section of a + * file (such as the contents of a group or the contents of all the groups that are members of that + * group) and acts on objects as they are encountered. #H5Ovisit recursively visits all objects + * accessible from a specified object. #H5Lvisit recursively visits all the links starting from a + * specified group. + * + * \subsubsection subsubsec_group_program_all Discovering All of the Objects in the File + * The structure of an HDF5 file is self-describing, meaning that an application can navigate an + * HDF5 file to discover and understand all the objects it contains. This is an iterative process + * wherein the structure is traversed as a graph, starting at one node and recursively visiting linked + * nodes. To explore the entire file, the traversal should start at the root group. + * + * \subsection subsec_group_examples Examples of File Structures + * This section presents several samples of HDF5 file structures. + * + * Figure 9 shows examples of the structure of a file with three groups and one dataset. The file in + * part a contains three groups: the root group and two member groups. In part b, the dataset + * dset1 has been created in /group1. In part c, a link named dset2 from /group2 to the dataset has + * been added. Note that there is only one copy of the dataset; there are two links to it and it can be + * accessed either as /group1/dset1 or as /group2/dset2. + * + * Part d illustrates that one of the two links to the dataset can be deleted. In this case, the link from + * <em>/group1</em> + * has been removed. The dataset itself has not been deleted; it is still in the file but can only be + * accessed as + * <em>/group2/dset2</em> + * + * <table> + * <caption>Figure 9 - Some file structures</caption> + * <tr> + * <td> + * \image html Groups_fig9_a.gif "a) The file contains three groups: the root group, /group1, and /group2." + * </td> + * <td> + * \image html Groups_fig9_b.gif "b) The dataset dset1 (or /group1/dset1) is created in /group1." + * </td> + * </tr> + * <tr> + * <td> + * \image html Groups_fig9_aa.gif "c) A link named dset2 to the same dataset is created in /group2." + * </td> + * <td> + * \image html Groups_fig9_bb.gif "d) The link from /group1 to dset1 is removed. The dataset is + * still in the file, but can be accessed only as /group2/dset2." + * </td> + * </tr> + * </table> + * + * Figure 10 illustrates loops in an HDF5 file structure. The file in part a contains three groups + * and a dataset; group2 is a member of the root group and of the root group’s other member group, + * group1. group2 thus can be accessed by either of two paths: /group2 or /group1/GXX. Similarly, + * the dataset can be accessed either as /group2/dset1 or as /group1/GXX/dset1. + * + * Part b illustrates a different case: the dataset is a member of a single group but with two links, or + * names, in that group. In this case, the dataset again has two names, /group1/dset1 and + * /group1/dset2. + * + * In part c, the dataset dset1 is a member of two groups, one of which can be accessed by either of + * two names. The dataset thus has three path names: /group1/dset1, /group2/dset2, and + * /group1/GXX/dset2. + * + * And in part d, two of the groups are members of each other and the dataset is a member of both + * groups. In this case, there are an infinite number of paths to the dataset because GXX and + * GYY can be traversed any number of times on the way from the root group, /, to the dataset. This + * can yield a path name such as /group1/GXX/GYY/GXX/GYY/GXX/dset2. + * + * <table> + * <caption>Figure 10 - More sample file structures</caption> + * <tr> + * <td> + * \image html Groups_fig10_a.gif "a) dset1 has two names: /group2/dset1 and /group1/GXX/dset1." + * </td> + * <td> + * \image html Groups_fig10_b.gif "b) dset1 again has two names: /group1/dset1 and /group1/dset2." + * </td> + * </tr> + * <tr> + * <td> + * \image html Groups_fig10_c.gif "c) dset1 has three names: /group1/dset1, /group2/dset2, and + * /group1/GXX/dset2." + * </td> + * <td> + * \image html Groups_fig10_d.gif "d) dset1 has an infinite number of available path names." + * </td> + * </tr> + * </table> + * + * Figure 11 takes us into the realm of soft links. The original file, in part a, contains only three + * hard links. In part b, a soft link named dset2 from group2 to /group1/dset1 has been created, + * making this dataset accessible as /group2/dset2. + * + * In part c, another soft link has been created in group2. But this time the soft link, dset3, points + * to a target object that does not yet exist. That target object, dset, has been added in part d and is + * now accessible as either /group2/dset or /group2/dset3. + * + * It could be said that HDF5 extends the organizing concepts of a file system to the internal + * structure of a single file. + * + * <table> + * <caption>Figure 11 - Hard and soft links</caption> + * <tr> + * <td> + * \image html Groups_fig11_a.gif "a) The file contains only hard links." + * </td> + * <td> + * \image html Groups_fig11_b.gif "b) A soft link is added from group2 to /group1/dset1." + * </td> + * </tr> + * <tr> + * <td> + * \image html Groups_fig11_c.gif "c) A soft link named dset3 is added with a target that does not yet exist." + * </td> + * <td> + * \image html Groups_fig11_d.gif "d) The target of the soft link is created or linked." + * </td> + * </tr> + * </table> + * + * Previous Chapter \ref sec_file - Next Chapter \ref sec_dataset + * + */ + +/** + * \defgroup H5G Groups (H5G) * * Use the functions in this module to manage HDF5 groups. * diff --git a/src/H5Gpublic.h b/src/H5Gpublic.h index ce36b84..c659a83 100644 --- a/src/H5Gpublic.h +++ b/src/H5Gpublic.h @@ -175,7 +175,7 @@ H5_DLL hid_t H5Gcreate_async(const char *app_file, const char *app_func, unsigne * H5Gclose() when the group is no longer needed so that resource * leaks will not develop. * - * \see H5Olink(), H5Dcreate(), Using Identifiers + * \see H5Olink(), H5Gcreate() * * \since 1.8.0 * @@ -735,7 +735,7 @@ H5_DLL herr_t H5Glink2(hid_t cur_loc_id, const char *cur_name, H5G_link_t type, * * \attention Exercise care in moving groups as it is possible to render data in * a file inaccessible with H5Gmove(). See The Group Interface in the - * HDF5 User's Guide. + * \ref UG. * * \version 1.8.0 Function deprecated in this release. * @@ -766,7 +766,7 @@ H5_DLL herr_t H5Gmove(hid_t src_loc_id, const char *src_name, const char *dst_na * * \attention Exercise care in moving groups as it is possible to render data in * a file inaccessible with H5Gmove2(). See The Group Interface in the - * HDF5 User's Guide. + * \ref UG. * * \version 1.8.0 Function deprecated in this release. * @@ -803,11 +803,11 @@ H5_DLL herr_t H5Gmove2(hid_t src_loc_id, const char *src_name, hid_t dst_loc_id, * Note that space identified as freespace is available for re-use only * as long as the file remains open; once a file has been closed, the * HDF5 library loses track of freespace. See “Freespace Management” in - * the HDF5 User's Guide for further details. + * the \ref UG for further details. * * \attention Exercise care in moving groups as it is possible to render data in * a file inaccessible with H5Gunlink(). See The Group Interface in the - * HDF5 User's Guide. + * \ref UG. * * \version 1.8.0 Function deprecated in this release. * diff --git a/src/H5Imodule.h b/src/H5Imodule.h index cd1cbcd..9470cc9 100644 --- a/src/H5Imodule.h +++ b/src/H5Imodule.h @@ -28,7 +28,12 @@ #define H5_MY_PKG H5I #define H5_MY_PKG_ERR H5E_ID -/**\defgroup H5I H5I +/** \page H5I_UG The HDF5 Identifiers + * @todo Under Construction + */ + +/** + * \defgroup H5I Identifiers (H5I) * * Use the functions in this module to manage identifiers defined by the HDF5 * library. See \ref H5IUD for user-defined identifiers and identifier diff --git a/src/H5Lmodule.h b/src/H5Lmodule.h index d52690e..cbb5060 100644 --- a/src/H5Lmodule.h +++ b/src/H5Lmodule.h @@ -28,7 +28,12 @@ #define H5_MY_PKG H5L #define H5_MY_PKG_ERR H5E_LINK -/**\defgroup H5L H5L +/** \page H5L_UG The HDF5 Links + * @todo Under Construction + */ + +/** + * \defgroup H5L Links (H5L) * * Use the functions in this module to manage HDF5 links and link types. * diff --git a/src/H5Mmodule.h b/src/H5Mmodule.h index e8d7c89..920ec3d 100644 --- a/src/H5Mmodule.h +++ b/src/H5Mmodule.h @@ -25,10 +25,24 @@ #define H5_MY_PKG H5M #define H5_MY_PKG_ERR H5E_MAP -/**\defgroup H5M H5M +/** + * \page H5M_UG The HDF5 VOL Data Mapping + * \Bold{The HDF5 Data Mapping can only be used with the HDF5 VOL connectors that + * implement map objects.} The native HDF5 library does not support this feature. + * + * \section sec_map The HDF5 Map Object * * \todo Describe the map life cycle. * + * \todo How does MAPL fit into \ref subsubsec_plist_class. + * + * Previous Chapter \ref sec_async - Next Chapter \ref sec_addition + * + */ + +/** + * \defgroup H5M VOL Mapping (H5M) + * * \details \Bold{The interface can only be used with the HDF5 VOL connectors that * implement map objects.} The native HDF5 library does not support this * feature. diff --git a/src/H5Omodule.h b/src/H5Omodule.h index 18e329c..afb005b 100644 --- a/src/H5Omodule.h +++ b/src/H5Omodule.h @@ -28,7 +28,12 @@ #define H5_MY_PKG H5O #define H5_MY_PKG_ERR H5E_OHDR -/**\defgroup H5O H5O +/** \page H5O_UG The HDF5 Objects + * @todo Under Construction + */ + +/** + * \defgroup H5O Objects (H5O) * * Use the functions in this module to manage HDF5 objects. * diff --git a/src/H5Opublic.h b/src/H5Opublic.h index ba352c8..a6cea39 100644 --- a/src/H5Opublic.h +++ b/src/H5Opublic.h @@ -311,7 +311,7 @@ H5_DLL hid_t H5Oopen_by_token(hid_t loc_id, H5O_token_t token); * * \return \hid_tv{object} * - * \details H5Open_by_idx() opens the nth object in the group specified by \p loc_id + * \details H5Oopen_by_idx() opens the nth object in the group specified by \p loc_id * and \p group_name. * * \p loc_id specifies a location identifier. @@ -778,7 +778,7 @@ H5_DLL herr_t H5Olink(hid_t obj_id, hid_t new_loc_id, const char *new_name, hid_ * * An object’s reference count is the number of hard links in the * file that point to that object. See the “Programming Model” - * section of the HDF5 Groups chapter in the -- <em>HDF5 User’s Guide</em> + * section of the HDF5 Groups chapter in the -- <em>\ref UG</em> * for a more complete discussion of reference counts. * * If a user application needs to determine an object’s reference @@ -813,7 +813,7 @@ H5_DLL herr_t H5Oincr_refcount(hid_t object_id); * * An object’s reference count is the number of hard links in the * file that point to that object. See the “Programming Model” - * section of the HDF5 Groups chapter in the <em>HDF5 User’s Guide</em> + * section of the HDF5 Groups chapter in the <em>\ref UG</em> * for a more complete discussion of reference counts. * * If a user application needs to determine an object’s reference diff --git a/src/H5PLmodule.h b/src/H5PLmodule.h index 4751a48..9331c86 100644 --- a/src/H5PLmodule.h +++ b/src/H5PLmodule.h @@ -2,7 +2,7 @@ * Copyright by The HDF Group. * * All rights reserved. * * * - * This file is part of HDF5. The full HDF5 copyright notice, including * + * This file is part of HDF5. The full HDF5 copyright notice, including * * terms governing use, modification, and redistribution, is contained in * * the COPYING file, which can be found at the root of the source code * * distribution tree, or in https://www.hdfgroup.org/licenses. * @@ -26,7 +26,12 @@ #define H5_MY_PKG H5PL #define H5_MY_PKG_ERR H5E_PLUGIN -/**\defgroup H5PL H5PL +/** \page H5PL_UG The HDF5 Plugins + * @todo Under Construction + */ + +/** + * \defgroup H5PL Dynamically-loaded Plugins (H5PL) * * Use the functions in this module to manage the loading behavior of HDF5 * plugins. diff --git a/src/H5Pmodule.h b/src/H5Pmodule.h index d771e6e..d5ef982 100644 --- a/src/H5Pmodule.h +++ b/src/H5Pmodule.h @@ -28,7 +28,860 @@ #define H5_MY_PKG H5P #define H5_MY_PKG_ERR H5E_PLIST -/**\defgroup H5P H5P +/** \page H5P_UG Properties and Property Lists in HDF5 + * + * \section sec_plist Properties and Property Lists in HDF5 + * + * HDF5 property lists are the main vehicle to configure the + * behavior of HDF5 API functions. + * + * Typically, property lists are created by instantiating one of the built-in + * or user-defined property list classes. After adding suitable properties, + * property lists are used when opening or creating HDF5 items, or when reading + * or writing data. Property lists can be modified by adding or changing + * properties. Property lists are deleted by closing the associated handles. + * + * \subsection subsec_plist_intro Introduction + * + * HDF5 properties and property lists make it possible to shape or modify an HDF5 file, group, + * dataset, attribute, committed datatype, or even an I/O stream, in a number of ways. For example, + * you can do any of the following: + * \li Customize the storage layout of a file to suit a project or task. + * \li Create a chunked dataset. + * \li Apply compression or filters to raw data. + * \li Use either ASCII or UTF-8 character encodings. + * \li Create missing groups on the fly. + * \li Switch between serial and parallel I/O. + * \li Create consistency within a single file or across an international project. + * + * Some properties enable an HDF5 application to take advantage of the capabilities of a specific + * computing environment while others make a file more compact; some speed the reading or + * writing of data while others enable more record-keeping at a per-object level. HDF5 offers + * nearly one hundred specific properties that can be used in literally thousands of combinations to + * maximize the usability of HDF5-stored data. + * + * At the most basic level, a property list is a collection of properties, represented by name/value + * pairs that can be passed to various HDF5 functions, usually modifying default settings. A + * property list inherits a set of properties and values from a property list class. But that statement + * hardly provides a complete picture; in the rest of this section and in the next section, + * \ref subsec_plist_class , we will discuss these things in much more detail. + * After reading that material, the reader should have a reasonably complete understanding of how + * properties and property lists can be used in HDF5 applications. + * + * <table> + * <tr> + * <td> + * \image html PropListEcosystem.gif "The HDF5 property environment" + * </td> + * </tr> + * </table> + * + * The remaining sections in this chapter discuss the following topics: + * \li What are properties, property lists, and property list classes? + * \li Property list programming model + * \li Generic property functions + * \li Summary listings of property list functions + * \li Additional resources + * + * The discussions and function listings in this chapter focus on general property operations, object + * and link properties, and related functions. + * + * File, group, dataset, datatype, and attribute properties are discussed in the chapters devoted to + * those features, where that information will be most convenient to users. For example, \ref sec_dataset + * discusses dataset creation property lists and functions, dataset access property lists and + * functions, and dataset transfer property lists and functions. This chapter does not duplicate those + * discussions. + * + * Generic property operations are an advanced feature and are beyond the scope of this guide. + * + * This chapter assumes an understanding of the following chapters of this \ref UG + * \li \ref sec_data_model + * \li \ref sec_program + * + * \subsection subsec_plist_class Property List Classes, Property Lists, and Properties + * + * HDF5 property lists and the property list interface \ref H5P provide a mechanism for storing + * characteristics of objects in an HDF5 file and economically passing them around in an HDF5 + * application. In this capacity, property lists significantly reduce the burden of additional function + * parameters throughout the HDF5 API. Another advantage of property lists is that features can + * often be added to HDF5 by adding only property list functions to the API; this is particularly true + * when all other requirements of the feature can be accomplished internally to the library. + * + * For instance, a file creation operation needs to know several things about a file, such as the size + * of the userblock or the sizes of various file data structures. Bundling this information as a + * property list simplifies the interface by reducing the number of parameters to the function + * \ref H5Fcreate. + * + * As illustrated in the figure above ("The HDF5 property environment"), the HDF5 property + * environment is a three-level hierarchy: + * \li Property list classes + * \li Property lists + * \li Properties + * + * The following subsections discuss property list classes, property lists, and properties in more detail. + * + * \subsubsection subsubsec_plist_class Property List Classes + * + * A property list class defines the roles that property lists of that class can play. Each class includes + * all properties that are valid for that class with each property set to its default value. HDF5 offers + * a property lists class for each of the following situations. + * + * <table> + * <caption align=top id="table_plist">Property list classes in HDF5</caption> + * <tr><th>Property List Class</th><th></th><th>For further discussion</th></tr> + * <tr valign="top"> + * <td> + * File creation (FCPL) + * </td> + * <td> + * \ref H5P_FILE_CREATE + * </td> + * <td> + * See various sections of \ref sec_file + * </td> + * <tr valign="top"> + * <td> + * File access (FAPL) + * </td> + * <td> + * \ref H5P_FILE_ACCESS + * </td> + * <td> + * Used only as \ref H5P_DEFAULT. + * </td> + * </tr> + * <tr valign="top"> + * <td> + * File mount (FMPL) + * </td> + * <td> + * \ref H5P_FILE_MOUNT + * </td> + * <td> + * For more information, see \ref FileMountProps "File Mount Properties" + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Object creation (OCPL) + * </td> + * <td> + * \ref H5P_OBJECT_CREATE + * </td> + * <td> + * See \ref OCPL + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Object copy (OCPYPL) + * </td> + * <td> + * \ref H5P_OBJECT_COPY + * </td> + * <td> + * + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Group creation (GCPL) + * </td> + * <td> + * \ref H5P_GROUP_CREATE + * </td> + * <td> + * See \ref subsec_group_program + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Group access (GAPL) + * </td> + * <td> + * \ref H5P_GROUP_ACCESS + * </td> + * <td> + * + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Link creation (LCPL) + * </td> + * <td> + * \ref H5P_LINK_CREATE + * </td> + * <td> + * See examples in \ref subsec_plist_program and \ref LCPL + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Link access (LAPL) + * </td> + * <td> + * \ref H5P_LINK_ACCESS + * </td> + * <td> + * + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Dataset creation (DCPL) + * </td> + * <td> + * \ref H5P_DATASET_CREATE + * </td> + * <td> + * See \ref subsec_dataset_program + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Dataset access (DAPL) + * </td> + * <td> + * \ref H5P_DATASET_ACCESS + * </td> + * <td> + * + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Dataset transfer (DXPL) + * </td> + * <td> + * \ref H5P_DATASET_XFER + * </td> + * <td> + * + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Datatype creation (TCPL) + * </td> + * <td> + * \ref H5P_DATATYPE_CREATE + * </td> + * <td> + * See various sections of \ref sec_datatype + * </td> + * </tr> + * <tr valign="top"> + * <td> + * String creation (STRCPL) + * </td> + * <td> + * \ref H5P_STRING_CREATE + * </td> + * <td> + * See \ref subsec_dataset_program and \ref subsec_datatype_program + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Attribute creation (ACPL) + * </td> + * <td> + * \ref H5P_ATTRIBUTE_CREATE + * </td> + * <td> + * See \ref subsec_attribute_work. + * </td> + * </tr> + * </table> + * + * Note: In the table above, the abbreviations to the right of each property list class name in this + * table are widely used in both HDF5 programmer documentation and HDF5 source code. For + * example, \ref FCPL (FCPL) is the file creation property list, \ref OCPL (OCPL) is the object creation + * property list, \ref OCPYPL (OCPYPL) is object copy property list, and \ref STRCPL (STRCPL) is the string + * creation property list. These abbreviations may appear in either uppercase or lowercase. + * + * The “HDF5 property list class inheritance hierarchy” figure, immediately following, illustrates + * the inheritance hierarchy of HDF5’s property list classes. Properties are defined at the root of the + * HDF5 property environment (\ref PLCR in the figure below). Property list + * classes then inherit properties from that root, either directly or indirectly through a parent class. + * In every case, a property list class inherits only the properties relevant to its role. For example, + * the \ref OCPL (OCPL) inherits all properties that are relevant to the + * creation of any object while the \ref GCPL (GCPL) inherits only those + * properties that are relevant to group creation. + * + * <table> + * <tr> + * <td> + * \image html PropListClassInheritance.gif "HDF5 property list class inheritance hierarchy" + * </td> + * </tr> + * </table> + * Note: In the figure above, property list classes displayed in black are directly accessible through + * the programming interface; the root of the property environment and the \ref STRCPL and \ref OCPL + * property list classes, in gray above, are not user-accessible. The red empty set symbol indicates + * that the \ref FMPL (FMPL) is an empty class; that is, it has no set table + * properties. For more information, see \ref FileMountProps "File Mount Properties". Abbreviations + * used in this figure are defined in the preceding table, \ref table_plist "Property list classes in HDF5". + * + * \subsubsection subsubsec_plist_lists Property Lists + * + * A property list is a collection of related properties that are used together in specific + * circumstances. A new property list created from a property list class inherits the properties of the + * property list class and each property’s default value. A fresh dataset creation property list, for + * example, includes all of the HDF5 properties relevant to the creation of a new dataset. + * + * Property lists are implemented as containers holding a collection of name/value pairs. Each pair + * specifies a property name and a value for the property. A property list usually contains + * information for one to many properties. + * + * HDF5’s default property values are designed to be reasonable for general use cases. Therefore, + * an application can often use a property list without modification. On the other hand, adjusting + * property list settings is a routine action and there are many reasons for an application to do so. + * + * A new property list may either be derived from a property list class or copied from an existing + * property list. When a property list is created from a property list class, it contains all the + * properties that are relevant to the class, with each property set to its default value. A new + * property list created by copying an existing property list will contain the same properties and + * property values as the original property list. In either case, the property values can be changed as + * needed through the HDF5 API. + * + * Property lists can be freely reused to create consistency. For example, a single set of file, group, + * and dataset creation property lists might be created at the beginning of a project and used to + * create hundreds, thousands, even millions, of consistent files, file structures, and datasets over + * the project’s life. When such consistency is important to a project, this is an economical means + * of providing it. + * + * \subsubsection subsubsec_plist_props Properties + * + * A property is the basic element of the property list hierarchy. HDF5 offers nearly one hundred + * properties controlling things ranging from file access rights, to the storage layout of a dataset, + * through optimizing the use of a parallel computing environment. + * + * Further examples include the following: + * <table> + * <tr><th>Purpose</th><th>Examples</th><th>Property List</th></tr> + * <tr valign="top"> + * <td> + * Specify the driver to be used to open a file + * </td> + * <td> + * A POSIX driver or an MPI IO driver + * </td> + * <td> + * \ref FAPL + * </td> + * <tr valign="top"> + * <td> + * Specify filters to be applied to a dataset + * </td> + * <td> + * Gzip compression or checksum evaluation + * </td> + * <td> + * \ref DCPL + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Specify whether to record key times associated with an object + * </td> + * <td> + * Creation time and/or last-modified time + * </td> + * <td> + * \ref OCPL + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Specify the access mode for a file opened via an external link + * </td> + * <td> + * Read-only or read-write + * </td> + * <td> + * \ref LAPL + * </td> + * </tr> + * </table> + * + * Each property is initialized with a default value. For each property, there are one or more + * dedicated H5Pset_*calls that can be used to change that value. + * + * <h4>Creation, access, and transfer properties:</h4> + * + * Properties fall into one of several major categories: creation properties, access properties, and + * transfer properties. + * + * Creation properties control permanent object characteristics. These characteristics must be + * established when an object is created, cannot change through the life of the object (they are + * immutable), and the property setting usually has a permanent presence in the file. + * + * <table> + * <caption align=top>Examples of creation properties include:</caption> + * <tr> + * <td> + * <p> + * Whether a dataset is stored in a compact, contiguous, or chunked layout <br /> + * <br /> + * The default for this dataset creation property (\ref H5Pset_layout) is that a dataset is + * stored in a contiguous block. This works well for datasets with a known size limit that + * will fit easily in system memory. <br /> + * <br /> + * A chunked layout is important if a dataset is to be compressed, to enable extending + * the dataset’s size, or to enable caching during I/O. <br /> + * <br /> + * A compact layout is suitable only for very small datasets because the raw data is + * stored in the object header. + * </p> + * </td> + * </tr> + * <tr> + * <td> + * <p> + * Creation of intermediate groups when adding an object to an HDF5 file<br /> + * <br /> + * This link creation property, \ref H5Pset_create_intermediate_group, enables an + * application to add an object in a file without having to know that the group or group + * hierarchy containing that object already exists. With this property set, HDF5 + * automatically creates missing groups. If this property is not set, an application must + * verify that each group in the path exists, and create those that do not, before creating + * the new object; if any group is missing, the create operation will fail. + * </p> + * </td> + * </tr> + * <tr> + * <td> + * <p> + * Whether an HDF5 file is a single file or a set of tightly related files that form a virtual + * HDF5 file<br /> + * <br /> + * Certain file creation properties enable the application to select one of several file + * layouts. Examples of the available layouts include a standard POSIX-compliant + * layout (\ref H5Pset_fapl_sec2), a family of files (\ref H5Pset_fapl_family), and a split file + * layout that separates raw data and metadata into separate files (\ref H5Pset_fapl_split). + * These and other file layout options are discussed in \ref subsec_file_alternate_drivers. + * </p> + * </td> + * </tr> + * <tr> + * <td> + * <p> + * To enable error detection when creating a dataset<br /> + * <br /> + * In settings where data integrity is vulnerable, it may be desirable to set + * checksumming when datasets are created (\ref H5Pset_fletcher32). A subsequent + * application will then have a means to verify data integrity when reading the dataset. + * </p> + * </td> + * </tr> + * </table> + * + * Access properties control transient object characteristics. These characteristics may change with + * the circumstances under which an object is accessed. + * + * <table> + * <caption align=top>Examples of access properties include:</caption> + * <tr> + * <td> + * <p> + * The driver used to open a file<br /> + * <br /> + * For example, a file might be created with the MPI I/O driver (\ref H5Pset_fapl_mpio) + * during high-speed data acquisition in a parallel computing environment. The same + * file might later be analyzed in a serial computing environment with I/O access + * handled through the serial POSIX driver (\ref H5Pset_fapl_sec2). + * </p> + * </td> + * </tr> + * <tr> + * <td> + * <p> + * Optimization settings in specialized environments<br /> + * <br /> + * Optimizations differ across computing environments and according to the needs of + * the task being performed, so are transient by nature. + * </p> + * </td> + * </tr> + * </table> + * + * Transfer properties apply only to datasets and control transient aspects of data I/O. These + * characteristics may change with the circumstances under which data is accessed. + * + * <table> + * <caption align=top>Examples of dataset transfer properties include:</caption> + * <tr> + * <td> + * <p> + * To enable error detection when reading a dataset<br /> + * <br /> + * If checksumming has been set on a dataset (with \ref H5Pset_fletcher32, in the dataset + * creation property list), an application reading that dataset can choose whether to check + * for data integrity (\ref H5Pset_edc_check). + * </p> + * </td> + * </tr> + * <tr> + * <td> + * <p> + * Various properties to optimize chunked data I/O on parallel computing systems<br /> + * <br /> + * HDF5 provides several properties for tuning I/O of chunked datasets in a parallel + * computing environment (\ref H5Pset_dxpl_mpio_chunk_opt, \ref H5Pset_dxpl_mpio_chunk_opt_num, + * \ref H5Pset_dxpl_mpio_chunk_opt_ratio, and \ref H5Pget_mpio_actual_chunk_opt_mode).<br /> + * <br /> + * Optimal settings differ due to the characteristics of a computing environment and due + * to an application’s data access patterns; even when working with the same file, these + * settings might change for every application and every platform. + * </p> + * </td> + * </tr> + * </table> + * + * \subsection subsec_plist_program Programming Model for Properties and Property Lists + * + * The programming model for HDF5 property lists is actually quite simple: + * \li Create a property list. + * \li Modify the property list, if required. + * \li Use the property list. + * \li Close the property list. + * + * There are nuances, of course, but that is the basic process. + * + * In some cases, you will not have to define property lists at all. If the default property settings are + * sufficient for your application, you can tell HDF5 to use the default property list. + * + * The following sections first discuss the use of default property lists, then each step of the + * programming model, and finally a few less frequently used property list operations. + * + * \subsubsection subsubsec_plist_default Using Default Property Lists + * + * Default property lists can simplify many routine HDF5 tasks because you do not always have to + * create every property list you use. + * + * An application that would be well-served by HDF5’s default property settings can use the default + * property lists simply by substituting the value \ref H5P_DEFAULT for a property list identifier. + * HDF5 will then apply the default property list for the appropriate property list class. + * + * For example, the function \ref H5Dcreate2 calls for a link creation property list, a dataset creation + * property list, and a dataset access property list. If the default properties are suitable for a dataset, + * this call can be made as + * \code + * dset_id = H5Dcreate2( loc_id, name, dtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT ); + * \endcode + * HDF5 will then apply the default link creation, dataset creation, and dataset access property lists + * correctly. + * + * Of course, you would not want to do this without considering where it is appropriate, as there + * may be unforeseen consequences. Consider, for example, the use of chunked datasets. Optimal + * chunking is quite dependent on the makeup of the dataset and the most common access patterns, + * both of which must be taken into account in setting up the size and shape of chunks. + * + * \subsubsection subsubsec_plist_basic Basic Steps of the Programming Model + * + * The steps of the property list programming model are described in the sub-sections below. + * + * <h4>Create a Property List</h4> + * + * A new property list can be created either as an instance of a property list class or by copying an + * existing property list. Consider the following examples. A new dataset creation property list is + * first created "from scratch" with \ref H5Pcreate. A second dataset creation property list is then + * created by copying the first one with \ref H5Pcopy. + * + * \code + * dcplA_id = H5Pcreate (H5P_DATASET_CREATE); + * \endcode + * + * The new dataset creation property list is created as an instance of the property list class + * \ref H5P_DATASET_CREATE. + * + * The new dataset creation property list’s identifier is returned in dcplA_id and the property list is + * initialized with default dataset creation property values. + * + * A list of valid classes appears in the table \ref table_plist "Property list classes in HDF5". + * + * \code + * dcplB_id = H5Pcopy (dcplA_id); + * \endcode + * + * A new dataset creation property list, dcplB_id, is created as a copy of dcplA_id and is initialized + * with dataset creation property values currently in dcplA_id. + * + * At this point, dcplA_id and dcplB_id are identical; they will both contain any modified property + * values that were changed in dcplA_id before dcplB_id was created. They may, however, diverge + * as additional property values are reset in each. + * + * While we are creating property lists, let’s create a link creation property list; we will need this + * property list when the new dataset is linked into the file below: + * \code + * lcplAB_id = H5Pcreate (H5P_LINK_CREATE); + * \endcode + * + * <h4>Change Property Values</h4> + * + * This section describes how to set property values. + * + * Later in this section, the dataset creation property lists dcplA_id and dcplB_id created in the + * section above will be used respectively to create chunked and contiguous datasets. To set this up, + * we must set the layout property in each property list. The following example sets dcplA_id for + * chunked datasets and dcplB_id for contiguous datasets: + * \code + * error = H5Pset_layout (dcplA_id, H5D_CHUNKED); + * error = H5Pset_layout (dcplB_id, H5D_CONTIGUOUS); + * \endcode + * + * Since dcplA_id specifies a chunked layout, we must also set the number of dimensions and the + * size of the chunks. The example below specifies that datasets created with dcplA_id will be + * 3-dimensional and that the chunk size will be 100 in each dimension: + * \code + * error = H5Pset_chunk (dcplA_id, 3, [100,100,100]); + * \endcode + * + * These datasets will be created with UTF-8 encoded names. To accomplish that, the following + * example sets the character encoding property in the link creation property list to create link + * names with UTF-8 encoding: + * \code + * error = H5Pset_char_encoding (lcplAB_id, H5T_CSET_UTF8); + * \endcode + * + * dcplA_id can now be used to create chunked datasets and dcplB_id to create contiguous datasets. + * And with the use of lcplAB_id, they will be created with UTF-8 encoded names. + * + * <h4>Use the Property List</h4> + * + * Once the required property lists have been created, they can be used to control various HDF5 + * processes. For illustration, consider dataset creation. + * + * Assume that the datatype dtypeAB and the dataspaces dspaceA and dspaceB have been defined + * and that the location identifier locAB_id specifies the group AB in the current HDF5 file. We + * have already created the required link creation and dataset creation property lists. + * For the sake of illustration, we assume that the default dataset access property list meets our application + * requirements. The following calls would create the datasets dsetA and dsetB in the group AB. + * The raw data in dsetA will be contiguous while dsetB raw data will be chunked; both datasets + * will have UTF-8 encoded link names: + * + * \code + * dsetA_id = H5Dcreate2( locAB_id, dsetA, dtypeAB, dspaceA_id, + * lcplAB_id, dcplA_id, H5P_DEFAULT ); + * dsetB_id = H5Dcreate2( locAB_id, dsetB, dtypeAB, dspaceB_id, + * lcplAB_id, dcplB_id, H5P_DEFAULT ); + * \endcode + * + * <h4>Close the Property List</h4> + * + * Generally, creating or opening anything in an HDF5 file results in an HDF5 identifier. These + * identifiers are of HDF5 type hid_t and include things like file identifiers, often expressed as + * file_id; dataset identifiers, dset_id; and property list identifiers, plist_id. To reduce the risk of + * memory leaks, all of these identifiers must be closed once they are no longer needed. + * + * Property list identifiers are no exception to this rule, and \ref H5Pclose is used for this purpose. The + * calls immediately following would close the property lists created and used in the examples above. + * + * \code + * error = H5Pclose (dcplA_id); + * error = H5Pclose (dcplB_id); + * error = H5Pclose (lcplAB_id); + * \endcode + * + * \subsubsection subsubsec_plist_additional Additional Property List Operations + * + * A few property list operations fall outside of the programming model described above. This + * section describes those operations. + * + * <h4>Query the Class of an Existing Property List</h4> + * + * Occasionally an application will have a property list but not know the corresponding property list + * class. A call such as in the following example will retrieve the unknown class of a known property list: + * \code + * PList_Class = H5Pget_class (dcplA_id); + * \endcode + * + * Upon this function’s return, PList_Class will contain the value \ref H5P_DATASET_CREATE indicating that + * dcplA_id is a dataset creation property list. + + * <h4>Determine Current Creation Property List Settings in an Existing Object</h4> + * + * After a file has been created, another application may work on the file without knowing how the + * creation properties for the file were set up. Retrieving these property values is often unnecessary; + * HDF5 can read the data and knows how to deal with any properties it encounters. + * + * But sometimes an application must do something that requires knowing the creation property + * settings. HDF5 makes the acquisition of this information fairly straight-forward; for each + * property setting call, H5Pset_*, there is a corresponding H5Pget_*call to retrieve the property’s + * current setting. + * + * Consider the following examples which illustrate the determination of dataset layout and chunking settings: + * + * The application must first identify the creation property list with the appropriate get creation property + * list call. There is one such call for each kind of object. + * + * \ref H5Dget_create_plist will return a property list identifier for the creation property list that was + * used to create the dataset. Call it DCPL1_id. + * + * \ref H5Pset_layout sets a dataset’s layout to be compact, contiguous, or chunked. + * + * \ref H5Pget_layout called with DCPL1_id will return the dataset’s layout, + * either \ref H5D_COMPACT, \ref H5D_CONTIGUOUS, or \ref H5D_CHUNKED. + * + * \ref H5Pset_chunk sets the rank of a dataset, that is the number of dimensions it will have, and the + * maximum size of each dimension. + * + * \ref H5Pget_chunk, also called with DCPL1_id, will return the rank of the dataset and the maximum + * size of each dimension. + * + * If a creation property value has not been explicitly set, these H5Pget_calls will return the + * property’s default value. + * + * <h4>Determine Access Property Settings</h4> + * + * Access property settings are quite different from creation properties. Since access property + * settings are not retained in an HDF5 file or object, there is normally no knowledge of the settings + * that were used in the past. On the other hand, since access properties do not affect characteristics + * of the file or object, this is not normally an issue. For more information, see "Access and + * Creation Property Exceptions." + * + * One circumstance under which an application might need to determine access property settings + * might be when a file or object is already open but the application does not know the property list + * settings. In that case, the application can use the appropriate get access property list + * call to retrieve a property list identifier. For example, if the dataset dsetA + * from the earlier examples is still open, the following call would return an identifier for the dataset + * access property list in use: + * \code + * dsetA_dacpl_id = H5Dget_access_plist( dsetA_id ); + * \endcode + * + * The application could then use the returned property list identifier to analyze the property settings + * + * \subsection subsec_plist_generic Generic Properties Interface and User-defined Properties + * + * HDF5’s generic property interface provides tools for managing the entire property hierarchy and + * for the creation and management of user-defined property lists and properties. This interface also + * makes it possible for an application or a driver to create, modify, and manage custom properties, + * property lists, and property list classes. A comprehensive list of functions for this interface + * appears under "Generic Property Operations (Advanced)" in the "H5P: Property List Interface" + * section of the \ref RM. + * + * Further discussion of HDF5’s generic property interface and user-defined properties and + * property lists is beyond the scope of this document. + * + * \subsection subsec_plist_H5P Property List Function Summaries + * + * General property functions, generic property functions and macros, property functions that are + * used with multiple types of objects, and object and link property functions are listed below. + * + * Property list functions that apply to a specific type of object are listed in the chapter that + * discusses that object. For example, the \ref sec_dataset chapter has two property list function listings: + * one for dataset creation property list functions and one for dataset access property list functions. + * As has been stated, this chapter is not intended to describe every property list function. + * + * \ref H5P reference manual + * + * \subsection subsec_plist_resources Additional Property List Resources + * Property lists are ubiquitous in an HDF5 environment and are therefore discussed in many places + * in HDF5 documentation. The following sections and listings in the \ref UG are of + * particular interest: + * \li In the \ref sec_data_model chapter, see \ref subsubsec_data_model_abstract_plist. + * \li In the \ref sec_file chapter, see the following sections and listings: + * <ul> <li>\ref subsec_file_creation_access</li> + * <li>\ref subsec_file_property_lists</li> + * <li>\ref subsubsec_file_examples_props</li> + * <li>\ref subsubsec_file_examples_access</li> + * <li>"File creation property list functions (H5P)"</li> + * <li>"File access property list functions (H5P)"</li> + * <li>"File driver functions (H5P)"</li></ul> + * \li In the \ref sec_attribute chapter, see "Attribute creation property list functions (H5P)". + * \li In the \ref sec_group chapter, see "Group creation property list functions (H5P)". + * \li Property lists are discussed throughout \ref sec_dataset. + * + * All property list functions are described in the \ref H5P section of the + * \ref RM. The function index at the top of the page provides a categorized listing + * grouped by property list class. Those classes are listed below: + * \li File creation properties + * \li File access properties + * \li Group creation properties + * \li Dataset creation properties + * \li Dataset access properties + * \li Dataset transfer properties + * \li Link creation properties + * \li Link access properties + * \li Object creation properties + * \li Object copy properties + * + * Additional categories not related to the class structure are as follows: + * \li General property list operations + * \li Generic property list functions + * + * The general property functions can be used with any property list; the generic property functions + * constitute an advanced feature. + * + * The in-memory file image feature of HDF5 uses property lists in a manner that differs + * substantially from their use elsewhere in HDF5. Those who plan to use in-memory file images + * must study "File Image Operations" (PDF) in the Advanced Topics in HDF5collection. + * + * \subsection subsec_plist_notes Notes + * + * \anchor FileMountProps <h4>File Mount Properties</h4> + * + * While the file mount property list class \ref H5P_FILE_MOUNT is a valid HDF5 property list class, + * no file mount properties are defined by the HDF5 Library. References to a file mount property + * list should always be expressed as \ref H5P_DEFAULT, meaning the default file mount property list. + * + * <h4>Access and Creation Property Exceptions</h4> + * + * There are a small number of exceptions to the rule that creation properties are always retained in + * a file or object and access properties are never retained. + * + * The following properties are file access properties but they are not transient; they have + * permanent and different effects on a file. They could be validly classified as file creation + * properties as they must be set at creation time to properly create the file. But they are access + * properties because they must also be set when a file is reopened to properly access the file. + * <table> + * <tr><th>Property</th><th>Related function</th></tr> + * <tr valign="top"> + * <td> + * Family file driver + * </td> + * <td> + * \ref H5Pset_fapl_family + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Split file driver + * </td> + * <td> + * \ref H5Pset_fapl_split + * </td> + * </tr> + * <tr valign="top"> + * <td> + * Core file driver + * </td> + * <td> + * \ref H5Pset_fapl_core + * </td> + * </tr> + * </table> + * + * The following is a link creation property, but it is not relevant after an object has been created + * and is not retained in the file or object. + * <table> + * <tr><th>Property</th><th>Related function</th></tr> + * <tr valign="top"> + * <td> + * Create missing intermediate groups + * </td> + * <td> + * \ref H5Pset_create_intermediate_group + * </td> + * </tr> + * </table> + * + * Previous Chapter \ref sec_error - Next Chapter \ref sec_vol + * + * \defgroup H5P Property Lists (H5P) * * Use the functions in this module to manage HDF5 property lists and property * list classes. HDF5 property lists are the main vehicle to configure the @@ -60,135 +913,118 @@ * </tr> * </table> * - * \defgroup ALCAPL Attribute and Link Creation Properties - * \ingroup H5P + * \defgroup STRCPL String Creation Properties * Currently, there are only two creation properties that you can use to control * the creation of HDF5 attributes and links. The first creation property, the * choice of a character encoding, applies to both attributes and links. * The second creation property applies to links only, and advises the library * to automatically create missing intermediate groups when creating new objects. + * \ingroup H5P * - * \defgroup DAPL Dataset Access Properties + * \defgroup LCPL Link Creation Properties + * The first creation property, the choice of a character encoding, applies to + * both attributes and links. + * The second creation property applies to links only, and advises the library + * to automatically create missing intermediate groups when creating new objects. + * \ingroup STRCPL + * + * @see STRCPL + * + * \defgroup ACPL Attribute Creation Properties + * The creation property, the choice of a character encoding, applies to attributes. + * \ingroup STRCPL + * + * @see STRCPL + * + * \defgroup LAPL Link Access Properties * \ingroup H5P + * + * \defgroup DAPL Dataset Access Properties * Use dataset access properties to modify the default behavior of the HDF5 * library when accessing datasets. The properties include adjusting the size * of the chunk cache, providing prefixes for external content and virtual * dataset file paths, and controlling flush behavior, etc. These properties * are \Emph{not} persisted with datasets, and can be adjusted at runtime before * a dataset is created or opened. + * \ingroup LAPL * * \defgroup DCPL Dataset Creation Properties - * \ingroup H5P * Use dataset creation properties to control aspects of dataset creation such * as fill time, storage layout, compression methods, etc. * Unlike dataset access and transfer properties, creation properties \Emph{are} * stored with the dataset, and cannot be changed once a dataset has been * created. + * \ingroup OCPL * * \defgroup DXPL Dataset Transfer Properties - * \ingroup H5P * Use dataset transfer properties to customize certain aspects of reading * and writing datasets such as transformations, MPI-IO I/O mode, error * detection, etc. These properties are \Emph{not} persisted with datasets, * and can be adjusted at runtime before a dataset is read or written. + * \ingroup H5P * * \defgroup FAPL File Access Properties - * \ingroup H5P * Use file access properties to modify the default behavior of the HDF5 * library when accessing files. The properties include selecting a virtual * file driver (VFD), configuring the metadata cache (MDC), control * file locking, etc. These properties are \Emph{not} persisted with files, and * can be adjusted at runtime before a file is created or opened. + * \ingroup H5P * * \defgroup FCPL File Creation Properties - * \ingroup H5P * Use file creation properties to control aspects of file creation such * as setting a file space management strategy or creating a user block. * Unlike file access properties, creation properties \Emph{are} * stored with the file, and cannot be changed once a file has been * created. + * \ingroup GCPL * * \defgroup GAPL General Access Properties - * \ingroup H5P * The functions in this section can be applied to different kinds of property * lists. + * \ingroup LAPL * * \defgroup GCPL Group Creation Properties - * \ingroup H5P * Use group creation properties to control aspects of group creation such * as storage layout, compression, and link creation order tracking. * Unlike file access properties, creation properties \Emph{are} * stored with the group, and cannot be changed once a group has been * created. + * \ingroup OCPL * - * \defgroup GPLO General Property List Operations - * \ingroup H5P - * + * \defgroup PLCR Property List Class Root * Use the functions in this module to manage HDF5 property lists. - * - * <table> - * <tr><th>Create</th><th>Read</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5P_examples.c create - * </td> - * <td> - * \snippet{lineno} H5P_examples.c read - * </td> - * <tr><th>Update</th><th>Delete</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5P_examples.c update - * </td> - * <td> - * \snippet{lineno} H5P_examples.c delete - * </td> - * </tr> - * </table> - * - * \defgroup GPLOA General Property List Operations (Advanced) * \ingroup H5P * + * \defgroup PLCRA Property List Class Root (Advanced) * You can create and customize user-defined property list classes using the * functions described below. Arbitrary user-defined properties can also * be inserted into existing property lists as so-called temporary properties. - * - * <table> - * <tr><th>Create</th><th>Read</th></tr> - * - * <tr valign="top"> - * <td> - * \snippet{lineno} H5P_examples.c create_class - * </td> - * <td> - * \snippet{lineno} H5P_examples.c read_class - * </td> - * <tr><th>Update</th><th>Delete</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5P_examples.c update_class - * </td> - * <td> - * \snippet{lineno} H5P_examples.c delete_class - * </td> - * </tr> - * </table> - * - * \defgroup LAPL Link Access Properties * \ingroup H5P * * - * \defgroup MAPL Map Access Properties - * \ingroup H5P - * \defgroup OCPL Object Creation Properties * \ingroup H5P * + * \defgroup OCPYPL Object Copy Properties + * \ingroup H5P * - * \defgroup OCPPL Object Copy Properties + * \defgroup FMPL File Mount Properties + * Empty property class. * \ingroup H5P * * + * \defgroup TCPL Datatype Creation Properties + * TCPL isn't supported yet. + * \ingroup OCPL + * + * + * \defgroup TAPL Datatype Access Properties + * TAPL isn't supported yet. + * \ingroup LAPL + * + * + * */ #endif /* H5Pmodule_H */ diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h index 8c021f2..5bf2b21 100644 --- a/src/H5Ppublic.h +++ b/src/H5Ppublic.h @@ -392,7 +392,7 @@ H5_DLLVAR hid_t H5P_CLS_LINK_ACCESS_ID_g; H5_DLLVAR hid_t H5P_CLS_VOL_INITIALIZE_ID_g; H5_DLLVAR hid_t H5P_CLS_REFERENCE_ACCESS_ID_g; -/* Default roperty list IDs */ +/* Default property list IDs */ /* (Internal to library, do not use! Use macros above) */ H5_DLLVAR hid_t H5P_LST_FILE_CREATE_ID_g; H5_DLLVAR hid_t H5P_LST_FILE_ACCESS_ID_g; @@ -421,7 +421,7 @@ H5_DLLVAR hid_t H5P_LST_REFERENCE_ACCESS_ID_g; /* Generic property list routines */ /** - * \ingroup GPLO + * \ingroup PLCR * * \brief Terminates access to a property list * @@ -439,7 +439,7 @@ H5_DLLVAR hid_t H5P_LST_REFERENCE_ACCESS_ID_g; */ H5_DLL herr_t H5Pclose(hid_t plist_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Closes an existing property list class * @@ -456,7 +456,7 @@ H5_DLL herr_t H5Pclose(hid_t plist_id); */ H5_DLL herr_t H5Pclose_class(hid_t plist_id); /** - * \ingroup GPLO + * \ingroup PLCR * * \brief Copies an existing property list to create a new property list * @@ -473,7 +473,7 @@ H5_DLL herr_t H5Pclose_class(hid_t plist_id); */ H5_DLL hid_t H5Pcopy(hid_t plist_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Copies a property from one list or class to another * @@ -509,7 +509,7 @@ H5_DLL hid_t H5Pcopy(hid_t plist_id); */ H5_DLL herr_t H5Pcopy_prop(hid_t dst_id, hid_t src_id, const char *name); /** - * \ingroup GPLO + * \ingroup PLCR * * \brief Creates a new property list as an instance of a property list class * @@ -633,7 +633,7 @@ H5_DLL herr_t H5Pcopy_prop(hid_t dst_id, hid_t src_id, const char *name); */ H5_DLL hid_t H5Pcreate(hid_t cls_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Creates a new property list class * @@ -676,7 +676,7 @@ H5_DLL hid_t H5Pcreate_class(hid_t parent, const char *name, H5P_cls_create_func H5P_cls_copy_func_t copy, void *copy_data, H5P_cls_close_func_t close, void *close_data); /** - * \ingroup GPLO + * \ingroup PLCR * * \brief Decodes property list received in a binary object buffer and * returns a new property list identifier @@ -705,7 +705,7 @@ H5_DLL hid_t H5Pcreate_class(hid_t parent, const char *name, H5P_cls_create_func */ H5_DLL hid_t H5Pdecode(const void *buf); /** - * \ingroup GPLO + * \ingroup PLCR * * \brief Encodes the property values in a property list into a binary * buffer @@ -759,7 +759,7 @@ H5_DLL hid_t H5Pdecode(const void *buf); */ H5_DLL herr_t H5Pencode2(hid_t plist_id, void *buf, size_t *nalloc, hid_t fapl_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Compares two property lists or classes for equality * @@ -779,7 +779,7 @@ H5_DLL herr_t H5Pencode2(hid_t plist_id, void *buf, size_t *nalloc, hid_t fapl_i */ H5_DLL htri_t H5Pequal(hid_t id1, hid_t id2); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Queries whether a property name exists in a property list or * class @@ -797,7 +797,7 @@ H5_DLL htri_t H5Pequal(hid_t id1, hid_t id2); */ H5_DLL htri_t H5Pexist(hid_t plist_id, const char *name); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Queries the value of a property * @@ -829,7 +829,7 @@ H5_DLL htri_t H5Pexist(hid_t plist_id, const char *name); */ H5_DLL herr_t H5Pget(hid_t plist_id, const char *name, void *value); /** - *\ingroup GPLO + * \ingroup PLCR * * \brief Returns the property list class identifier for a property list * @@ -892,7 +892,7 @@ H5_DLL herr_t H5Pget(hid_t plist_id, const char *name, void *value); */ H5_DLL hid_t H5Pget_class(hid_t plist_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Retrieves the name of a class * @@ -1036,7 +1036,7 @@ H5_DLL hid_t H5Pget_class(hid_t plist_id); */ H5_DLL char *H5Pget_class_name(hid_t pclass_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Retrieves the parent class of a property class * @@ -1052,7 +1052,7 @@ H5_DLL char *H5Pget_class_name(hid_t pclass_id); */ H5_DLL hid_t H5Pget_class_parent(hid_t pclass_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Queries the number of properties in a property list or class * @@ -1075,7 +1075,7 @@ H5_DLL hid_t H5Pget_class_parent(hid_t pclass_id); */ H5_DLL herr_t H5Pget_nprops(hid_t id, size_t *nprops); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Queries the size of a property value in bytes * @@ -1096,7 +1096,7 @@ H5_DLL herr_t H5Pget_nprops(hid_t id, size_t *nprops); */ H5_DLL herr_t H5Pget_size(hid_t id, const char *name, size_t *size); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Registers a temporary property with a property list * @@ -1346,7 +1346,7 @@ H5_DLL herr_t H5Pinsert2(hid_t plist_id, const char *name, size_t size, void *va H5P_prp_get_func_t get, H5P_prp_delete_func_t prp_del, H5P_prp_copy_func_t copy, H5P_prp_compare_func_t compare, H5P_prp_close_func_t close); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Determines whether a property list is a member of a class * @@ -1366,7 +1366,7 @@ H5_DLL herr_t H5Pinsert2(hid_t plist_id, const char *name, size_t size, void *va */ H5_DLL htri_t H5Pisa_class(hid_t plist_id, hid_t pclass_id); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Iterates over properties in a property class or list * @@ -1412,7 +1412,7 @@ H5_DLL htri_t H5Pisa_class(hid_t plist_id, hid_t pclass_id); */ H5_DLL int H5Piterate(hid_t id, int *idx, H5P_iterate_t iter_func, void *iter_data); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Registers a permanent property with a property list class * @@ -1693,7 +1693,7 @@ H5_DLL herr_t H5Pregister2(hid_t cls_id, const char *name, size_t size, void *de H5P_prp_delete_func_t prp_del, H5P_prp_copy_func_t copy, H5P_prp_compare_func_t compare, H5P_prp_close_func_t close); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Removes a property from a property list * @@ -1719,7 +1719,7 @@ H5_DLL herr_t H5Pregister2(hid_t cls_id, const char *name, size_t size, void *de */ H5_DLL herr_t H5Premove(hid_t plist_id, const char *name); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Sets a property list value * @@ -1751,7 +1751,7 @@ H5_DLL herr_t H5Premove(hid_t plist_id, const char *name); */ H5_DLL herr_t H5Pset(hid_t plist_id, const char *name, const void *value); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Removes a property from a property list class * @@ -1770,8 +1770,6 @@ H5_DLL herr_t H5Pset(hid_t plist_id, const char *name, const void *value); */ H5_DLL herr_t H5Punregister(hid_t pclass_id, const char *name); -/* Object creation property list (OCPL) routines */ - /** * \ingroup DCPL * @@ -1791,6 +1789,9 @@ H5_DLL herr_t H5Punregister(hid_t pclass_id, const char *name); * */ H5_DLL htri_t H5Pall_filters_avail(hid_t plist_id); + +/* Object creation property list (OCPL) routines */ + /** * \ingroup OCPL * @@ -8203,7 +8204,7 @@ H5_DLL herr_t H5Pset_dataset_io_hyperslab_selection(hid_t plist_id, unsigned ran /* Link creation property list (LCPL) routines */ /** - * \ingroup ALCAPL + * \ingroup STRCPL * * \brief Determines whether property is set to enable creating missing * intermediate groups @@ -8234,7 +8235,7 @@ H5_DLL herr_t H5Pset_dataset_io_hyperslab_selection(hid_t plist_id, unsigned ran */ H5_DLL herr_t H5Pget_create_intermediate_group(hid_t plist_id, unsigned *crt_intmd /*out*/); /** - * \ingroup ALCAPL + * \ingroup STRCPL * * \brief Specifies in property list whether to create missing * intermediate groups @@ -8618,7 +8619,7 @@ H5_DLL herr_t H5Pget_map_iterate_hints(hid_t mapl_id, size_t *key_prefetch_size /* String creation property list (STRCPL) routines */ /** - * \ingroup ALCAPL + * \ingroup STRCPL * * \brief Retrieves the character encoding used to create a link or * attribute name @@ -8647,7 +8648,7 @@ H5_DLL herr_t H5Pget_map_iterate_hints(hid_t mapl_id, size_t *key_prefetch_size */ H5_DLL herr_t H5Pget_char_encoding(hid_t plist_id, H5T_cset_t *encoding /*out*/); /** - * \ingroup ALCAPL + * \ingroup STRCPL * * \brief Sets the character encoding used to encode link and attribute * names @@ -8688,7 +8689,6 @@ H5_DLL herr_t H5Pget_char_encoding(hid_t plist_id, H5T_cset_t *encoding /*out*/) */ H5_DLL herr_t H5Pset_char_encoding(hid_t plist_id, H5T_cset_t encoding); -/* Link access property list (LAPL) routines */ /** * \ingroup LAPL * @@ -9047,7 +9047,7 @@ H5_DLL herr_t H5Pset_nlinks(hid_t plist_id, size_t nlinks); /* Object copy property list (OCPYPL) routines */ /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Adds a path to the list of paths that will be searched in the * destination file for a matching committed datatype @@ -9162,7 +9162,7 @@ H5_DLL herr_t H5Pset_nlinks(hid_t plist_id, size_t nlinks); */ H5_DLL herr_t H5Padd_merge_committed_dtype_path(hid_t plist_id, const char *path); /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Clears the list of paths stored in the object copy property list * @@ -9213,7 +9213,7 @@ H5_DLL herr_t H5Padd_merge_committed_dtype_path(hid_t plist_id, const char *path */ H5_DLL herr_t H5Pfree_merge_committed_dtype_paths(hid_t plist_id); /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Retrieves the properties to be used when an object is copied * @@ -9238,7 +9238,7 @@ H5_DLL herr_t H5Pfree_merge_committed_dtype_paths(hid_t plist_id); */ H5_DLL herr_t H5Pget_copy_object(hid_t plist_id, unsigned *copy_options /*out*/); /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Retrieves the callback function from the specified object copy * property list @@ -9276,7 +9276,7 @@ H5_DLL herr_t H5Pget_copy_object(hid_t plist_id, unsigned *copy_options /*out*/) */ H5_DLL herr_t H5Pget_mcdt_search_cb(hid_t plist_id, H5O_mcdt_search_cb_t *func, void **op_data); /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Sets properties to be used when an object is copied * @@ -9369,7 +9369,7 @@ H5_DLL herr_t H5Pget_mcdt_search_cb(hid_t plist_id, H5O_mcdt_search_cb_t *func, */ H5_DLL herr_t H5Pset_copy_object(hid_t plist_id, unsigned copy_options); /** - * \ingroup OCPPL + * \ingroup OCPYPL * * \brief Sets the callback function that H5Ocopy() will invoke before * searching the entire destination file for a matching committed @@ -9467,7 +9467,7 @@ H5_DLL herr_t H5Pset_mcdt_search_cb(hid_t plist_id, H5O_mcdt_search_cb_t func, v /* Typedefs */ /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Registers a permanent property with a property list class * @@ -9597,7 +9597,7 @@ H5_DLL herr_t H5Pregister1(hid_t cls_id, const char *name, size_t size, void *de H5P_prp_get_func_t prp_get, H5P_prp_delete_func_t prp_del, H5P_prp_copy_func_t prp_copy, H5P_prp_close_func_t prp_close); /** - * \ingroup GPLOA + * \ingroup PLCRA * * \brief Registers a temporary property with a property list * @@ -9709,7 +9709,7 @@ H5_DLL herr_t H5Pinsert1(hid_t plist_id, const char *name, size_t size, void *va H5P_prp_delete_func_t prp_delete, H5P_prp_copy_func_t prp_copy, H5P_prp_close_func_t prp_close); /** - * \ingroup GPLO + * \ingroup PLCRA * * \brief Encodes the property values in a property list into a binary * buffer diff --git a/src/H5Rmodule.h b/src/H5Rmodule.h index d9ab968..5e3affb 100644 --- a/src/H5Rmodule.h +++ b/src/H5Rmodule.h @@ -24,34 +24,17 @@ #define H5_MY_PKG H5R #define H5_MY_PKG_ERR H5E_REFERENCE +/** \page H5R_UG The HDF5 References + * @todo Under Construction + */ + /** - * \defgroup H5R H5R + * \defgroup H5R References (H5R) * * Use the functions in this module to manage HDF5 references. Referents can * be HDF5 objects, attributes, and selections on datasets a.k.a. dataset * regions. * - * - * <table> - * <tr><th>Create</th><th>Read</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5R_examples.c create - * </td> - * <td> - * \snippet{lineno} H5R_examples.c read - * </td> - * <tr><th>Update</th><th>Delete</th></tr> - * <tr valign="top"> - * <td> - * \snippet{lineno} H5R_examples.c update - * </td> - * <td> - * \snippet{lineno} H5R_examples.c delete - * </td> - * </tr> - * </table> - * */ #endif /* H5Rmodule_H */ diff --git a/src/H5Smodule.h b/src/H5Smodule.h index 72d722a..73f5953 100644 --- a/src/H5Smodule.h +++ b/src/H5Smodule.h @@ -28,7 +28,1494 @@ #define H5_MY_PKG H5S #define H5_MY_PKG_ERR H5E_DATASPACE -/**\defgroup H5S H5S +/** \page H5S_UG Dataspaces and Partial I/O + * + * + * \section sec_dataspace HDF5 Dataspaces and Partial I/O + * + * HDF5 dataspaces describe the \Emph{shape} of datasets in memory or in HDF5 + * files. Dataspaces can be empty (#H5S_NULL), a singleton (#H5S_SCALAR), or + * a multi-dimensional, regular grid (#H5S_SIMPLE). Dataspaces can be re-shaped. + * + * Subsets of dataspaces can be "book-marked" or used to restrict I/O operations + * using \Emph{selections}. Furthermore, certain set operations are supported + * for selections. + * + * \subsection subsec_dataspace_intro Introduction + * + * The HDF5 \Emph{dataspace} is a required component of an HDF5 dataset or attribute definition. The dataspace + * defines the size and shape of the dataset or attribute raw data. In other words, a dataspace defines the + * number of dimensions and the size of each dimension of the multidimensional array in which the raw data + * is represented. The dataspace must be defined when the dataset or attribute is created. + * + * The \Emph{dataspace} is also used during dataset I/O operations, defining the elements of the dataset that + * participate in the I/O operation. + * + * This chapter explains the \Emph{dataspace} object and its use in dataset and attribute creation and data + * transfer. It also describes selection operations on a dataspace used to implement sub‐setting, + * sub‐sampling, and scatter‐gather access to datasets. + * + * \subsection subsec_dataspace_function Dataspace Function Summaries + * @see H5S reference manual provides a reference list of dataspace functions, the H5S APIs. + * + * \subsection subsec_dataspace_program Definition of Dataspace Objects and the Dataspace Programming Model + * + * This section introduces the notion of the HDF5 dataspace object and a programming model for creating + * and working with dataspaces. + * + * \subsubsection subsubsec_dataspace_program_object Dataspace Objects + * + * An HDF5 dataspace is a required component of an HDF5 dataset or attribute. A dataspace defines the size + * and the shape of a dataset’s or an attribute’s raw data. Currently, HDF5 supports the following types of + * the dataspaces: + * \li Scalar dataspaces + * \li Simple dataspaces + * \li Null dataspaces + * + * A scalar dataspace, #H5S_SCALAR, represents just one element, a scalar. Note that the datatype of this one + * element may be very complex; example would be a compound structure with members being of any + * allowed HDF5 datatype, including multidimensional arrays, strings, and nested compound structures. By + * convention, the rank of a scalar dataspace is always 0 (zero); think of it geometrically as a single, + * dimensionless point, though that point may be complex. + * + * A simple dataspace, #H5S_SIMPLE , is a multidimensional array of elements. The dimensionality of the + * dataspace (or the rank of the array) is fixed and is defined at creation time. The size of each dimension + * can grow during the life time of the dataspace from the current size up to the maximum size. Both the + * current size and the maximum size are specified at creation time. The sizes of dimensions at any particular + * time in the life of a dataspace are called the current dimensions, or the dataspace extent. They can be + * queried along with the maximum sizes. + * + * A null dataspace, #H5S_NULL, contains no data elements. Note that no selections can be applied to a null + * dataset as there is nothing to select. + * + * As shown in the UML diagram in the figure below, an HDF5 simple dataspace object has three attributes: + * the rank or number of dimensions; the current sizes, expressed as an array of length rank with each element + * of the array denoting the current size of the corresponding dimension; and the maximum sizes, + * expressed as an array of length rank with each element of the array denoting the maximum size of the + * corresponding dimension. + * + * <table> + * <tr> + * <td> + * \image html Dspace_simple.gif "A simple dataspace" + * </td> + * </tr> + * </table> + * + * \em Note: A simple dataspace is defined by its rank, the current size of each dimension, and the maximum + * size of each dimension. + * + * The size of a current dimension cannot be greater than the maximum size, which can be unlimited, specified + * as #H5S_UNLIMITED. Note that while the HDF5 file format and library impose no maximum size on an + * unlimited dimension, practically speaking its size will always be limited to the biggest integer available + * on the particular system being used. + * + * Dataspace rank is restricted to 32, the standard limit in C on the rank of an array, in the current + * implementation of the HDF5 Library. The HDF5 file format, on the other hand, allows any rank up to the + * maximum integer value on the system, so the library restriction can be raised in the future if higher + * dimensionality is required. + * + * Note that most of the time Fortran applications calling HDF5 will work with dataspaces of rank less than + * or equal to seven, since seven is the maximum number of dimensions in a Fortran array. But dataspace rank + * is not limited to seven for Fortran applications. + * + * The current dimensions of a dataspace, also referred to as the dataspace extent, define the bounding box + * for dataset elements that can participate in I/O operations. + * + * \subsubsection subsubsec_dataspace_program_model Dataspace Programming Model + * + * The programming model for creating and working with HDF5 dataspaces can be summarized as follows: + * \li 1. Create a dataspace + * \li 2. Use the dataspace to create a dataset in the file or to describe a data array in memory + * \li 3. Modify the dataspace to define dataset elements that will participate in I/O operations + * \li 4. Use the modified dataspace while reading/writing dataset raw data or to create a region reference + * \li 5. Close the dataspace when no longer needed + * + * The rest of this section will address steps 1, 2, and 5 of the programming model; steps 3 and 4 will be + * discussed in later sections of this chapter. + * + * <h4>Creating a Dataspace</h4> + * + * A dataspace can be created by calling the \ref H5Screate function. Since the + * definition of a simple dataspace requires the specification of dimensionality (or rank) and initial and + * maximum dimension sizes, the HDF5 Library provides a convenience API, \ref H5Screate_simple to create a + * simple dataspace in one step. + * + * The following examples illustrate the usage of these APIs. + * + * <h4>Creating a Scalar Dataspace</h4> + * + * Creating a Scalar Dataspace + * \code + * hid_t space_id; + * . . . + * space_id = H5Screate(H5S_SCALAR); + * \endcode + * As mentioned above, the dataspace will contain only one element. Scalar dataspaces are used more often + * for describing attributes that have just one value. For example, the attribute temperature with the value + * Celsius is used to indicate that the dataset with this attribute stores temperature values using the + * Celsius scale. + * + * <h4>Creating a Null Dataspace</h4> + * + * A null dataspace is created with the \ref H5Screate function. + * \code + * hid_t space_id; + * . . . + * space_id = H5Screate(H5S_NULL); + * \endcode + * As mentioned above, the dataspace will contain no elements. + * + * <h4>Creating a Simple Dataspace</h4> + * + * Let’s assume that an application wants to store a two‐dimensional array of data, A(20,100). During the + * life of the application, the first dimension of the array can grow up to 30; there is no restriction on + * the size of the second dimension. The following steps are used to declare a dataspace for the dataset + * in which the array data will be stored. + * \code + * hid_t space_id; + * int rank = 2; + * hsize_t current_dims[2] = {20, 100}; + * hsize_t max_dims[2] = {30, H5S_UNLIMITED}; + * . . . + * space_id = H5Screate(H5S_NULL); + * H5Sset_extent_simple(space_id, rank, current_dims, max_dims); + * \endcode + * + * Alternatively, the convenience APIs H5Screate_simple/h5screate_simple_f can replace the + * H5Screate/h5screate_f and H5Sset_extent_simple/h5sset_extent_simple_f calls. + * \code + * space_id = H5Screate_simple(rank, current_dims, max_dims); + * \endcode + * + * In this example, a dataspace with current dimensions of 20 by 100 is created. The first dimension can be + * extended only up to 30. The second dimension, however, is declared unlimited; it can be extended up to + * the largest available integer value on the system. + * + * Note that when there is a difference between the current dimensions and the maximum dimensions of an + * array, then chunking storage must be used. In other words, if the number of dimensions may change over + * the life of the dataset, then chunking must be used. If the array dimensions are fixed (if the number of + * current dimensions is equal to the maximum number of dimensions when the dataset is created), then + * contiguous storage can be used. For more information, see "Data Transfer". + * + * Maximum dimensions can be the same as current dimensions. In such a case, the sizes of dimensions + * cannot be changed during the life of the dataspace object. In C, \c NULL can be used to indicate to the + * \ref H5Screate_simple and \ref H5Sset_extent_simple functions that the maximum sizes of all dimensions + * are the same as the current sizes. + * \code + * space_id = H5Screate_simple(rank, current_dims, NULL); + * \endcode + * The created dataspace will have current and maximum dimensions of 20 and 100 correspondingly, and the + * sizes of those dimensions cannot be changed. + * + * <h4>C versus Fortran Dataspaces</h4> + * + * Dataspace dimensions are numbered from 1 to rank. HDF5 uses C storage conventions, assuming that the + * last listed dimension is the fastest‐changing dimension and the first‐listed dimension is the slowest + * changing. The HDF5 file format storage layout specification adheres to the C convention and the HDF5 + * Library adheres to the same convention when storing dataspace dimensions in the file. This affects how + * C programs and tools interpret data written from Fortran programs and vice versa. The example below + * illustrates the issue. + * + * When a Fortran application describes a dataspace to store an array as A(20,100), it specifies the value of + * the first dimension to be 20 and the second to be 100. Since Fortran stores data by columns, the + * first‐listed dimension with the value 20 is the fastest‐changing dimension and the last‐listed dimension + * with the value 100 is the slowest‐changing. In order to adhere to the HDF5 storage convention, the HDF5 + * Fortran wrapper transposes dimensions, so the first dimension becomes the last. The dataspace dimensions + * stored in the file will be 100,20 instead of 20,100 in order to correctly describe the Fortran data that + * is stored in 100 columns, each containing 20 elements. + * + * When a Fortran application reads the data back, the HDF5 Fortran wrapper transposes the dimensions + * once more, returning the first dimension to be 20 and the second to be 100, describing correctly the sizes + * of the array that should be used to read data in the Fortran array A(20,100). + * + * When a C application reads data back, the dimensions will come out as 100 and 20, correctly describing + * the size of the array to read data into, since the data was written as 100 records of 20 elements each. + * Therefore C tools such as h5dump and h5ls always display transposed dimensions and values for the data + * written by a Fortran application. + * + * Consider the following simple example of equivalent C 3 x 5 and Fortran 5 x 3 arrays. As illustrated in + * the figure below, a C application will store a 3 x 5 2‐dimensional array as three 5‐element rows. In order + * to store the same data in the same order, a Fortran application must view the array as a 5 x 3 array with + * three 5‐element columns. The dataspace of this dataset, as written from Fortran, will therefore be + * described as 5 x 3 in the application but stored and described in the file according to the C convention + * as a 3 x 5 array. This ensures that C and Fortran applications will always read the data in the order in + * which it was written. The HDF5 Fortran interface handles this transposition automatically. + * \code + * // C + * \#define NX 3 // dataset dimensions + * \#define NY 5 + * . . . + * int data[NX][NY]; // data to write + * . . . + * // Data and output buffer initialization. + * for (j = 0; j < NX; j++) + * for (i = 0; i < NY; i++) + * data[j][i] = i + j; + * // + * // 1 2 3 4 5 + * // 6 7 8 9 10 + * // 11 12 13 14 15 + * // + * . . . + * dims[0] = NX; + * dims[1] = NY; + * dataspace = H5Screate_simple(RANK, dims, NULL); + * \endcode + * + * \code + * ! Fortran + * INTEGER, PARAMETER :: NX = 3 + * INTEGER, PARAMETER :: NX = 5 + * . . . + * INTEGER(HSIZE_T), DIMENSION(2) :: dims = (/NY, NX/) ! Dataset dimensions + * . . . + * ! + * ! Initialize data + * ! + * do i = 1, NY + * do j = 1, NX + * data(i,j) = i + (j-1)*NY + * enddo + * enddo + * ! + * ! Data + * ! + * ! 1 6 11 + * ! 2 7 12 + * ! 3 8 13 + * ! 4 9 14 + * ! 5 10 15 + * . . . + * CALL h5screate_simple_f(rank, dims, dspace_id, error) + * \endcode + * + * <table> + * <caption align=top>Comparing C and Fortran dataspaces</caption> + * <tr> + * <td> + * A dataset stored by a C program in a 3 x 5 array: + * </td> + * </tr> + * <tr> + * <td> + * \image html Dspace_CvsF1.gif + * </td> + * </tr> + * <tr> + * <td> + * The same dataset stored by a Fortran program in a 5 x 3 array: + * </td> + * </tr> + * <tr> + * <td> + * \image html Dspace_CvsF2.gif + * </td> + * </tr> + * <tr> + * <td> + * The first dataset above as written to an HDF5 file from C or the second dataset above as written + * from Fortran: + * </td> + * </tr> + * <tr> + * <td> + * \image html Dspace_CvsF3.gif + * </td> + * </tr> + * <tr> + * <td> + * The first dataset above as written to an HDF5 file from Fortran: + * </td> + * </tr> + * <tr> + * <td> + * \image html Dspace_CvsF4.gif + * </td> + * </tr> + * </table> + * + * <em>Note: The HDF5 Library stores arrays along the fastest‐changing dimension. This approach is often + * referred to as being “in C order.” C, C++, and Java work with arrays in row‐major order. In other words, + * the row, or the last dimension, is the fastest‐changing dimension. Fortran, on the other hand, handles + * arrays in column‐major order making the column, or the first dimension, the fastest‐changing dimension. + * Therefore, the C and Fortran arrays illustrated in the top portion of this figure are stored identically + * in an HDF5 file. This ensures that data written by any language can be meaningfully read, interpreted, + * and manipulated by any other.</em> + * + * <h4>Finding Dataspace Characteristics</h4> + * + * The HDF5 Library provides several APIs designed to query the characteristics of a dataspace. + * + * The function \ref H5Sis_simple returns information about the type of a dataspace. + * This function is rarely used and currently supports only simple and scalar dataspaces. + * + * To find out the dimensionality, or rank, of a dataspace, use \ref H5Sget_simple_extent_ndims. + * \ref H5Sget_simple_extent_dims can also be used to find out the rank. See + * the example below. If both functions return 0 for the value of rank, then the dataspace is scalar. + * + * To query the sizes of the current and maximum dimensions, use \ref H5Sget_simple_extent_dims. + * + * The following example illustrates querying the rank and dimensions of a dataspace using these functions. + * \code + * hid_t space_id; + * int rank; + * hsize_t *current_dims; + * hsize_t *max_dims; + * . . . + * rank = H5Sget_simple_extent_ndims(space_id); + * // (or rank = H5Sget_simple_extent_dims(space_id, NULL, NULL);) + * current_dims = (hsize_t)malloc(rank * sizeof(hsize_t)); + * max_dims = (hsize_t)malloc(rank * sizeof(hsize_t)); + * H5Sget_simple_extent_dims(space_id, current_dims, max_dims); + * // Print values here + * \endcode + * + * \subsection subsec_dataspace_transfer Dataspaces and Data Transfer + * + * Read and write operations transfer data between an HDF5 file on disk and in memory. The shape that the + * array data takes in the file and in memory may be the same, but HDF5 also allows users the ability to + * represent data in memory in a different shape than in the file. If the shape of an array in the file and + * in memory will be the same, then the same dataspace definition can be used for both. If the shape of an + * array in memory needs to be different than the shape in the file, then the dataspace definition for the + * shape of the array in memory can be changed. During a read operation, the array will be read into the + * different shape in memory, and during a write operation, the array will be written to the file in the + * shape specified by the dataspace in the file. The only qualification is that the number of elements read + * or written must be the same in both the source and the destination dataspaces. + * + * Item a in the figure below shows a simple example of a read operation in which the data is stored as a 3 + * by 4 array in the file (item b) on disk, but the program wants it to be a 4 by 3 array in memory. This is + * accomplished by setting the memory dataspace to describe the desired memory layout, as in item c. The read + * operation reads the data in the file array into the memory array. + * + * <table> + * <tr> + * <td> + * \image html Dspace_read.gif "Data layout before and after a read operation" + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Dspace_move.gif "Moving data from disk to memory" + * </td> + * </tr> + * </table> + + * Both the source and destination are stored as contiguous blocks of storage with the elements in the order + * specified by the dataspace. The figure above shows one way the elements might be organized. In item a, + * the elements are stored as 3 blocks of 4 elements. The destination is an array of 12 elements in memory + * (see item c). As the figure suggests, the transfer reads the disk blocks into a memory buffer (see item b), + * and then writes the elements to the correct locations in memory. A similar process occurs in reverse when + * data is written to disk. + * + * \subsubsection subsubsec_dataspace_transfer_select Data Selection + * + * In addition to rearranging data, the transfer may select the data elements from the source and destination. + * + * Data selection is implemented by creating a dataspace object that describes the selected elements (within + * the hyper rectangle) rather than the whole array. Two dataspace objects with selections can be used in + * data transfers to read selected elements from the source and write selected elements to the destination. + * When data is transferred using the dataspace object, only the selected elements will be transferred. + * + * This can be used to implement partial I/O, including: + * \li Sub‐setting ‐ reading part of a large dataset + * \li Sampling ‐ reading selected elements (for example, every second element) of a dataset + * \li Scatter‐gather ‐ read non‐contiguous elements into contiguous locations (gather) or read contiguous + * elements into non‐contiguous locations (scatter) or both + * + * To use selections, the following steps are followed: + * \li 1. Get or define the dataspace for the source and destination + * \li 2. Specify one or more selections for source and destination dataspaces + * \li 3. Transfer data using the dataspaces with selections + * + * A selection is created by applying one or more selections to a dataspace. A selection may override any + * other selections (#H5S_SELECT_SET) or may be “Ored” with previous selections on the same dataspace + * (#H5S_SELECT_OR). In the latter case, the resulting selection is the union of the selection and all + * previously selected selections. Arbitrary sets of points from a dataspace can be selected by specifying + * an appropriate set of selections. + * + * Two selections are used in data transfer, so the source and destination must be compatible, as described + * below. + * + * There are two forms of selection, hyperslab and point. A selection must be either a point selection or a + * set of hyperslab selections. Selections cannot be mixed. + * + * The definition of a selection within a dataspace, not the data in the selection, cannot be saved to the + * file unless the selection definition is saved as a region reference. For more information, + * see \ref subsec_dataspace_refer. + * + * <h4>Hyperslab Selection</h4> + * + * A hyperslab is a selection of elements from a hyper rectangle. An HDF5 hyperslab is a rectangular pattern + * defined by four arrays. The four arrays are summarized in the table below. + * + * The offset defines the origin of the hyperslab in the original dataspace. + * + * The stride is the number of elements to increment between selected elements. A stride of ‘1’ is every + * element, a stride of ‘2’ is every second element, etc. Note that there may be a different stride for + * each dimen‐sion of the dataspace. The default stride is 1. + * + * The count is the number of elements in the hyperslab selection. When the stride is 1, the selection is a + * hyper rectangle with a corner at the offset and size count[0] by count[1] by.... When stride is greater + * than one, the hyperslab bounded by the offset and the corners defined by stride[n] * count[n]. + * + * <table> + * <caption align=top>Hyperslab elements</caption> + * <tr> + * <th> + * Parameter + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * Offset + * </td> + * <td> + * The starting location for the hyperslab. + * </td> + * </tr> + * <tr> + * <td> + * Stride + * </td> + * <td> + * The number of elements to separate each element or block to be selected. + * </td> + * </tr> + * <tr> + * <td> + * Count + * </td> + * <td> + * The number of elements or blocks to select along each dimension. + * </td> + * </tr> + * <tr> + * <td> + * Block + * </td> + * <td> + * The size of the block selected from the dataspace. + * </td> + * </tr> + * </table> + * + * The block is a count on the number of repetitions of the hyperslab. The default block size is '1', which is + * one hyperslab. A block of 2 would be two hyperslabs in that dimension, with the second starting at + * offset[n] + (count[n] * stride[n]) + 1. + * + * A hyperslab can be used to access a sub‐set of a large dataset. The figure below shows an example of a + * hyperslab that reads a rectangle from the middle of a larger two dimensional array. The destination is the + * same shape as the source. + * + * <table> + * <tr> + * <td> + * \image html Dspace_subset.gif "Access a sub‐set of data with a hyperslab" + * </td> + * </tr> + * </table> + * + * Hyperslabs can be combined to select complex regions of the source and destination. The figure below + * shows an example of a transfer from one non‐rectangular region into another non‐rectangular region. The + * source is defined as the union of two hyperslabs, and the destination is the union of three hyperslabs. + * + * <table> + * <tr> + * <td> + * \image html Dspace_complex.gif "Build complex regions with hyperslab unions" + * </td> + * </tr> + * </table> + * + * Hyperslabs may also be used to collect or scatter data from regular patterns. The figure below shows an + * example where the source is a repeating pattern of blocks, and the destination is a single, one dimensional + * array. + * + * <table> + * <tr> + * <td> + * \image html Dspace_combine.gif "Use hyperslabs to combine or disperse data" + * </td> + * </tr> + * </table> + * + * <h4>Select Points</h4> + * + * The second type of selection is an array of points such as coordinates. Essentially, this selection is a + * list of all the points to include. The figure below shows an example of a transfer of seven elements from + * a two dimensional dataspace to a three dimensional dataspace using a point selection to specify the points. + * + * <table> + * <tr> + * <td> + * \image html Dspace_point.gif "Point selection" + * </td> + * </tr> + * </table> + * + * <h4>Rules for Defining Selections</h4> + * + * A selection must have the same number of dimensions (rank) as the dataspace it is applied to, although it + * may select from only a small region such as a plane from a 3D dataspace. Selections do not affect the + * extent of the dataspace, the selection may be larger than the dataspace. The boundaries of selections are + * reconciled with the extent at the time of the data transfer. + * + * <h4>Data Transfer with Selections</h4> + * + * A data transfer (read or write) with selections is the same as any read or write, except the source + * and destination dataspace have compatible selections. + * + * During the data transfer, the following steps are executed by the library: + * \li The source and destination dataspaces are checked to assure that the selections are compatible. + * <ul><li>Each selection must be within the current extent of the dataspace. A selection may be + * defined to extend outside the current extent of the dataspace, but the dataspace cannot be + * accessed if the selection is not valid at the time of the access.</li> + * <li> The total number of points selected in the source and destination must be the same. Note + * that the dimensionality of the source and destination can be different (for example, the + * source could be 2D, the destination 1D or 3D), and the shape can be different, but the number of + * elements selected must be the same.</li></ul> + * \li The data is transferred, element by element. + * + * Selections have an iteration order for the points selected, which can be any permutation of the dimensions + * involved (defaulting to 'C' array order) or a specific order for the selected points, for selections + * composed of single array elements with \ref H5Sselect_elements. + * + * The elements of the selections are transferred in row‐major, or C order. That is, it is assumed that the + * first dimension varies slowest, the second next slowest, and so forth. For hyperslab selections, the order + * can be any permutation of the dimensions involved (defaulting to ‘C’ array order). When multiple hyperslabs + * are combined, the hyperslabs are coalesced into contiguous reads and writes. + * + * In the case of point selections, the points are read and written in the order specified. + * + * \subsubsection subsubsec_dataspace_transfer_model Programming Model + * + * <h4>Selecting Hyperslabs</h4> + * + * Suppose we want to read a 3x4 hyperslab from a dataset in a file beginning at the element <1,2> in the + * dataset, and read it into a 7 x 7 x 3 array in memory. See the figure below. In order to do this, we must + * create a dataspace that describes the overall rank and dimensions of the dataset in the file as well as + * the position and size of the hyperslab that we are extracting from that dataset. + * + * <table> + * <tr> + * <td> + * \image html Dspace_select.gif "Selecting a hyperslab" + * </td> + * </tr> + * </table> + * + * The code in the first example below illustrates the selection of the hyperslab in the file dataspace. + * The second example below shows the definition of the destination dataspace in memory. Since the in‐memory + * dataspace has three dimensions, the hyperslab is an array with three dimensions with the last dimension + * being 1: <3,4,1>. The third example below shows the read using the source and destination dataspaces + * with selections. + * + * <em>Selecting a hyperslab</em> + * \code + * //get the file dataspace. + * dataspace = H5Dget_space(dataset); // dataspace identifier + * + * // Define hyperslab in the dataset. + * offset[0] = 1; + * offset[1] = 2; + * count[0] = 3; + * count[1] = 4; + * status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); + * \endcode + * + * <em>Defining the destination memory</em> + * \code + * // Define memory dataspace. + * dimsm[0] = 7; + * dimsm[1] = 7; + * dimsm[2] = 3; + * memspace = H5Screate_simple(3,dimsm,NULL); + * + * // Define memory hyperslab. + * offset_out[0] = 3; + * offset_out[1] = 0; + * offset_out[2] = 0; + * count_out[0] = 3; + * count_out[1] = 4; + * count_out[2] = 1; + * status = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); + * \endcode + * + * <em>A sample read specifying source and destination dataspaces</em> + * \code + * ret = H5Dread(dataset, H5T_NATIVE_INT, memspace,dataspace, H5P_DEFAULT, data); + * \endcode + * + * <h4>Example with Strides and Blocks</h4> + * + * Consider an 8 x 12 dataspace into which we want to write eight 3 x 2 blocks in a two dimensional array + * from a source dataspace in memory that is a 50‐element one dimensional array. See the figure below. + * + * <table> + * <tr> + * <td> + * \image html Dspace_write1to2.gif "Write from a one dimensional array to a two dimensional array" + * </td> + * </tr> + * </table> + * + * The example below shows code to write 48 elements from the one dimensional array to the file dataset + * starting with the second element in vector. The destination hyperslab has the following parameters: + * offset=(0,1), stride=(4,3), count=(2,4), block=(3,2). The source has the parameters: offset=(1), + * stride=(1), count=(48), block=(1). After these operations, the file dataspace will have the values + * shown in item b in the figure above. Notice that the values are inserted in the file dataset in + * row‐major order. + * + * <em>Write from a one dimensional array to a two dimensional array</em> + * \code + * // Select hyperslab for the dataset in the file, using 3 x 2 blocks, (4,3) stride (2,4) + * // count starting at the position (0,1). + * offset[0] = 0; offset[1] = 1; + * stride[0] = 4; stride[1] = 3; + * count[0] = 2; count[1] = 4; + * block[0] = 3; block[1] = 2; + * ret = H5Sselect_hyperslab(fid, H5S_SELECT_SET, offset, stride, count, block); + * + * // Create dataspace for the first dataset. + * mid1 = H5Screate_simple(MSPACE1_RANK, dim1, NULL); + * + * // Select hyperslab. + * // We will use 48 elements of the vector buffer starting + * // at the second element. Selected elements are + * // 1 2 3 . . . 48 + * offset[0] = 1; + * stride[0] = 1; + * count[0] = 48; + * block[0] = 1; + * ret = H5Sselect_hyperslab(mid1, H5S_SELECT_SET, offset, stride, count, block); + * + * // Write selection from the vector buffer to the dataset in the file. + * ret = H5Dwrite(dataset, H5T_NATIVE_INT, midd1, fid, H5P_DEFAULT, vector) + * \endcode + * + * <h4>Selecting a Union of Hyperslabs</h4> + * + * The HDF5 Library allows the user to select a union of hyperslabs and write or read the selection into + * another selection. The shapes of the two selections may differ, but the number of elements must be + * equal. + * + * <table> + * <tr> + * <td> + * \image html Dspace_transfer.gif "Transferring hyperslab unions" + * </td> + * </tr> + * </table> + * + * The figure above shows the transfer of a selection that is two overlapping hyperslabs from the dataset + * into a union of hyperslabs in the memory dataset. Note that the destination dataset has a different shape + * from the source dataset. Similarly, the selection in the memory dataset could have a different shape than + * the selected union of hyperslabs in the original file. For simplicity, the selection is that same shape + * at the destination. + * + * To implement this transfer, it is necessary to: + * \li 1. Get the source dataspace + * \li 2. Define one hyperslab selection for the source + * \li 3. Define a second hyperslab selection, unioned with the first + * \li 4. Get the destination dataspace + * \li 5. Define one hyperslab selection for the destination + * \li 6. Define a second hyperslab selection, unioned with the first + * \li 7. Execute the data transfer (H5Dread or H5Dwrite) using the source and destination dataspaces + * + * The example below shows example code to create the selections for the source dataspace (the file). The + * first hyperslab is size 3 x 4 and the left upper corner at the position (1,2). The hyperslab is a simple + * rectangle, so the stride and block are 1. The second hyperslab is 6 x 5 at the position (2,4). The second + * selection is a union with the first hyperslab (#H5S_SELECT_OR). + * + * <em> Select source hyperslabs</em> + * \code + * fid = H5Dget_space(dataset); + * + * // Select first hyperslab for the dataset in the file. + * offset[0] = 1; offset[1] = 2; + * block[0] = 1; block[1] = 1; + * stride[0] = 1; stride[1] = 1; + * count[0] = 3; count[1] = 4; + * ret = H5Sselect_hyperslab(fid, H5S_SELECT_SET, offset, stride, count, block); + * + * // Add second selected hyperslab to the selection. + * offset[0] = 2; offset[1] = 4; + * block[0] = 1; block[1] = 1; + * stride[0] = 1; stride[1] = 1; + * count[0] = 6; count[1] = 5; + * ret = H5Sselect_hyperslab(fid, H5S_SELECT_OR, offset, stride, count, block); + * \endcode + * + * The example below shows example code to create the selection for the destination in memory. The steps + * are similar. In this example, the hyperslabs are the same shape, but located in different positions in the + * dataspace. The first hyperslab is 3 x 4 and starts at (0,0), and the second is 6 x 5 and starts at (1,2). + * Finally, the H5Dread call transfers the selected data from the file dataspace to the selection in memory. + * In this example, the source and destination selections are two overlapping rectangles. In general, any + * number of rectangles can be OR’ed, and they do not have to be contiguous. The order of the selections + * does not matter, but the first should use #H5S_SELECT_SET ; subsequent selections are unioned using + * #H5S_SELECT_OR. + * + * It is important to emphasize that the source and destination do not have to be the same shape (or number + * of rectangles). As long as the two selections have the same number of elements, the data can be + * transferred. + * + * <em>Select destination hyperslabs</em> + * \code + * // Create memory dataspace. + * mid = H5Screate_simple(MSPACE_RANK, mdim, NULL); + * + * // Select two hyperslabs in memory. Hyperslabs has the + * // same size and shape as the selected hyperslabs for + * // the file dataspace. + * offset[0] = 0; offset[1] = 0; + * block[0] = 1; block[1] = 1; + * stride[0] = 1; stride[1] = 1; + * count[0] = 3; count[1] = 4; + * ret = H5Sselect_hyperslab(mid, H5S_SELECT_SET, offset, stride, count, block); + * + * offset[0] = 1; offset[1] = 2; + * block[0] = 1; block[1] = 1; + * stride[0] = 1; stride[1] = 1; + * count[0] = 6; count[1] = 5; + * ret = H5Sselect_hyperslab(mid, H5S_SELECT_OR, offset, stride, count, block); + * + * ret = H5Dread(dataset, H5T_NATIVE_INT, mid, fid, H5P_DEFAULT, matrix_out); + * \endcode + * + * <h4>Selecting a List of Independent Points</h4> + * + * It is also possible to specify a list of elements to read or write using the function H5Sselect_elements. + * + * The procedure is similar to hyperslab selections. + * \li 1. Get the source dataspace + * \li 2. Set the selected points + * \li 3. Get the destination dataspace + * \li 4. Set the selected points + * \li 5. Transfer the data using the source and destination dataspaces + * + * The figure below shows an example where four values are to be written to four separate points in a two + * dimensional dataspace. The source dataspace is a one dimensional array with the values 53, 59, 61, 67. + * The destination dataspace is an 8 x 12 array. The elements are to be written to the points + * (0,0), (3,3), (3,5), and (5,6). In this example, the source does not require a selection. The example + * below the figure shows example code to implement this transfer. + * + * A point selection lists the exact points to be transferred and the order they will be transferred. The + * source and destination are required to have the same number of elements. A point selection can be used + * with a hyperslab (for example, the source could be a point selection and the destination a hyperslab, + * or vice versa), so long as the number of elements selected are the same. + * + * <table> + * <tr> + * <td> + * \image html Dspace_separate.gif "Write data to separate points" + * </td> + * </tr> + * </table> + * + * <em>Write data to separate points</em> + * \code + * hsize_t dim2[] = {4}; + * int values[] = {53, 59, 61, 67}; + * + * // file dataspace + * hssize_t coord[4][2]; + * + * // Create dataspace for the second dataset. + * mid2 = H5Screate_simple(1, dim2, NULL); + * + * // Select sequence of NPOINTS points in the file dataspace. + * coord[0][0] = 0; coord[0][1] = 0; + * coord[1][0] = 3; coord[1][1] = 3; + * coord[2][0] = 3; coord[2][1] = 5; + * coord[3][0] = 5; coord[3][1] = 6; + * + * ret = H5Sselect_elements(fid, H5S_SELECT_SET, NPOINTS, (const hssize_t **)coord); + * + * ret = H5Dwrite(dataset, H5T_NATIVE_INT, mid2, fid, H5P_DEFAULT, values); + * \endcode + * + * <h4>Combinations of Selections</h4> + * + * Selections are a very flexible mechanism for reorganizing data during a data transfer. With different + * combinations of dataspaces and selections, it is possible to implement many kinds of data transfers + * including sub‐setting, sampling, and reorganizing the data. The table below gives some example combinations + * of source and destination, and the operations they implement. + * + * <table> + * <caption>Selection operations</caption> + * <tr> + * <th> + * <p>Source</p> + * </th> + * <th> + * <p>Destination</p> + * </th> + * <th> + * <p>Operation</p> + * </th> + * </tr> + * <tr> + * <td> + * <p>All</p> + * </td> + * <td> + * <p>All</p> + * </td> + * <td> + * <p>Copy whole array</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>All</p> + * </td> + * <td> + * <p>All (different shape)</p> + * </td> + * <td> + * <p>Copy and reorganize array</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Hyperslab</p> + * </td> + * <td> + * <p>All</p> + * </td> + * <td> + * <p>Sub-set</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Hyperslab</p> + * </td> + * <td> + * <p>Hyperslab (same shape)</p> + * </td> + * <td> + * <p>Selection</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Hyperslab</p> + * </td> + * <td> + * <p>Hyperslab (different shape)</p> + * </td> + * <td> + * <p>Select and rearrange</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Hyperslab with stride or block</p> + * </td> + * <td> + * <p>All or hyperslab with stride 1</p> + * </td> + * <td> + * <p>Sub-sample, scatter</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Hyperslab</p> + * </td> + * <td> + * <p>Points</p> + * </td> + * <td> + * <p>Scatter</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Points</p> + * </td> + * <td> + * <p>Hyperslab or all</p> + * </td> + * <td> + * <p>Gather</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Points</p> + * </td> + * <td> + * <p>Points (same)</p> + * </td> + * <td> + * <p>Selection</p> + * </td> + * </tr> + * <tr> + * <td> + * <p>Points</p> + * </td> + * <td> + * <p>Points (different)</p> + * </td> + * <td> + * <p>Reorder points</p> + * </td> + * </tr> + * </table> + * + * \subsection subsec_dataspace_select Dataspace Selection Operations and Data Transfer + * + * This section is under construction. + * + * \subsection subsec_dataspace_refer References to Dataset Regions + * + * Another use of selections is to store a reference to a region of a dataset. An HDF5 object reference + * object is a pointer to an object (dataset, group, or committed datatype) in the file. A selection can + * be used to create a pointer to a set of selected elements of a dataset, called a region reference. The + * selection can be either a point selection or a hyperslab selection. + * + * A region reference is an object maintained by the HDF5 Library. The region reference can be stored in a + * dataset or attribute, and then read. The dataset or attribute is defined to have the special datatype, + * #H5T_STD_REF_DSETREG. + * + * To discover the elements and/or read the data, the region reference can be dereferenced. The + * #H5Rdereference call returns an identifier for the dataset, and then the selected dataspace can be + * retrieved with a call to #H5Rget_region(). The selected dataspace can be used to read the selected data + * elements. + * + * For more information, \see subsubsec_datatype_other_refs. + * + * \subsubsection subsubsec_dataspace_refer_use Example Uses for Region References + * + * Region references are used to implement stored pointers to data within a dataset. For example, features + * in a large dataset might be indexed by a table. See the figure below. This table could be stored as an + * HDF5 dataset with a compound datatype, for example, with a field for the name of the feature and a region + * reference to point to the feature in the dataset. See the second figure below. + * + * <table> + * <tr> + * <td> + * \image html Dspace_features.gif " Features indexed by a table" + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Dspace_features_cmpd.gif "Storing the table with a compound datatype" + * </td> + * </tr> + * </table> + * + * + * \subsubsection subsubsec_dataspace_refer_create Creating References to Regions + * + * To create a region reference: + * \li 1. Create or open the dataset that contains the region + * \li 2. Get the dataspace for the dataset + * \li 3. Define a selection that specifies the region + * \li 4. Create a region reference using the dataset and dataspace with selection + * \li 5. Write the region reference(s) to the desired dataset or attribute + * + * The figure below shows a diagram of a file with three datasets. Dataset D1 and D2 are two dimensional + * arrays of integers. Dataset R1 is a one dimensional array of references to regions in D1 and D2. The + * regions can be any valid selection of the dataspace of the target dataset. + * <table> + * <tr> + * <td> + * \image html Dspace_three_datasets.gif "A file with three datasets" + * </td> + * </tr> + * </table> + * <em>Note: In the figure above, R1 is a 1 D array of region pointers; each pointer refers to a selection + * in one dataset.</em> + * + * The example below shows code to create the array of region references. The references are created in an + * array of type #hdset_reg_ref_t. Each region is defined as a selection on the dataspace of the dataset, + * and a reference is created using \ref H5Rcreate(). The call to \ref H5Rcreate() specifies the file, + * dataset, and the dataspace with selection. + * + * <em>Create an array of region references</em> + * \code + * // create an array of 4 region references + * hdset_reg_ref_t ref[4]; + * + * // Create a reference to the first hyperslab in the first Dataset. + * offset[0] = 1; offset[1] = 1; + * count[0] = 3; count[1] = 2; + * status = H5Sselect_hyperslab(space_id, H5S_SELECT_SET, offset, NULL, count, NULL); + * status = H5Rcreate(&ref[0], file_id, "D1", H5R_DATASET_REGION, space_id); + * + * // The second reference is to a union of hyperslabs in the first Dataset + * offset[0] = 5; offset[1] = 3; + * count[0] = 1; count[1] = 4; + * status = H5Sselect_none(space_id); + * status = H5Sselect_hyperslab(space_id, H5S_SELECT_SET, offset, NULL, count, NULL); + * offset[0] = 6; offset[1] = 5; + * count[0] = 1; count[1] = 2; + * status = H5Sselect_hyperslab(space_id, H5S_SELECT_OR, offset, NULL, count, NULL); + * status = H5Rcreate(&ref[1], file_id, "D1", H5R_DATASET_REGION, space_id); + * + * // the fourth reference is to a selection of points in the first Dataset + * status = H5Sselect_none(space_id); + * coord[0][0] = 4; coord[0][1] = 4; + * coord[1][0] = 2; coord[1][1] = 6; + * coord[2][0] = 3; coord[2][1] = 7; + * coord[3][0] = 1; coord[3][1] = 5; + * coord[4][0] = 5; coord[4][1] = 8; + * + * status = H5Sselect_elements(space_id, H5S_SELECT_SET, num_points, (const hssize_t **)coord); + * status = H5Rcreate(&ref[3], file_id, "D1", H5R_DATASET_REGION, space_id); + * + * // the third reference is to a hyperslab in the second Dataset + * offset[0] = 0; offset[1] = 0; + * count[0] = 4; count[1] = 6; + * status = H5Sselect_hyperslab(space_id2, H5S_SELECT_SET, offset, NULL, count, NULL); + * status = H5Rcreate(&ref[2], file_id, "D2", H5R_DATASET_REGION, space_id2); + * \endcode + * + * When all the references are created, the array of references is written to the dataset R1. The + * dataset is declared to have datatype #H5T_STD_REF_DSETREG. See the example below. + * + * <em>Write the array of references to a dataset</em> + * \code + * Hsize_t dimsr[1]; + * dimsr[0] = 4; + * + * // Dataset with references. + * spacer_id = H5Screate_simple(1, dimsr, NULL); + * dsetr_id = H5Dcreate(file_id, "R1", H5T_STD_REF_DSETREG, spacer_id, H5P_DEFAULT, H5P_DEFAULT, + * H5P_DEFAULT); + * + * // Write dataset with the references. + * status = H5Dwrite(dsetr_id, H5T_STD_REF_DSETREG, H5S_ALL, H5S_ALL, H5P_DEFAULT, ref); + * + * \endcode + * + * When creating region references, the following rules are enforced. + * \li The selection must be a valid selection for the target dataset, just as when transferring data + * \li The dataset must exist in the file when the reference is created; #H5Rcreate + * \li The target dataset must be in the same file as the stored reference + * + * \subsubsection subsubsec_dataspace_refer_read Reading References to Regions + * + * To retrieve data from a region reference, the reference must be read from the file, and then the data can + * be retrieved. The steps are: + * \li 1. Open the dataset or attribute containing the reference objects + * \li 2. Read the reference object(s) + * \li 3. For each region reference, get the dataset (#H5Rdereference) and dataspace (#H5Rget_region) + * \li 4. Use the dataspace and datatype to discover what space is needed to store the data, allocate the + * correct storage and create a dataspace and datatype to define the memory data layout + * + * The example below shows code to read an array of region references from a dataset, and then read the + * data from the first selected region. Note that the region reference has information that records the + * dataset (within the file) and the selection on the dataspace of the dataset. After dereferencing the + * regions reference, the datatype, number of points, and some aspects of the selection can be discovered. + * (For a union of hyperslabs, it may not be possible to determine the exact set of hyperslabs that has been + * combined.) + * The table below the code example shows the inquiry functions. + * + * When reading data from a region reference, the following rules are enforced: + * \li The target dataset must be present and accessible in the file + * \li The selection must be a valid selection for the dataset + * + * <em>Read an array of region references; read from the first selection</em> + * \code + * dsetr_id = H5Dopen (file_id, "R1", H5P_DEFAULT); + * status = H5Dread(dsetr_id, H5T_STD_REF_DSETREG, H5S_ALL, H5S_ALL, H5P_DEFAULT, ref_out); + * + * // Dereference the first reference. + * // 1) get the dataset (H5Rdereference) + * // 2) get the selected dataspace (H5Rget_region) + * + * dsetv_id = H5Rdereference(dsetr_id, H5R_DATASET_REGION, &ref_out[0]); + * space_id = H5Rget_region(dsetr_id, H5R_DATASET_REGION, &ref_out[0]); + * + * // Discover how many points and shape of the data + * ndims = H5Sget_simple_extent_ndims(space_id); + * H5Sget_simple_extent_dims(space_id,dimsx,NULL); + * + * // Read and display hyperslab selection from the dataset. + * dimsy[0] = H5Sget_select_npoints(space_id); + * spacex_id = H5Screate_simple(1, dimsy, NULL); + * + * status = H5Dread(dsetv_id, H5T_NATIVE_INT, H5S_ALL, space_id, H5P_DEFAULT, data_out); + * printf("Selected hyperslab: "); + * for (i = 0; i < 8; i++) { + * printf("\n"); + * for (j = 0; j < 10; j++) + * printf("%d ", data_out[i][j]); + * } + * printf("\n"); + * \endcode + * + * <table> + * <caption>The inquiry functions</caption> + * <tr> + * <th> + * <p>Function</p> + * </th> + * <th> + * <p>Information</p> + * </th> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_npoints + * </td> + * <td> + * <p>The number of elements in the selection (hyperslab or point selection).</p> + * </td> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_bounds + * </td> + * <td> + * <p>The bounding box that encloses the selected points (hyperslab or point selection).</p> + * </td> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_hyper_nblocks + * </td> + * <td> + * <p>The number of blocks in the selection.</p> + * </td> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_hyper_blocklist + * </td> + * <td> + * <p>A list of the blocks in the selection.</p> + * </td> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_elem_npoints + * </td> + * <td> + * <p>The number of points in the selection.</p> + * </td> + * </tr> + * <tr> + * <td> + * @ref H5Sget_select_elem_pointlist + * </td> + * <td> + * <p>The points.</p> + * </td> + * </tr> + * </table> + * + * + * \subsection subsec_dataspace_sample Sample Programs + * + * This section contains the full programs from which several of the code examples in this chapter were + * derived. The h5dump output from the program’s output file immediately follows each program. + * + * <em>h5_write.c</em> + * \code + * #include "hdf5.h" + * + * #define H5FILE_NAME "SDS.h5" + * #define DATASETNAME "C Matrix" + * #define NX 3 + * #define NY 5 + * #define RANK 2 // dataset dimensions + * + * int + * main (void) + * { + * hid_t file, dataset; // file and dataset identifiers + * hid_t datatype, dataspace; // identifiers + * hsize_t dims[2]; // dataset dimensions + * herr_t status; + * int data[NX][NY]; // data to write + * int i, j; + * + * // + * // Data and output buffer initialization. + * for (j = 0; j < NX; j++) { + * for (i = 0; i < NY; i++) + * data[j][i] = i + 1 + j*NY; + * } + * // 1 2 3 4 5 + * // 6 7 8 9 10 + * // 11 12 13 14 15 + * + * // Create a new file using H5F_ACC_TRUNC access, + * // default file creation properties, and default file + * // access properties. + * file = H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + * + * // Describe the size of the array and create the data space for fixed + * // size dataset. + * dims[0] = NX; + * dims[1] = NY; + * dataspace = H5Screate_simple(RANK, dims, NULL); + * + * // Create a new dataset within the file using defined dataspace and + * // datatype and default dataset creation properties. + * dataset = H5Dcreate(file, DATASETNAME, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, + * H5P_DEFAULT, H5P_DEFAULT); + * + * // Write the data to the dataset using default transfer properties. + * status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * + * // Close/release resources. + * H5Sclose(dataspace); + * H5Dclose(dataset); + * H5Fclose(file); + * + * return 0; + * } + * + * SDS.out + * ------- + * HDF5 "SDS.h5" { + * GROUP "/" { + * DATASET "C Matrix" { + * DATATYPE H5T_STD_I32BE + * DATASPACE SIMPLE { ( 3, 5 ) / ( 3, 5 ) } + * DATA { + * 1, 2, 3, 4, 5, + * 6, 7, 8, 9, 10, + * 11, 12, 13, 14, 15 + * } + * } + * + * \endcode + * + * <em>h5_write.f90</em> + * \code + * ---------- + * PROGRAM DSETEXAMPLE + * + * USE HDF5 ! This module contains all necessary modules + * + * IMPLICIT NONE + * + * CHARACTER(LEN=7), PARAMETER :: filename = "SDSf.h5" ! File name + * CHARACTER(LEN=14), PARAMETER :: dsetname = "Fortran Matrix" ! Dataset name + * INTEGER, PARAMETER :: NX = 3 + * INTEGER, PARAMETER :: NY = 5 + * + * INTEGER(HID_T) :: file_id ! File identifier + * INTEGER(HID_T) :: dset_id ! Dataset identifier + * INTEGER(HID_T) :: dspace_id ! Dataspace identifier + * + * INTEGER(HSIZE_T), DIMENSION(2) :: dims = (/3,5/) ! Dataset dimensions + * INTEGER :: rank = 2 ! Dataset rank + * INTEGER :: data(NX,NY) + * INTEGER :: error ! Error flag + * INTEGER :: i, j + * + * ! + * ! Initialize data + * ! + * do i = 1, NX + * do j = 1, NY + * data(i,j) = j + (i-1)*NY + * enddo + * enddo + * ! + * ! Data + * ! + * ! 1 2 3 4 5 + * ! 6 7 8 9 10 + * ! 11 12 13 14 15 + * + * ! + * ! Initialize FORTRAN interface. + * ! + * CALLh5open_f(error) + * + * ! + * ! Create a new file using default properties. + * ! + * CALL h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, error) + * + * ! + * ! Create the dataspace. + * ! + * CALL h5screate_simple_f(rank, dims, dspace_id, error) + * + * ! + * ! Create and write dataset using default properties. + * ! + * CALL h5dcreate_f(file_id, dsetname, H5T_NATIVE_INTEGER, dspace_id, & + * dset_id, error, H5P_DEFAULT_F, H5P_DEFAULT_F, & + * H5P_DEFAULT_F) + * + * CALL h5dwrite_f(dset_id, H5T_NATIVE_INTEGER, data, dims, error) + * + * ! + * ! End access to the dataset and release resources used by it. + * ! + * CALL h5dclose_f(dset_id, error) + * + * ! + * ! Terminate access to the data space. + * ! + * CALL h5sclose_f(dspace_id, error) + * + * ! + * ! Close the file. + * ! + * CALL h5fclose_f(file_id, error) + * + * ! + * ! Close FORTRAN interface. + * ! + * CALL h5close_f(error) + * + * END PROGRAM DSETEXAMPLE + * + * SDSf.out + * -------- + * HDF5 "SDSf.h5" { + * GROUP "/" { + * DATASET "Fortran Matrix" { + * DATATYPE H5T_STD_I32BE + * DATASPACE SIMPLE { ( 5, 3 ) / ( 5, 3 ) } + * DATA { + * 1, 6, 11, + * 2, 7, 12, + * 3, 8, 13, + * 4, 9, 14, + * 5, 10, 15 + * } + * } + * } + * } + * + * \endcode + * + * <em>h5_write_tr.f90</em> + * \code + * PROGRAM DSETEXAMPLE + * + * USE HDF5 ! This module contains all necessary modules + * + * IMPLICIT NONE + * + * CHARACTER(LEN=10), PARAMETER :: filename = "SDSf_tr.h5" ! File name + * CHARACTER(LEN=24), PARAMETER :: dsetname = "Fortran Transpose Matrix"! Dataset name + * + * INTEGER, PARAMETER :: NX = 3 + * INTEGER, PARAMETER :: NY = 5 + * + * INTEGER(HID_T) :: file_id ! File identifier + * INTEGER(HID_T) :: dset_id ! Dataset identifier + * INTEGER(HID_T) :: dspace_id ! Dataspace identifier + * + * INTEGER(HSIZE_T), DIMENSION(2) :: dims = (/NY, NX/) ! Dataset dimensions + * INTEGER :: rank = 2 ! Dataset rank + * INTEGER :: data(NY,NX) + * + * INTEGER :: error ! Error flag + * INTEGER :: i, j + * + * ! + * ! Initialize data + * ! + * do i = 1, NY + * do j = 1, NX + * data(i,j) = i + (j-1)*NY + * enddo + * enddo + * + * ! + * ! Data + * ! + * ! 1 6 11 + * ! 2 7 12 + * ! 3 8 13 + * ! 4 9 14 + * ! 5 10 15 + * + * ! + * ! Initialize FORTRAN interface. + * ! + * CALL h5open_f(error) + * + * ! + * ! Create a new file using default properties. + * ! + * CALL h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, error) + * + * ! + * ! Create the dataspace. + * ! + * CALL h5screate_simple_f(rank, dims, dspace_id, error) + * + * ! + * ! Create and write dataset using default properties. + * ! + * CALL h5dcreate_f(file_id, dsetname, H5T_NATIVE_INTEGER, dspace_id, & + * dset_id, error, H5P_DEFAULT_F, H5P_DEFAULT_F, & + * H5P_DEFAULT_F) + * CALL h5dwrite_f(dset_id, H5T_NATIVE_INTEGER, data, dims, error) + * + * ! + * ! End access to the dataset and release resources used by it. + * ! + * CALL h5dclose_f(dset_id, error) + * + * ! + * ! Terminate access to the data space. + * ! + * CALL h5sclose_f(dspace_id, error) + * + * ! + * ! Close the file. + * ! + * CALL h5fclose_f(file_id, error) + * + * ! + * ! Close FORTRAN interface. + * ! + * CALL h5close_f(error) + * + * END PROGRAM DSETEXAMPLE + * + * SDSf_tr.out + * ----------- + * HDF5 "SDSf_tr.h5" { + * GROUP "/" { + * DATASET "Fortran Transpose Matrix" { + * DATATYPE H5T_STD_I32LE + * DATASPACE SIMPLE { ( 3, 5 ) / ( 3, 5 ) } + * DATA { + * 1, 2, 3, 4, 5, + * 6, 7, 8, 9, 10, + * 11, 12, 13, 14, 15 + * } + * } + * } + * } + * + * \endcode + * + * Previous Chapter \ref sec_datatype - Next Chapter \ref sec_attribute + * + */ + +/** + * \defgroup H5S Dataspaces (H5S) * * Use the functions in this module to manage HDF5 dataspaces \Emph{and} selections. * @@ -40,6 +1527,7 @@ * using \Emph{selections}. Furthermore, certain set operations are supported * for selections. * + * <!-- * <table> * <tr><th>Create</th><th>Read</th></tr> * <tr valign="top"> @@ -59,7 +1547,7 @@ * </td> * </tr> * </table> - * + * --> */ #endif /* H5Smodule_H */ diff --git a/src/H5Tmodule.h b/src/H5Tmodule.h index 8f7d04d..f631007 100644 --- a/src/H5Tmodule.h +++ b/src/H5Tmodule.h @@ -28,7 +28,3837 @@ #define H5_MY_PKG H5T #define H5_MY_PKG_ERR H5E_DATATYPE -/**\defgroup H5T H5T +/** \page H5T_UG HDF5 Datatypes + * + * \section sec_datatype HDF5 Datatypes + * HDF5 datatypes describe the element type of HDF5 datasets and attributes. + * There's a large set of predefined datatypes, but users may find it useful + * to define new datatypes through a process called \Emph{derivation}. + * + * The element type is automatically persisted as part of the HDF5 metadata of + * attributes and datasets. Additionally, datatype definitions can be persisted + * to HDF5 files and linked to groups as HDF5 datatype objects or so-called + * \Emph{committed datatypes}. + * + * \subsection subsec_datatype_intro Introduction and Definitions + * + * An HDF5 dataset is an array of data elements, arranged according to the specifications + * of the dataspace. In general, a data element is the smallest addressable unit of storage + * in the HDF5 file. (Compound datatypes are the exception to this rule.) The HDF5 datatype + * defines the storage format for a single data element. See the figure below. + * + * The model for HDF5 attributes is extremely similar to datasets: an attribute has a dataspace + * and a data type, as shown in the figure below. The information in this chapter applies to both + * datasets and attributes. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig1.gif "Datatypes, dataspaces, and datasets" + * </td> + * </tr> + * </table> + * + * Abstractly, each data element within the dataset is a sequence of bits, interpreted as a single + * value from a set of values (for example, a number or a character). For a given datatype, there is a + * standard or convention for representing the values as bits, and when the bits are represented in a + * particular storage the bits are laid out in a specific storage scheme such as 8-bit bytes with a + * specific ordering and alignment of bytes within the storage array. + * + * HDF5 datatypes implement a flexible, extensible, and portable mechanism for specifying and + * discovering the storage layout of the data elements, determining how to interpret the elements + * (for example, as floating point numbers), and for transferring data from different compatible + * layouts. + * + * An HDF5 datatype describes one specific layout of bits. A dataset has a single datatype which + * applies to every data element. When a dataset is created, the storage datatype is defined. After + * the dataset or attribute is created, the datatype cannot be changed. + * \li The datatype describes the storage layout of a singledata element + * \li All elements of the dataset must have the same type + * \li The datatype of a dataset is immutable + * + * When data is transferred (for example, a read or write), each end point of the transfer has a + * datatype, which describes the correct storage for the elements. The source and destination may + * have different (but compatible) layouts, in which case the data elements are automatically + * transformed during the transfer. + * + * HDF5 datatypes describe commonly used binary formats for numbers (integers + * and floating point) and characters (ASCII). A given computing architecture and programming language + * supports certain number and character representations. For example, a computer may support 8-, + * 16-, 32-, and 64-bit signed integers, stored in memory in little-endian byte order. These would + * presumably correspond to the C programming language types \Emph{char}, \Emph{short}, + * \Emph{int}, and \Emph{long}. + * + * When reading and writing from memory, the HDF5 library must know the appropriate datatype + * that describes the architecture specific layout. The HDF5 library provides the platform + * independent \Emph{NATIVE} types, which are mapped to an appropriate datatype for each platform. + * So the type #H5T_NATIVE_INT is an alias for the appropriate descriptor for each platform. + * + * Data in memory has a datatype: + * \li The storage layout in memory is architecture-specific + * \li The HDF5 \Emph{NATIVE} types are predefined aliases for the architecture-specific memory layout + * \li The memory datatype need not be the same as the stored datatype of the dataset + * + * In addition to numbers and characters, an HDF5 datatype can describe more abstract classes of + * types including enumerations, strings, bit strings, and references (pointers to objects in the HDF5 + * file). HDF5 supports several classes of composite datatypes which are combinations of one or + * more other datatypes. In addition to the standard predefined datatypes, users can define new + * datatypes within the datatype classes. + * + * The HDF5 datatype model is very general and flexible: + * \li For common simple purposes, only predefined types will be needed + * \li Datatypes can be combined to create complex structured datatypes + * \li If needed, users can define custom atomic datatypes + * \li Committed datatypes can be shared by datasets or attributes + * + * \subsection subsec_datatype_model Datatype Model + * The HDF5 library implements an object-oriented model of datatypes. HDF5 datatypes are + * organized as a logical set of base types, or datatype classes. Each datatype class defines + * a format for representing logical values as a sequence of bits. For example the #H5T_INTEGER + * class is a format for representing twos complement integers of various sizes. + * + * A datatype class is defined as a set of one or more datatype properties. A datatype property is + * a property of the bit string. The datatype properties are defined by the logical model of the + * datatype class. For example, the integer class (twos complement integers) has properties such as + * “signed or unsigned”, “length”, and “byte-order”. The float class (IEEE floating point numbers) + * has these properties, plus “exponent bits”, “exponent sign”, etc. + * + * A datatype is derived from one datatype class: a given datatype has a specific value for the + * datatype properties defined by the class. For example, for 32-bit signed integers, stored + * big-endian, the HDF5 datatype is a sub-type of integer with the properties set to + * signed=1, size=4(bytes), and byte-order=BE. + * + * The HDF5 datatype API (H5T functions) provides methods to create datatypes of different + * datatype classes, to set the datatype properties of a new datatype, and to discover the datatype + * properties of an existing datatype. + * + * The datatype for a dataset is stored in the HDF5 file as part of the metadata for the dataset. + * A datatype can be shared by more than one dataset in the file if the datatype is saved to the + * file with a name. This shareable datatype is known as a committed datatype. In the past, + * this kind of datatype was called a named datatype. + * + * When transferring data (for example, a read or write), the data elements of the source and + * destination storage must have compatible types. As a general rule, data elements with the same + * datatype class are compatible while elements from different datatype classes are not compatible. + * When transferring data of one datatype to another compatible datatype, the HDF5 Library uses + * the datatype properties of the source and destination to automatically transform each data + * element. For example, when reading from data stored as 32-bit signed integers, big + * endian into 32-bit signed integers, little-endian, the HDF5 Library will automatically swap the + * bytes. + * + * Thus, data transfer operations (\ref H5Dread, \ref H5Dwrite, \ref H5Aread, \ref H5Awrite) require + * a datatype for both the source and the destination. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig2.gif "The datatype model" + * </td> + * </tr> + * </table> + * + * The HDF5 library defines a set of predefined datatypes, corresponding to commonly used + * storage formats, such as twos complement integers, IEEE Floating point numbers, etc., 4- + * and 8-byte sizes, big-endian and little-endian byte orders. In addition, a user can derive types with + * custom values for the properties. For example, a user program may create a datatype to describe + * a 6-bit integer, or a 600-bit floating point number. + * + * In addition to atomic datatypes, the HDF5 library supports composite datatypes. A composite + * datatype is an aggregation of one or more datatypes. Each class of composite datatypes has + * properties that describe the organization of the composite datatype. See the figure below. + * Composite datatypes include: + * \li Compound datatypes: structured records + * \li Array: a multidimensional array of a datatype + * \li Variable-length: a one-dimensional array of a datatype + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig3.gif "Composite datatypes" + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_model_class Datatype Classes and Properties + * The figure below shows the HDF5 datatype classes. Each class is defined to have a set of + * properties which describe the layout of the data element and the interpretation of the bits. The + * table below lists the properties for the datatype classes. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig4.gif "Datatype classes" + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Datatype classes and their properties</caption> + * <tr> + * <th> + * Class + * </th> + * <th> + * Description + * </th> + * <th> + * Properties + * </th> + * <th> + * Notes + * </th> + * </tr> + * <tr> + * <td> + * Integer + * </td> + * <td> + * Twos complement integers + * </td> + * <td> + * Size (bytes), precision (bits), offset (bits), pad, byte order, signed/unsigned + * </td> + * <td> + * </td> + * </tr> + * <tr> + * <td> + * Float + * </td> + * <td> + * Floating Point numbers + * </td> + * <td> + * Size (bytes), precision (bits), offset (bits), pad, byte order, sign position, + * exponent position, exponent size (bits), exponent sign, exponent bias, mantissa position, + * mantissa (size) bits, mantissa sign, mantissa normalization, internal padding + * </td> + * <td> + * See IEEE 754 for a definition of these properties. These properties describe + * non-IEEE 754 floating point formats as well. + * </td> + * </tr> + * <tr> + * <td> + * Character + * </td> + * <td> + * Array of 1-byte character encoding + * </td> + * <td> + * Size (characters), Character set, byte order, pad/no pad, pad character + * </td> + * <td> + * Currently, ASCII and UTF-8 are supported. + * </td> + * </tr> + * <tr> + * <td> + * Bitfield + * </td> + * <td> + * String of bits + * </td> + * <td> + * Size (bytes), precision (bits), offset (bits), pad, byte order + * </td> + * <td> + * A sequence of bit values packed into one or more bytes. + * </td> + * </tr> + * <tr> + * <td> + * Opaque + * </td> + * <td> + * Uninterpreted data + * </td> + * <td> + * Size (bytes), precision (bits), offset (bits), pad, byte order, tag + * </td> + * <td> + * A sequence of bytes, stored and retrieved as a block. + * The ‘tag’ is a string that can be used to label the value. + * </td> + * </tr> + * <tr> + * <td> + * Enumeration + * </td> + * <td> + * A list of discrete values, with symbolic names in the form of strings. + * </td> + * <td> + * Number of elements, element names, element values + * </td> + * <td> + * Enumeration is a list of pairs (name, value). The name is a string; the + * value is an unsigned integer. + * </td> + * </tr> + * <tr> + * <td> + * Reference + * </td> + * <td> + * Reference to object or region within the HDF5 file + * </td> + * <td> + * + * </td> + * <td> + * @see H5R + * </td> + * </tr> + * <tr> + * <td> + * Array + * </td> + * <td> + * Array (1-4 dimensions) of data elements + * </td> + * <td> + * Number of dimensions, dimension sizes, base datatype + * </td> + * <td> + * The array is accessed atomically: no selection or sub-setting. + * </td> + * </tr> + * <tr> + * <td> + * Variable-length + * </td> + * <td> + * A variable-length 1-dimensional array of data elements + * </td> + * <td> + * Current size, base type + * </td> + * <td> + * + * </td> + * </tr> + * <tr> + * <td> + * Compound + * </td> + * <td> + * A Datatype of a sequence of Datatypes + * </td> + * <td> + * Number of members, member names, member types, member offset, member class, + * member size, byte order + * </td> + * <td> + * + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_model_predefine Predefined Datatypes + * The HDF5 library predefines a modest number of commonly used datatypes. These types have + * standard symbolic names of the form H5T_arch_base where arch is an architecture name and + * base is a programming type name <b>Table 2</b>. New types can be derived from the predefined + * types by copying the predefined type \ref H5Tcopy() and then modifying the result. + * + * The base name of most types consists of a letter to indicate the class <b>Table 3</b>, a precision in + * bits, and an indication of the byte order <b>Table 4</b>. + * + * <b>Table 5</b> shows examples of predefined datatypes. The full list can be found in the + * \ref PDT section of the \ref RM. + * + * <table> + * <caption align=top>Table 2. Architectures used in predefined datatypes</caption> + * <tr> + * <th> + * Architecture Name + * </th> + * <th span='3'> + * Description + * </th> + * </tr> + * <tr> + * <td> + * IEEE + * </td> + * <td span='3'> + * IEEE-754 standard floating point types in various byte orders. + * </td> + * </tr> + * <tr> + * <td> + * STD + * </td> + * <td span='3'> + * This is an architecture that contains semi-standard datatypes like signed + * two’s complement integers, unsigned integers, and bitfields in various + * byte orders. + * </td> + * </tr> + * <tr> + * <td> + * C <br \> FORTRAN + * </td> + * <td span='3'> + * Types which are specific to the C or Fortran programming languages + * are defined in these architectures. For instance, #H5T_C_S1 defines a + * base string type with null termination which can be used to derive string + * types of other lengths. + * </td> + * </tr> + * <tr> + * <td> + * NATIVE + * </td> + * <td span='3'> + * This architecture contains C-like datatypes for the machine on which + * the library was compiled. The types were actually defined by running + * the H5detect program when the library was compiled. In order to be + * portable, applications should almost always use this architecture + * to describe things in memory. + * </td> + * </tr> + * <tr> + * <td> + * CRAY + * </td> + * <td span='3'> + * Cray architectures. These are word-addressable, big-endian systems + * with non-IEEE floating point. + * </td> + * </tr> + * <tr> + * <td> + * INTEL + * </td> + * <td span='3'> + * All Intel and compatible CPU’s. + * These are little-endian systems with IEEE floating-point. + * </td> + * </tr> + * <tr> + * <td> + * MIPS + * </td> + * <td span='3'> + * All MIPS CPU’s commonly used in SGI systems. These are big-endian + * systems with IEEE floating-point. + * </td> + * </tr> + * <tr> + * <td> + * ALPHA + * </td> + * <td span='3'> + * All DEC Alpha CPU’s, little-endian systems with IEEE floating-point. + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 3. Base types</caption> + * <tr> + * <th> + * Base + * </th> + * <th span='3'> + * Description + * </th> + * </tr> + * <tr> + * <td> + * B + * </td> + * <td span='3'> + * Bitfield + * </td> + * </tr> + * <tr> + * <td> + * F + * </td> + * <td span='3'> + * Floating point + * </td> + * </tr> + * <tr> + * <td> + * I + * </td> + * <td span='3'> + * Signed integer + * </td> + * </tr> + * <tr> + * <td> + * R + * </td> + * <td span='3'> + * References + * </td> + * </tr> + * <tr> + * <td> + * S + * </td> + * <td span='3'> + * Character string + * </td> + * </tr> + * <tr> + * <td> + * U + * </td> + * <td span='3'> + * Unsigned integer + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 4. Byte order</caption> + * <tr> + * <th> + * Order + * </th> + * <th span='3'> + * Description + * </th> + * </tr> + * <tr> + * <td> + * BE + * </td> + * <td span='3'> + * Big-endian + * </td> + * </tr> + * <tr> + * <td> + * LE + * </td> + * <td span='3'> + * Little-endian + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 5. Some predefined datatypes</caption> + * <tr> + * <th> + * Example + * </th> + * <th span='3'> + * Description + * </th> + * </tr> + * <tr> + * <td> + * #H5T_IEEE_F64LE + * </td> + * <td span='3'> + * Eight-byte, little-endian, IEEE floating-point + * </td> + * </tr> + * <tr> + * <td> + * #H5T_IEEE_F32BE + * </td> + * <td span='3'> + * Four-byte, big-endian, IEEE floating point + * </td> + * </tr> + * <tr> + * <td> + * #H5T_STD_I32LE + * </td> + * <td span='3'> + * Four-byte, little-endian, signed two’s complement integer + * </td> + * </tr> + * <tr> + * <td> + * #H5T_STD_U16BE + * </td> + * <td span='3'> + * Two-byte, big-endian, unsigned integer + * </td> + * </tr> + * <tr> + * <td> + * #H5T_C_S1 + * </td> + * <td span='3'> + * One-byte,null-terminated string of eight-bit characters + * </td> + * </tr> + * <tr> + * <td> + * #H5T_INTEL_B64 + * </td> + * <td span='3'> + * Eight-byte bit field on an Intel CPU + * </td> + * </tr> + * <tr> + * <td> + * #H5T_STD_REF_OBJ + * </td> + * <td span='3'> + * Reference to an entire object in a file + * </td> + * </tr> + * </table> + * + * The HDF5 library predefines a set of \Emph{NATIVE} datatypes which are similar to C type names. + * The native types are set to be an alias for the appropriate HDF5 datatype for each platform. For + * example, #H5T_NATIVE_INT corresponds to a C int type. On an Intel based PC, this type is the same as + * #H5T_STD_I32LE, while on a MIPS system this would be equivalent to #H5T_STD_I32BE. Table 6 shows + * examples of \Emph{NATIVE} types and corresponding C types for a common 32-bit workstation. + * + * <table> + * <caption align=top>Table 6. Native and 32-bit C datatypes</caption> + * <tr> + * <th> + * Example + * </th> + * <th span='3'> + * Corresponding C Type + * </th> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_CHAR + * </td> + * <td span='3'> + * char + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_SCHAR + * </td> + * <td span='3'> + * signed char + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_UCHAR + * </td> + * <td span='3'> + * unsigned char + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_SHORT + * </td> + * <td span='3'> + * short + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_USHORT + * </td> + * <td span='3'> + * unsigned short + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_INT + * </td> + * <td span='3'> + * int + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_UINT + * </td> + * <td span='3'> + * unsigned + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_LONG + * </td> + * <td span='3'> + * long + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_ULONG + * </td> + * <td span='3'> + * unsigned long + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_LLONG + * </td> + * <td span='3'> + * long long + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_ULLONG + * </td> + * <td span='3'> + * unsigned long long + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_FLOAT + * </td> + * <td span='3'> + * float + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_DOUBLE + * </td> + * <td span='3'> + * double + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_LDOUBLE + * </td> + * <td span='3'> + * long double + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_HSIZE + * </td> + * <td span='3'> + * hsize_t + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_HSSIZE + * </td> + * <td span='3'> + * hssize_t + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_HERR + * </td> + * <td span='3'> + * herr_t + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_HBOOL + * </td> + * <td span='3'> + * hbool_t + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_B8 + * </td> + * <td span='3'> + * 8-bit unsigned integer or 8-bit buffer in memory + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_B16 + * </td> + * <td span='3'> + * 16-bit unsigned integer or 16-bit buffer in memory + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_B32 + * </td> + * <td span='3'> + * 32-bit unsigned integer or 32-bit buffer in memory + * </td> + * </tr> + * <tr> + * <td> + * #H5T_NATIVE_B64 + * </td> + * <td span='3'> + * 64-bit unsigned integer or 64-bit buffer in memory + * </td> + * </tr> + * </table> + * + * \subsection subsec_datatype_usage How Datatypes are Used + * + * \subsubsection subsubsec_datatype_usage_object The Datatype Object and the HDF5 Datatype API + * The HDF5 library manages datatypes as objects. The HDF5 datatype API manipulates the + * datatype objects through C function calls. New datatypes can be created from scratch or + * copied from existing datatypes. When a datatype is no longer needed its resources should be released by + * calling \ref H5Tclose(). + * + * The datatype object is used in several roles in the HDF5 data model and library. Essentially, a + * datatype is used whenever the form at of data elements is needed. There are four major uses of + * datatypes in the HDF5 library: at dataset creation, during data transfers, when discovering the + * contents of a file, and for specifying user-defined datatypes. See the table below. + * + * <table> + * <caption align=top>Table 7. Datatype uses</caption> + * <tr> + * <th> + * Use + * </th> + * <th span='2'> + * Description + * </th> + * </tr> + * <tr> + * <td> + * Dataset creation + * </td> + * <td span='2'> + * The datatype of the data elements must be declared when the dataset is created. + * </td> + * </tr> + * <tr> + * <td> + * Dataset transfer + * </td> + * <td span='2'> + * The datatype (format) of the data elements must be defined for both the source and destination. + * </td> + * </tr> + * <tr> + * <td> + * Discovery + * </td> + * <td span='2'> + * The datatype of a dataset can be interrogated to retrieve a complete description of the storage layout. + * </td> + * </tr> + * <tr> + * <td> + * Creating user-defined datatypes + * </td> + * <td span='2'> + * Users can define their own datatypes by creating datatype objects and setting their properties. + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_usage_create Dataset Creation + * All the data elements of a dataset have the same datatype. When a dataset is created, the datatype + * for the data elements must be specified. The datatype of a dataset can never be changed. The + * example below shows the use of a datatype to create a dataset called “/dset”. In this example, the + * dataset will be stored as 32-bit signed integers in big-endian order. + * + * <em> Using a datatype to create a dataset </em> + * \code + * hid_t dt; + * + * dt = H5Tcopy(H5T_STD_I32BE); + * dataset_id = H5Dcreate(file_id, “/dset”, dt, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * \subsubsection subsubsec_datatype_usage_transfer Data Transfer (Read and Write) + * Probably the most common use of datatypes is to write or read data from a dataset or attribute. In + * these operations, each data element is transferred from the source to the destination (possibly + * rearranging the order of the elements). Since the source and destination do not need to be + * identical (in other words, one is disk and the other is memory), the transfer requires + * both the format of the source element and the destination element. Therefore, data transfers use two + * datatype objects, for the source and destination. + * + * When data is written, the source is memory and the destination is disk (file). The memory + * datatype describes the format of the data element in the machine memory, and the file datatype + * describes the desired format of the data element on disk. Similarly, when reading, the source + * datatype describes the format of the data element on disk, and the destination datatype describes + * the format in memory. + * + * In the most common cases, the file datatype is the datatype specified when + * the dataset was + * created, and the memory datatype should be the appropriate \Emph{NATIVE} type. + * The examples below show samples of writing data to and reading data from a dataset. The data + * in memory is declared C type ‘int’, and the datatype #H5T_NATIVE_INT corresponds to this + * type. The datatype of the dataset should be of datatype class #H5T_INTEGER. + * + * <em> Writing to a dataset </em> + * \code + * int dset_data[DATA_SIZE]; + * + * status = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dset_data); + * \endcode + * + * <em> Reading from a dataset </em> + * \code + * int dset_data[DATA_SIZE]; + * + * status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dset_data); + * \endcode + * + * \subsubsection subsubsec_datatype_usage_discover Discovery of Data Format + * The HDF5 Library enables a program to + * determine the datatype class and properties for any + * datatype. In order to discover the storage format of data in a dataset, the datatype is obtained, and + * the properties are determined by queries to the datatype object. The example below shows code + * that analyzes the datatype for an integer and prints out a description of its storage properties + * (byte order, signed, size). + * + * <em> Discovering datatype properties </em> + * \code + * switch (H5Tget_class(type)) { + * case H5T_INTEGER: + * ord = H5Tget_order(type); + * sgn = H5Tget_sign(type); + * printf(“Integer ByteOrder= ”); + * switch (ord) { + * case H5T_ORDER_LE: + * printf(“LE”); + * break; + * case H5T_ORDER_BE: + * printf(“BE”); + * break; + * } + * printf(“ Sign= ”); + * switch (sgn) { + * case H5T_SGN_NONE: + * printf(“false”); + * break; + * case H5T_SGN_2: + * printf(“true”); + * break; + * } + * printf(“ Size= ”); + * sz = H5Tget_size(type); + * printf(“%d”, sz); + * printf(“\n”); + * break; + * case H5T_???? + * ... + * break; + * } + * \endcode + * + * \subsubsection subsubsec_datatype_usage_user Creating and Using User‐defined Datatypes + * Most programs will primarily use the predefined datatypes described above, possibly in + * composite data types such as compound or array datatypes. However, the HDF5 datatype model + * is extremely general; a user program can define a great variety of atomic datatypes (storage + * layouts). In particular, the datatype properties can define signed and unsigned integers of any + * size and byte order, and floating point numbers with different formats, size, and byte order. The + * HDF5 datatype API provides methods to set these properties. + * + * User-defined types can be used to define the layout of data in memory; examples might match + * some platform specific number format or application defined bit-field. The user-defined type can + * also describe data in the file such as an application-defined format. The user-defined types can be + * translated to and from standard types of the same class, as described above. + * + * \subsection subsec_datatype_function Datatype Function Summaries + * @see H5T reference manual provides a reference list of datatype functions, the H5T APIs. + * + * \subsection subsec_datatype_program Programming Model for Datatypes + * The HDF5 Library implements an object-oriented model of datatypes. HDF5 datatypes are + * organized as a logical set of base types, or datatype classes. The HDF5 Library manages + * datatypes as objects. The HDF5 datatype API manipulates the datatype objects through C + * function calls. The figure below shows the abstract view of the datatype object. The table below + * shows the methods (C functions) that operate on datatype objects. New datatypes can be created + * from scratch or copied from existing datatypes. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig5.gif "The datatype object" + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 8. General operations on datatype objects</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref hid_t \ref H5Tcreate (\ref H5T_class_t class, size_t size) + * </td> + * <td> + * Create a new datatype object of datatype class . The following datatype classes care supported + * with this function: + * \li #H5T_COMPOUND + * \li #H5T_OPAQUE + * \li #H5T_ENUM + * \li Other datatypes are created with \ref H5Tcopy(). + * </td> + * </tr> + * <tr> + * <td> + * \ref hid_t \ref H5Tcopy (\ref hid_t type) + * </td> + * <td> + * Obtain a modifiable transient datatype which is a copy of type. If type is a dataset identifier + * then the type returned is a modifiable transient copy of the datatype of the specified dataset. + * </td> + * </tr> + * <tr> + * <td> + * \ref hid_t \ref H5Topen (\ref hid_t location, const char *name, #H5P_DEFAULT) + * </td> + * <td> + * Open a committed datatype. The committed datatype returned by this function is read-only. + * </td> + * </tr> + * <tr> + * <td> + * \ref htri_t \ref H5Tequal (\ref hid_t type1, \ref hid_t type2) + * </td> + * <td> + * Determines if two types are equal. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tclose (\ref hid_t type) + * </td> + * <td> + * Releases resources associated with a datatype obtained from \ref H5Tcopy, \ref H5Topen, or + * \ref H5Tcreate. It is illegal to close an immutable transient datatype (for example, predefined types). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tcommit (\ref hid_t location, const char *name, hid_t type, + * #H5P_DEFAULT, #H5P_DEFAULT, #H5P_DEFAULT) + * </td> + * <td> + * Commit a transient datatype (not immutable) to a file to become a committed datatype. Committed + * datatypes can be shared. + * </td> + * </tr> + * <tr> + * <td> + * \ref htri_t \ref H5Tcommitted (\ref hid_t type) + * </td> + * <td> + * Test whether the datatype is transient or committed (named). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tlock (\ref hid_t type) + * </td> + * <td> + * Make a transient datatype immutable (read-only and not closable). Predefined types are locked. + * </td> + * </tr> + * </table> + * + * In order to use a datatype, the object must be created (\ref H5Tcreate), or a reference obtained by + * cloning from an existing type (\ref H5Tcopy), or opened (\ref H5Topen). In addition, a reference to the + * datatype of a dataset or attribute can be obtained with \ref H5Dget_type or \ref H5Aget_type. For + * composite datatypes a reference to the datatype for members or base types can be obtained + * (\ref H5Tget_member_type, \ref H5Tget_super). When the datatype object is no longer needed, the + * reference is discarded with \ref H5Tclose. + * + * Two datatype objects can be tested to see if they are the same with \ref H5Tequal. This function + * returns true if the two datatype references refer to the same datatype object. However, if two + * datatype objects define equivalent datatypes (the same datatype class and datatype properties), + * they will not be considered ‘equal’. + * + * A datatype can be written to the file as a first class object (\ref H5Tcommit). This is a committed + * datatype and can be used in thesame way as any other datatype. + * + * \subsubsection subsubsec_datatype_program_discover Discovery of Datatype Properties + * Any HDF5 datatype object can be queried to discover all of its datatype properties. For each + * datatype class, there are a set of API functions to retrieve the datatype properties for this class. + * + * <h4>Properties of Atomic Datatypes</h4> + * Table 9 lists the functions to discover the properties of atomic datatypes. Table 10 lists the + * queries relevant to specific numeric types. Table 11 gives the properties for atomic string + * datatype, and Table 12 gives the property of the opaque datatype. + * + * <table> + * <caption align=top>Table 9. Functions to discover properties of atomic datatypes</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref H5T_class_t \ref H5Tget_class (\ref hid_t type) + * </td> + * <td> + * The datatype class: #H5T_INTEGER, #H5T_FLOAT, #H5T_STRING, #H5T_BITFIELD, #H5T_OPAQUE, #H5T_COMPOUND, + * #H5T_REFERENCE, #H5T_ENUM, #H5T_VLEN, #H5T_ARRAY + * </td> + * </tr> + * <tr> + * <td> + * size_t \ref H5Tget_size (\ref hid_t type) + * </td> + * <td> + * The total size of the element in bytes, including padding which may appear on either side of the + * actual value. + * </td> + * </tr> + * <tr> + * <td> + * \ref H5T_order_t \ref H5Tget_order (\ref hid_t type) + * </td> + * <td> + * The byte order describes how the bytes of the datatype are laid out in memory. If the lowest memory + * address contains the least significant byte of the datum then it is said to be little-endian or + * #H5T_ORDER_LE. If the bytes are in the opposite order then they are said to be big-endianor #H5T_ORDER_BE. + * </td> + * </tr> + * <tr> + * <td> + * size_t \ref H5Tget_precision (\ref hid_t type) + * </td> + * <td> + * The precision property identifies the number of significant bits of a datatype and the offset property + * (defined below) identifies its location. Some datatypes occupy more bytes than what is needed to store + * the value. For instance, a short on a Cray is 32 significant bits in an eight-byte field. + * </td> + * </tr> + * <tr> + * <td> + * int \ref H5Tget_offset (\ref hid_t type) + * </td> + * <td> + * The offset property defines the bit location of the least significant bit of a bit field whose length + * is precision. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tget_pad (\ref hid_t type, \ref H5T_pad_t *lsb, \ref H5T_pad_t *msb) + * </td> + * <td> + * Padding is the bits of a data element which are not significant as defined by the precision and offset + * properties. Padding in the low-numbered bits is lsb padding and padding in the high-numbered bits is msb + * padding. Padding bits can be set to zero (#H5T_PAD_ZERO) or one (#H5T_PAD_ONE). + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 10. Functions to discover properties of atomic datatypes</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref H5T_sign_t \ref H5Tget_sign (\ref hid_t type) + * </td> + * <td> + * (INTEGER)Integer data can be signed two’s complement (#H5T_SGN_2) or unsigned (#H5T_SGN_NONE). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tget_fields (\ref hid_t type, size_t *spos, size_t *epos, size_t *esize, + * size_t*mpos, size_t *msize) + * </td> + * <td> + * (FLOAT)A floating-point data element has bit fields which are the exponent and mantissa as well as a + * mantissa sign bit. These properties define the location (bit position of least significant bit of the + * field) and size (in bits) of each field. The sign bit is always of length one and none of the fields + * are allowed to overlap. + * </td> + * </tr> + * <tr> + * <td> + * size_t \ref H5Tget_ebias (\ref hid_t type) + * </td> + * <td> + * (FLOAT)A floating-point data element has bit fields which are the exponent and + * mantissa as well as a mantissa sign bit. These properties define the location (bit + * position of least significant bit of the field) and size (in bits) of + * each field. The sign bit is always of length one and none of the + * fields are allowed to overlap. + * </td> + * </tr> + * <tr> + * <td> + * \ref H5T_norm_t \ref H5Tget_norm (\ref hid_t type) + * </td> + * <td> + * (FLOAT)This property describes the normalization method of the mantissa. + * <ul><li>#H5T_NORM_MSBSET: the mantissa is shifted left (if non-zero) until the first bit + * after the radix point is set and the exponent is adjusted accordingly. All bits of the + * mantissa after the radix point are stored. </li> + * <li>#H5T_NORM_IMPLIED: the mantissa is shifted left \(if non-zero) until the first + * bit after the radix point is set and the exponent is adjusted accordingly. The first + * bit after the radix point is not stored since it’s always set. </li> + * <li>#H5T_NORM_NONE: the fractional part of the mantissa is stored without normalizing it.</li></ul> + * </td> + * </tr> + * <tr> + * <td> + * \ref H5T_pad_t \ref H5Tget_inpad (\ref hid_t type) + * </td> + * <td> + * (FLOAT)If any internal bits (that is, bits between the sign bit, the mantissa field, + * and the exponent field but within the precision field) are unused, then they will be + * filled according to the value of this property. The padding can be: + * #H5T_PAD_BACKGROUND, #H5T_PAD_ZERO,or #H5T_PAD_ONE. + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 11. Functions to discover properties of atomic string datatypes</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref H5T_cset_t \ref H5Tget_cset (\ref hid_t type) + * </td> + * <td> + * Two character sets are currently supported: + * ASCII (#H5T_CSET_ASCII) and UTF-8 (#H5T_CSET_UTF8). + * </td> + * </tr> + * <tr> + * <td> + * \ref H5T_str_t \ref H5Tget_strpad (\ref hid_t type) + * </td> + * <td> + * The string datatype has a fixed length, but the string may be shorter than the length. + * This property defines the storage mechanism for the left over bytes. The options are: + * \li #H5T_STR_NULLTERM + * \li #H5T_STR_NULLPAD + * \li #H5T_STR_SPACEPAD. + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 12. Functions to discover properties of atomic opaque datatypes</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * char* \ref H5Tget_tag(\ref hid_t type_id) + * </td> + * <td> + * A user-defined string. + * </td> + * </tr> + * </table> + * + * <h4>Properties of Composite Datatypes</h4> + * The composite datatype classes can also be analyzed to discover their datatype properties and the + * datatypes that are members or base types of the composite datatype. The member or base type + * can, in turn, be analyzed. The table below lists the functions that can access the datatype + * properties of the different composite datatypes. + * + * <table> + * <caption align=top>Table 13. Functions to discover properties of composite datatypes</caption> + * <tr> + * <th> + * API Function + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * int \ref H5Tget_nmembers(\ref hid_t type_id) + * </td> + * <td> + * (COMPOUND)The number of fields in the compound datatype. + * </td> + * </tr> + * <tr> + * <td> + * \ref H5T_class_t \ref H5Tget_member_class (\ref hid_t cdtype_id, unsigned member_no) + * </td> + * <td> + * (COMPOUND)The datatype class of compound datatype member member_no. + * </td> + * </tr> + * <tr> + * <td> + * char* \ref H5Tget_member_name (\ref hid_t type_id, unsigned field_idx) + * </td> + * <td> + * (COMPOUND)The name of field field_idx of a compound datatype. + * </td> + * </tr> + * <tr> + * <td> + * size_t \ref H5Tget_member_offset (\ref hid_t type_id, unsigned memb_no) + * </td> + * <td> + * (COMPOUND)The byte offset of the beginning of a field within a compound datatype. + * </td> + * </tr> + * <tr> + * <td> + * \ref hid_t \ref H5Tget_member_type (\ref hid_t type_id, unsigned field_idx) + * </td> + * <td> + * (COMPOUND)The datatype of the specified member. + * </td> + * </tr> + * <tr> + * <td> + * int \ref H5Tget_array_ndims (\ref hid_t adtype_id) + * </td> + * <td> + * (ARRAY)The number of dimensions (rank) of the array datatype object. + * </td> + * </tr> + * <tr> + * <td> + * int \ref H5Tget_array_dims (\ref hid_t adtype_id, hsize_t *dims[]) + * </td> + * <td> + * (ARRAY)The sizes of the dimensions and the dimension permutations of the array datatype object. + * </td> + * </tr> + * <tr> + * <td> + * \ref hid_t \ref H5Tget_super(\ref hid_t type) + * </td> + * <td> + * (ARRAY, VL, ENUM)The base datatype from which the datatype type is derived. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tenum_nameof(\ref hid_t type, const void *value, char *name, size_t size) + * </td> + * <td> + * (ENUM)The symbol name that corresponds to the specified value of the enumeration datatype. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tenum_valueof(\ref hid_t type, const char *name, void *value) + * </td> + * <td> + * (ENUM)The value that corresponds to the specified name of the enumeration datatype. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tget_member_value (\ref hid_t type unsigned memb_no, void *value) + * </td> + * <td> + * (ENUM)The value of the enumeration datatype member memb_no. + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_program_define Definition of Datatypes + * The HDF5 library enables user programs to create and modify datatypes. The essential steps are: + * <ul><li>1. Create a new datatype object of a specific composite datatype class, or copy an existing + * atomic datatype object</li> + * <li>2. Set properties of the datatype object</li> + * <li>3. Use the datatype object</li> + * <li>4. Close the datatype object</li></ul> + * + * To create a user-defined atomic datatype, the procedure is to clone a predefined datatype of the + * appropriate datatype class (\ref H5Tcopy), and then set the datatype properties appropriate to the + * datatype class. The table below shows how to create a datatype to describe a 1024-bit unsigned + * integer. + * + * <em>Create a new datatype</em> + * \code + * hid_t new_type = H5Tcopy (H5T_NATIVE_INT); + * + * H5Tset_precision(new_type, 1024); + * H5Tset_sign(new_type, H5T_SGN_NONE); + * \endcode + * + * Composite datatypes are created with a specific API call for each datatype class. The table below + * shows the creation method for each datatype class. A newly created datatype cannot be used until the + * datatype properties are set. For example, a newly created compound datatype has no members and cannot + * be used. + * + * <table> + * <caption align=top>Table 14. Functions to create each datatype class</caption> + * <tr> + * <th> + * Datatype Class + * </th> + * <th> + * Function to Create + * </th> + * </tr> + * <tr> + * <td> + * COMPOUND + * </td> + * <td> + * #H5Tcreate + * </td> + * </tr> + * <tr> + * <td> + * OPAQUE + * </td> + * <td> + * #H5Tcreate + * </td> + * </tr> + * <tr> + * <td> + * ENUM + * </td> + * <td> + * #H5Tenum_create + * </td> + * </tr> + * <tr> + * <td> + * ARRAY + * </td> + * <td> + * #H5Tarray_create + * </td> + * </tr> + * <tr> + * <td> + * VL + * </td> + * <td> + * #H5Tvlen_create + * </td> + * </tr> + * </table> + * + * Once the datatype is created and the datatype properties set, the datatype object can be used. + * + * Predefined datatypes are defined by the library during initialization using the same mechanisms + * as described here. Each predefined datatype is locked (\ref H5Tlock), so that it cannot be changed or + * destroyed. User-defined datatypes may also be locked using \ref H5Tlock. + * + * <h4>User-defined Atomic Datatypes</h4> + * Table 15 summarizes the API methods that set properties of atomic types. Table 16 shows + * properties specific to numeric types, Table 17 shows properties specific to the string datatype + * class. Note that offset, pad, etc. do not apply to strings. Table 18 shows the specific property of + * the OPAQUE datatype class. + * + * <table> + * <caption align=top>Table 15. API methods that set properties of atomic datatypes</caption> + * <tr> + * <th> + * Functions + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_size (\ref hid_t type, size_t size) + * </td> + * <td> + * Set the total size of the element in bytes. This includes padding which may appear on either + * side of the actual value. If this property is reset to a smaller value which would cause the + * significant part of the data to extend beyond the edge of the datatype, then the offset property + * is decremented a bit at a time. If the offset reaches zero and the significant part of the data + * still extends beyond the edge of the datatype then the precision property is decremented a bit at + * a time. Decreasing the size of a datatype may fail if the #H5T_FLOAT bit fields would extend beyond + * the significant part of the type. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_order (\ref hid_t type, \ref H5T_order_t order) + * </td> + * <td> + * Set the byte order to little-endian (#H5T_ORDER_LE) or big-endian (#H5T_ORDER_BE). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_precision (\ref hid_t type, size_t precision) + * </td> + * <td> + * Set the number of significant bits of a datatype. The offset property (defined below) identifies + * its location. The size property defined above represents the entire size (in bytes) of the datatype. + * If the precision is decreased then padding bits are inserted on the MSB side of the significant + * bits (this will fail for #H5T_FLOAT types if it results in the sign,mantissa, or exponent bit field + * extending beyond the edge of the significant bit field). On the other hand, if the precision is + * increased so that it “hangs over” the edge of the total size then the offset property is decremented + * a bit at a time. If the offset reaches zero and the significant bits still hang over the edge, then + * the total size is increased a byte at a time. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_offset (\ref hid_t type, size_t offset) + * </td> + * <td> + * Set the bit location of the least significant bit of a bit field whose length is precision. The + * bits of the entire data are numbered beginning at zero at the least significant bit of the least + * significant byte (the byte at the lowest memory address for a little-endian type or the byte at + * the highest address for a big-endian type). The offset property defines the bit location of the + * least significant bit of a bit field whose length is precision. If the offset is increased so the + * significant bits “hang over” the edge of the datum, then the size property is automatically incremented. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_pad (\ref hid_t type, \ref H5T_pad_t lsb, \ref H5T_pad_t msb) + * </td> + * <td> + * Set the padding to zeros (#H5T_PAD_ZERO) or ones (#H5T_PAD_ONE). Padding is the bits of a + * data element which are not significant as defined by the precision and offset properties. Padding + * in the low-numbered bits is lsb padding and padding in the high-numbered bits is msb padding. + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 16. API methods that set properties of numeric datatypes</caption> + * <tr> + * <th> + * Functions + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_sign (\ref hid_t type, \ref H5T_sign_t sign) + * </td> + * <td> + * (INTEGER)Integer data can be signed two’s complement (#H5T_SGN_2) or unsigned (#H5T_SGN_NONE). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_fields (\ref hid_t type, size_t spos, size_t epos, size_t esize, + * size_t mpos, size_t msize) + * </td> + * <td> + * (FLOAT)Set the properties define the location (bit position of least significant bit of the field) + * and size (in bits) of each field. The sign bit is always of length one and none of the fields are + * allowed to overlap. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_ebias (\ref hid_t type, size_t ebias) + * </td> + * <td> + * (FLOAT)The exponent is stored as a non-negative value which is ebias larger than the true exponent. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_norm (\ref hid_t type, \ref H5T_norm_t norm) + * </td> + * <td> + * (FLOAT)This property describes the normalization method of the mantissa. + * <ul><li>#H5T_NORM_MSBSET: the mantissa is shifted left (if non-zero) until the first bit + * after theradix point is set and the exponent is adjusted accordingly. All bits of the + * mantissa after the radix point are stored. </li> + * <li>#H5T_NORM_IMPLIED: the mantissa is shifted left (if non-zero) until the first bit + * after the radix point is set and the exponent is adjusted accordingly. The first bit after + * the radix point is not stored since it is always set. </li> + * <li>#H5T_NORM_NONE: the fractional part of the mantissa is stored without normalizing it.</li></ul> + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_inpad (\ref hid_t type, \ref H5T_pad_t inpad) + * </td> + * <td> + * (FLOAT) +If any internal bits (that is, bits between the sign bit, the mantissa field, +and the exponent field but within the precision field) are unused, then they will be +filled according to the value of this property. The padding can be: + * \li #H5T_PAD_BACKGROUND + * \li #H5T_PAD_ZERO + * \li #H5T_PAD_ONE + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 17. API methods that set properties of string datatypes</caption> + * <tr> + * <th> + * Functions + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_size (\ref hid_t type, size_t size) + * </td> + * <td> + * Set the length of the string, in bytes. The precision is automatically set to 8*size. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_precision (\ref hid_t type, size_t precision) + * </td> + * <td> + * The precision must be a multiple of 8. + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_cset (\ref hid_t type_id, \ref H5T_cset_t cset) + * </td> + * <td> + * Two character sets are currently supported: + * \li ASCII (#H5T_CSET_ASCII) + * \li UTF-8 (#H5T_CSET_UTF8). + * </td> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_strpad (\ref hid_t type_id, H5T_str_t strpad) + * </td> + * <td> + * The string datatype has a fixed length, but the string may be shorter than the length. This + * property defines the storage mechanism for the left over bytes. The method used to store + * character strings differs with the programming language: + * \li C usually null terminates strings + * \li Fortran left-justifies and space-pads strings + * + * Valid string padding values, as passed in the parameter strpad, are as follows: + * \li #H5T_STR_NULLTERM: Null terminate (as C does) + * \li #H5T_STR_NULLPAD: Pad with zeros + * \li #H5T_STR_SPACEPAD: Pad with spaces (as FORTRAN does) + * </td> + * </tr> + * </table> + * + * <table> + * <caption align=top>Table 18. API methods that set properties of opaque datatypes</caption> + * <tr> + * <th> + * Functions + * </th> + * <th> + * Description + * </th> + * </tr> + * <tr> + * <td> + * \ref herr_t \ref H5Tset_tag (\ref hid_t type_id, const char *tag) + * </td> + * <td> + * Tags the opaque datatype type_id with an ASCII identifier tag. + * </td> + * </tr> + * </table> + * + * <h4>Examples</h4> + * The example below shows how to create a 128-bit little-endian signed integer type. Increasing + * the precision of a type automatically increases the total size. Note that the proper + * procedure is to begin from a type of the intended datatype class which in this case is a + * NATIVE INT. + * + * <em>Create a new 128-bit little-endian signed integer datatype</em> + * \code + * hid_t new_type = H5Tcopy (H5T_NATIVE_INT); + * H5Tset_precision (new_type, 128); + * H5Tset_order (new_type, H5T_ORDER_LE); + * \endcode + * + * The figure below shows the storage layout as the type is defined. The \ref H5Tcopy creates a + * datatype that is the same as #H5T_NATIVE_INT. In this example, suppose this is a 32-bit + * big-endian number (Figure a). The precision is set to 128 bits, which automatically extends + * the size to 8 bytes (Figure b). Finally, the byte order is set to little-endian (Figure c). + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig6.gif "The storage layout for a new 128-bit little-endian signed integer datatype" + * </td> + * </tr> + * </table> + * + * The significant bits of a data element can be offset from the beginning of the memory for that + * element by an amount of padding. The offset property specifies the number of bits of padding + * that appear to the “right of” the value. The table and figure below show how a 32-bit unsigned + * integer with 16-bits of precision having the value 0x1122 will be laid out in memory. + * + * <table> + * <caption align=top>Table 19. Memory Layout for a 32-bit unsigned integer</caption> + * <tr> + * <th> + * Byte Position + * </th> + * <th> + * Big-Endian<br />Offset=0 + * </th> + * <th> + * Big-Endian<br />Offset=16 + * </th> + * <th> + * Little-Endian<br />Offset=0 + * </th> + * <th> + * Little-Endian<br />Offset=16 + * </th> + * </tr> + * <tr> + * <td> + * 0: + * </td> + * <td> + * [pad] + * </td> + * <td> + * [0x11] + * </td> + * <td> + * [0x22] + * </td> + * <td> + * [pad] + * </td> + * </tr> + * <tr> + * <td> + * 1: + * </td> + * <td> + * [pad] + * </td> + * <td> + * [0x22] + * </td> + * <td> + * [0x11] + * </td> + * <td> + * [pad] + * </td> + * </tr> + * <tr> + * <td> + * 2: + * </td> + * <td> + * [0x11] + * </td> + * <td> + * [pad] + * </td> + * <td> + * [pad] + * </td> + * <td> + * [0x22] + * </td> + * </tr> + * <tr> + * <td> + * 3: + * </td> + * <td> + * [0x22] + * </td> + * <td> + * [pad] + * </td> + * <td> + * [pad] + * </td> + * <td> + * [0x11] + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig7.gif "Memory Layout for a 32-bit unsigned integer" + * </td> + * </tr> + * </table> + * + * If the offset is incremented then the total size is incremented also if necessary to prevent + * significant bits of the value from hanging over the edge of the datatype. + * + * The bits of the entire data are numbered beginning at zero at the least significant bit of the least + * significant byte (the byte at the lowest memory address for a little-endian type or the byte at the + * highest address for a big-endian type). The offset property defines the bit location of the least + * significant bit of a bit field whose length is precision. If the offset is increased so the significant + * bits “hang over” the edge of the datum, then the size property is automatically incremented. + * + * To illustrate the properties of the integer datatype class, the example below shows how to create + * a user-defined datatype that describes a 24-bit signed integer that starts on the third bit of a 32-bit + * word. The datatype is specialized from a 32-bit integer, the precision is set to 24 bits, and the + * offset is set to 3. + * + * <em>A user-defined datatype with a 24-bit signed integer</em> + * \code + * hid_t dt; + * + * dt = H5Tcopy(H5T_SDT_I32LE); + * H5Tset_precision(dt, 24); + * H5Tset_offset(dt,3); + * H5Tset_pad(dt, H5T_PAD_ZERO, H5T_PAD_ONE); + * \endcode + * + * The figure below shows the storage layout for a data element. Note that the unused bits in the + * offset will be set to zero and the unused bits at the end will be set to one, as specified in the + * \ref H5Tset_pad call. + * <table> + * <tr> + * <td> + * \image html Dtypes_fig8.gif "A user-defined integer datatype with a range of -1,048,583 to 1,048,584" + * </td> + * </tr> + * </table> + * + * To illustrate a user-defined floating point number, the example below shows how to create a 24-bit + * floating point number that starts 5 bits into a 4 byte word. The floating point number is defined to + * have a mantissa of 19 bits (bits 5-23), an exponent of 3 bits (25-27), and the sign bit is bit 28. + * (Note that this is an illustration of what can be done and is not necessarily a floating point + * format that a user would require.) + * + * <em>A user-defined datatype with a 24-bit floating point datatype</em> + * \code + * hid_t dt; + * + * dt = H5Tcopy(H5T_SDT_F32LE); + * H5Tset_precision(dt, 24); + * H5Tset_fields (dt, 28, 25, 3, 5, 19); + * H5Tset_pad(dt, H5T_PAD_ZERO, H5T_PAD_ONE); + * H5Tset_inpad(dt, H5T_PAD_ZERO); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig9.gif "A user-defined floating point datatype" + * </td> + * </tr> + * </table> + * The figure above shows the storage layout of a data element for this datatype. Note that there is + * an unused bit (24) between the mantissa and the exponent. This bit is filled with the inpad value + * which in this case is 0. + * + * The sign bit is always of length one and none of the fields are allowed to overlap. When + * expanding a floating-point type one should set the precision first; when decreasing the size one + * should set the field positions and sizes first. + * + * <h4>Composite Datatypes</h4> + * All composite datatypes must be user-defined; there are no predefined composite datatypes. + * + * <h4>Compound Datatypes</h4> + * The subsections below describe how to create a compound datatype and how to write and read + * data of a compound datatype. + * + * <h4>Defining Compound Datatypes</h4> + * + * Compound datatypes are conceptually similar to a C struct or Fortran derived types. The + * compound datatype defines a contiguous sequence of bytes, which are formatted using one up to + * 2^16 datatypes (members). A compound datatype may have any number of members, in any + * order, and the members may have any datatype, including compound. Thus, complex nested + * compound datatypes can be created. The total size of the compound datatype is greater than or + * equal to the sum of the size of its members, up to a maximum of 2^32 bytes. HDF5 does not + * support datatypes with distinguished records or the equivalent of C unions or Fortran + * EQUIVALENCE statements. + * + * Usually a C struct or Fortran derived type will be defined to hold a data point in memory, and the + * offsets of the members in memory will be the offsets of the struct members from the beginning + * of an instance of the struct. The HDF5 C library provides a macro #HOFFSET (s,m)to calculate + * the member’s offset. The HDF5 Fortran applications have to calculate offsets by using sizes of + * members datatypes and by taking in consideration the order of members in the Fortran derived type. + * \code + * HOFFSET(s,m) + * \endcode + * This macro computes the offset of member m within a struct s + * \code + * offsetof(s,m) + * \endcode + * This macro defined in stddef.h does exactly the same thing as the HOFFSET()macro. + * + * Note for Fortran users: Offsets of Fortran structure members correspond to the offsets within a + * packed datatype (see explanation below) stored in an HDF5 file. + * + * Each member of a compound datatype must have a descriptive name which is the key used to + * uniquely identify the member within the compound datatype. A member name in an HDF5 + * datatype does not necessarily have to be the same as the name of the member in the C struct or + * Fortran derived type, although this is often the case. Nor does one need to define all members of + * the C struct or Fortran derived type in the HDF5 compound datatype (or vice versa). + * + * Unlike atomic datatypes which are derived from other atomic datatypes, compound datatypes are + * created from scratch. First, one creates an empty compound datatype and specifies its total size. + * Then members are added to the compound datatype in any order. Each member type is inserted + * at a designated offset. Each member has a name which is the key used to uniquely identify the + * member within the compound datatype. + * + * The example below shows a way of creating an HDF5 C compound datatype to describe a + * complex number. This is a structure with two components, “real” and “imaginary”, and each + * component is a double. An equivalent C struct whose type is defined by the complex_tstruct is + * shown. + * + * <em>A compound datatype for complex numbers in C</em> + * \code + * typedef struct { + * double re; //real part + * double im; //imaginary part + * } complex_t; + * + * hid_t complex_id = H5Tcreate (H5T_COMPOUND, sizeof (complex_t)); + * H5Tinsert (complex_id, “real”, HOFFSET(complex_t,re), + * H5T_NATIVE_DOUBLE); + * H5Tinsert (complex_id, “imaginary”, HOFFSET(complex_t,im), + * H5T_NATIVE_DOUBLE); + * \endcode + * + * The example below shows a way of creating an HDF5 Fortran compound datatype to describe a + * complex number. This is a Fortran derived type with two components, “real” and “imaginary”, + * and each component is DOUBLE PRECISION. An equivalent Fortran TYPE whose type is defined + * by the TYPE complex_t is shown. + * + * <em>A compound datatype for complex numbers in Fortran</em> + * \code + * TYPE complex_t + * DOUBLE PRECISION re ! real part + * DOUBLE PRECISION im; ! imaginary part + * END TYPE complex_t + * + * CALL h5tget_size_f(H5T_NATIVE_DOUBLE, re_size, error) + * CALL h5tget_size_f(H5T_NATIVE_DOUBLE, im_size, error) + * complex_t_size = re_size + im_size + * CALL h5tcreate_f(H5T_COMPOUND_F, complex_t_size, type_id) + * offset = 0 + * CALL h5tinsert_f(type_id, “real”, offset, H5T_NATIVE_DOUBLE, error) + * offset = offset + re_size + * CALL h5tinsert_f(type_id, “imaginary”, offset, H5T_NATIVE_DOUBLE, error) + * \endcode + * + * Important Note: The compound datatype is created with a size sufficient to hold all its members. + * In the C example above, the size of the C struct and the #HOFFSET macro are used as a + * convenient mechanism to determine the appropriate size and offset. Alternatively, the size and + * offset could be manually determined: the size can be set to 16 with “real” at offset 0 and + * “imaginary” at offset 8. However, different platforms and compilers have different sizes for + * “double” and may have alignment restrictions which require additional padding within the + * structure. It is much more portable to use the #HOFFSET macro which assures that the values will + * be correct for any platform. + * + * The figure below shows how the compound datatype would be laid out assuming that + * NATIVE_DOUBLE are 64-bit numbers and that there are no alignment requirements. The total + * size of the compound datatype will be 16 bytes, the “real” component will start at byte 0, and + * “imaginary” will start at byte 8. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig10.gif "Layout of a compound datatype" + * </td> + * </tr> + * </table> + * + * The members of a compound datatype may be any HDF5 datatype including the compound, + * array, and variable-length (VL) types. The figure and example below show the memory layout + * and code which creates a compound datatype composed of two complex values, and each + * complex value is also a compound datatype as in the figure above. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig11.gif "Layout of a compound datatype nested in a compound datatype" + * </td> + * </tr> + * </table> + * + * <em>Code for a compound datatype nested in a compound datatype</em> + * \code + * typedef struct { + * complex_t x; + * complex_t y; + * } surf_t; + * + * hid_t complex_id, surf_id; // hdf5 datatypes + * + * complex_id = H5Tcreate (H5T_COMPOUND, sizeof(complex_t)); + * H5Tinsert (complex_id, “re”, HOFFSET(complex_t, re), H5T_NATIVE_DOUBLE); + * H5Tinsert (complex_id, “im”, HOFFSET(complex_t, im), H5T_NATIVE_DOUBLE); + * + * surf_id = H5Tcreate (H5T_COMPOUND, sizeof(surf_t)); + * H5Tinsert (surf_id, “x”, HOFFSET(surf_t, x), complex_id); + * H5Tinsert (surf_id, “y”, HOFFSET(surf_t, y), complex_id); + * \endcode + * + * Note that a similar result could be accomplished by creating a compound datatype and inserting + * four fields. See the figure below. This results in the same layout as the figure above. The difference + * would be how the fields are addressed. In the first case, the real part of ‘y’ is called ‘y.re’; + * in the second case it is ‘y-re’. + * + * <em>Another compound datatype nested in a compound datatype</em> + * \code + * typedef struct { + * complex_t x; + * complex_t y; + * } surf_t; + * + * hid_t surf_id = H5Tcreate (H5T_COMPOUND, sizeof(surf_t)); + * H5Tinsert (surf_id, “x-re”, HOFFSET(surf_t, x.re), H5T_NATIVE_DOUBLE); + * H5Tinsert (surf_id, “x-im”, HOFFSET(surf_t, x.im), H5T_NATIVE_DOUBLE); + * H5Tinsert (surf_id, “y-re”, HOFFSET(surf_t, y.re), H5T_NATIVE_DOUBLE); + * H5Tinsert (surf_id, “y-im”, HOFFSET(surf_t, y.im), H5T_NATIVE_DOUBLE); + * \endcode + * + * The members of a compound datatype do not always fill all the bytes. The #HOFFSET macro + * assures that the members will be laid out according to the requirements of the platform and + * language. The example below shows an example of a C struct which requires extra bytes of + * padding on many platforms. The second element, ‘b’, is a 1-byte character followed by an 8 byte + * double, ‘c’. On many systems, the 8-byte value must be stored on a 4-or 8-byte boundary. This + * requires the struct to be larger than the sum of the size of its elements. + * + * In the example below, sizeof and #HOFFSET are used to assure that the members are inserted at + * the correct offset to match the memory conventions of the platform. The figure below shows how + * this data element would be stored in memory, assuming the double must start on a 4-byte + * boundary. Notice the extra bytes between ‘b’ and ‘c’. + * + * <em>A compound datatype that requires padding</em> + * \code + * typedef struct { + * int a; + * char b; + * double c; + * } s1_t; + * + * hid_t s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t)); + * H5Tinsert (s1_tid, “x-im”, HOFFSET(s1_t, a), H5T_NATIVE_INT); + * H5Tinsert (s1_tid, “y-re”, HOFFSET(s1_t, b), H5T_NATIVE_CHAR); + * H5Tinsert (s1_tid, “y-im”, HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig12.gif "Memory layout of a compound datatype that requires padding" + * </td> + * </tr> + * </table> + * + * However, data stored on disk does not require alignment, so unaligned versions of compound + * data structures can be created to improve space efficiency on disk. These unaligned compound + * datatypes can be created by computing offsets by hand to eliminate inter-member padding, or the + * members can be packed by calling #H5Tpack (which modifies a datatype directly, so it is usually + * preceded by a call to #H5Tcopy). + * + * The example below shows how to create a disk version of the compound datatype from the + * figure above in order to store data on disk in as compact a form as possible. Packed compound + * datatypes should generally not be used to describe memory as they may violate alignment + * constraints for the architecture being used. Note also that using a packed datatype for disk + * storage may involve a higher data conversion cost. + * + * <em>Create a packed compound datatype in C</em> + * \code + * hid_t s2_tid = H5Tcopy (s1_tid); + * H5Tpack (s2_tid); + * \endcode + * + * The example below shows the sequence of Fortran calls to create a packed compound datatype. + * An HDF5 Fortran compound datatype never describes a compound datatype in memory and + * compound data is ALWAYS written by fields as described in the next section. Therefore packing + * is not needed unless the offset of each consecutive member is not equal to the sum of the sizes of + * the previous members. + * + * <em>Create a packed compound datatype in Fortran</em> + * \code + * CALL h5tcopy_f(s1_id, s2_id, error) + * CALL h5tpack_f(s2_id, error) + * \endcode + * + * <h4>Creating and Writing Datasets with Compound Datatypes</h4> + * + * Creating datasets with compound datatypes is similar to creating datasets with any other HDF5 + * datatypes. But writing and reading may be different since datasets that have compound datatypes + * can be written or read by a field (member) or subsets of fields (members). The compound + * datatype is the only composite datatype that supports “sub-setting” by the elements the datatype + * is built from. + * + * The example below shows a C example of creating and writing a dataset with a compound + * datatype. + * + * + * <em>Create and write a dataset with a compound datatype in C</em> + * \code + * typedef struct s1_t { + * int a; + * float b; + * double c; + * } s1_t; + * + * s1_t data[LENGTH]; + * + * // Initialize data + * for (i = 0; i < LENGTH; i++) { + * data[i].a = i; + * data[i].b = i*i; + * data[i].c = 1./(i+1); + * } + * + * ... + * + * s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t)); + * H5Tinsert(s1_tid, “a_name”, HOFFSET(s1_t, a), H5T_NATIVE_INT); + * H5Tinsert(s1_tid, “b_name”, HOFFSET(s1_t, b), H5T_NATIVE_FLOAT); + * H5Tinsert(s1_tid, “c_name”, HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); + * + * ... + * + * dataset_id = H5Dcreate(file_id, “SDScompound.h5”, s1_t, + * space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * H5Dwrite (dataset_id, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * The example below shows the content of the file written on a little-endian machine. + * <em>Create and write a little-endian dataset with a compound datatype in C</em> + * \code + * HDF5 “SDScompound.h5” { + * GROUP “/” { + * DATASET “ArrayOfStructures” { + * DATATYPE H5T_COMPOUND { + * H5T_STD_I32LE “a_name”; + * H5T_IEEE_F32LE “b_name”; + * H5T_IEEE_F64LE “c_name”; + * } + * DATASPACE SIMPLE { ( 3 ) / ( 3 ) } + * DATA { + * (0): { + * 0, + * 0, + * 1 + * }, + * (1): { + * 0, + * 1, + * 0.5 + * }, + * (2): { + * 0, + * 4, + * 0.333333 + * } + * } + * } + * } + * } + * \endcode + * + * It is not necessary to write the whole data at once. Datasets with compound datatypes can be + * written by field or by subsets of fields. In order to do this one has to remember to set the transfer + * property of the dataset using the H5Pset_preserve call and to define the memory datatype that + * corresponds to a field. The example below shows how float and double fields are written to the + * dataset. + * + * <em>Writing floats and doubles to a dataset</em> + * \code + * typedef struct sb_t { + * float b; + * double c; + * } sb_t; + * + * typedef struct sc_t { + * float b; + * double c; + * } sc_t; + * sb_t data1[LENGTH]; + * sc_t data2[LENGTH]; + * + * // Initialize data + * for (i = 0; i < LENGTH; i++) { + * data1.b = i * i; + * data2.c = 1./(i + 1); + * } + * + * ... + * + * // Create dataset as in example 15 + * + * ... + * + * // Create memory datatypes corresponding to float + * // and double datatype fields + * + * sb_tid = H5Tcreate (H5T_COMPOUND, sizeof(sb_t)); + * H5Tinsert(sb_tid, “b_name”, HOFFSET(sb_t, b), H5T_NATIVE_FLOAT); + * sc_tid = H5Tcreate (H5T_COMPOUND, sizeof(sc_t)); + * H5Tinsert(sc_tid, “c_name”, HOFFSET(sc_t, c), H5T_NATIVE_DOUBLE); + * + * ... + * + * // Set transfer property + * xfer_id = H5Pcreate(H5P_DATASET_XFER); + * H5Pset_preserve(xfer_id, 1); + * H5Dwrite (dataset_id, sb_tid, H5S_ALL, H5S_ALL, xfer_id, data1); + * H5Dwrite (dataset_id, sc_tid, H5S_ALL, H5S_ALL, xfer_id, data2); + * \endcode + * + * The figure below shows the content of the file written on a little-endian machine. Only float and + * double fields are written. The default fill value is used to initialize the unwritten integer field. + * <em>Writing floats and doubles to a dataset on a little-endian system</em> + * \code + * HDF5 “SDScompound.h5” { + * GROUP “/” { + * DATASET “ArrayOfStructures” { + * DATATYPE H5T_COMPOUND { + * H5T_STD_I32LE “a_name”; + * H5T_IEEE_F32LE “b_name”; + * H5T_IEEE_F64LE “c_name”; + * } + * DATASPACE SIMPLE { ( 3 ) / ( 3 ) } + * DATA { + * (0): { + * 0, + * 0, + * 1 + * }, + * (1): { + * 0, + * 1, + * 0.5 + * }, + * (2): { + * 0, + * 4, + * 0.333333 + * } + * } + * } + * } + * } + * \endcode + * + * The example below contains a Fortran example that creates and writes a dataset with a + * compound datatype. As this example illustrates, writing and reading compound datatypes in + * Fortran is always done by fields. The content of the written file is the same as shown in the + * example above. + * <em>Create and write a dataset with a compound datatype in Fortran</em> + * \code + * ! One cannot write an array of a derived datatype in + * ! Fortran. + * TYPE s1_t + * INTEGER a + * REAL b + * DOUBLE PRECISION c + * END TYPE s1_t + * TYPE(s1_t) d(LENGTH) + * ! Therefore, the following code initializes an array + * ! corresponding to each field in the derived datatype + * ! and writesthose arrays to the dataset + * + * INTEGER, DIMENSION(LENGTH) :: a + * REAL, DIMENSION(LENGTH) :: b + * DOUBLE PRECISION, DIMENSION(LENGTH) :: c + * + * ! Initialize data + * do i = 1, LENGTH + * a(i) = i-1 + * b(i) = (i-1) * (i-1) + * c(i) = 1./i + * enddo + * + * ... + * + * ! Set dataset transfer property to preserve partially + * ! initialized fields during write/read to/from dataset + * ! with compound datatype. + * ! + * CALL h5pcreate_f(H5P_DATASET_XFER_F, plist_id, error) + * CALL h5pset_preserve_f(plist_id, .TRUE., error) + * + * ... + * + * ! + * ! Create compound datatype. + * ! + * ! First calculate total size by calculating sizes of + * ! each member + * ! + * CALL h5tget_size_f(H5T_NATIVE_INTEGER, type_sizei, error) + * CALL h5tget_size_f(H5T_NATIVE_REAL, type_sizer, error) + * CALL h5tget_size_f(H5T_NATIVE_DOUBLE, type_sized, error) + * type_size = type_sizei + type_sizer + type_sized + * CALL h5tcreate_f(H5T_COMPOUND_F, type_size, dtype_id, error) + * ! + * ! Insert members + * ! + * ! + * ! INTEGER member + * ! + * offset = 0 + * CALL h5tinsert_f(dtype_id, “a_name”, offset, H5T_NATIVE_INTEGER, error) + * ! + * ! REAL member + * ! + * offset = offset + type_sizei + * CALL h5tinsert_f(dtype_id, “b_name”, offset, H5T_NATIVE_REAL, error) + * ! + * ! DOUBLE PRECISION member + * ! + * offset = offset + type_sizer + * CALL h5tinsert_f(dtype_id, “c_name”, offset, H5T_NATIVE_DOUBLE, error) + * ! + * ! Create the dataset with compound datatype. + * ! + * CALL h5dcreate_f(file_id, dsetname, dtype_id, dspace_id, &dset_id, error, H5P_DEFAULT_F, + * H5P_DEFAULT_F, H5P_DEFAULT_F) + * ! + * + * ... + * + * ! Create memory types. We have to create a compound + * ! datatype for each member we want to write. + * ! + * CALL h5tcreate_f(H5T_COMPOUND_F, type_sizei, dt1_id, error) + * offset = 0 + * CALL h5tinsert_f(dt1_id, “a_name”, offset, H5T_NATIVE_INTEGER, error) + * ! + * CALL h5tcreate_f(H5T_COMPOUND_F, type_sizer, dt2_id, error) + * offset = 0 + * CALL h5tinsert_f(dt2_id, “b_name”, offset, H5T_NATIVE_REAL, error) + * ! + * CALL h5tcreate_f(H5T_COMPOUND_F, type_sized, dt3_id, error) + * offset = 0 + * CALL h5tinsert_f(dt3_id, “c_name”, offset, H5T_NATIVE_DOUBLE, error) + * ! + * ! Write data by fields in the datatype. Fields order + * ! is not important. + * ! + * CALL h5dwrite_f(dset_id, dt3_id, c, data_dims, error, xfer_prp = plist_id) + * CALL h5dwrite_f(dset_id, dt2_id, b, data_dims, error, xfer_prp = plist_id) + * CALL h5dwrite_f(dset_id, dt1_id, a, data_dims, error, xfer_prp = plist_id) + * \endcode + * + * <h4>Reading Datasets with Compound Datatypes</h4> + * + * Reading datasets with compound datatypes may be a challenge. For general applications there is + * no way to know a priori the corresponding C structure. Also, C structures cannot be allocated on + * the fly during discovery of the dataset’s datatype. For general C, C++, Fortran and Java + * application the following steps will be required to read and to interpret data from the dataset with + * compound datatype: + * \li 1. Get the identifier of the compound datatype in the file with the #H5Dget_type call + * \li 2. Find the number of the compound datatype members with the #H5Tget_nmembers call + * \li 3. Iterate through compound datatype members + * <ul><li>Get member class with the #H5Tget_member_class call</li> + * <li>Get member name with the #H5Tget_member_name call</li> + * <li>Check class type against predefined classes + * <ul><li>#H5T_INTEGER</li> + * <li>#H5T_FLOAT</li> + * <li>#H5T_STRING</li> + * <li>#H5T_BITFIELD</li> + * <li>#H5T_OPAQUE</li> + * <li>#H5T_COMPOUND</li> + * <li>#H5T_REFERENCE</li> + * <li>#H5T_ENUM</li> + * <li>#H5T_VLEN</li> + * <li>#H5T_ARRAY</li></ul> + * </li> + * <li>If class is #H5T_COMPOUND, then go to step 2 and repeat all steps under step 3. If + * class is not #H5T_COMPOUND, then a member is of an atomic class and can be read + * to a corresponding buffer after discovering all necessary information specific to each + * atomic type (for example, size of the integer or floats, super class for enumerated and + * array datatype, and its sizes)</li></ul> + * + * The examples below show how to read a dataset with a known compound datatype. + * + * The first example below shows the steps needed to read data of a known structure. First, build a + * memory datatype the same way it was built when the dataset was created, and then second use + * the datatype in an #H5Dread call. + * + * <em>Read a dataset using a memory datatype</em> + * \code + * typedef struct s1_t { + * int a; + * float b; + * double c; + * } s1_t; + * + * s1_t *data; + * + * ... + * + * s1_tid = H5Tcreate(H5T_COMPOUND, sizeof(s1_t)); + * H5Tinsert(s1_tid, “a_name”, HOFFSET(s1_t, a), H5T_NATIVE_INT); + * H5Tinsert(s1_tid, “b_name”, HOFFSET(s1_t, b), H5T_NATIVE_FLOAT); + * H5Tinsert(s1_tid, “c_name”, HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); + * + * ... + * + * dataset_id = H5Dopen(file_id, “SDScompound.h5”, H5P_DEFAULT); + * + * ... + * + * data = (s1_t *) malloc (sizeof(s1_t)*LENGTH); + * H5Dread(dataset_id, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * Instead of building a memory datatype, the application could use the + * #H5Tget_native_type function. See the example below. + * + * <em>Read a dataset using H5Tget_native_type</em> + * \code + * typedef struct s1_t { + * int a; + * float b; + * double c; + * } s1_t; + * + * s1_t *data; + * hid_t file_s1_t, mem_s1_t; + * + * ... + * + * dataset_id = H5Dopen(file_id, “SDScompound.h5”, H5P_DEFAULT); + * // Discover datatype in the file + * file_s1_t = H5Dget_type(dataset_id); + * // Find corresponding memory datatype + * mem_s1_t = H5Tget_native_type(file_s1_t, H5T_DIR_DEFAULT); + * + * ... + * + * data = (s1_t *) malloc (sizeof(s1_t)*LENGTH); + * H5Dread (dataset_id,mem_s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * The example below shows how to read just one float member of a compound datatype. + * + * <em>Read one floating point member of a compound datatype</em> + * \code + * typedef struct sf_t { + * float b; + * } sf_t; + * + * sf_t *data; + * + * ... + * + * sf_tid = H5Tcreate(H5T_COMPOUND, sizeof(sf_t)); + * H5Tinsert(sf_tid, “b_name”, HOFFSET(sf_t, b), H5T_NATIVE_FLOAT); + * + * ... + * + * dataset_id = H5Dopen(file_id, “SDScompound.h5”, H5P_DEFAULT); + * + * ... + * + * data = (sf_t *) malloc (sizeof(sf_t) * LENGTH); + * H5Dread(dataset_id, sf_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * The example below shows how to read float and double members of a compound datatype into a + * structure that has those fields in a different order. Please notice that #H5Tinsert calls can be used + * in an order different from the order of the structure’s members. + * + * <em>Read float and double members of a compound datatype</em> + * \code + * typedef struct sdf_t { + * double c; + * float b; + * } sdf_t; + * + * sdf_t *data; + * + * ... + * + * sdf_tid = H5Tcreate(H5T_COMPOUND, sizeof(sdf_t)); + * H5Tinsert(sdf_tid, “b_name”, HOFFSET(sdf_t, b), H5T_NATIVE_FLOAT); + * H5Tinsert(sdf_tid, “c_name”, HOFFSET(sdf_t, c), H5T_NATIVE_DOUBLE); + * + * ... + * + * dataset_id = H5Dopen(file_id, “SDScompound.h5”, H5P_DEFAULT); + * + * ... + * + * data = (sdf_t *) malloc (sizeof(sdf_t) * LENGTH); + * H5Dread(dataset_id, sdf_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * <h4>Array</h4> + * + * Many scientific datasets have multiple measurements for each point in a space. There are several + * natural ways to represent this data, depending on the variables and how they are used in + * computation. See the table and the figure below. + * + * <table> + * <caption>Representing data with multiple measurements</caption> + * <tr> + * <th> + * <p>Storage Strategy</p> + * </th> + * <th> + * <p>Stored as</p> + * </th> + * <th> + * <p>Remarks</p> + * </th> + * </tr> + * <tr> + * <td>Multiple planes + * </td> + * <td> + * Several datasets with identical dataspaces + * </td> + * <td> + * This is optimal when variables are accessed individually, or when often uses only selected + * variables. + * </td> + * </tr> + * <tr> + * <td> + * Additional dimension + * </td> + * <td> + * One dataset, the last “dimension” is a vec-tor of variables + * </td> + * <td> + * This can give good performance, although selecting only a few variables may be slow. This may + * not reflect the science. + * </td> + * </tr> + * <tr> + * <td> + * Record with multiple values + * </td> + * <td> + * One dataset with compound datatype + * </td> + * <td> + * This enables the variables to be read all together or selected. Also handles “vectors” of + * heterogeneous data. + * </td> + * </tr> + * <tr> + * <td> + * Vector or Tensor value + * </td> + * <td> + * One dataset, each data element is a small array of values. + * </td> + * <td> + * This uses the same amount of space as the previous two, and may represent the science model + * better. + * </td> + * </tr> + * </table> + * + * <table> + * <caption>Figure 13 Representing data with multiple measurements</caption> + * <tr> + * <td> + * \image html Dtypes_fig13a.gif + * </td> + * <td> + * \image html Dtypes_fig13b.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig13c.gif + * </td> + * <td> + * \image html Dtypes_fig13d.gif + * </td> + * </tr> + * </table> + * + * The HDF5 #H5T_ARRAY datatype defines the data element to be a homogeneous, multi-dimensional array. + * See Figure 13 above. The elements of the array can be any HDF5 datatype + * (including compound and array), and the size of the datatype is the total size of the array. A + * dataset of array datatype cannot be subdivided for I/O within the data element: the entire array of + * the data element must be transferred. If the data elements need to be accessed separately, for + * example, by plane, then the array datatype should not be used. The table below shows + * advantages and disadvantages of various storage methods. + * + * <table> + * <caption>Storage method advantages and disadvantages</caption> + * <tr> + * <th> + * <p>Method</p> + * </th> + * <th> + * <p>Advantages</p> + * </th> + * <th> + * <p>Disadvantages</p> + * </th> + * </tr> + * <tr> + * <td> + * Multiple Datasets + * </td> + * <td> + * Easy to access each plane, can select any plane(s) + * </td> + * <td> + * Less efficient to access a ‘column’ through the planes + * </td> + * </tr> + * </tr> + * <tr> + * <td> + * N+1 Dimension + * </td> + * <td> + * All access patterns supported + * </td> + * <td> + * Must be homogeneous datatype<br /> + * The added dimension may not make sense in the scientific model + * </td> + * </tr> + * </tr> + * <tr> + * <td> + * Compound Datatype + * </td> + * <td> + * Can be heterogeneous datatype + * </td> + * <td> + * Planes must be named, selection is by plane<br /> + * Not a natural representation for a matrix + * </td> + * </tr> + * </tr> + * <tr> + * <td> + * Array + * </td> + * <td> + * A natural representation for vector or tensor data + * </td> + * <td> + * Cannot access elements separately (no access by plane) + * </td> + * </tr> + * </table> + * + * An array datatype may be multi-dimensional with 1 to #H5S_MAX_RANK(the maximum rank + * of a dataset is currently 32) dimensions. The dimensions can be any size greater than 0, but + * unlimited dimensions are not supported (although the datatype can be a variable-length datatype). + * + * An array datatype is created with the #H5Tarray_create call, which specifies the number of + * dimensions, the size of each dimension, and the base type of the array. The array datatype can + * then be used in any way that any datatype object is used. The example below shows the creation + * of a datatype that is a two-dimensional array of native integers, and this is then used to create a + * dataset. Note that the dataset can be a dataspace that is any number and size of dimensions. The figure + * below shows the layout in memory assuming that the native integers are 4 bytes. Each + * data element has 6 elements, for a total of 24 bytes. + * + * <em>Create a two-dimensional array datatype</em> + * \code + * hid_t file, dataset; + * hid_t datatype, dataspace; + * hsize_t adims[] = {3, 2}; + * + * datatype = H5Tarray_create(H5T_NATIVE_INT, 2, adims, NULL); + * + * dataset = H5Dcreate(file, datasetname, datatype, + * dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig14.gif "Memory layout of a two-dimensional array datatype" + * </td> + * </tr> + * </table> + * + * @anchor h4_vlen_datatype <h4>Variable-length Datatypes</h4> + * + * A variable-length (VL) datatype is a one-dimensional sequence of a datatype which are not fixed + * in length from one dataset location to another. In other words, each data element may have a + * different number of members. Variable-length datatypes cannot be divided;the entire data + * element must be transferred. + * + * VL datatypes are useful to the scientific community in many different ways, possibly including: + * <ul> + * <li>Ragged arrays: Multi-dimensional ragged arrays can be implemented with the last (fastest changing) + * dimension being ragged by using a VL datatype as the type of the element stored. + * </li> + * <li>Fractal arrays: A nested VL datatype can be used to implement ragged arrays of ragged arrays, to + * whatever nesting depth is required for the user. + * </li> + * <li>Polygon lists: A common storage requirement is to efficiently store arrays of polygons with + * different numbers of vertices. A VL datatype can be used to efficiently and succinctly describe + * an array of polygons with different numbers of vertices. + * </li> + * <li>Character strings: Perhaps the most common use of VL datatypes will be to store C-like VL + * character strings in dataset elements or as attributes of objects. + * </li> + * <li>Indices (for example, of objects within a file): An array of VL object references could be used + * as an index to all the objects in a file which contain a particular sequence of dataset values. + * </li> + * <li>Object Tracking: An array of VL dataset region references can be used as a method of tracking + * objects or features appearing in a sequence of datasets. + * </li> + * </ul> + * + * A VL datatype is created by calling #H5Tvlen_create which specifies the base datatype. The first + * example below shows an example of code that creates a VL datatype of unsigned integers. Each + * data element is a one-dimensional array of zero or more members and is stored in the + * hvl_t structure. See the second example below. + * + * <em>Create a variable-length datatype of unsigned integers</em> + * \code + * tid1 = H5Tvlen_create (H5T_NATIVE_UINT); + * + * dataset=H5Dcreate(fid1,“Dataset1”, tid1, sid1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * <em>Data element storage for members of the VL datatype</em> + * \code + * typedef struct + * { + * size_t len; // Length of VL data + * //(in base type units) + * void *p; // Pointer to VL data + * } hvl_t; + * \endcode + * + * The first example below shows how the VL data is written. For each of the 10 data elements, a + * length and data buffer must be allocated. Below the two examples is a figure that shows how the + * data is laid out in memory. + * + * An analogous procedure must be used to read the data. See the second example below. An + * appropriate array of vl_t must be allocated, and the data read. It is then traversed one data + * element at a time. The #H5Dvlen_reclaim call frees the data buffer for the buffer. With each + * element possibly being of different sequence lengths for a dataset with a VL datatype, the + * memory for the VL datatype must be dynamically allocated. Currently there are two methods of + * managing the memory for VL datatypes: the standard C malloc/free memory allocation routines + * or a method of calling user-defined memory management routines to allocate or free memory + * (set with #H5Pset_vlen_mem_manager). Since the memory allocated when reading (or writing) + * may be complicated to release, the #H5Dvlen_reclaim function is provided to traverse a memory + * buffer and free the VL datatype information without leaking memory. + * + * <em>Write VL data</em> + * \code + * hvl_t wdata[10]; // Information to write + * + * // Allocate and initialize VL data to write + * for(i = 0; i < 10; i++) { + * wdata[i].p = malloc((i + 1) * sizeof(unsigned int)); + * wdata[i].len = i + 1; + * for(j = 0; j < (i + 1); j++) + * ((unsigned int *)wdata[i].p)[j]=i * 10 + j; + * } + * ret = H5Dwrite(dataset, tid1, H5S_ALL, H5S_ALL, H5P_DEFAULT, wdata); + * \endcode + * + * <em>Read VL data</em> + * \code + * hvl_t rdata[SPACE1_DIM1]; + * ret = H5Dread(dataset, tid1, H5S_ALL, H5S_ALL, xfer_pid, rdata); + * + * for(i = 0; i < SPACE1_DIM1; i++) { + * printf(“%d: len %d ”,rdata[i].len); + * for(j = 0; j < rdata[i].len; j++) { + * printf(“ value: %u\n”,((unsigned int *)rdata[i].p)[j]); + * } + * } + * ret = H5Dvlen_reclaim(tid1, sid1, xfer_pid, rdata); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig15.gif "Memory layout of a VL datatype" + * </td> + * </tr> + * </table> + * + * The user program must carefully manage these relatively complex data structures. The + * #H5Dvlen_reclaim function performs a standard traversal, freeing all the data. This function + * analyzes the datatype and dataspace objects, and visits each VL data element, recursing through + * nested types. By default, the system free is called for the pointer in each vl_t. Obviously, this + * call assumes that all of this memory was allocated with the system malloc. + * + * The user program may specify custom memory manager routines, one for allocating and one for + * freeing. These may be set with the #H5Pset_vlen_mem_manager, and must have the following + * prototypes: + * <ul> + * <li> + * \code + * typedef void *(*H5MM_allocate_t)(size_t size, void *info); + * \endcode + * </li> + * <li> + * \code + * typedef void (*H5MM_free_t)(void *mem, void *free_info); + * \endcode + * </li> + * </ul> + * The utility function #H5Dvlen_get_buf_size checks the number of bytes required to store the VL + * data from the dataset. This function analyzes the datatype and dataspace object to visit all the VL + * data elements, to determine the number of bytes required to store the data for the in the + * destination storage (memory). The size value is adjusted for data conversion and alignment in the + * destination. + * + * \subsection subsec_datatype_other Other Non-numeric Datatypes + * Several datatype classes define special types of objects. + * + * \subsubsection subsubsec_datatype_other_strings Strings + * Text data is represented by arrays of characters, called strings. Many programming languages + * support different conventions for storing strings, which may be fixed or variable-length, and may + * have different rules for padding unused storage. HDF5 can represent strings in several ways. See + * the figure below. + * + * The strings to store are “Four score” and “lazy programmers.” + * <table> + * <caption>A string stored as one-character elements in a one-dimensional array</caption> + * <tr> + * <td> + * a) #H5T_NATIVE_CHAR: The dataset is a one-dimensional array with 29 elements, and each element + * is a single character. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig16a.gif + * </td> + * </tr> + * <tr> + * <td> + * b) Fixed-length string: The dataset is a one-dimensional array with two elements, and each + * element is 20 characters. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig16b.gif + * </td> + * </tr> + * <tr> + * <td> + * c) Variable-length string: The dataset is a one-dimensional array with two elements, and each + * element is a variable-length string. This is the same result when stored as a fixed-length + * string except that the first element of the array will need only 11 bytes for storage instead of 20. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig16c.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig16d.gif + * </td> + * </tr> + * </table> + * + * First, a dataset may have a dataset with datatype #H5T_NATIVE_CHAR with each character of + * the string as an element of the dataset. This will store an unstructured block of text data, but + * gives little indication of any structure in the text. See item a in the figure above. + * + * A second alternative is to store the data using the datatype class #H5T_STRING with each + * element a fixed length. See item b in the figure above. In this approach, each element might be a + * word or a sentence, addressed by the dataspace. The dataset reserves space for the specified + * number of characters, although some strings may be shorter. This approach is simple and usually + * is fast to access, but can waste storage space if the length of the Strings varies. + * + * A third alternative is to use a variable-length datatype. See item c in the figure above. This can + * be done using the standard mechanisms described above. The program would use vl_t structures + * to write and read the data. + * + * A fourth alternative is to use a special feature of the string datatype class to set the size of the + * datatype to #H5T_VARIABLE. See item c in the figure above. The example below shows a + * declaration of a datatype of type #H5T_C_S1 which is set to #H5T_VARIABLE. The HDF5 + * Library automatically translates between this and the vl_t structure. Note: the #H5T_VARIABLE + * size can only be used with string datatypes. + * <em>Set the string datatype size to H5T_VARIABLE</em> + * \code + * tid1 = H5Tcopy (H5T_C_S1); + * ret = H5Tset_size (tid1, H5T_VARIABLE); + * \endcode + * + * Variable-length strings can be read into C strings (in other words, pointers to zero terminated + * arrays of char). See the example below. + * <em>Read variable-length strings into C strings</em> + * \code + * char *rdata[SPACE1_DIM1]; + * + * ret = H5Dread(dataset, tid1, H5S_ALL, H5S_ALL, xfer_pid, rdata); + * + * for(i = 0; i < SPACE1_DIM1; i++) { + * printf(“%d: len: %d, str is: %s\n”, i, strlen(rdata[i]), rdata[i]); + * } + * + * ret = H5Dvlen_reclaim(tid1, sid1, xfer_pid, rdata); + * \endcode + * + * \subsubsection subsubsec_datatype_other_refs Reference + * In HDF5, objects (groups, datasets, and committed datatypes) are usually accessed by name. + * There is another way to access stored objects - by reference. There are two reference datatypes: + * object reference and region reference. Object reference objects are created with #H5Rcreate and + * other calls (cross reference). These objects can be stored and retrieved in a dataset as elements + * with reference datatype. The first example below shows an example of code that creates + * references to four objects, and then writes the array of object references to a dataset. The second + * example below shows a dataset of datatype reference being read and one of the reference objects + * being dereferenced to obtain an object pointer. + * + * In order to store references to regions of a dataset, the datatype should be #H5T_STD_REF_DSETREG. + * Note that a data element must be either an object reference or a region reference: these are + * different types and cannot be mixed within a single array. + * + * A reference datatype cannot be divided for I/O: an element is read or written completely. + * + * <em>Create object references and write to a dataset</em> + * \code + * dataset= H5Dcreate (fid1, “Dataset3”, H5T_STD_REF_OBJ, sid1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * + * // Create reference to dataset + * ret = H5Rcreate(&wbuf[0], fid1,“/Group1/Dataset1”, H5R_OBJECT, -1); + * + * // Create reference to dataset + * ret = H5Rcreate(&wbuf[1], fid1, “/Group1/Dataset2”, H5R_OBJECT, -1); + * + * // Create reference to group + * ret = H5Rcreate(&wbuf[2], fid1, “/Group1”, H5R_OBJECT, -1); + * + * // Create reference to committed datatype + * ret = H5Rcreate(&wbuf[3], fid1, “/Group1/Datatype1”, H5R_OBJECT, -1); + * + * // Write selection to disk + * ret=H5Dwrite(dataset, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, wbuf); + * \endcode + * + * <em>Read a dataset with a reference datatype</em> + * \code + * rbuf = malloc(sizeof(hobj_ref_t)*SPACE1_DIM1); + * + * // Read selection from disk + * ret=H5Dread(dataset, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, rbuf); + * + * // Open dataset object + * dset2 = H5Rdereference(dataset, H5R_OBJECT, &rbuf[0]); + * \endcode + * + * \subsubsection subsubsec_datatype_other_enum ENUM + * The enum datatype implements a set of (name, value) pairs, similar to C/C++ enum. The values + * are currently limited to native integer datatypes. Each name can be the name of only one value, + * and each value can have only one name. + * + * The data elements of the ENUMERATION are stored according to the datatype. An example + * would be as an array of integers. The example below shows an example of how to create an + * enumeration with five elements. The elements map symbolic names to 2-byte integers. See the + * table below. + * <em>Create an enumeration with five elements</em> + * \code + * hid_t hdf_en_colors; + * short val; + * + * hdf_en_colors = H5Tcreate(H5T_ENUM, sizeof(short)); + * H5Tenum_insert(hdf_en_colors, “RED”, (val=0, &val)); + * H5Tenum_insert(hdf_en_colors, “GREEN”, (val=1, &val)); + * H5Tenum_insert(hdf_en_colors, “BLUE”, (val=2, &val)); + * H5Tenum_insert(hdf_en_colors, “WHITE”, (val=3, &val)); + * H5Tenum_insert(hdf_en_colors, “BLACK”, (val=4, &val)); + * H5Dcreate(fileid, datasetname, hdf_en_colors, spaceid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * <table> + * <caption>An enumeration with five elements</caption> + * <tr> + * <th>Name</th> + * <th>Value</th> + * </tr> + * <tr> + * <td>RED</td> + * <td>0</td> + * </tr> + * <tr> + * <td>GREEN</td> + * <td>1</td> + * </tr> + * <tr> + * <td>BLUE</td> + * <td>2</td> + * </tr> + * <tr> + * <td>WHITE</td> + * <td>3</td> + * </tr> + * <tr> + * <td>BLACK</td> + * <td>4</td> + * </tr> + * </table> + * + * The figure below shows how an array of eight values might be stored. Conceptually, the array is + * an array of symbolic names [BLACK, RED, WHITE, BLUE, ...] See item a in the figure below. + * These are stored as the values and are short integers. So, the first 2 bytes are the value associated + * with “BLACK”, which is the number 4, and so on. See item b in the figure below. + * <table> + * <caption>Storing an enum array</caption> + * <tr> + * <td> + * a) Logical data to be written - eight elements + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig17a.gif + * </td> + * </tr> + * <tr> + * <td> + * b) The storage layout. Total size of the array is 16 bytes, 2 bytes per element. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig17b.gif + * </td> + * </tr> + * </table> + * + * The order that members are inserted into an enumeration type is unimportant; the important part + * is the associations between the symbol names and the values. Thus, two enumeration datatypes + * will be considered equal if and only if both types have the same symbol/value associations and + * both have equal underlying integer datatypes. Type equality is tested with the H5Tequal + * function. + * + * If a particular architecture type is required, a little-endian or big-endian datatype for example, + * use a native integer datatype as the ENUM base datatype and use #H5Tconvert on values as they + * are read from or written to a dataset. + * + * \subsubsection subsubsec_datatype_other_opaque Opaque + * In some cases, a user may have data objects that should be stored and retrieved as blobs with no + * attempt to interpret them. For example, an application might wish to store an array of encrypted + * certificates which are 100 bytes long. + * + * While an arbitrary block of data may always be stored as bytes, characters, integers, or whatever, + * this might mislead programs about the meaning of the data. The opaque datatype defines data + * elements which are uninterpreted by HDF5. The opaque data may be labeled with + * #H5Tset_tag with a string that might be used by an application. For example, the encrypted + * certificates might have a tag to indicate the encryption and the certificate standard. + * + * \subsubsection subsubsec_datatype_other_bitfield Bitfield + * Some data is represented as bits, where the number of bits is not an integral byte and the bits are + * not necessarily interpreted as a standard type. Some examples might include readings from + * machine registers (for example, switch positions), a cloud mask, or data structures with several + * small integers that should be store in a single byte. + * + * This data could be stored as integers, strings, or enumerations. However, these storage methods + * would likely result in considerable wasted space. For example, storing a cloud mask with one + * byte per value would use up to eight times the space of a packed array of bits. + * + * The HDF5 bitfield datatype class defines a data element that is a contiguous sequence of bits, + * which are stored on disk in a packed array. The programming model is the same as for unsigned + * integers: the datatype object is created by copying a predefined datatype, and then the precision, + * offset, and padding are set. + * + * While the use of the bitfield datatype will reduce storage space substantially, there will still be + * wasted space if the bitfield as a whole does not match the 1-, 2-, 4-, or 8-byte unit in which it is + * written. The remaining unused space can be removed by applying the N-bit filter to the dataset + * containing the bitfield data. For more information, see "Using the N-bit Filter." + * + * \subsection subsec_datatype_fill Fill Values + * The “fill value” for a dataset is the specification of the default value assigned to data elements + * that have not yet been written. In the case of a dataset with an atomic datatype, the fill value is a + * single value of the appropriate datatype, such as ‘0’ or ‘-1.0’. In the case of a dataset with a + * composite datatype, the fill value is a single data element of the appropriate type. For example, + * for an array or compound datatype, the fill value is a single data element with values for all the + * component elements of the array or compound datatype. + * + * The fill value is set (permanently) when the dataset is created. The fill value is set in the dataset + * creation properties in the #H5Dcreate call. Note that the #H5Dcreate call must also include the + * datatype of the dataset, and the value provided for the fill value will be interpreted as a single + * element of this datatype. The example below shows code which creates a dataset of integers with + * fill value -1. Any unwritten data elements will be set to -1. + * + * <em>Create a dataset with a fill value of -1</em> + * \code + * hid_t plist_id; + * int filler; + * + * filler = -1; + * plist_id = H5Pcreate(H5P_DATASET_CREATE); + * H5Pset_fill_value(plist_id, H5T_NATIVE_INT, &filler); + * + * // Create the dataset with fill value ‘-1’. + * dataset_id = H5Dcreate(file_id, “/dset”, H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, plist_id, + * H5P_DEFAULT); + * \endcode + * + * <em>Create a fill value for a compound datatype</em> + * \code + * typedef struct s1_t { + * int a; + * char b; + * double c; + * } s1_t; + * s1_t filler; + * + * s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t)); + * H5Tinsert(s1_tid, “a_name”, HOFFSET(s1_t, a), H5T_NATIVE_INT); + * H5Tinsert(s1_tid, “b_name”, HOFFSET(s1_t, b), H5T_NATIVE_CHAR); + * H5Tinsert(s1_tid, “c_name”, HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); + * + * filler.a = -1; + * filler.b = ‘*’; + * filler.c = -2.0; + * plist_id = H5Pcreate(H5P_DATASET_CREATE); + * H5Pset_fill_value(plist_id, s1_tid, &filler); + * + * // Create the dataset with fill value + * // (-1, ‘*’, -2.0). + * dataset = H5Dcreate(file, datasetname, s1_tid, space, H5P_DEFAULT, plist_id, H5P_DEFAULT); + * \endcode + * + * The code above shows how to create a fill value for a compound datatype. The procedure is the + * same as the previous example except the filler must be a structure with the correct fields. Each + * field is initialized to the desired fill value. + * + * The fill value for a dataset can be retrieved by reading the dataset creation properties of the + * dataset and then by reading the fill value with #H5Pget_fill_value. The data will be read into + * memory using the storage layout specified by the datatype. This transfer will convert data in the + * same way as #H5Dread. The example below shows how to get the fill value from the dataset + * created in the example "Create a dataset with a fill value of -1". + * + * <em>Retrieve a fill value</em> + * \code + * hid_t plist2; + * int filler; + * + * dataset_id = H5Dopen(file_id, “/dset”, H5P_DEFAULT); + * plist2 = H5Dget_create_plist(dataset_id); + * + * H5Pget_fill_value(plist2, H5T_NATIVE_INT, &filler); + * + * // filler has the fill value, ‘-1’ + * \endcode + * + * A similar procedure is followed for any datatype. The example below shows how to read the fill + * value for the compound datatype created in an example above. Note that the program must pass + * an element large enough to hold a fill value of the datatype indicated by the argument to + * #H5Pget_fill_value. Also, the program must understand the datatype in order to interpret its + * components. This may be difficult to determine without knowledge of the application that + * created the dataset. + * + * <em>Read the fill value for a compound datatype</em> + * \code + * char *fillbuf; + * int sz; + * + * dataset = H5Dopen( file, DATASETNAME, H5P_DEFAULT); + * + * s1_tid = H5Dget_type(dataset); + * + * sz = H5Tget_size(s1_tid); + * + * fillbuf = (char *)malloc(sz); + * + * plist_id = H5Dget_create_plist(dataset); + * + * H5Pget_fill_value(plist_id, s1_tid, fillbuf); + * + * printf(“filler.a: %d\n”,((s1_t *) fillbuf)->a); + * printf(“filler.b: %c\n”,((s1_t *) fillbuf)->b); + * printf(“filler.c: %f\n”,((s1_t *) fillbuf)->c); + * \endcode + * + * \subsection subsec_datatype_complex Complex Combinations of Datatypes + * Several composite datatype classes define collections of other datatypes, including other + * composite datatypes. In general, a datatype can be nested to any depth, with any combination of + * datatypes. + * + * For example, a compound datatype can have members that are other compound datatypes, arrays, + * VL datatypes. An array can be an array of array, an array of compound, or an array of VL. And a + * VL datatype can be a variable-length array of compound, array, or VL datatypes. + * + * These complicated combinations of datatypes form a logical tree, with a single root datatype, and + * leaves which must be atomic datatypes (predefined or user-defined). The figure below shows an + * example of a logical tree describing a compound datatype constructed from different datatypes. + * + * Recall that the datatype is a description of the layout of storage. The complicated compound + * datatype is constructed from component datatypes, each of which describes the layout of part of + * the storage. Any datatype can be used as a component of a compound datatype, with the + * following restrictions: + * <ul><li>1. No byte can be part of more than one component datatype (in other words, the fields cannot + * overlap within the compound datatype)</li> + * <li>2. The total size of the components must be less than or equal to the total size of the compound + * datatype</li></ul> + * These restrictions are essentially the rules for C structures and similar record types familiar from + * programming languages. Multiple typing, such as a C union, is not allowed in HDF5 datatypes. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig18.gif "A compound datatype built with different datatypes" + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_complex_create Creating a Complicated Compound Datatype + * To construct a complicated compound datatype, each component is constructed, and then added + * to the enclosing datatype description. The example below shows how to create a compound + * datatype with four members: + * \li “T1”, a compound datatype with three members + * \li “T2”, a compound datatype with two members + * \li “T3”, a one-dimensional array of integers + * \li “T4”, a string + * + * Below the example code is a figure that shows this datatype as a logical tree. The output of the + * h5dump utility is shown in the example below the figure. + * + * Each datatype is created as a separate datatype object. Figure "The storage layout for the + * four member datatypes" below shows the storage layout + * for the four individual datatypes. Then the datatypes are inserted into the outer datatype at an + * appropriate offset. Figure "The storage layout of the combined four members" below shows the + * resulting storage layout. The combined record is 89 bytes long. + * + * The Dataset is created using the combined compound datatype. The dataset is declared to be a 4 + * by 3 array of compound data. Each data element is an instance of the 89-byte compound + * datatype. Figure "The layout of the dataset" below shows the layout of the dataset, and expands + * one of the elements to show the relative position of the component data elements. + * + * Each data element is a compound datatype, which can be written or read as a record, or each + * field may be read or written individually. The first field (“T1”) is itself a compound datatype + * with three fields (“T1.a”, “T1.b”, and “T1.c”). “T1” can be read or written as a record, or + * individual fields can be accessed. Similarly, the second filed is a compound datatype with two + * fields (“T2.f1”, “T2.f2”). + * + * The third field (“T3”) is an array datatype. Thus, “T3” should be accessed as an array of 40 + * integers. Array data can only be read or written as a single element, so all 40 integers must be + * read or written to the third field. The fourth field (“T4”) is a single string of length 25. + * + * <em>Create a compound datatype with four members</em> + * \code + * typedef struct s1_t { + * int a; + * char b; + * double c; + * } s1_t; + * typedef struct s2_t { + * float f1; + * float f2; + * } s2_t; + * hid_t s1_tid, s2_tid, s3_tid, s4_tid, s5_tid; + * + * // Create a datatype for s1 + * s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t)); + * H5Tinsert(s1_tid, “a_name”, HOFFSET(s1_t, a), H5T_NATIVE_INT); + * H5Tinsert(s1_tid, “b_name”, HOFFSET(s1_t, b), H5T_NATIVE_CHAR); + * H5Tinsert(s1_tid, “c_name”, HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); + * + * // Create a datatype for s2. + * s2_tid = H5Tcreate (H5T_COMPOUND, sizeof(s2_t)); + * H5Tinsert(s2_tid, “f1”, HOFFSET(s2_t, f1), H5T_NATIVE_FLOAT); + * H5Tinsert(s2_tid, “f2”, HOFFSET(s2_t, f2), H5T_NATIVE_FLOAT); + * + * // Create a datatype for an Array of integers + * s3_tid = H5Tarray_create(H5T_NATIVE_INT, RANK, dim); + * + * // Create a datatype for a String of 25 characters + * s4_tid = H5Tcopy(H5T_C_S1); + * H5Tset_size(s4_tid, 25); + * + * // Create a compound datatype composed of one of each of these types. + * // The total size is the sum of the size of each. + * sz = H5Tget_size(s1_tid) + H5Tget_size(s2_tid) + H5Tget_size(s3_tid) + H5Tget_size(s4_tid); + * s5_tid = H5Tcreate (H5T_COMPOUND, sz); + * + * // Insert the component types at the appropriate offsets. + * H5Tinsert(s5_tid, “T1”, 0, s1_tid); + * H5Tinsert(s5_tid, “T2”, sizeof(s1_t), s2_tid); + * H5Tinsert(s5_tid, “T3”, sizeof(s1_t) + sizeof(s2_t), s3_tid); + * H5Tinsert(s5_tid, “T4”, (sizeof(s1_t) + sizeof(s2_t) + H5Tget_size(s3_tid)), s4_tid); + * + * // Create the dataset with this datatype. + * dataset = H5Dcreate(file, DATASETNAME, s5_tid, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig19.gif "Logical tree for the compound datatype with four members" + * </td> + * </tr> + * </table> + * + * <em> Output from h5dump for the compound datatype</em> + * \code + * DATATYPE H5T_COMPOUND { + * H5T_COMPOUND { + * H5T_STD_I32LE “a_name”; + * H5T_STD_I8LE “b_name”; + * H5T_IEEE_F64LE “c_name”; + * } “T1”; + * H5T_COMPOUND { + * H5T_IEEE_F32LE “f1”; + * H5T_IEEE_F32LE “f2”; + * } “T2”; + * H5T_ARRAY { [10] H5T_STD_I32LE } “T3”; + * H5T_STRING { + * STRSIZE 25; + * STRPAD H5T_STR_NULLTERM; + * CSET H5T_CSET_ASCII; + * CTYPE H5T_C_S1; + * } “T4”; + * } + * \endcode + * + * <table> + * <caption> The storage layout for the four member datatypes</caption> + * <tr> + * <td> + * a) Compound type ‘s1_t’, size 16 bytes. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig20a.gif + * </td> + * </tr> + * <tr> + * <td> + * b) Compound type ‘s2_t’, size 8 bytes. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig20b.gif + * </td> + * </tr> + * <tr> + * <td> + * c) Array type ‘s3_tid’, 40 integers, total size 40 bytes. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig20c.gif + * </td> + * </tr> + * <tr> + * <td> + * d) String type ‘s4_tid’, size 25 bytes. + * </td> + * </tr> + * <tr> + * <td> + * \image html Dtypes_fig20d.gif + * </td> + * </tr> + * </table> + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig21.gif "The storage layout of the combined four members" + * </td> + * </tr> + * </table> + * + * \li A 4 x 3 array of Compound Datatype + * \li Element [1,1] expanded + * <table> + * <tr> + * <td> + * \image html Dtypes_fig22.gif "The layout of the dataset" + * </td> + * </tr> + * </table> + * + * \subsubsection subsubsec_datatype_complex_analyze Analyzing and Navigating a Compound Datatype + * A complicated compound datatype can be analyzed piece by piece to discover the exact storage + * layout. In the example above, the outer datatype is analyzed to discover that it is a compound + * datatype with four members. Each member is analyzed in turn to construct a complete map of the + * storage layout. + * + * The example below shows an example of code that partially analyzes a nested compound + * datatype. The name and overall offset and size of the component datatype is discovered, and then + * its type is analyzed depending on the datatype class. Through this method, the complete storage + * layout can be discovered. + * + * <em> Output from h5dump for the compound datatype</em> + * \code + * s1_tid = H5Dget_type(dataset); + * + * if (H5Tget_class(s1_tid) == H5T_COMPOUND) { + * printf(“COMPOUND DATATYPE {\n”); + * sz = H5Tget_size(s1_tid); + * nmemb = H5Tget_nmembers(s1_tid); + * printf(“ %d bytes\n”,sz); + * printf(“ %d members\n”,nmemb); + * for (i =0; i < nmemb; i++) { + * s2_tid = H5Tget_member_type(s1_tid, i); + * if (H5Tget_class(s2_tid) == H5T_COMPOUND) { + * // recursively analyze the nested type. + * } + * else if (H5Tget_class(s2_tid) == H5T_ARRAY) { + * sz2 = H5Tget_size(s2_tid); + * printf(“ %s: NESTED ARRAY DATATYPE offset %d size %d + * {\n”, H5Tget_member_name(s1_tid, i), H5Tget_member_offset(s1_tid, i), sz2); + * H5Tget_array_dims(s2_tid, dim); + * s3_tid = H5Tget_super(s2_tid); + * // Etc., analyze the base type of the array + * } + * else { + * // analyze a simple type + * printf(“ %s: type code %d offset %d size %d\n”, H5Tget_member_name(s1_tid, i), + * H5Tget_class(s2_tid), H5Tget_member_offset(s1_tid, i), H5Tget_size(s2_tid)); + * } + * // and so on.... + * \endcode + * + * \subsection subsec_datatype_life Life Cycle of the Datatype Object + * Application programs access HDF5 datatypes through identifiers. Identifiers are obtained by + * creating a new datatype or by copying or opening an existing datatype. The identifier can be used + * until it is closed or until the library shuts down. See items a and b in the figure below. By default, + * a datatype is transient, and it disappears when it is closed. + * + * When a dataset or attribute is created (#H5Dcreate or #H5Acreate), its datatype is stored in the + * HDF5 file as part of the dataset or attribute object. See item c in the figure below. Once an object + * created, its datatype cannot be changed or deleted. The datatype can be accessed by calling + * #H5Dget_type, #H5Aget_type, #H5Tget_super, or #H5Tget_member_type. See item d in the figure + * below. These calls return an identifier to a transient copy of the datatype of the dataset or + * attribute unless the datatype is a committed datatype. + * Note that when an object is created, the stored datatype is a copy of the transient datatype. If two + * objects are created with the same datatype, the information is stored in each object with the same + * effect as if two different datatypes were created and used. + * + * A transient datatype can be stored using #H5Tcommit in the HDF5 file as an independent, named + * object, called a committed datatype. Committed datatypes were formerly known as named + * datatypes. See item e in the figure below. Subsequently, when a committed datatype is opened + * with #H5Topen (item f), or is obtained with #H5Tget_member_type or similar call (item k), the return + * is an identifier to a transient copy of the stored datatype. The identifier can be used in the + * same way as other datatype identifiers except that the committed datatype cannot be modified. When a + * committed datatype is copied with #H5Tcopy, the return is a new, modifiable, transient datatype + * object (item f). + * + * When an object is created using a committed datatype (#H5Dcreate, #H5Acreate), the stored + * datatype is used without copying it to the object. See item j in the figure below. In this case, if + * multiple objects are created using the same committed datatype, they all share the exact same + * datatype object. This saves space and makes clear that the datatype is shared. Note that a + * committed datatype can be shared by objects within the same HDF5 file, but not by objects in + * other files. For more information on copying committed datatypes to other HDF5 files, see the + * “Copying Committed Datatypes with H5Ocopy” topic in the “Additional Resources” chapter. + * + * A committed datatype can be deleted from the file by calling #H5Ldelete which replaces + * #H5Gunlink. See item i in the figure below. If one or more objects are still using the datatype, the + * committed datatype cannot be accessed with #H5Topen, but will not be removed from the file + * until it is no longer used. #H5Tget_member_type and similar calls will return a transient copy of the + * datatype. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig23.gif "Life cycle of a datatype" + * </td> + * </tr> + * </table> + * + * Transient datatypes are initially modifiable. Note that when a datatype is copied or when it is + * written to the file (when an object is created) or the datatype is used to create a composite + * datatype, a copy of the current state of the datatype is used. If the datatype is then modified, the + * changes have no effect on datasets, attributes, or datatypes that have already been created. See + * the figure below. + * + * A transient datatype can be made read-only (#H5Tlock). Note that the datatype is still transient, + * and otherwise does not change. A datatype that is immutable is read-only but cannot be closed + * except when the entire library is closed. The predefined types such as #H5T_NATIVE_INT are + * immutable transient types. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig24.gif "Transient datatype states: modifiable, read-only, and immutable" + * </td> + * </tr> + * </table> + * + * To create two or more datasets that share a common datatype, first commit the datatype, and then + * use that datatype to create the datasets. See the example below. + * <em> Create a shareable datatype</em> + * \code + * hid_t t1 = ...some transient type...; + * H5Tcommit (file, “shared_type”, t1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * hid_t dset1 = H5Dcreate (file, “dset1”, t1, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * hid_t dset2 = H5Dcreate (file, “dset2”, t1, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * + * hid_t dset1 = H5Dopen (file, “dset1”, H5P_DEFAULT); + * hid_t t2 = H5Dget_type (dset1); + * hid_t dset3 = H5Dcreate (file, “dset3”, t2, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * hid_t dset4 = H5Dcreate (file, “dset4”, t2, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * <table> + * <caption> Datatype APIs</caption> + * <tr> + * <th>Function</th> + * <th>Description</th> + * </tr> + * <tr> + * <td> + * \code + * hid_t H5Topen (hid_t location, const char *name) + * \endcode + * </td> + * <td> + * A committed datatype can be opened by calling this function, which returns a datatype identifier. + * The identifier should eventually be released by calling #H5Tclose() to release resources. The + * committed datatype returned by this function is read-only or a negative value is returned for failure. + * The location is either a file or group identifier. + * </td> + * </tr> + * <tr> + * <td> + * \code + * herr_t H5Tcommit (hid_t location, const char *name, hid_t type, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) + * \endcode + * </td> + * <td> + * A transient datatype (not immutable) can be written to a file and turned into a committed datatype by + * calling this function. The location is either a file or group identifier and when combined with name + * refers to a new committed datatype. + * </td> + * </tr> + * <tr> + * <td> + * \code + * htri_t H5Tcommitted (hid_t type) + * \endcode + * </td> + * <td> + * A type can be queried to determine if it is a committed type or a transient type. If this function + * returns a positive value then the type is committed. Datasets which return committed datatypes with + * #H5Dget_type() are able to share the datatype with other datasets in the same file. + * </td> + * </tr> + * </table> + * + * \subsection subsec_datatype_transfer Data Transfer: Datatype Conversion and Selection + * When data is transferred (write or read), the storage layout of the data elements may be different. + * For example, an integer might be stored on disk in big-endian byte order and read into memory + * with little-endian byte order. In this case, each data element will be transformed by the HDF5 + * Library during the data transfer. + * + * The conversion of data elements is controlled by specifying the datatype of the source and + * specifying the intended datatype of the destination. The storage format on disk is the datatype + * specified when the dataset is created. The datatype of memory must be specified in the library + * call. + * + * In order to be convertible, the datatype of the source and destination must have the same + * datatype class (with the exception of enumeration type). Thus, integers can be converted to other + * integers, and floats to other floats, but integers cannot (yet) be converted to floats. For each + * atomic datatype class, the possible conversions are defined. An enumeration datatype can be + * converted to an integer or a floating-point number datatype. + * + * Basically, any datatype can be converted to another datatype of the same datatype class. The + * HDF5 Library automatically converts all properties. If the destination is too small to hold the + * source value then an overflow or underflow exception occurs. If a handler is defined with the + * #H5Pset_type_conv_cb function, it will be called. Otherwise, a default action will be performed. + * The table below summarizes the default actions. + * + * <table> + * <caption>Default actions for datatype conversion exceptions</caption> + * <tr> + * <th>Datatype Class</th> + * <th>Possible Exceptions</th> + * <th>Default Action</th> + * </tr> + * <tr> + * <td>Integer</td> + * <td>Size, offset, pad</td> + * <td></td> + * </tr> + * <tr> + * <td>Float</td> + * <td>Size, offset, pad, ebits</td> + * <td></td> + * </tr> + * <tr> + * <td>String</td> + * <td>Size</td> + * <td>Truncates, zero terminate if required.</td> + * </tr> + * <tr> + * <td>Enumeration</td> + * <td>No field</td> + * <td>All bits set</td> + * </tr> + * </table> + * + * For example, when reading data from a dataset, the source datatype is the datatype set when the + * dataset was created, and the destination datatype is the description of the storage layout in + * memory. The destination datatype must be specified in the #H5Dread call. The example below + * shows an example of reading a dataset of 32-bit integers. The figure below the example shows + * the data transformation that is performed. + * <em>Specify the destination datatype with H5Dread</em> + * \code + * // Stored as H5T_STD_BE32 + * // Use the native memory order in the destination + * mem_type_id = H5Tcopy(H5T_NATIVE_INT); + * status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + * \endcode + * + * <table> + * <caption>Layout of a datatype conversion</caption> + * <tr> + * <td> + * \image html Dtypes_fig25a.gif<br /> + * \image html Dtypes_fig25b.gif<br /> + * \image html Dtypes_fig25c.gif + * </td> + * </tr> + * </table> + * + * One thing to note in the example above is the use of the predefined native datatype + * #H5T_NATIVE_INT. Recall that in this example, the data was stored as a 4-bytes in big-endian + * order. The application wants to read this data into an array of integers in memory. Depending on + * the system, the storage layout of memory might be either big or little-endian, so the data may + * need to be transformed on some platforms and not on others. The #H5T_NATIVE_INT type is set + * by the HDF5 Library to be the correct type to describe the storage layout of the memory on the + * system. Thus, the code in the example above will work correctly on any platform, performing a + * transformation when needed. + * + * There are predefined native types for most atomic datatypes, and these can be combined in + * composite datatypes. In general, the predefined native datatypes should always be used for data + * stored in memory. + * Predefined native datatypes describe the storage properties of memory. + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig26.gif "An enum datatype conversion" + * </td> + * </tr> + * </table> + * + * <em>Create an aligned and packed compound datatype</em> + * \code + * // Stored as H5T_STD_BE32 + * // Use the native memory order in the destination + * mem_type_id = H5Tcopy(H5T_NATIVE_INT); + * status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig27.gif "Alignment of a compound datatype" + * </td> + * </tr> + * </table> + * + * <em>Transfer some fields of a compound datatype</em> + * \code + * // Stored as H5T_STD_BE32 + * // Use the native memory order in the destination + * mem_type_id = H5Tcopy(H5T_NATIVE_INT); + * status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + * \endcode + * + * <table> + * <tr> + * <td> + * \image html Dtypes_fig28.gif "Layout when an element is skipped" + * </td> + * </tr> + * </table> + * + * \subsection subsec_datatype_text Text Descriptions of Datatypes: Conversion to and from + * + * HDF5 provides a means for generating a portable and human-readable text description of a + * datatype and for generating a datatype from such a text description. This capability is particularly + * useful for creating complex datatypes in a single step, for creating a text description of a datatype + * for debugging purposes, and for creating a portable datatype definition that can then be used to + * recreate the datatype on many platforms or in other applications. + * + * These tasks are handled by two functions provided in the HDF5 Lite high-level library: + * \li #H5LTtext_to_dtype Creates an HDF5 datatype in a single step. + * \li #H5LTdtype_to_text Translates an HDF5 datatype into a text description. + * + * Note that this functionality requires that the HDF5 High-Level Library (H5LT) be installed. + * + * While #H5LTtext_to_dtype can be used to generate any sort of datatype, it is particularly useful + * for complex datatypes. + * + * #H5LTdtype_to_text is most likely to be used in two sorts of situations: when a datatype must be + * closely examined for debugging purpose or to create a portable text description of the datatype + * that can then be used to recreate the datatype on other platforms or in other applications. + * + * These two functions work for all valid HDF5 datatypes except time, bitfield, and reference + * datatypes. + * + * The currently supported text format used by #H5LTtext_to_dtype and #H5LTdtype_to_text is the + * data description language (DDL) and conforms to the \ref DDLBNF110. The portion of the + * \ref DDLBNF110 that defines HDF5 datatypes appears below. + * <em>The definition of HDF5 datatypes from the HDF5 DDL</em> + * \code + * <datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type> + * + * <atomic_type> ::= <integer> | <float> | <time> | <string> | + * <bitfield> | <opaque> | <reference> | <enum> + * <integer> ::= H5T_STD_I8BE | H5T_STD_I8LE | + * H5T_STD_I16BE | H5T_STD_I16LE | + * H5T_STD_I32BE | H5T_STD_I32LE | + * H5T_STD_I64BE | H5T_STD_I64LE | + * H5T_STD_U8BE | H5T_STD_U8LE | + * H5T_STD_U16BE | H5T_STD_U16LE | + * H5T_STD_U32BE | H5T_STD_U32LE | + * H5T_STD_U64BE | H5T_STD_U64LE | + * H5T_NATIVE_CHAR | H5T_NATIVE_UCHAR | + * H5T_NATIVE_SHORT | H5T_NATIVE_USHORT | + * H5T_NATIVE_INT | H5T_NATIVE_UINT | + * H5T_NATIVE_LONG | H5T_NATIVE_ULONG | + * H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG + * <float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE | + * H5T_IEEE_F64BE | H5T_IEEE_F64LE | + * H5T_NATIVE_FLOAT | H5T_NATIVE_DOUBLE | + * H5T_NATIVE_LDOUBLE + * <time> ::= H5T_TIME: not yet implemented + * <string> ::= H5T_STRING { + * STRSIZE <strsize> ; + * STRPAD <strpad> ; + * CSET <cset> ; + * CTYPE <ctype> ; + * } + * <strsize> ::= <int_value> + * <strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD + * <cset> ::= H5T_CSET_ASCII | H5T_CSET_UTF8 + * <ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1 + * + * <bitfield> ::= H5T_STD_B8BE | H5T_STD_B8LE | + * H5T_STD_B16BE | H5T_STD_B16LE | + * H5T_STD_B32BE | H5T_STD_B32LE | + * H5T_STD_B64BE | H5T_STD_B64LE + * + * <opaque> ::= H5T_OPAQUE { + * OPAQUE_TAG <identifier>; + * OPAQUE_SIZE <int_value>;opt + * } + * + * <reference> ::= H5T_REFERENCE { <ref_type> } + * <ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG + * + * <compound_type> ::= H5T_COMPOUND { + * <member_type_def>+ + * } + * <member_type_def> ::= <datatype> <field_name>; + * <field_name> ::= <identifier> + * + * <variable_length_type> ::= H5T_VLEN { <datatype> } + * + * <array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> } + * <dim_sizes> ::= '['<dimsize>']' | '['<dimsize>']'<dim_sizes> + * <dimsize> ::= <int_value> + * + * <enum> ::= H5T_ENUM { + * <enum_base_type> <enum_def>+ + * } + * <enum_base_type> ::= <integer> + * // Currently enums can only hold integer type data, but they may be expanded + * // in the future to hold any datatype + * <enum_def> ::= <enum_symbol> <enum_val>; + * <enum_symbol> ::= <identifier> + * <enum_val> ::= <int_value> + * \endcode + * + * <em> Old definitions of the opaque and compound datatypes</em> + * \code + * <opaque> ::= H5T_OPAQUE { <identifier> } + * <compound_type> ::= H5T_COMPOUND { <member_type_def>+ } + * <member_type_def> ::= <datatype> <field_name> ; + * <field_name> ::= <identifier> + * \endcode + * + * <h4>Examples</h4> + * The code sample below illustrates the use of #H5LTtext_to_dtype to generate a variable-length + * string datatype. + * + * <em>Creating a variable-length string datatype from a text description</em> + * \code + * hid_t dtype; + * if((dtype = H5LTtext_to_dtype( + * “H5T_STRING { + * STRSIZE H5T_VARIABLE; + * STRPAD H5T_STR_NULLPAD; + * CSET H5T_CSET_ASCII; + * CTYPE H5T_C_S1; + * }”, H5LT_DDL)) < 0) + * goto out; + * \endcode + * + * The code sample below illustrates the use of #H5LTtext_to_dtype to generate a complex array + * datatype. + * + * <em>Creating a complex array datatype from a text description</em> + * \code + * hid_t dtype; + * if((dtype = H5LTtext_to_dtype( + * “H5T_ARRAY { [5][7][13] H5T_ARRAY + * { [17][19] H5T_COMPOUND + * { + * H5T_STD_I8BE \“arr_compound_1\”; + * H5T_STD_I32BE \“arr_compound_2\”; + * } + * } + * }”, H5LT_DDL))<0) + * goto out; + * \endcode + * + * Previous Chapter \ref sec_dataset - Next Chapter \ref sec_dataspace + * + */ + +/** + * \defgroup H5T Datatypes (H5T) * * Use the functions in this module to manage HDF5 datatypes. * diff --git a/src/H5VLmodule.h b/src/H5VLmodule.h index 4ea4992..5e2e1b3 100644 --- a/src/H5VLmodule.h +++ b/src/H5VLmodule.h @@ -26,7 +26,97 @@ #define H5_MY_PKG H5VL #define H5_MY_PKG_ERR H5E_VOL -/**\defgroup H5VL H5VL +/** \page H5VL_UG The HDF5 VOL plugin + * + * \section sec_vol The HDF5 VOL plugin + * + * \section subsec_vol_intro Introduction + * The virtual object layer is an abstraction layer in the HDF5 library that intercepts all API calls + * that could potentially access objects in an HDF5 container and forwards those calls to a VOL connector, + * which implements the storage. The user or application gets the benefit of using the familiar and + * widely-used HDF5 data model and API, but can map the physical storage of the HDF5 file and objects + * to storage that better meets the application's data needs. + * + * \section subsec_vol_abstract_layer The VOL Abstraction Layer + * The VOL lies just under the public API. When a storage-oriented public APIcall is made, the library + * performs a few sanity checks on the input parameters and then immediately invokes a VOL callback, + * which resolves to an implementation in the VOL connector that was selected when opening or creating + * the file. The VOL connector then performs whatever operations are needed before control returns to the + * library, where any final library operations such as assigning IDs for newly created/opened datasets are + * performed before returning. This means that, for calls that utilize the VOL, all of the functionality + * is deferred to the VOL connector and the HDF5 library does very little work. An important consideration + * of this is that most of the HDF5 caching layers (metadata and chunk caches, page buffering, etc.) will + * not be available as those are implemented in the HDF5 native VOL connector and cannot be easily reused + * by external connectors. + * + * <table> + * <tr> + * <td> + * \image html vol_architecture.png "The VOL Architecture" + * </td> + * </tr> + * </table> + * + * Not all public HDF5 API calls pass through the VOL. Only calls which require manipulating storage go + * through the VOL and require a VOL connector author to implement the appropriate callback. Dataspace, + * property list, error stack, etc. calls have nothing to do with storage manipulation or querying and + * do not use the VOL. This may be confusing when it comes to property list calls, since many of those + * calls set properties for storage. Property lists are just collections of key-value pairs, though, so + * a particular VOL connector is not required to set or get properties. + * + * Another thing to keep in mind is that not every VOL connector will implement the full HDF5 public API. + * In some cases, a particular feature like variable-length types may not have been developed yet or may + * not have an equivalent in the target storage system. Also, many HDF5 public API calls are specific to + * the native HDF5 file format and are unlikely to have any use in other VOL connectors. A + * feature/capabilities flag scheme is being developed to help navigate this. + * + * For more information about which calls go through the VOL and the mechanism by which this is implemented, + * see the connector author and library internals documentation. + * + * \section subsec_vol_connect VOL Connectors + * A VOL connector can be implemented in several ways: + * \li as a shared or static library linked to an application + * \li as a dynamically loaded plugin, implemented as a shared library + * \li and even as an internal connector, built into the HDF5 libraryitself + * + * This section mostly focuses on external connectors, both libraries and plugins, as those are expected + * to be much more common than internal implementations. + * + * A list of VOL connectors can be found here: + * <a href="https://portal.hdfgroup.org/display/support/Registered+VOL+Connectors"> + * Registered VOL Connectors</a> + * + * This list is incomplete and only includes the VOL connectors that have been registered with + * The HDF Group. + * + * Not every connector in this collection is actively maintained by The HDF Group. It simply serves as a + * single location where important VOL connectors can be found. See the documentation in a connector's + * repository to determine its development status and the parties responsible for it. + * + * A VOL template that contains build scripts (Autotools and CMake) and an empty VOL connector "shell" + * which can be copied and used as a starting point for building new connectors is located here: + * <a href="https://github.com/HDFGroup/vol-template">VOL Connector Template</a> + * + * This template VOL connector is for use in constructing terminal VOL connectors that do not forward + * calls to an underlying connector. The external pass-through VOL connector listed on the registered + * connector page can be used as a starting point for pass-through connectors. + * + * The only current (non-test) internal VOL connector distributed with the library is the native file + * format connector (the "native VOL connector") which contains the code that handles native HDF5 (*.h5/hdf5) + * files. In other words, even the canonical HDF5 file format is implemented via the VOL, making it a core + * part of the HDF5 library and not an optional component which could be disabled. + * + * It has not been completely abstracted from the HDF5 library, though, and is treated as a special case. + * For example, it cannot be unloaded and is always present. + * + * \section subsec_vol_use Connector Use + * + * Previous Chapter \ref sec_plist - Next Chapter \ref sec_async + * + */ + +/** + *\defgroup H5VL VOL connector (H5VL) * * \todo Describe the VOL plugin life cycle. * diff --git a/src/H5Zmodule.h b/src/H5Zmodule.h index 338242e..ec21e50 100644 --- a/src/H5Zmodule.h +++ b/src/H5Zmodule.h @@ -28,7 +28,12 @@ #define H5_MY_PKG H5Z #define H5_MY_PKG_ERR H5E_PLINE -/**\defgroup H5Z H5Z +/** \page H5Z_UG The HDF5 Filters + * @todo Under Construction + */ + +/** + * \defgroup H5Z Filters (H5Z) * * Use the functions in this module to manage HDF5 filters. * diff --git a/src/H5module.h b/src/H5module.h index 642683f..35de966 100644 --- a/src/H5module.h +++ b/src/H5module.h @@ -25,7 +25,1409 @@ #define H5_MY_PKG H5 #define H5_MY_PKG_ERR H5E_LIB -/**\defgroup H5 H5 +/** \page H5DM_UG The HDF5 Data Model and File Structure + * + * \section sec_data_model The HDF5 Data Model and File Structure + * \subsection subsec_data_model_intro Introduction + * The Hierarchical Data Format (HDF) implements a model for managing and storing data. The + * model includes an abstract data model and an abstract storage model (the data format), and + * libraries to implement the abstract model and to map the storage model to different storage + * mechanisms. The HDF5 library provides a programming interface to a concrete implementation + * of the abstract models. The library also implements a model of data transfer, an efficient + * movement of data from one stored representation to another stored representation. The figure + * below illustrates the relationships between the models and implementations. This chapter + * explains these models in detail. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig1.gif "HDF5 models and implementations" + * </td> + * </tr> + * </table> + * + * The <em>Abstract Data Model</em> is a conceptual model of data, data types, and data organization. The + * abstract data model is independent of storage medium or programming environment. The + * <em>Storage Model</em> is a standard representation for the objects of the abstract data model. The + * <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> + * defines the storage model. + * + * The <em>Programming Model</em> is a model of the computing environment and includes platforms from + * small single systems to large multiprocessors and clusters. The programming model manipulates + * (instantiates, populates, and retrieves) objects from the abstract data model. + * + * The <em>Library</em> is the concrete implementation of the programming model. The library exports the + * HDF5 APIs as its interface. In addition to implementing the objects of the abstract data model, + * the library manages data transfers from one stored form to another. Data transfer examples + * include reading from disk to memory and writing from memory to disk. + * + * <em>Stored Data</em> is the concrete implementation of the storage model. The <em>Storage Model</em> + * is mapped to several storage mechanisms including single disk files, multiple files (family of files), + * and memory representations. + * + * The HDF5 library is a C module that implements the programming model and abstract data + * model. The HDF5 library calls the operating system or other storage management software (for + * example, the MPI/IO Library) to store and retrieve persistent data. The HDF5 library may also + * link to other software such as filters for compression. The HDF5 library is linked to an + * application program which may be written in C, C++, Fortran, or Java. The application program + * implements problem specific algorithms and data structures and calls the HDF5 library to store + * and retrieve data. The figure below shows the dependencies of these modules. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig2.gif "The library, the application program, and other modules" + * </td> + * </tr> + * </table> + * + * It is important to realize that each of the software components manages data using models and + * data structures that are appropriate to the component. When data is passed between layers + * (during storage or retrieval), it is transformed from one representation to another. The figure + * below suggests some of the kinds of data structures used in the different layers. + * + * The <em>Application Program</em> uses data structures that represent the problem and algorithms + * including variables, tables, arrays, and meshes among other data structures. Depending on its + * design and function, an application may have quite a few different kinds of data structures and + * different numbers and sizes of objects. + * + * The <em>HDF5 Library</em> implements the objects of the HDF5 abstract data model. Some of these + * objects include groups, datasets, and attributes. The application program maps the application + * data structures to a hierarchy of HDF5 objects. Each application will create a mapping best + * suited to its purposes. + * + * The objects of the HDF5 abstract data model are mapped to the objects of the HDF5 storage + * model, and stored in a storage medium. The stored objects include header blocks, free lists, data + * blocks, B-trees, and other objects. Each group or dataset is stored as one or more header and data + * blocks. + * @see <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> + * for more information on how these objects are organized. The HDF5 library can also use other + * libraries and modules such as compression. + * + * <table> + * <caption>Data structures in different layers</caption> + * <tr> + * <td> + * \image html Dmodel_fig3_a.gif + * </td> + * <td> + * \image html Dmodel_fig2.gif + * </td> + * <td> + * \image html Dmodel_fig3_c.gif + * </td> + * </tr> + * </table> + * + * The important point to note is that there is not necessarily any simple correspondence between + * the objects of the application program, the abstract data model, and those of the Format + * Specification. The organization of the data of application program, and how it is mapped to the + * HDF5 abstract data model is up to the application developer. The application program only + * needs to deal with the library and the abstract data model. Most applications need not consider + * any details of the + * <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> + * or the details of how objects of abstract data model are translated to and from storage. + * + * \subsection subsec_data_model_abstract The Abstract Data Model + * The abstract data model (ADM) defines concepts for defining and describing complex data + * stored in files. The ADM is a very general model which is designed to conceptually cover many + * specific models. Many different kinds of data can be mapped to objects of the ADM, and + * therefore stored and retrieved using HDF5. The ADM is not, however, a model of any particular + * problem or application domain. Users need to map their data to the concepts of the ADM. + * + * The key concepts include: + * <ul><li>@ref subsubsec_data_model_abstract_file - a contiguous string of bytes in a computer + * store (memory, disk, etc.), and the bytes represent zero or more objects of the model</li> + * <li>@ref subsubsec_data_model_abstract_group - a collection of objects (including groups)</li> + * <li>@ref subsubsec_data_model_abstract_dataset - a multidimensional array of data elements with + * attributes and other metadata</li> + * <li>@ref subsubsec_data_model_abstract_space - a description of the dimensions of a multidimensional + * array</li> + * <li>@ref subsubsec_data_model_abstract_type - a description of a specific class of data element + * including its storage layout as a pattern of bits</li> + * <li>@ref subsubsec_data_model_abstract_attr - a named data value associated with a group, + * dataset, or named datatype</li> + * <li>@ref subsubsec_data_model_abstract_plist - a collection of parameters (some permanent and + * some transient) controlling options in the library</li> + * <li>@ref subsubsec_data_model_abstract_link - the way objects are connected</li></ul> + * + * These key concepts are described in more detail below. + * + * \subsubsection subsubsec_data_model_abstract_file File + * Abstractly, an HDF5 file is a container for an organized collection of objects. The objects are + * groups, datasets, and other objects as defined below. The objects are organized as a rooted, + * directed graph. Every HDF5 file has at least one object, the root group. See the figure below. All + * objects are members of the root group or descendants of the root group. + * + * <table> + * <caption>The HDF5 file</caption> + * <tr> + * <td> + * \image html Dmodel_fig4_b.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Dmodel_fig4_a.gif + * </td> + * </tr> + * </table> + * + * HDF5 objects have a unique identity within a single HDF5 file and can be accessed only by their + * names within the hierarchy of the file. HDF5 objects in different files do not necessarily have + * unique identities, and it is not possible to access a permanent HDF5 object except through a file. + * For more information, see \ref subsec_data_model_structure. + * + * When the file is created, the file creation properties specify settings for the file. The file creation + * properties include version information and parameters of global data structures. When the file is + * opened, the file access properties specify settings for the current access to the file. File access + * properties include parameters for storage drivers and parameters for caching and garbage + * collection. The file creation properties are set permanently for the life of the file, and the file + * access properties can be changed by closing and reopening the file. + * + * An HDF5 file can be “mounted” as part of another HDF5 file. This is analogous to Unix file + * system mounts. The root of the mounted file is attached to a group in the mounting file, and all + * the contents can be accessed as if the mounted file were part of the mounting file. + * + * @see @ref sec_file. + * + * \subsubsection subsubsec_data_model_abstract_group Group + * An HDF5 group is analogous to a file system directory. Abstractly, a group contains zero or + * more objects, and every object must be a member of at least one group. The root group is a + * special case; it may not be a member of any group. + * + * Group membership is actually implemented via link objects. See the figure below. A link object + * is owned by a group and points to a named object. Each link has a name, and each link points to + * exactly one object. Each named object has at least one and possibly many links to it. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig5.gif "Group membership via link objects" + * </td> + * </tr> + * </table> + * + * There are three classes of named objects: group, dataset, and committed (named) datatype. See + * the figure below. Each of these objects is the member of at least one group, and this means there + * is at least one link to it. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig6.gif "Classes of named objects" + * </td> + * </tr> + * </table> + * + * @see @ref sec_group. + * + * \subsubsection subsubsec_data_model_abstract_dataset Dataset + * An HDF5 dataset is a multidimensional (rectangular) array of data elements. See the figure + * below. The shape of the array (number of dimensions, size of each dimension) is described by + * the dataspace object (described in the next section below). + * + * A data element is a single unit of data which may be a number, a character, an array of numbers + * or characters, or a record of heterogeneous data elements. A data element is a set of bits. The + * layout of the bits is described by the datatype (see below). + * + * The dataspace and datatype are set when the dataset is created, and they cannot be changed for + * the life of the dataset. The dataset creation properties are set when the dataset is created. The + * dataset creation properties include the fill value and storage properties such as chunking and + * compression. These properties cannot be changed after the dataset is created. + * + * The dataset object manages the storage and access to the data. While the data is conceptually a + * contiguous rectangular array, it is physically stored and transferred in different ways depending + * on the storage properties and the storage mechanism used. The actual storage may be a set of + * compressed chunks, and the access may be through different storage mechanisms and caches. + * The dataset maps between the conceptual array of elements and the actual stored data. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig7_b.gif "The dataset" + * </td> + * </tr> + * </table> + * + * @see @ref sec_dataset. + * + * \subsubsection subsubsec_data_model_abstract_space Dataspace + * The HDF5 dataspace describes the layout of the elements of a multidimensional array. + * Conceptually, the array is a hyper-rectangle with one to 32 dimensions. HDF5 dataspaces can be + * extendable. Therefore, each dimension has a current size and a maximum size, and the maximum + * may be unlimited. The dataspace describes this hyper-rectangle: it is a list of dimensions with + * the current and maximum (or unlimited) sizes. See the figure below. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig8.gif "The dataspace" + * </td> + * </tr> + * </table> + * + * Dataspace objects are also used to describe hyperslab selections from a dataset. Any subset of the + * elements of a dataset can be selected for read or write by specifying a set of hyperslabs. A + * non-rectangular region can be selected by the union of several (rectangular) dataspaces. + * + * @see @ref sec_dataspace. + * + * \subsubsection subsubsec_data_model_abstract_type Datatype + * The HDF5 datatype object describes the layout of a single data element. A data element is a + * single element of the array; it may be a single number, a character, an array of numbers or + * carriers, or other data. The datatype object describes the storage layout of this data. + * + * Data types are categorized into 11 classes of datatype. Each class is interpreted according to a set + * of rules and has a specific set of properties to describe its storage. For instance, floating point + * numbers have exponent position and sizes which are interpreted according to appropriate + * standards for number representation. Thus, the datatype class tells what the element means, and + * the datatype describes how it is stored. + * + * The figure below shows the classification of datatypes. Atomic datatypes are indivisible. Each + * may be a single object such as a number or a string. Composite datatypes are composed of + * multiple elements of atomic datatypes. In addition to the standard types, users can define + * additional datatypes such as a 24-bit integer or a 16-bit float. + * A dataset or attribute has a single datatype object associated with it. See Figure 7 above. The + * datatype object may be used in the definition of several objects, but by default, a copy of the + * datatype object will be private to the dataset. + * + * Optionally, a datatype object can be stored in the HDF5 file. The datatype is linked into a group, + * and therefore given a name. A committed datatype (formerly called a named datatype) can be + * opened and used in any way that a datatype object can be used. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig9.gif "Datatype classifications" + * </td> + * </tr> + * </table> + * + * @see @ref sec_datatype. + * + * \subsubsection subsubsec_data_model_abstract_attr Attribute + * Any HDF5 named data object (group, dataset, or named datatype) may have zero or more user + * defined attributes. Attributes are used to document the object. The attributes of an object are + * stored with the object. + * + * An HDF5 attribute has a name and data. The data portion is similar in structure to a dataset: a + * dataspace defines the layout of an array of data elements, and a datatype defines the storage + * layout and interpretation of the elements See the figure below. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig10.gif "Attribute data elements" + * </td> + * </tr> + * </table> + * + * In fact, an attribute is very similar to a dataset with the following limitations: + * <ul><li>An attribute can only be accessed via the object</li> + * <li>Attribute names are significant only within the object</li> + * <li>An attribute should be a small object</li> + * <li>The data of an attribute must be read or written in a single access (partial reading or + * writing is not allowed)</li> + * <li>Attributes do not have attributes</li></ul> + * + * Note that the value of an attribute can be an object reference. A shared attribute or an attribute + * that is a large array can be implemented as a reference to a dataset. + * + * The name, dataspace, and datatype of an attribute are specified when it is created and cannot be + * changed over the life of the attribute. An attribute can be opened by name, by index, or by + * iterating through all the attributes of the object. + * + * @see @ref sec_attribute. + * + * \subsubsection subsubsec_data_model_abstract_plist Property List + * HDF5 has a generic property list object. Each list is a collection of name-value pairs. Each class + * of property list has a specific set of properties. Each property has an implicit name, a datatype, + * and a value. See the figure below. A property list object is created and used in ways similar to + * the other objects of the HDF5 library. + * + * Property Lists are attached to the object in the library, and they can be used by any part of the + * library. Some properties are permanent (for example, the chunking strategy for a dataset), others + * are transient (for example, buffer sizes for data transfer). A common use of a Property List is to + * pass parameters from the calling program to a VFL driver or a module of the pipeline. + * + * Property lists are conceptually similar to attributes. Property lists are information relevant to the + * behavior of the library while attributes are relevant to the user’s data and application. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig11_b.gif "The property list" + * </td> + * </tr> + * </table> + * + * Property lists are used to control optional behavior for file creation, file access, dataset creation, + * dataset transfer (read, write), and file mounting. Some property list classes are shown in the table + * below. Details of the different property lists are explained in the relevant sections of this + * document. + * + * <table> + * <caption>Property list classes and their usage</caption> + * <tr> + * <th>Property List Class</th> + * <th>Used</th> + * <th>Examples</th> + * </tr> + * <tr> + * <td>#H5P_FILE_CREATE</td> + * <td>Properties for file creation.</td> + * <td>Set size of user block.</td> + * </tr> + * <tr> + * <td>#H5P_FILE_ACCESS</td> + * <td>Properties for file access.</td> + * <td>Set parameters for VFL driver. An example is MPI I/O. </td> + * </tr> + * <tr> + * <td>#H5P_DATASET_CREATE</td> + * <td>Properties for dataset creation.</td> + * <td>Set chunking, compression, or fill value.</td> + * </tr> + * <tr> + * <td>#H5P_DATASET_XFER</td> + * <td>Properties for raw data transfer (read and write).</td> + * <td>Tune buffer sizes or memory management.</td> + * </tr> + * <tr> + * <td>#H5P_FILE_MOUNT</td> + * <td>Properties for file mounting.</td> + * <td></td> + * </tr> + * </table> + * + * @see @ref sec_plist. + * + * \subsubsection subsubsec_data_model_abstract_link Link + * This section is under construction. + * + * \subsection subsec_data_model_storage The HDF5 Storage Model + * \subsubsection subsubsec_data_model_storage_spec The Abstract Storage Model: the HDF5 Format Specification + * The <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> + * defines how HDF5 objects and data are mapped to a linear + * address space. The address space is assumed to be a contiguous array of bytes stored on some + * random access medium. The format defines the standard for how the objects of the abstract data + * model are mapped to linear addresses. The stored representation is self-describing in the sense + * that the format defines all the information necessary to read and reconstruct the original objects + * of the abstract data model. + * + * The HDF5 File Format Specification is organized in three parts: + * <ul><li>Level 0: File signature and super block</li> + * <li>Level 1: File infrastructure</li> + * <ul><li>Level 1A: B-link trees and B-tree nodes</li> + * <li>Level 1B: Group</li> + * <li>Level 1C: Group entry</li> + * <li>Level 1D: Local heaps</li> + * <li>Level 1E: Global heap</li> + * <li>Level 1F: Free-space index</li></ul> + * <li>Level 2: Data object</li> + * <ul><li>Level 2A: Data object headers</li> + * <li>Level 2B: Shared data object headers</li> + * <li>Level 2C: Data object data storage</li></ul></ul> + * + * The Level 0 specification defines the header block for the file. Header block elements include a + * signature, version information, key parameters of the file layout (such as which VFL file drivers + * are needed), and pointers to the rest of the file. Level 1 defines the data structures used + * throughout the file: the B-trees, heaps, and groups. Level 2 defines the data structure for storing + * the data objects and data. In all cases, the data structures are completely specified so that every + * bit in the file can be faithfully interpreted. + * + * It is important to realize that the structures defined in the HDF5 file format are not the same as + * the abstract data model: the object headers, heaps, and B-trees of the file specification are not + * represented in the abstract data model. The format defines a number of objects for managing the + * storage including header blocks, B-trees, and heaps. The HDF5 File Format Specification defines + * how the abstract objects (for example, groups and datasets) are represented as headers, B-tree + * blocks, and other elements. + * + * The HDF5 library implements operations to write HDF5 objects to the linear format and to read + * from the linear format to create HDF5 objects. It is important to realize that a single HDF5 + * abstract object is usually stored as several objects. A dataset, for example, might be stored in a + * header and in one or more data blocks, and these objects might not be contiguous on the hard + * disk. + * + * \subsubsection subsubsec_data_model_storage_imple Concrete Storage Model + * The HDF5 file format defines an abstract linear address space. This can be implemented in + * different storage media such as a single file or multiple files on disk or in memory. The HDF5 + * Library defines an open interface called the Virtual File Layer (VFL). The VFL allows different + * concrete storage models to be selected. + * + * The VFL defines an abstract model, an API for random access storage, and an API to plug in + * alternative VFL driver modules. The model defines the operations that the VFL driver must and + * may support, and the plug-in API enables the HDF5 library to recognize the driver and pass it + * control and data. + * + * A number of VFL drivers have been defined in the HDF5 library. Some work with a single file, + * and some work with multiple files split in various ways. Some work in serial computing + * environments, and some work in parallel computing environments. Most work with disk copies + * of HDF5 files, but one works with a memory copy. These drivers are listed in the + * \ref table_file_drivers "Supported file drivers" table. + * + * @see @ref subsec_file_alternate_drivers. + * + * Each driver isolates the details of reading and writing storage so that the rest of the HDF5 library + * and user program can be almost the same for different storage methods. The exception to this + * rule is that some VFL drivers need information from the calling application. This information is + * passed using property lists. For example, the Parallel driver requires certain control information + * that must be provided by the application. + * + * \subsection subsec_data_model_structure The Structure of an HDF5 File + * \subsubsection subsubsec_data_model_structure_file Overall File Structure + * An HDF5 file is organized as a rooted, directed graph. Named data objects are the nodes of the + * graph, and links are the directed arcs. Each arc of the graph has a name, and the root group has + * the name “/”. Objects are created and then inserted into the graph with the link operation which + * creates a named link from a group to the object. For example, the figure below illustrates the + * structure of an HDF5 file when one dataset is created. An object can be the target of more than + * one link. The names on the links must be unique within each group, but there may be many links + * with the same name in different groups. Link names are unambiguous: some ancestor will have a + * different name, or they are the same object. The graph is navigated with path names similar to + * Unix file systems. An object can be opened with a full path starting at the root group or with a + * relative path and a starting node (group). Note that all paths are relative to a single HDF5 file. In + * this sense, an HDF5 file is analogous to a single Unix file system. + * + * <table> + * <caption>An HDF5 file with one dataset</caption> + * <tr> + * <td> + * \image html Dmodel_fig12_a.gif + * </td> + * <td> + * \image html Dmodel_fig12_b.gif + * </td> + * </tr> + * </table> + * + * Note: In the figure above are two figures. The top figure represents a newly created file with one + * group, /. In the bottom figure, a dataset called /dset1 has been created. + * + * It is important to note that, just like the Unix file system, HDF5 objects do not have names. The + * names are associated with paths. An object has a unique (within the file) object identifier, but a + * single object may have many names because there may be many paths to the same object. An + * object can be renamed (moved to another group) by adding and deleting links. In this case, the + * object itself never moves. For that matter, membership in a group has no implication for the + * physical location of the stored object. + * + * Deleting a link to an object does not necessarily delete the object. The object remains available + * as long as there is at least one link to it. After all the links to an object are deleted, it can no + * longer be opened although the storage may or may not be reclaimed. + * + * It is important to realize that the linking mechanism can be used to construct very complex + * graphs of objects. For example, it is possible for an object to be shared between several groups + * and even to have more than one name in the same group. It is also possible for a group to be a + * member of itself or to be in a “cycle” in the graph. An example of a cycle is where a child is the + * parent of one of its own ancestors. + * + * \subsubsection subsubsec_data_model_structure_path HDF5 Path Names and Navigation + * The structure of the file constitutes the name space for the objects in the file. A path name is a + * string of components separated by ‘/’. Each component is the name of a link or the special + * character “.” for the current group. Link names (components) can be any string of ASCII + * characters not containing ‘/’ (except the string “.” which is reserved). However, users are advised + * to avoid the use of punctuation and non-printing characters because they may create problems for + * other software. The figure below gives a BNF grammar for HDF5 path names. + * + * <em>A BNF grammar for path names</em> + * \code + * PathName ::= AbsolutePathName | RelativePathName + * Separator ::= "/" ["/"]* + * AbsolutePathName ::= Separator [ RelativePathName ] + * RelativePathName ::= Component [ Separator RelativePathName ]* + * Component ::= "." | Name + * Name ::= Character+ - {"."} + * Character ::= {c: c in {{ legal ASCII characters } - {'/'}} + * \endcode + * + * An object can always be addressed by a full or absolute path which would start at the root group. + * As already noted, a given object can have more than one full path name. An object can also be + * addressed by a relative path which would start at a group and include the path to the object. + * + * The structure of an HDF5 file is “self-describing.” This means that it is possible to navigate the + * file to discover all the objects in the file. Basically, the structure is traversed as a graph starting at + * one node and recursively visiting the nodes of the graph. + * + * \subsubsection subsubsec_data_model_structure_example Examples of HDF5 File Structures + * The figures below show some possible HDF5 file structures with groups and datasets. The first + * figure shows the structure of a file with three groups. The second shows a dataset created in + * “/group1”. The third figure shows the structure after a dataset called dset2 has been added to the + * root group. The fourth figure shows the structure after another group and dataset have been + * added. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig14_a.gif "An HDF5 file structure with groups" + * </td> + * </tr> + * </table> + * + * Note: The figure above shows three groups; /group1 and /group2 are members of the root group. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig14_b.gif "An HDF5 file structure with groups and a dataset" + * </td> + * </tr> + * </table> + * + * Note: The figure above shows that a dataset has been created in /group1: /group1/dset1. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig14_c.gif " An HDF5 file structure with groups and datasets" + * </td> + * </tr> + * </table> + * + * Note: In the figure above, another dataset has been added as a member of the root group: /dset2. + * + * <table> + * <tr> + * <td> + * \image html Dmodel_fig14_c.gif " Another HDF5 file structure with groups and datasets" + * </td> + * </tr> + * </table> + * + * Note: In the figure above, another group and dataset have been added reusing object names: + * <em>/group2/group2/dset2</em>. + * <ol><li>HDF5 requires random access to the linear address space. For this reason it is not + * well suited for some data media such as streams.</li> + * <li>It could be said that HDF5 extends the organizing concepts of a file system to the internal + * structure of a single file.</li> + * <li>As of HDF5-1.4, the storage used for an object is reclaimed, even if all links are + * deleted.</li></ol> + * + * Next Chapter \ref sec_program + * + */ + +/** \page H5_UG The HDF5 Library and Programming Model + * + * \section sec_program The HDF5 Library and Programming Model + * \subsection subsec_program_intro Introduction + * The HDF5 library implements the HDF5 abstract data model and storage model. These models + * were described in the preceding chapter. + * + * Two major objectives of the HDF5 products are to provide tools that can be used on as many + * computational platforms as possible (portability), and to provide a reasonably object-oriented + * data model and programming interface. + * + * To be as portable as possible, the HDF5 library is implemented in portable C. C is not an + * object-oriented language, but the library uses several mechanisms and conventions to implement an + * object model. + * + * One mechanism the HDF5 library uses is to implement the objects as data structures. To refer to + * an object, the HDF5 library implements its own pointers. These pointers are called identifiers. + * An identifier is then used to invoke operations on a specific instance of an object. For example, + * when a group is opened, the API returns a group identifier. This identifier is a reference to that + * specific group and will be used to invoke future operations on that group. The identifier is valid + * only within the context it is created and remains valid until it is closed or the file is closed. This + * mechanism is essentially the same as the mechanism that C++ or other object-oriented languages + * use to refer to objects except that the syntax is C. + * + * Similarly, object-oriented languages collect all the methods for an object in a single name space. + * An example is the methods of a C++ class. The C language does not have any such mechanism, + * but the HDF5 library simulates this through its API naming convention. API function names + * begin with a common prefix that is related to the class of objects that the function operates on. + * The table below lists the HDF5 objects and the standard prefixes used by the corresponding + * HDF5 APIs. For example, functions that operate on datatype objects all have names beginning + * with H5T. + * + * <table> + * <caption>Access flags and modes</caption> + * <tr> + * <th>Prefix</th> + * <th>Operates on</th> + * </tr> + * <tr> + * <td>@ref H5A</td> + * <td>Attributes</td> + * </tr> + * <tr> + * <td>@ref H5D</td> + * <td>Datasets</td> + * </tr> + * <tr> + * <td>@ref H5E</td> + * <td>Error reports</td> + * </tr> + * <tr> + * <td>@ref H5F</td> + * <td>Files</td> + * </tr> + * <tr> + * <td>@ref H5G</td> + * <td>Groups</td> + * </tr> + * <tr> + * <td>@ref H5I</td> + * <td>Identifiers</td> + * </tr> + * <tr> + * <td>@ref H5L</td> + * <td>Links</td> + * </tr> + * <tr> + * <td>@ref H5O</td> + * <td>Objects</td> + * </tr> + * <tr> + * <td>@ref H5P</td> + * <td>Property lists</td> + * </tr> + * <tr> + * <td>@ref H5R</td> + * <td>References</td> + * </tr> + * <tr> + * <td>@ref H5S</td> + * <td>Dataspaces</td> + * </tr> + * <tr> + * <td>@ref H5T</td> + * <td>Datatypes</td> + * </tr> + * <tr> + * <td>@ref H5Z</td> + * <td>Filters</td> + * </tr> + * </table> + * + * \subsection subsec_program_model The HDF5 Programming Model + * In this section we introduce the HDF5 programming model by means of a series of short code + * samples. These samples illustrate a broad selection of common HDF5 tasks. More details are + * provided in the following chapters and in the HDF5 Reference Manual. + * + * \subsubsection subsubsec_program_model_create Creating an HDF5 File + * Before an HDF5 file can be used or referred to in any manner, it must be explicitly created or + * opened. When the need for access to a file ends, the file must be closed. The example below + * provides a C code fragment illustrating these steps. In this example, the values for the file + * creation property list and the file access property list are set to the defaults #H5P_DEFAULT. + * + * <em>Creating and closing an HDF5 file</em> + * \code + * hid_t file; // declare file identifier + * + * // Create a new file using H5F_ACC_TRUNC to truncate and overwrite + * // any file of the same name, default file creation properties, and + * // default file access properties. Then close the file. + * file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + * status = H5Fclose(file); + * \endcode + * + * Note: If there is a possibility that a file of the declared name already exists and you wish to open + * a new file regardless of that possibility, the flag #H5F_ACC_TRUNC will cause the operation to + * overwrite the previous file. If the operation should fail in such a circumstance, use the flag + * #H5F_ACC_EXCL instead. + * + * \subsubsection subsubsec_program_model_dset Creating and Initializing a Dataset + * The essential objects within a dataset are datatype and dataspace. These are independent objects + * and are created separately from any dataset to which they may be attached. Hence, creating a + * dataset requires, at a minimum, the following steps: + * <ol><li>Create and initialize a dataspace for the dataset</li> + * <li>Define a datatype for the dataset</li> + * <li>Create and initialize the dataset</li></ol> + * + * The code in the example below illustrates the execution of these steps. + * + * <em>Create a dataset</em> + * \code + * hid_t dataset, datatype, dataspace; // declare identifiers + * + * // Create a dataspace: Describe the size of the array and + * // create the dataspace for a fixed-size dataset. + * dimsf[0] = NX; + * dimsf[1] = NY; + * dataspace = H5Screate_simple(RANK, dimsf, NULL); + * + * // Define a datatype for the data in the dataset. + * // We will store little endian integers. + * datatype = H5Tcopy(H5T_NATIVE_INT); + * status = H5Tset_order(datatype, H5T_ORDER_LE); + * + * // Create a new dataset within the file using the defined + * // dataspace and datatype and default dataset creation + * // properties. + * // NOTE: H5T_NATIVE_INT can be used as the datatype if + * // conversion to little endian is not needed. + * dataset = H5Dcreate(file, DATASETNAME, datatype, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * \subsubsection subsubsec_program_model_close Closing an Object + * An application should close an object such as a datatype, dataspace, or dataset once the object is + * no longer needed. Since each is an independent object, each must be released (or closed) + * separately. This action is frequently referred to as releasing the object’s identifier. The code in + * the example below closes the datatype, dataspace, and dataset that were created in the preceding + * section. + * + * <em>Close an object</em> + * \code + * H5Tclose(datatype); + * H5Dclose(dataset); + * H5Sclose(dataspace); + * \endcode + * + * There is a long list of HDF5 library items that return a unique identifier when the item is created + * or opened. Each time that one of these items is opened, a unique identifier is returned. Closing a + * file does not mean that the groups, datasets, or other open items are also closed. Each opened + * item must be closed separately. + * + * For more information, + * @see <a href="http://www.hdfgroup.org/HDF5/doc/Advanced/UsingIdentifiers/index.html">Using Identifiers</a> + * in the HDF5 Application Developer’s Guide under General Topics in HDF5. + * + * <h4>How Closing a File Effects Other Open Structural Elements</h4> + * Every structural element in an HDF5 file can be opened, and these elements can be opened more + * than once. Elements range in size from the entire file down to attributes. When an element is + * opened, the HDF5 library returns a unique identifier to the application. Every element that is + * opened must be closed. If an element was opened more than once, each identifier that was + * returned to the application must be closed. For example, if a dataset was opened twice, both + * dataset identifiers must be released (closed) before the dataset can be considered closed. Suppose + * an application has opened a file, a group in the file, and two datasets in the group. In order for + * the file to be totally closed, the file, group, and datasets must each be closed. Closing the file + * before the group or the datasets will not affect the state of the group or datasets: the group and + * datasets will still be open. + * + * There are several exceptions to the above general rule. One is when the #H5close function is used. + * #H5close causes a general shutdown of the library: all data is written to disk, all identifiers are + * closed, and all memory used by the library is cleaned up. Another exception occurs on parallel + * processing systems. Suppose on a parallel system an application has opened a file, a group in the + * file, and two datasets in the group. If the application uses the #H5Fclose function to close the file, + * the call will fail with an error. The open group and datasets must be closed before the file can be + * closed. A third exception is when the file access property list includes the property + * #H5F_CLOSE_STRONG. This property closes any open elements when the file is closed with + * #H5Fclose. For more information, see the #H5Pset_fclose_degree function in the HDF5 Reference + * Manual. + * + * \subsubsection subsubsec_program_model_data Writing or Reading a Dataset to or from a File + * Having created the dataset, the actual data can be written with a call to #H5Dwrite. See the + * example below. + * + * <em>Writing a dataset</em> + * \code + * // Write the data to the dataset using default transfer properties. + * status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + * \endcode + * + * Note that the third and fourth #H5Dwrite parameters in the above example describe the + * dataspaces in memory and in the file, respectively. For now, these are both set to + * #H5S_ALL which indicates that the entire dataset is to be written. The selection of partial datasets + * and the use of differing dataspaces in memory and in storage will be discussed later in this + * chapter and in more detail elsewhere in this guide. + * + * Reading the dataset from storage is similar to writing the dataset to storage. To read an entire + * dataset, substitute #H5Dread for #H5Dwrite in the above example. + * + * \subsubsection subsubsec_program_model_partial Reading and Writing a Portion of a Dataset + * The previous section described writing or reading an entire dataset. HDF5 also supports access to + * portions of a dataset. These parts of datasets are known as selections. + * + * The simplest type of selection is a simple hyperslab. This is an n-dimensional rectangular sub-set + * of a dataset where n is equal to the dataset’s rank. Other available selections include a more + * complex hyperslab with user-defined stride and block size, a list of independent points, or the + * union of any of these. + * + * The figure below shows several sample selections. + * + * <table> + * <caption align=top>Dataset selections</caption> + * <tr> + * <td> + * \image html Pmodel_fig5_a.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Pmodel_fig5_b.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Pmodel_fig5_c.gif + * </td> + * </tr> + * <tr> + * <td> + * \image html Pmodel_fig5_d.gif<br /> + * \image html Pmodel_fig5_e.gif + * </td> + * </tr> + * </table> + * + * Note: In the figure above, selections can take the form of a simple hyperslab, a hyperslab with + * user-defined stride and block, a selection of points, or a union of any of these forms. + * + * Selections and hyperslabs are portions of a dataset. As described above, a simple hyperslab is a + * rectangular array of data elements with the same rank as the dataset’s dataspace. Thus, a simple + * hyperslab is a logically contiguous collection of points within the dataset. + * + * The more general case of a hyperslab can also be a regular pattern of points or blocks within the + * dataspace. Four parameters are required to describe a general hyperslab: the starting coordinates, + * the block size, the stride or space between blocks, and the number of blocks. These parameters + * are each expressed as a one-dimensional array with length equal to the rank of the dataspace and + * are described in the table below. + * + * <table> + * <caption></caption> + * <tr> + * <th>Parameter</th> + * <th>Definition</th> + * </tr> + * <tr> + * <td>start</td> + * <td>The coordinates of the starting location of the hyperslab in the dataset’s dataspace.</td> + * </tr> + * <tr> + * <td>block</td> + * <td>The size of each block to be selected from the dataspace. If the block parameter + * is set to NULL, the block size defaults to a single element in each dimension, as + * if the block array was set to all 1s (all ones). This will result in the selection of a + * uniformly spaced set of count points starting at start and on the interval defined + * by stride.</td> + * </tr> + * <tr> + * <td>stride</td> + * <td>The number of elements separating the starting point of each element or block to + * be selected. If the stride parameter is set to NULL, the stride size defaults to 1 + * (one) in each dimension and no elements are skipped.</td> + * </tr> + * <tr> + * <td>count</td> + * <td>The number of elements or blocks to select along each dimension.</td> + * </tr> + * </table> + * + * <h4>Reading Data into a Differently Shaped Memory Block</h4> + * For maximum flexibility in user applications, a selection in storage can be mapped into a + * differently-shaped selection in memory. All that is required is that the two selections contain the + * same number of data elements. In this example, we will first define the selection to be read from + * the dataset in storage, and then we will define the selection as it will appear in application + * memory. + * + * Suppose we want to read a 3 x 4 hyperslab from a two-dimensional dataset in a file beginning at + * the dataset element <1,2>. The first task is to create the dataspace that describes the overall rank + * and dimensions of the dataset in the file and to specify the position and size of the in-file + * hyperslab that we are extracting from that dataset. See the code below. + * + * <em>Define the selection to be read from storage</em> + * \code + * // Define dataset dataspace in file. + * dataspace = H5Dget_space(dataset); // dataspace identifier + * rank = H5Sget_simple_extent_ndims(dataspace); + * + * status_n = H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + * + * // Define hyperslab in the dataset. + * offset[0] = 1; + * offset[1] = 2; + * count[0] = 3; + * count[1] = 4; + * status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); + * \endcode + * + * The next task is to define a dataspace in memory. Suppose that we have in memory a + * three-dimensional 7 x 7 x 3 array into which we wish to read the two-dimensional 3 x 4 hyperslab + * described above and that we want the memory selection to begin at the element <3,0,0> and + * reside in the plane of the first two dimensions of the array. Since the in-memory dataspace is + * three-dimensional, we have to describe the in-memory selection as three-dimensional. Since we + * are keeping the selection in the plane of the first two dimensions of the in-memory dataset, the + * in-memory selection will be a 3 x 4 x 1 array defined as <3,4,1>. + * + * Notice that we must describe two things: the dimensions of the in-memory array, and the size + * and position of the hyperslab that we wish to read in. The code below illustrates how this would + * be done. + * + * <em>Define the memory dataspace and selection</em> + * \code + * // Define memory dataspace. + * dimsm[0] = 7; + * dimsm[1] = 7; + * dimsm[2] = 3; + * memspace = H5Screate_simple(RANK_OUT,dimsm,NULL); + * + * // Define memory hyperslab. + * offset_out[0] = 3; + * offset_out[1] = 0; + * offset_out[2] = 0; + * count_out[0] = 3; + * count_out[1] = 4; + * count_out[2] = 1; + * status = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); + * \endcode + * + * The hyperslab defined in the code above has the following parameters: start=(3,0,0), + * count=(3,4,1), stride and block size are NULL. + * + * <h4>Writing Data into a Differently Shaped Disk Storage Block</h4> + * Now let’s consider the opposite process of writing a selection from memory to a selection in a + * dataset in a file. Suppose that the source dataspace in memory is a 50-element, one-dimensional + * array called vector and that the source selection is a 48-element simple hyperslab that starts at the + * second element of vector. See the figure below. + * + * <table> + * <tr> + * <td> + * \image html Pmodel_fig2.gif "A one-dimensional array" + * </td> + * </tr> + * </table> + * + * Further suppose that we wish to write this data to the file as a series of 3 x 2-element blocks in a + * two-dimensional dataset, skipping one row and one column between blocks. Since the source + * selection contains 48 data elements and each block in the destination selection contains 6 data + * elements, we must define the destination selection with 8 blocks. We will write 2 blocks in the + * first dimension and 4 in the second. The code below shows how to achieve this objective. + * + * <em>The destination selection</em> + * \code + * // Select the hyperslab for the dataset in the file, using + * // 3 x 2 blocks, a (4,3) stride, a (2,4) count, and starting + * // at the position (0,1). + * start[0] = 0; start[1] = 1; + * stride[0] = 4; stride[1] = 3; + * count[0] = 2; count[1] = 4; + * block[0] = 3; block[1] = 2; + * ret = H5Sselect_hyperslab(fid, H5S_SELECT_SET, start, stride, count, block); + * + * // Create dataspace for the first dataset. + * mid1 = H5Screate_simple(MSPACE1_RANK, dim1, NULL); + * + * // Select hyperslab. + * // We will use 48 elements of the vector buffer starting at the + * // second element. Selected elements are 1 2 3 . . . 48 + * start[0] = 1; + * stride[0] = 1; + * count[0] = 48; + * block[0] = 1; + * ret = H5Sselect_hyperslab(mid1, H5S_SELECT_SET, start, stride, count, block); + * + * // Write selection from the vector buffer to the dataset in the file. + * ret = H5Dwrite(dataset, H5T_NATIVE_INT, mid1, fid, H5P_DEFAULT, vector); + * \endcode + * + * \subsubsection subsubsec_program_model_info Getting Information about a Dataset + * Although reading is analogous to writing, it is often first necessary to query a file to obtain + * information about the dataset to be read. For instance, we often need to determine the datatype + * associated with a dataset, or its dataspace (in other words, rank and dimensions). As illustrated in + * the code example below, there are several get routines for obtaining this information. + * + * <em>Routines to get dataset parameters</em> + * \code + * // Get datatype and dataspace identifiers, + * // then query datatype class, order, and size, and + * // then query dataspace rank and dimensions. + * datatype = H5Dget_type (dataset); // datatype identifier + * class = H5Tget_class (datatype); + * if (class == H5T_INTEGER) + * printf("Dataset has INTEGER type \n"); + * + * order = H5Tget_order (datatype); + * if (order == H5T_ORDER_LE) + * printf("Little endian order \n"); + * + * size = H5Tget_size (datatype); + * printf ("Size is %d \n", size); + * + * dataspace = H5Dget_space (dataset); // dataspace identifier + * + * // Find rank and retrieve current and maximum dimension sizes. + * rank = H5Sget_simple_extent_dims (dataspace, dims, max_dims); + * \endcode + * + * \subsubsection subsubsec_program_model_compound Creating and Defining Compound Datatypes + * A compound datatype is a collection of one or more data elements. Each element might be an + * atomic type, a small array, or another compound datatype. + * + * The provision for nested compound datatypes allows these structures to become quite complex. + * An HDF5 compound datatype has some similarities to a C struct or a Fortran common block. + * Though not originally designed with databases in mind, HDF5 compound datatypes are + * sometimes used in a way that is similar to a database record. Compound datatypes can become + * either a powerful tool or a complex and difficult-to-debug construct. Reasonable caution is + * advised. + * + * To create and use a compound datatype, you need to create a datatype with class compound + * (#H5T_COMPOUND) and specify the total size of the data element in bytes. A compound + * datatype consists of zero or more uniquely named members. Members can be defined in any + * order but must occupy non-overlapping regions within the datum. The table below lists the + * properties of compound datatype members. + * + * <table> + * <caption></caption> + * <tr> + * <th>Parameter</th> + * <th>Definition</th> + * </tr> + * <tr> + * <td>Index</td> + * <td>An index number between zero and N-1, where N is the number of + * members in the compound. The elements are indexed in the order of their + * location in the array of bytes.</td> + * </tr> + * <tr> + * <td>Name</td> + * <td>A string that must be unique within the members of the same datatype.</td> + * </tr> + * <tr> + * <td>Datatype</td> + * <td>An HDF5 datatype.</td> + * </tr> + * <tr> + * <td>Offset</td> + * <td>A fixed byte offset which defines the location of the first byte of that + * member in the compound datatype.</td> + * </tr> + * </table> + * + * Properties of the members of a compound datatype are defined when the member is added to the + * compound type. These properties cannot be modified later. + * + * <h4>Defining Compound Datatypes</h4> + * Compound datatypes must be built out of other datatypes. To do this, you first create an empty + * compound datatype and specify its total size. Members are then added to the compound datatype + * in any order. + * + * Each member must have a descriptive name. This is the key used to uniquely identify the + * member within the compound datatype. A member name in an HDF5 datatype does not + * necessarily have to be the same as the name of the corresponding member in the C struct in + * memory although this is often the case. You also do not need to define all the members of the C + * struct in the HDF5 compound datatype (or vice versa). + * + * Usually a C struct will be defined to hold a data point in memory, and the offsets of the members + * in memory will be the offsets of the struct members from the beginning of an instance of the + * struct. The library defines the macro that computes the offset of member m within a struct + * variable s: + * \code + * HOFFSET(s,m) + * \endcode + * + * The code below shows an example in which a compound datatype is created to describe complex + * numbers whose type is defined by the complex_t struct. + * + * <em>A compound datatype for complex numbers</em> + * \code + * Typedef struct { + * double re; //real part + * double im; //imaginary part + * } complex_t; + * + * complex_t tmp; //used only to compute offsets + * hid_t complex_id = H5Tcreate (H5T_COMPOUND, sizeof tmp); + * H5Tinsert (complex_id, "real", HOFFSET(tmp,re), H5T_NATIVE_DOUBLE); + * H5Tinsert (complex_id, "imaginary", HOFFSET(tmp,im), H5T_NATIVE_DOUBLE); + * \endcode + * + * \subsubsection subsubsec_program_model_extend Creating and Writing Extendable Datasets + * An extendable dataset is one whose dimensions can grow. One can define an HDF5 dataset to + * have certain initial dimensions with the capacity to later increase the size of any of the initial + * dimensions. For example, the figure below shows a 3 x 3 dataset (a) which is later extended to + * be a 10 x 3 dataset by adding 7 rows (b), and further extended to be a 10 x 5 dataset by adding + * two columns (c). + * + * <table> + * <tr> + * <td> + * \image html Pmodel_fig3.gif "Extending a dataset" + * </td> + * </tr> + * </table> + * + * HDF5 requires the use of chunking when defining extendable datasets. Chunking makes it + * possible to extend datasets efficiently without having to reorganize contiguous storage + * excessively. + * + * To summarize, an extendable dataset requires two conditions: + * <ol><li>Define the dataspace of the dataset as unlimited in all dimensions that might eventually be + * extended</li> + * <li>Enable chunking in the dataset creation properties</li></ol> + * + * For example, suppose we wish to create a dataset similar to the one shown in the figure above. + * We want to start with a 3 x 3 dataset, and then later we will extend it. To do this, go through the + * steps below. + * + * First, declare the dataspace to have unlimited dimensions. See the code shown below. Note the + * use of the predefined constant #H5S_UNLIMITED to specify that a dimension is unlimited. + * + * <em>Declaring a dataspace with unlimited dimensions</em> + * \code + * // dataset dimensions at creation time + * hsize_t dims[2] = {3, 3}; + * hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED}; + * + * // Create the data space with unlimited dimensions. + * dataspace = H5Screate_simple(RANK, dims, maxdims); + * \endcode + * + * Next, set the dataset creation property list to enable chunking. See the code below. + * + * <em>Enable chunking</em> + * \code + * hid_t cparms; + * hsize_t chunk_dims[2] ={2, 5}; + * + * // Modify dataset creation properties to enable chunking. + * cparms = H5Pcreate (H5P_DATASET_CREATE); + * status = H5Pset_chunk(cparms, RANK, chunk_dims); + * \endcode + * + * The next step is to create the dataset. See the code below. + * + * <em>Create a dataset</em> + * \code + * // Create a new dataset within the file using cparms creation properties. + * dataset = H5Dcreate(file, DATASETNAME, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, cparms, H5P_DEFAULT); + * \endcode + * + * Finally, when the time comes to extend the size of the dataset, invoke #H5Dextend. Extending the + * dataset along the first dimension by seven rows leaves the dataset with new dimensions of + * <10,3>. See the code below. + * + * <em>Extend the dataset by seven rows</em> + * \code + * // Extend the dataset. Dataset becomes 10 x 3. + * dims[0] = dims[0] + 7; + * size[0] = dims[0]; + * size[1] = dims[1]; + * + * status = H5Dextend (dataset, size); + * \endcode + * + * \subsubsection subsubsec_program_model_group Creating and Working with Groups + * Groups provide a mechanism for organizing meaningful and extendable sets of datasets within + * an HDF5 file. The @ref H5G API provides several routines for working with groups. + * + * <h4>Creating a Group</h4> + * With no datatype, dataspace, or storage layout to define, creating a group is considerably simpler + * than creating a dataset. For example, the following code creates a group called Data in the root + * group of file. + * + * <em> Create a group</em> + * \code + * // Create a group in the file. + * grp = H5Gcreate(file, "/Data", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * A group may be created within another group by providing the absolute name of the group to the + * #H5Gcreate function or by specifying its location. For example, to create the group Data_new in + * the group Data, you might use the sequence of calls shown below. + * + * <em>Create a group within a group</em> + * \code + * // Create group "Data_new" in the group "Data" by specifying + * // absolute name of the group. + * grp_new = H5Gcreate(file, "/Data/Data_new", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * + * // or + * + * // Create group "Data_new" in the "Data" group. + * grp_new = H5Gcreate(grp, "Data_new", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + * \endcode + * + * This first parameter of #H5Gcreate is a location identifier. file in the first example specifies only + * the file. \em grp in the second example specifies a particular group in a particular file. Note that in + * this instance, the group identifier \em grp is used as the first parameter in the #H5Gcreate call so that + * the relative name of Data_new can be used. + * + * The third parameter of #H5Gcreate optionally specifies how much file space to reserve to store + * the names of objects that will be created in this group. If a non-positive value is supplied, the + * library provides a default size. + * + * Use #H5Gclose to close the group and release the group identifier. + * + * <h4>Creating a Dataset within a Group</h4> + * As with groups, a dataset can be created in a particular group by specifying either its absolute + * name in the file or its relative name with respect to that group. The next code excerpt uses the + * absolute name. + * + * <em>Create a dataset within a group using a relative name</em> + * \code + * // Create the dataset "Compressed_Data" in the group Data using + * // the absolute name. The dataset creation property list is + * // modified to use GZIP compression with the compression + * // effort set to 6. Note that compression can be used only when + * // the dataset is chunked. + * dims[0] = 1000; + * dims[1] = 20; + * cdims[0] = 20; + * cdims[1] = 20; + * dataspace = H5Screate_simple(RANK, dims, NULL); + * + * plist = H5Pcreate(H5P_DATASET_CREATE); + * H5Pset_chunk(plist, 2, cdims); + * H5Pset_deflate(plist, 6); + * + * dataset = H5Dcreate(file, "/Data/Compressed_Data", H5T_NATIVE_INT, dataspace, H5P_DEFAULT, + * plist, H5P_DEFAULT); + * \endcode + * + * Alternatively, you can first obtain an identifier for the group in which the dataset is to be + * created, and then create the dataset with a relative name. + * + * <em>Create a dataset within a group using a relative name</em> + * \code + * // Open the group. + * grp = H5Gopen(file, "Data", H5P_DEFAULT); + * + * // Create the dataset "Compressed_Data" in the "Data" group + * // by providing a group identifier and a relative dataset + * // name as parameters to the H5Dcreate function. + * dataset = H5Dcreate(grp, "Compressed_Data", H5T_NATIVE_INT, dataspace, H5P_DEFAULT, plist, H5P_DEFAULT); + * \endcode + * + * <h4>Accessing an Object in a Group</h4> + * Any object in a group can be accessed by its absolute or relative name. The first code snippet + * below illustrates the use of the absolute name to access the dataset <em>Compressed_Data</em> in the + * group <em>Data</em> created in the examples above. The second code snippet illustrates the use of the + * relative name. + * + * <em>Accessing a group using its relative name</em> + * \code + * // Open the dataset "Compressed_Data" in the "Data" group. + * dataset = H5Dopen(file, "/Data/Compressed_Data", H5P_DEFAULT); + * + * // Open the group "data" in the file. + * grp = H5Gopen(file, "Data", H5P_DEFAULT); + * + * // Access the "Compressed_Data" dataset in the group. + * dataset = H5Dopen(grp, "Compressed_Data", H5P_DEFAULT); + * \endcode + * + * \subsubsection subsubsec_program_model_attr Working with Attributes + * An attribute is a small dataset that is attached to a normal dataset or group. Attributes share many + * of the characteristics of datasets, so the programming model for working with attributes is + * similar in many ways to the model for working with datasets. The primary differences are that an + * attribute must be attached to a dataset or a group and sub-setting operations cannot be performed + * on attributes. + * + * To create an attribute belonging to a particular dataset or group, first create a dataspace for the + * attribute with the call to #H5Screate, and then create the attribute using #H5Acreate. For example, + * the code shown below creates an attribute called “Integer attribute” that is a member of a dataset + * whose identifier is dataset. The attribute identifier is attr2. #H5Awrite then sets the value of the + * attribute of that of the integer variable point. #H5Aclose then releases the attribute identifier. + * + * <em>Create an attribute</em> + * \code + * int point = 1; // Value of the scalar attribute + * + * // Create scalar attribute. + * aid2 = H5Screate(H5S_SCALAR); + * attr2 = H5Acreate(dataset, "Integer attribute", H5T_NATIVE_INT, aid2, H5P_DEFAULT, H5P_DEFAULT); + * + * // Write scalar attribute. + * ret = H5Awrite(attr2, H5T_NATIVE_INT, &point); + * + * // Close attribute dataspace. + * ret = H5Sclose(aid2); + * + * // Close attribute. + * ret = H5Aclose(attr2); + * \endcode + * + * <em>Read a known attribute</em> + * \code + * // Attach to the scalar attribute using attribute name, then + * // read and display its value. + * attr = H5Aopen_by_name(file_id, dataset_name, "Integer attribute", H5P_DEFAULT, H5P_DEFAULT); + * ret = H5Aread(attr, H5T_NATIVE_INT, &point_out); + * printf("The value of the attribute \"Integer attribute\" is %d \n", point_out); + * ret = H5Aclose(attr); + * \endcode + * + * To read a scalar attribute whose name and datatype are known, first open the attribute using + * #H5Aopen_by_name, and then use #H5Aread to get its value. For example, the code shown below + * reads a scalar attribute called “Integer attribute” whose datatype is a native integer and whose + * parent dataset has the identifier dataset. + * + * To read an attribute whose characteristics are not known, go through these steps. First, query the + * file to obtain information about the attribute such as its name, datatype, rank, and dimensions, + * and then read the attribute. The following code opens an attribute by its index value using + * #H5Aopen_by_idx, and then it reads in information about the datatype with #H5Aread. + * + * <em>Read an unknown attribute</em> + * \code + * // Attach to the string attribute using its index, then read and + * // display the value. + * attr = H5Aopen_by_idx(file_id, dataset_name, index_type, iter_order, 2, H5P_DEFAULT, H5P_DEFAULT); + * + * atype = H5Tcopy(H5T_C_S1); + * H5Tset_size(atype, 4); + * + * ret = H5Aread(attr, atype, string_out); + * printf("The value of the attribute with the index 2 is %s \n", string_out); + * \endcode + * + * In practice, if the characteristics of attributes are not known, the code involved in accessing and + * processing the attribute can be quite complex. For this reason, HDF5 includes a function called + * #H5Aiterate. This function applies a user-supplied function to each of a set of attributes. The + * user-supplied function can contain the code that interprets, accesses, and processes each attribute. + * + * \subsection subsec_program_transfer_pipeline The Data Transfer Pipeline + * The HDF5 library implements data transfers between different storage locations. At the lowest + * levels, the HDF5 Library reads and writes blocks of bytes to and from storage using calls to the + * virtual file layer (VFL) drivers. In addition to this, the HDF5 library manages caches of metadata + * and a data I/O pipeline. The data I/O pipeline applies compression to data blocks, transforms + * data elements, and implements selections. + * + * A substantial portion of the HDF5 library’s work is in transferring data from one environment or + * media to another. This most often involves a transfer between system memory and a storage + * medium. Data transfers are affected by compression, encryption, machine-dependent differences + * in numerical representation, and other features. So, the bit-by-bit arrangement of a given dataset + * is often substantially different in the two environments. + * + * Consider the representation on disk of a compressed and encrypted little-endian array as + * compared to the same array after it has been read from disk, decrypted, decompressed, and + * loaded into memory on a big-endian system. HDF5 performs all of the operations necessary to + * make that transition during the I/O process with many of the operations being handled by the + * VFL and the data transfer pipeline. + * + * The figure below provides a simplified view of a sample data transfer with four stages. Note that + * the modules are used only when needed. For example, if the data is not compressed, the + * compression stage is omitted. + * + * <table> + * <tr> + * <td> + * \image html Pmodel_fig6.gif "A data transfer from storage to memory" + * </td> + * </tr> + * </table> + * + * For a given I/O request, different combinations of actions may be performed by the pipeline. The + * library automatically sets up the pipeline and passes data through the processing steps. For + * example, for a read request (from disk to memory), the library must determine which logical + * blocks contain the requested data elements and fetch each block into the library’s cache. If the + * data needs to be decompressed, then the compression algorithm is applied to the block after it is + * read from disk. If the data is a selection, the selected elements are extracted from the data block + * after it is decompressed. If the data needs to be transformed (for example, byte swapped), then + * the data elements are transformed after decompression and selection. + * + * While an application must sometimes set up some elements of the pipeline, use of the pipeline is + * normally transparent to the user program. The library determines what must be done based on the + * metadata for the file, the object, and the specific request. An example of when an application + * might be required to set up some elements in the pipeline is if the application used a custom + * error-checking algorithm. + * + * In some cases, it is necessary to pass parameters to and from modules in the pipeline or among + * other parts of the library that are not directly called through the programming API. This is + * accomplished through the use of dataset transfer and data access property lists. + * + * The VFL provides an interface whereby user applications can add custom modules to the data + * transfer pipeline. For example, a custom compression algorithm can be used with the HDF5 + * Library by linking an appropriate module into the pipeline through the VFL. This requires + * creating an appropriate wrapper for the compression module and registering it with the library + * with #H5Zregister. The algorithm can then be applied to a dataset with an #H5Pset_filter call which + * will add the algorithm to the selected dataset’s transfer property list. + * + * Previous Chapter \ref sec_data_model - Next Chapter \ref sec_file + * + */ + +/** + * \defgroup H5 Library General (H5) * * Use the functions in this module to manage the life cycle of HDF5 library * instances. |