From bd1e676c521d881b3143829f493a28b5ced1294b Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Wed, 8 Jul 1998 09:54:54 -0500 Subject: [svn-r467] Restructuring documentation. --- doc/html/Attributes.html | 177 ++ doc/html/Big.html | 111 + doc/html/Caching.html | 82 + doc/html/CodeReview.html | 300 +++ doc/html/Coding.html | 300 +++ doc/html/Compression.html | 409 ++++ doc/html/Datasets.html | 839 ++++++++ doc/html/Dataspaces.html | 568 +++++ doc/html/Datatypes.html | 1370 ++++++++++++ doc/html/Errors.html | 281 +++ doc/html/ExternalFiles.html | 278 +++ doc/html/Files.html | 529 +++++ doc/html/Groups.html | 288 +++ doc/html/H5.api.html | 4611 ++++++++++++++++++++++++++++++++++++++++ doc/html/H5.api_map.html | 849 ++++++++ doc/html/H5.format.html | 3183 +++++++++++++++++++++++++++ doc/html/H5.intro.html | 997 +++++++++ doc/html/H5.sample_code.html | 123 ++ doc/html/H5.user.html | 71 + doc/html/IOPipe.html | 114 + doc/html/MemoryManagement.html | 510 +++++ doc/html/ObjectHeader.txt | 60 + doc/html/Properties.html | 81 + doc/html/Version.html | 139 ++ doc/html/chunk1.gif | Bin 0 -> 5111 bytes doc/html/chunk1.obj | 52 + doc/html/compat.html | 271 +++ doc/html/dataset_p1.gif | Bin 0 -> 3359 bytes doc/html/dataset_p1.obj | 32 + doc/html/extern1.gif | Bin 0 -> 1989 bytes doc/html/extern1.obj | 40 + doc/html/extern2.gif | Bin 0 -> 4054 bytes doc/html/extern2.obj | 108 + doc/html/group_p1.gif | Bin 0 -> 3696 bytes doc/html/group_p1.obj | 85 + doc/html/group_p2.gif | Bin 0 -> 3524 bytes doc/html/group_p2.obj | 57 + doc/html/group_p3.gif | Bin 0 -> 3354 bytes doc/html/group_p3.obj | 59 + doc/html/h5s.examples | 347 +++ doc/html/heap.txt | 72 + doc/html/index.html | 40 + doc/html/move.html | 66 + doc/html/ph5design.html | 77 + doc/html/ph5example.c | 1003 +++++++++ doc/html/ph5implement.txt | 27 + doc/html/pipe1.gif | Bin 0 -> 10110 bytes doc/html/pipe1.obj | 136 ++ doc/html/pipe2.gif | Bin 0 -> 11715 bytes doc/html/pipe2.obj | 168 ++ doc/html/pipe3.gif | Bin 0 -> 6961 bytes doc/html/pipe3.obj | 70 + doc/html/pipe4.gif | Bin 0 -> 8355 bytes doc/html/pipe4.obj | 92 + doc/html/pipe5.gif | Bin 0 -> 6217 bytes doc/html/pipe5.obj | 52 + doc/html/review1.html | 283 +++ doc/html/review1a.html | 252 +++ doc/html/storage.html | 274 +++ doc/html/study.html | 172 ++ doc/html/study_1000x1000.gif | Bin 0 -> 6594 bytes doc/html/study_250x250.gif | Bin 0 -> 6914 bytes doc/html/study_499x499.gif | Bin 0 -> 10429 bytes doc/html/study_5000x1000.gif | Bin 0 -> 10653 bytes doc/html/study_500x500.gif | Bin 0 -> 6842 bytes doc/html/study_p1.gif | Bin 0 -> 6550 bytes doc/html/study_p1.obj | 113 + doc/html/symtab | 313 +++ doc/html/tracing.html | 192 ++ doc/html/version.gif | Bin 0 -> 4772 bytes doc/html/version.obj | 96 + 71 files changed, 20819 insertions(+) create mode 100644 doc/html/Attributes.html create mode 100644 doc/html/Big.html create mode 100644 doc/html/Caching.html create mode 100644 doc/html/CodeReview.html create mode 100644 doc/html/Coding.html create mode 100644 doc/html/Compression.html create mode 100644 doc/html/Datasets.html create mode 100644 doc/html/Dataspaces.html create mode 100644 doc/html/Datatypes.html create mode 100644 doc/html/Errors.html create mode 100644 doc/html/ExternalFiles.html create mode 100644 doc/html/Files.html create mode 100644 doc/html/Groups.html create mode 100644 doc/html/H5.api.html create mode 100644 doc/html/H5.api_map.html create mode 100644 doc/html/H5.format.html create mode 100644 doc/html/H5.intro.html create mode 100644 doc/html/H5.sample_code.html create mode 100644 doc/html/H5.user.html create mode 100644 doc/html/IOPipe.html create mode 100644 doc/html/MemoryManagement.html create mode 100644 doc/html/ObjectHeader.txt create mode 100644 doc/html/Properties.html create mode 100644 doc/html/Version.html create mode 100644 doc/html/chunk1.gif create mode 100644 doc/html/chunk1.obj create mode 100644 doc/html/compat.html create mode 100644 doc/html/dataset_p1.gif create mode 100644 doc/html/dataset_p1.obj create mode 100644 doc/html/extern1.gif create mode 100644 doc/html/extern1.obj create mode 100644 doc/html/extern2.gif create mode 100644 doc/html/extern2.obj create mode 100644 doc/html/group_p1.gif create mode 100644 doc/html/group_p1.obj create mode 100644 doc/html/group_p2.gif create mode 100644 doc/html/group_p2.obj create mode 100644 doc/html/group_p3.gif create mode 100644 doc/html/group_p3.obj create mode 100644 doc/html/h5s.examples create mode 100644 doc/html/heap.txt create mode 100644 doc/html/index.html create mode 100644 doc/html/move.html create mode 100644 doc/html/ph5design.html create mode 100644 doc/html/ph5example.c create mode 100644 doc/html/ph5implement.txt create mode 100644 doc/html/pipe1.gif create mode 100644 doc/html/pipe1.obj create mode 100644 doc/html/pipe2.gif create mode 100644 doc/html/pipe2.obj create mode 100644 doc/html/pipe3.gif create mode 100644 doc/html/pipe3.obj create mode 100644 doc/html/pipe4.gif create mode 100644 doc/html/pipe4.obj create mode 100644 doc/html/pipe5.gif create mode 100644 doc/html/pipe5.obj create mode 100644 doc/html/review1.html create mode 100644 doc/html/review1a.html create mode 100644 doc/html/storage.html create mode 100644 doc/html/study.html create mode 100644 doc/html/study_1000x1000.gif create mode 100644 doc/html/study_250x250.gif create mode 100644 doc/html/study_499x499.gif create mode 100644 doc/html/study_5000x1000.gif create mode 100644 doc/html/study_500x500.gif create mode 100644 doc/html/study_p1.gif create mode 100644 doc/html/study_p1.obj create mode 100644 doc/html/symtab create mode 100644 doc/html/tracing.html create mode 100644 doc/html/version.gif create mode 100644 doc/html/version.obj diff --git a/doc/html/Attributes.html b/doc/html/Attributes.html new file mode 100644 index 0000000..85fe70f --- /dev/null +++ b/doc/html/Attributes.html @@ -0,0 +1,177 @@ + + + + Attributes + + + +

Attributes

+ +

1. Introduction

+ +

The appribute API (H5A) is primarily designed to easily allow small + datasets to be attached to primary datasets as metadata information. + Additional goals for the H5A interface include keeping storage + requirement for each attribute to a minimum and easily sharing + attributes among datasets. +

Because attributes are intended to be small objects, large datasets + intended as additional information for a primary dataset should be + stored as supplemental datasets in a group with the primary dataset. + Attributes can then be attached to the group containing everything to + indicate a particular type of dataset with supplemental datasets is + located in the group. How small is "small" is not defined by the + library and is up to the user's interpretation. +

Attributes are not seperate objects in the file, they are always + contained in the object header of the object they are attached to. The + I/O functions defined below are required to read or write attribute + information, not the H5D I/O routines. + +

2. Creating, Opening, Closing and Deleting Attributes

+ +

Attributes are created with the H5Acreate() function, + and existing attributes can be accessed with either the + H5Aopen_name() or H5Aopen_idx() functions. All + three functions return an object ID which should be eventually released + by calling H5Aclose(). + +

+
hid_t H5Acreate (hid_t loc_id, const char + *name, hid_t type_id, hid_t space_id, + hid_t create_plist_id) +
This function creates an attribute which is attached to the object + specified with loc_id. The name specified with name + for each attribute for an object must be unique for that object. The type_id + and space_id are created with the H5T and H5S interfaces + respectively. Currently only simple dataspaces are allowed for attribute + dataspaces. The create_plist_id property list is currently + unused, but will be used int the future for optional properties of + attributes. The attribute ID returned from this function must be released + with H5Aclose or resource leaks will develop. Attempting to create an + attribute with the same name as an already existing attribute will fail, + leaving the pre-existing attribute in place. + This function returns a attribute ID for success or negative for failure. + +

+
hid_t H5Aopen_name (hid_t loc_id, const char + *name) +
This function opens an attribute which is attached to the object + specified with loc_id. The name specified with name + indicates the attribute to access. The attribute ID returned from this + function must be released with H5Aclose or resource leaks will develop. + This function returns a attribute ID for success or negative for failure. + +

+
hid_t H5Aopen_idx (hid_t loc_id, unsigned + idx) +
This function opens an attribute which is attached to the object + specified with loc_id. The attribute specified with idx + indicates the idxth attribute to access, starting with '0'. The + attribute ID returned from this function must be released with H5Aclose or + resource leaks will develop. + This function returns a attribute ID for success or negative for failure. + +

+
herr_t H5Aclose (hid_t attr_id) +
This function releases an attribute from use. Further use of the + attribute ID will result in undefined behavior. + This function returns non-negative on success, negative on failure. + +

+
herr_t H5Adelete (hid_t loc_id, + const char *name) +
This function removes the named attribute from a dataset or group. + This function should not be used when attribute IDs are open on loc_id + as it may cause the internal indexes of the attributes to change and future + writes to the open attributes to produce incorrect results. + Returns non-negative on success, negative on failure. +
+ +

3. Attribute I/O Functions

+ +

Attributes may only be written as an entire object, no partial I/O + is currently supported. + +

+
herr_t H5Awrite (hid_t attr_id, + hid_t mem_type_id, void *buf) +
This function writes an attribute, specified with attr_id, + with mem_type_id specifying the datatype in memory. The entire + attribute is written from buf to the file. + This function returns non-negative on success, negative on failure. + +

+
herr_t H5Aread (hid_t attr_id, + hid_t mem_type_id, void *buf) +
This function read an attribute, specified with attr_id, with + mem_type_id specifying the datatype in memory. The entire + attribute is read into buf from the file. + This function returns non-negative on success, negative on failure. + +
+ +

4. Attribute Inquiry Functions

+ +
+
int H5Aiterate (hid_t loc_id, + unsigned *attr_number, + H5A_operator operator, + void *operator_data) +
This function interates over the attributes of dataset or group + specified with loc_id. For each attribute of the object, the + operator_data and some additional information (specified below) + are passed to the operator function. The iteration begins with + the *attr_number object in the group and the next attribute to be + processed by the operator is returned in *attr_number. +

The iterator returns a negative value if something is wrong, the return + value of the last operator if it was non-zero, or zero if all attributes + were processed. +

The prototype for H5A_operator_t is:
+ typedef herr_t (*H5A_operator_t)(hid_t loc_id, + const char *attr_name, void *operator_data); +

The operation receives the ID for the group or dataset being iterated over + (loc_id), the name of the current attribute about the object (attr_name) + and the pointer to the operator data passed in to H5Aiterate + (operator_data). The return values from an operator are: +

    +
  • Zero causes the iterator to continue, returning zero when all + attributes have been processed. +
  • Positive causes the iterator to immediately return that positive + value, indicating short-circuit success. The iterator can be + restarted at the next attribute. +
  • Negative causes the iterator to immediately return that value, + indicating failure. The iterator can be restarted at the next + attribute. +
+

+
hid_t H5Aget_space (hid_t attr_id) +
This function retrieves a copy of the dataspace for an attribute. + The dataspace ID returned from this function must be released with H5Sclose + or resource leaks will develop. + This function returns a dataspace ID for success or negative for failure. +

+
hid_t H5Aget_type (hid_t attr_id) +
This function retrieves a copy of the datatype for an attribute. + The datatype ID returned from this function must be released with H5Tclose + or resource leaks will develop. + This function returns a datatype ID for success or negative for failure. +

+
size_t H5Aget_name (hid_t attr_id, + char *buf, size_t buf_size) +
This function retrieves the name of an attribute for an attribute ID. + Up to buf_size characters are stored in buf followed by a + '\0' string terminator. If the name of the attribute is longer than + buf_size-1, the string terminator is stored in the last position + of the buffer to properly terminate the string. + This function returns the length of the attribute's name (which may be + longer than buf_size) on success or negative for failure. +

+
int H5Anum_attrs (hid_t loc_id) +
This function returns the number of attributes attached to a dataset or + group, loc_id. + This function returns non-negative for success or negative for failure. +
+ +
+
HDF Support
+ + diff --git a/doc/html/Big.html b/doc/html/Big.html new file mode 100644 index 0000000..080f786 --- /dev/null +++ b/doc/html/Big.html @@ -0,0 +1,111 @@ + + + + Big Datasets on Small Machines + + + +

Big Datasets on Small Machines

+ +

1. Introduction

+ +

The HDF5 library is able to handle files larger than the + maximum file size, and datasets larger than the maximum memory + size. For instance, a machine where sizeof(off_t) + and sizeof(size_t) are both four bytes can handle + datasets and files as large as 18x10^18 bytes. However, most + Unix systems limit the number of concurrently open files, so a + practical file size limit is closer to 512GB or 1TB. + +

Two "tricks" must be imployed on these small systems in order + to store large datasets. The first trick circumvents the + off_t file size limit and the second circumvents + the size_t main memory limit. + +

2. File Size Limits

+ +

Some 32-bit operating systems have special file systems that + can support large (>2GB) files and HDF5 will detect these and + use them automatically. If this is the case, the output from + configure will show: + +

+checking for lseek64... yes
+checking for fseek64... yes
+    
+ +

Otherwise one must use an HDF5 file family. Such a family is + created by setting file family properties in a file access + property list and then supplying a file name that includes a + printf-style integer format. For instance: + +

+hid_t plist, file;
+plist = H5Pcreate (H5P_FILE_ACCESS);
+H5Pset_family (plist, 1<<30, H5P_DEFAULT);
+file = H5Fcreate ("big%03d.h5", H5F_ACC_TRUNC, H5P_DEFAULT, plist);
+    
+ +

The second argument (30) to + H5Pset_family() indicates that the family members + are to be 2^30 bytes (1GB) each. In general, family members + cannot be 2GB because writes to byte number 2,147,483,647 will + fail, so the largest safe value for a family member is + 2,147,483,647. HDF5 will create family members on demand as the + HDF5 address space increases, but since most Unix systems limit + the number of concurrently open files the effective maximum size + of the HDF5 address space will be limited. + +

If the effective HDF5 address space is limited then one may be + able to store datasets as external datasets each spanning + multiple files of any length since HDF5 opens external dataset + files one at a time. To arrange storage for a 5TB dataset one + could say: + +

+hid_t plist = H5Pcreate (H5P_DATASET_CREATE);
+for (i=0; i<5*1024; i++) {
+   sprintf (name, "velocity-%04d.raw", i);
+   H5Pset_external (plist, name, 0, (size_t)1<<30);
+}
+    
+ +

3. Dataset Size Limits

+ +

The second limit which must be overcome is that of + sizeof(size_t). HDF5 defines a new data type + called hsize_t which is used for sizes of datasets + and is, by default, defined as unsigned long long. + +

To create a dataset with 8*2^30 4-byte integers for a total of + 32GB one first creates the dataspace. We give two examples + here: a 4-dimensional dataset whose dimension sizes are smaller + than the maximum value of a size_t, and a + 1-dimensional dataset whose dimension size is too large to fit + in a size_t. + +

+hsize_t size1[4] = {8, 1024, 1024, 1024};
+hid_t space1 = H5Screate_simple (4, size1, size1);
+
+hsize_t size2[1] = {8589934592LL};
+hid_t space2 = H5Screate_simple (1, size2, size2};
+    
+ +

However, the LL suffix is not portable, so it may + be better to replace the number with + (hsize_t)8*1024*1024*1024. + +

For compilers that don't support long long large + datasets will not be possible. The library performs too much + arithmetic on hsize_t types to make the use of a + struct feasible. + +


+
Robb Matzke
+ + +Last modified: Wed May 13 12:36:47 EDT 1998 + + + diff --git a/doc/html/Caching.html b/doc/html/Caching.html new file mode 100644 index 0000000..4e5a6ac --- /dev/null +++ b/doc/html/Caching.html @@ -0,0 +1,82 @@ + + + + Data Caching + + + +

Meta Data Caching

+ +

The HDF5 library caches two types of data: meta data and raw + data. The meta data cache holds file objects like the file + header, symbol table nodes, global heap collections, object + headers and their messages, etc. in a partially decoded + state. The cache has a fixed number of entries which is set with + the file access property list (defaults to 10k) and each entry + can hold a single meta data object. Collisions between objects + are handled by preempting the older object in favor of the new + one. + +

Raw Data Chunk Caching

+ +

Raw data chunks are cached because I/O requests at the + application level typically don't map well to chunks at the + storage level. The chunk cache has a maximum size in bytes + set with the file access property list (defaults to 1MB) and + when the limit is reached chunks are preempted based on the + following set of heuristics. + +

+ +

One should choose large values for w0 if I/O requests + typically do not overlap but smaller values for w0 if + the requests do overlap. For instance, reading an entire 2d + array by reading from non-overlapping "windows" in a row-major + order would benefit from a high w0 value while reading + a diagonal accross the dataset where each request overlaps the + previous request would benefit from a small w0. + +

The API

+ +

The cache parameters for both caches are part of a file access + property list and are set and queried with this pair of + functions: + +

+
herr_t H5Pset_cache(hid_t plist, unsigned int + mdc_nelmts, size_t rdcc_nbytes, double + w0) +
herr_t H5Pget_cache(hid_t plist, unsigned int + *mdc_nelmts, size_t *rdcc_nbytes, double + w0) +
Sets or queries the meta data cache and raw data chunk cache + parameters. The plist is a file access property + list. The number of elements (objects) in the meta data cache + is mdc_nelmts. The total size of the raw data chunk + cache and the preemption policy is rdcc_nbytes and + w0. For H5Pget_cache() any (or all) of + the pointer arguments may be null pointers. +
+ +
+
Robb Matzke
+ + +Last modified: Tue May 26 15:38:27 EDT 1998 + + + diff --git a/doc/html/CodeReview.html b/doc/html/CodeReview.html new file mode 100644 index 0000000..213cbbe --- /dev/null +++ b/doc/html/CodeReview.html @@ -0,0 +1,300 @@ + + + + Code Review + + +

Code Review 1

+ +

Some background...

+

This is one of the functions exported from the + H5B.c file that implements a B-link-tree class + without worrying about concurrency yet (thus the `Note:' in the + function prologue). The H5B.c file provides the + basic machinery for operating on generic B-trees, but it isn't + much use by itself. Various subclasses of the B-tree (like + symbol tables or indirect storage) provide their own interface + and back end to this function. For instance, + H5G_stab_find() takes a symbol table OID and a name + and calls H5B_find() with an appropriate + udata argument that eventually gets passed to the + H5G_stab_find() function. + +

+ 1 /*-------------------------------------------------------------------------
+ 2  * Function:    H5B_find
+ 3  *
+ 4  * Purpose:     Locate the specified information in a B-tree and return
+ 5  *              that information by filling in fields of the caller-supplied
+ 6  *              UDATA pointer depending on the type of leaf node
+ 7  *              requested.  The UDATA can point to additional data passed
+ 8  *              to the key comparison function.
+ 9  *
+10  * Note:        This function does not follow the left/right sibling
+11  *              pointers since it assumes that all nodes can be reached
+12  *              from the parent node.
+13  *
+14  * Return:      Success:        SUCCEED if found, values returned through the
+15  *                              UDATA argument.
+16  *
+17  *              Failure:        FAIL if not found, UDATA is undefined.
+18  *
+19  * Programmer:  Robb Matzke
+20  *              matzke@llnl.gov
+21  *              Jun 23 1997
+22  *
+23  * Modifications:
+24  *
+25  *-------------------------------------------------------------------------
+26  */
+27 herr_t
+28 H5B_find (H5F_t *f, const H5B_class_t *type, const haddr_t *addr, void *udata)
+29 {
+30    H5B_t        *bt=NULL;
+31    intn         idx=-1, lt=0, rt, cmp=1;
+32    int          ret_value = FAIL;
+    
+ +

All pointer arguments are initialized when defined. I don't + worry much about non-pointers because it's usually obvious when + the value isn't initialized. + +

+33 
+34    FUNC_ENTER (H5B_find, NULL, FAIL);
+35 
+36    /*
+37     * Check arguments.
+38     */
+39    assert (f);
+40    assert (type);
+41    assert (type->decode);
+42    assert (type->cmp3);
+43    assert (type->found);
+44    assert (addr && H5F_addr_defined (addr));
+    
+ +

I use assert to check invariant conditions. At + this level of the library, none of these assertions should fail + unless something is majorly wrong. The arguments should have + already been checked by higher layers. It also provides + documentation about what arguments might be optional. + +

+45    
+46    /*
+47     * Perform a binary search to locate the child which contains
+48     * the thing for which we're searching.
+49     */
+50    if (NULL==(bt=H5AC_protect (f, H5AC_BT, addr, type, udata))) {
+51       HGOTO_ERROR (H5E_BTREE, H5E_CANTLOAD, FAIL);
+52    }
+    
+ +

You'll see this quite often in the low-level stuff and it's + documented in the H5AC.c file. The + H5AC_protect insures that the B-tree node (which + inherits from the H5AC package) whose OID is addr + is locked into memory for the duration of this function (see the + H5AC_unprotect on line 90). Most likely, if this + node has been accessed in the not-to-distant past, it will still + be in memory and the H5AC_protect is almost a + no-op. If cache debugging is compiled in, then the protect also + prevents other parts of the library from accessing the node + while this function is protecting it, so this function can allow + the node to be in an inconsistent state while calling other + parts of the library. + +

The alternative is to call the slighlty cheaper + H5AC_find and assume that the pointer it returns is + valid only until some other library function is called, but + since we're accessing the pointer throughout this function, I + chose to use the simpler protect scheme. All protected objects + must be unprotected before the file is closed, thus the + use of HGOTO_ERROR instead of + HRETURN_ERROR. + +

+53    rt = bt->nchildren;
+54 
+55    while (lt<rt && cmp) {
+56       idx = (lt + rt) / 2;
+57       if (H5B_decode_keys (f, bt, idx)<0) {
+58          HGOTO_ERROR (H5E_BTREE, H5E_CANTDECODE, FAIL);
+59       }
+60 
+61       /* compare */
+62       if ((cmp=(type->cmp3)(f, bt->key[idx].nkey, udata,
+63                             bt->key[idx+1].nkey))<0) {
+64          rt = idx;
+65       } else {
+66          lt = idx+1;
+67       }
+68    }
+69    if (cmp) {
+70       HGOTO_ERROR (H5E_BTREE, H5E_NOTFOUND, FAIL);
+71    }
+    
+ +

Code is arranged in paragraphs with a comment starting each + paragraph. The previous paragraph is a standard binary search + algorithm. The (type->cmp3)() is an indirect + function call into the subclass of the B-tree. All indirect + function calls have the function part in parentheses to document + that it's indirect (quite obvious here, but not so obvious when + the function is a variable). + +

It's also my standard practice to have side effects in + conditional expressions because I can write code faster and it's + more apparent to me what the condition is testing. But if I + have an assignment in a conditional expr, then I use an extra + set of parens even if they're not required (usually they are, as + in this case) so it's clear that I meant = instead + of ==. + +

+72 
+73    /*
+74     * Follow the link to the subtree or to the data node.
+75     */
+76    assert (idx>=0 && idxnchildren);
+77    if (bt->level > 0) {
+78       if ((ret_value = H5B_find (f, type, bt->child+idx, udata))<0) {
+79          HGOTO_ERROR (H5E_BTREE, H5E_NOTFOUND, FAIL);
+80       }
+81    } else {
+82       ret_value = (type->found)(f, bt->child+idx, bt->key[idx].nkey,
+83                                 udata, bt->key[idx+1].nkey);
+84       if (ret_value<0) {
+85          HGOTO_ERROR (H5E_BTREE, H5E_NOTFOUND, FAIL);
+86       }
+87    }
+    
+ +

Here I broke the "side effect in conditional" rule, which I + sometimes do if the expression is so long that the + <0 gets lost at the end. Another thing to note is + that success/failure is always determined by comparing with zero + instead of SUCCEED or FAIL. I do this + because occassionally one might want to return other meaningful + values (always non-negative) or distinguish between various types of + failure (always negative). + +

+88 
+89 done:
+90    if (bt && H5AC_unprotect (f, H5AC_BT, addr, bt)<0) {
+91       HRETURN_ERROR (H5E_BTREE, H5E_PROTECT, FAIL);
+92    }
+93    FUNC_LEAVE (ret_value);
+94 }
+    
+ +

For lack of a better way to handle errors during error cleanup, + I just call the HRETURN_ERROR macro even though it + will make the error stack not quite right. I also use short + circuiting boolean operators instead of nested if + statements since that's standard C practice. + +

Code Review 2

+ + +

The following code is an API function from the H5F package... + +

+ 1 /*--------------------------------------------------------------------------
+ 2  NAME
+ 3     H5Fflush
+ 4 
+ 5  PURPOSE
+ 6     Flush all cached data to disk and optionally invalidates all cached
+ 7     data.
+ 8 
+ 9  USAGE
+10     herr_t H5Fflush(fid, invalidate)
+11         hid_t fid;              IN: File ID of file to close.
+12         hbool_t invalidate;     IN: Invalidate all of the cache?
+13 
+14  ERRORS
+15     ARGS      BADTYPE       Not a file atom. 
+16     ATOM      BADATOM       Can't get file struct. 
+17     CACHE     CANTFLUSH     Flush failed. 
+18 
+19  RETURNS
+20     SUCCEED/FAIL
+21 
+22  DESCRIPTION
+23         This function flushes all cached data to disk and, if INVALIDATE
+24     is non-zero, removes cached objects from the cache so they must be
+25     re-read from the file on the next access to the object.
+26 
+27  MODIFICATIONS:
+28 --------------------------------------------------------------------------*/
+    
+ +

An API prologue is used for each API function instead of my + normal function prologue. I use the prologue from Code Review 1 + for non-API functions because it's more suited to C programmers, + it requires less work to keep it synchronized with the code, and + I have better editing tools for it. + +

+29 herr_t
+30 H5Fflush (hid_t fid, hbool_t invalidate)
+31 {
+32    H5F_t        *file = NULL;
+33 
+34    FUNC_ENTER (H5Fflush, H5F_init_interface, FAIL);
+35    H5ECLEAR;
+    
+ +

API functions are never called internally, therefore I always + clear the error stack before doing anything. + +

+36 
+37    /* check arguments */
+38    if (H5_FILE!=H5Aatom_group (fid)) {
+39       HRETURN_ERROR (H5E_ARGS, H5E_BADTYPE, FAIL); /*not a file atom*/
+40    }
+41    if (NULL==(file=H5Aatom_object (fid))) {
+42       HRETURN_ERROR (H5E_ATOM, H5E_BADATOM, FAIL); /*can't get file struct*/
+43    }
+    
+ +

If something is wrong with the arguments then we raise an + error. We never assert arguments at this level. + We also convert atoms to pointers since atoms are really just a + pointer-hiding mechanism. Functions that can be called + internally always have pointer arguments instead of atoms + because (1) then they don't have to always convert atoms to + pointers, and (2) the various pointer data types provide more + documentation and type checking than just an hid_t + type. + +

+44 
+45    /* do work */
+46    if (H5F_flush (file, invalidate)<0) {
+47       HRETURN_ERROR (H5E_CACHE, H5E_CANTFLUSH, FAIL); /*flush failed*/
+48    }
+    
+ +

An internal version of the function does the real work. That + internal version calls assert to check/document + it's arguments and can be called from other library functions. + +

+49 
+50    FUNC_LEAVE (SUCCEED);
+51 }
+    
+ +
+
Robb Matzke
+ + +Last modified: Mon Nov 10 15:33:33 EST 1997 + + + diff --git a/doc/html/Coding.html b/doc/html/Coding.html new file mode 100644 index 0000000..dbf55bf --- /dev/null +++ b/doc/html/Coding.html @@ -0,0 +1,300 @@ + + + HDF5 Naming Scheme + + + + + +

+
HDF5 Naming Scheme for

+ +

+

+

+

+ Authors: + Quincey Koziol and + + Robb Matzke + +
+ +

+

+
+ This file /hdf3/web/hdf/internal/HDF_standard/HDF5.coding_standard.html is + maintained by Elena Pourmal + epourmal@ncsa.uiuc.edu . +
+

+

+ Last modified August 5, 1997 +
+ +
+ + + diff --git a/doc/html/Compression.html b/doc/html/Compression.html new file mode 100644 index 0000000..c3a2a45 --- /dev/null +++ b/doc/html/Compression.html @@ -0,0 +1,409 @@ + + + + Compression + + + +

Compression

+ +

1. Introduction

+ +

HDF5 supports compression of raw data by compression methods + built into the library or defined by an application. A + compression method is associated with a dataset when the dataset + is created and is applied independently to each storage chunk of + the dataset. + + The dataset must use the H5D_CHUNKED storage + layout. The library doesn't support compression for contiguous + datasets because of the difficulty of implementing random access + for partial I/O, and compact dataset compression is not + supported because it wouldn't produce significant results. + +

2. Supported Compression Methods

+ +

The library identifies compression methods with small + integers, with values less than 16 reserved for use by NCSA and + values between 16 and 255 (inclusive) available for general + use. This range may be extended in the future if it proves to + be too small. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Method NameDescription
H5Z_NONEThe default is to not use compression. Specifying + H5Z_NONE as the compression method results + in better perfomance than writing a function that just + copies data because the library's I/O pipeline + recognizes this method and is able to short circuit + parts of the pipeline.
H5Z_DEFLATEThe deflate method is the algorithm used by + the GNU gzipprogram. It's a combination of + a Huffman encoding followed by a 1977 Lempel-Ziv (LZ77) + dictionary encoding. The aggressiveness of the + compression can be controlled by passing an integer value + to the compressor with H5Pset_deflate() + (see below). In order for this compression method to be + used, the HDF5 library must be configured and compiled + in the presence of the GNU zlib version 1.1.2 or + later.
H5Z_RES_NThese compression methods (where N is in the + range two through 15, inclusive) are reserved by NCSA + for future use.
Values of N between 16 and 255, inclusiveThese values can be used to represent application-defined + compression methods. We recommend that methods under + testing should be in the high range and when a method is + about to be published it should be given a number near + the low end of the range (or even below 16). Publishing + the compression method and its numeric ID will make a + file sharable.
+
+ +

Setting the compression for a dataset to a method which was + not compiled into the library and/or not registered by the + application is allowed, but writing to such a dataset will + silently not compress the data. Reading a compressed + dataset for a method which is not available will result in + errors (specifically, H5Dread() will return a + negative value). The errors will be displayed in the + compression statistics if the library was compiled with + debugging turned on for the "z" package. See the + section on diagnostics below for more details. + +

3. Application-Defined Methods

+ +

Compression methods 16 through 255 can be defined by an + application. As mentioned above, methods that have not been + released should use high numbers in that range while methods + that have been published will be assigned an official number in + the low region of the range (possibly less than 16). Users + should be aware that using unpublished compression methods + results in unsharable files. + +

A compression method has two halves: one have handles + compression and the other half handles uncompression. The + halves are implemented as functions + method_c and + method_u respectively. One should not use + the names compress or uncompress since + they are likely to conflict with other compression libraries + (like the GNU zlib). + +

Both the method_c and + method_u functions take the same arguments + and return the same values. They are defined with the type: + +

+
typedef size_t (*H5Z_func_t)(unsigned int + flags, size_t cd_size, const void + *client_data, size_t src_nbytes, const + void *src, size_t dst_nbytes, void + *dst/*out*/) +
The flags are an 8-bit vector which is stored in + the file and which is defined when the compression method is + defined. The client_data is a pointer to + cd_size bytes of configuration data which is also + stored in the file. The function compresses or uncompresses + src_nbytes from the source buffer src into + at most dst_nbytes of the result buffer dst. + The function returns the number of bytes written to the result + buffer or zero if an error occurs. But if a result buffer + overrun occurs the function should return a value at least as + large as dst_size (the uncompressor will see an + overrun only for corrupt data). +
+ +

The application associates the pair of functions with a name + and a method number by calling H5Zregister(). This + function can also be used to remove a compression method from + the library by supplying null pointers for the functions. + +

+
herr_t H5Zregister (H5Z_method_t method, + const char *name, H5Z_func_t method_c, + H5Z_func_t method_u) +
The pair of functions to be used for compression + (method_c) and uncompression (method_u) are + associated with a short name used for debugging and a + method number in the range 16 through 255. This + function can be called as often as desired for a particular + compression method with each call replacing the information + stored by the previous call. Sometimes it's convenient to + supply only one half of the compression, for instance in an + application that opens files for read-only. Compression + statistics for the method are accumulated across calls to this + function. +
+ +

+

+ + + + + +

Example: Registering an + Application-Defined Compression Method

+

Here's a simple-minded "compression" method + that just copies the input value to the output. It's + similar to the H5Z_NONE method but + slower. Compression and uncompression are performed + by the same function. + +

+size_t
+bogus (unsigned int flags,
+       size_t cd_size, const void *client_data,
+       size_t src_nbytes, const void *src,
+       size_t dst_nbytes, void *dst/*out*/)
+{
+    memcpy (dst, src, src_nbytes);
+    return src_nbytes;
+}
+	      
+ +

The function could be registered as method 250 as + follows: + +

+#define H5Z_BOGUS 250
+H5Zregister (H5Z_BOGUS, "bogus", bogus, bogus);
+	      
+ +

The function can be unregistered by saying: + +

+H5Zregister (H5Z_BUGUS, "bogus", NULL, NULL);
+	      
+ +

Notice that we kept the name "bogus" even + though we unregistered the functions that perform the + compression and uncompression. This makes compression + statistics more understandable when they're printed. +

+
+ +

4. Enabling Compression for a Dataset

+ +

If a dataset is to be compressed then the compression + information must be specified when the dataset is created since + once a dataset is created compression parameters cannot be + adjusted. The compression is specified through the dataset + creation property list (see H5Pcreate()). + +

+
herr_t H5Pset_deflate (hid_t plist, int + level) +
The compression method for dataset creation property list + plist is set to H5Z_DEFLATE and the + aggression level is set to level. The level + must be a value between one and nine, inclusive, where one + indicates no (but fast) compression and nine is aggressive + compression. + +

+
int H5Pget_deflate (hid_t plist) +
If dataset creation property list plist is set to + use H5Z_DEFLATE compression then this function + will return the aggression level, an integer between one and + nine inclusive. If plist isn't a valid dataset + creation property list or it isn't set to use the deflate + method then a negative value is returned. + +

+
herr_t H5Pset_compression (hid_t plist, + H5Z_method_t method, unsigned int flags, + size_t cd_size, const void *client_data) +
This is a catch-all function for defining compresion methods + and is intended to be called from a wrapper such as + H5Pset_deflate(). The dataset creation property + list plist is adjusted to use the specified + compression method. The flags is an 8-bit vector + which is stored in the file as part of the compression message + and passed to the compress and uncompress functions. The + client_data is a byte array of length + cd_size which is copied to the file and passed to the + compress and uncompress methods. + +

+
H5Z_method_t H5Pget_compression (hid_t plist, + unsigned int *flags, size_t *cd_size, void + *client_data) +
This is a catch-all function for querying the compression + method associated with dataset creation property list + plist and is intended to be called from a wrapper + function such as H5Pget_deflate(). The + compression method (or a negative value on error) is returned + by value, and compression flags and client data is returned by + argument. The application should allocate the + client_data and pass its size as the + cd_size. On return, cd_size will contain + the actual size of the client data. If client_data + is not large enough to hold the entire client data then + cd_size bytes are copied into client_data + and cd_size is set to the total size of the client + data, a value larger than the original. +
+ +

It is possible to set the compression to a method which hasn't + been defined with H5Zregister() and which isn't + supported as a predefined method (for instance, setting the + method to H5Z_DEFLATE when the GNU zlib isn't + available). If that happens then data will be written to the + file in its uncompressed form and the compression statistics + will show failures for the compression. + +

+

+ + + + + +

Example: Statistics for an + Unsupported Compression Method

+

If an application attempts to use an unsupported + method then the compression statistics will show large + numbers of compression errors and no data + uncompressed. + +

+H5Z: compression statistics accumulated over life of library:
+   Method      Total  Overrun  Errors  User  System  Elapsed Bandwidth
+   ------      -----  -------  ------  ----  ------  ------- ---------
+   deflate-c  160000        0  160000  0.00    0.01     0.01 1.884e+07
+   deflate-u       0        0       0  0.00    0.00     0.00       NaN
+	      
+ +

This example is from a program that tried to use + H5Z_DEFLATE on a system that didn't have + the GNU zlib to write to a dataset and then read the + result. The read and write both succeeded but the + data was not compressed. +

+
+ +

5. Compression Diagnostics

+ +

If the library is compiled with debugging turned on for the H5Z + layer (usually as a result of configure --enable-debug=z) + then statistics about data compression are printed when the + application exits normally or the library is closed. The + statistics are written to the standard error stream and include + two lines for each compression method that was used: the first + line shows compression statistics while the second shows + uncompression statistics. The following fields are displayed: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
MethodThis is the name of the method as defined with + H5Zregister() with the letters + "-c" or "-u" appended to indicate + compression or uncompression.
TotalThe total number of bytes compressed or decompressed + including buffer overruns and errors. Bytes of + non-compressed data are counted.
OverrunDuring compression, if the algorithm causes the result + to be at least as large as the input then a buffer + overrun error occurs. This field shows the total number + of bytes from the Total column which can be attributed to + overruns. Overruns for decompression can only happen if + the data has been corrupted in some way and will result + in failure of H5Dread().
ErrorsIf an error occurs during compression the data is + stored in it's uncompressed form; and an error during + uncompression causes H5Dread() to return + failure. This field shows the number of bytes of the + Total column which can be attributed to errors.
User, System, ElapsedThese are the amount of user time, system time, and + elapsed time in seconds spent by the library to perform + compression. Elapsed time is sensitive to system + load. These times may be zero on operating systems that + don't support the required operations.
BandwidthThis is the compression bandwidth which is the total + number of bytes divided by elapsed time. Since elapsed + time is subject to system load the bandwidth numbers + cannot always be trusted. Furthermore, the bandwidth + includes overrun and error bytes which may significanly + taint the value.
+
+ +

+

+ + + + + +

Example: Compression + Statistics

+

+H5Z: compression statistics accumulated over life of library:
+   Method      Total  Overrun  Errors  User  System  Elapsed Bandwidth
+   ------      -----  -------  ------  ----  ------  ------- ---------
+   deflate-c  160000      200       0  0.62    0.74     1.33 1.204e+05
+   deflate-u  120000        0       0  0.11    0.00     0.12 9.885e+05
+	      
+
+
+ +
+
Robb Matzke
+ + +Last modified: Fri Apr 17 16:15:21 EDT 1998 + + + diff --git a/doc/html/Datasets.html b/doc/html/Datasets.html new file mode 100644 index 0000000..e0f9680 --- /dev/null +++ b/doc/html/Datasets.html @@ -0,0 +1,839 @@ + + + + The Dataset Interface (H5D) + + + +

The Dataset Interface (H5D)

+ +

1. Introduction

+ +

The purpose of the dataset interface is to provide a mechanism + to describe properties of datasets and to transfer data between + memory and disk. A dataset is composed of a collection of raw + data points and four classes of meta data to describe the data + points. The interface is hopefully designed in such a way as to + allow new features to be added without disrupting current + applications that use the dataset interface. + +

The four classes of meta data are: + +

+
Constant Meta Data +
Meta data that is created when the dataset is created and + exists unchanged for the life of the dataset. For instance, + the data type of stored array elements is defined when the + dataset is created and cannot be subsequently changed. + +
Persistent Meta Data +
Meta data that is an integral and permanent part of a + dataset but can change over time. For instance, the size in + any dimension can increase over time if such an increase is + allowed when the dataset was created. + +
Memory Meta Data +
Meta data that exists to describe how raw data is organized + in the application's memory space. For instance, the data + type of elements in an application array might not be the same + as the data type of those elements as stored in the HDF5 file. + +
Transport Meta Data +
Meta data that is used only during the transfer of raw data + from one location to another. For instance, the number of + processes participating in a collective I/O request or hints + to the library to control caching of raw data. +
+ +

Each of these classes of meta data is handled differently by + the library although the same API might be used to create them. + For instance, the data type exists as constant meta data and as + memory meta data; the same API (the H5T API) is + used to manipulate both pieces of meta data but they're handled + by the dataset API (the H5D API) in different + manners. + + + +

2. Storage Layout Properties

+ +

The dataset API partitions these terms on three orthogonal axes + (layout, compression, and external storage) and uses a + dataset creation property list to hold the various + settings and pass them through the dataset interface. This is + similar to the way HDF5 files are created with a file creation + property list. A dataset creation property list is always + derived from the default dataset creation property list (use + H5Pcreate() to get a copy of the default property + list) by modifying properties with various + H5Pset_property() functions. + +

+
herr_t H5Pset_layout (hid_t plist_id, + H5D_layout_t layout) +
The storage layout is a piece of constant meta data that + describes what method the library uses to organize the raw + data on disk. The default layout is contiguous storage. + +

+
+
H5D_COMPACT +
The raw data is presumably small and can be stored + directly in the object header. Such data is + non-extendible, non-compressible, non-sparse, and cannot + be stored externally. Most of these restrictions are + arbitrary but are enforced because of the small size of + the raw data. Storing data in this format eliminates the + disk seek/read request normally necessary to read raw + data. This layout is not implemented yet. + +

+
H5D_CONTIGUOUS +
The raw data is large, non-extendible, non-compressible, + non-sparse, and can be stored externally. This is the + default value for the layout property. The term + large means that it may not be possible to hold + the entire dataset in memory. The non-compressibility is + a side effect of the data being large, contiguous, and + fixed-size at the physical level, which could cause + partial I/O requests to be extremely expensive if + compression were allowed. + +

+
H5D_CHUNKED +
The raw data is large and can be extended in any + dimension at any time (provided the data space also allows + the extension). It may be sparse at the chunk level (each + chunk is non-sparse, but there might only be a few chunks) + and each chunk can be compressed and/or stored externally. + A dataset is partitioned into chunks so each chunk is the + same logical size. The chunks are indexed by a B-tree and + are allocated on demand (although it might be useful to be + able to preallocate storage for parts of a chunked array + to reduce contention for the B-tree in a parallel + environment). The chunk size must be defined with + H5Pset_chunk(). + +

+
others... +
Other layout types may be defined later without breaking + existing code. However, to be able to correctly read or + modify data stored with one of these new layouts, the + application will need to be linked with a new version of + the library. This happens automatically on systems with + dynamic linking. +
+
+ +

Once the general layout is defined, the user can define + properties of that layout. Currently, the only layout that has + user-settable properties is the H5D_CHUNKED layout, + which needs to know the dimensionality and chunk size. + +

+
herr_t H5Pset_chunk (hid_t plist_id, int + ndims, hsize_t dim[]) +
This function defines the logical size of a chunk for + chunked layout. If the layout property is set to + H5D_CHUNKED and the chunk size is set to + dim. The number of elements in the dim array + is the dimensionality, ndims. One need not call + H5Dset_layout() when using this function since + the chunked layout is implied. +
+ +

+

+ + + + + +

Example: Chunked Storage

+

This example shows how a two-dimensional dataset + is partitioned into chunks. The library can manage file + memory by moving the chunks around, and each chunk could be + compressed. The chunks are allocated in the file on demand + when data is written to the chunk. +

+ Chunked Storage +
+ +

+size_t hsize[2] = {1000, 1000};
+plist = H5Pcreate (H5P_DATASET_CREATE);
+H5Pset_chunk (plist, 2, size);
+	      
+
+
+ + +

Although it is most efficient if I/O requests are aligned on chunk + boundaries, this is not a constraint. The application can perform I/O + on any set of data points as long as the set can be described by the + data space. The set on which I/O is performed is called the + selection. + +

3. Compression Properties

+ +

Some types of storage layout allow data compression which is + defined by the functions described here. Compression is not + implemented yet. + +

+
herr_t H5Pset_compression (hid_t plist_id, + H5Z_method_t method) +
H5Z_method_t H5Pget_compression (hid_t + plist_id) +
These functions set and query the compression method that + is used to compress the raw data of a dataset. The + plist_id is a dataset creation property list. The + possible values for the compression method are: + +

+
+
H5Z_NONE +
This is the default and specifies that no compression is + to be performed. + +

+
H5Z_DEFLATE +
This specifies that a variation of the Lempel-Ziv 1977 + (LZ77) encoding is used, the same encoding used by the + free GNU gzip program. +
+ +

+
herr_t H5Pset_deflate (hid_t plist_id, + int level) +
int H5Pget_deflate (hid_t plist_id) +
These functions set or query the deflate level of + dataset creation property list plist_id. The + H5Pset_deflate() sets the compression method to + H5Z_DEFLATE and sets the compression level to + some integer between one and nine (inclusive). One results in + the fastest compression while nine results in the best + compression ratio. The default value is six if + H5Pset_deflate() isn't called. The + H5Pget_deflate() returns the compression level + for the deflate method, or negative if the method is not the + deflate method. +
+ +

4. External Storage Properties

+ +

Some storage formats may allow storage of data across a set of + non-HDF5 files. Currently, only the H5D_CONTIGUOUS storage + format allows external storage. A set segments (offsets and sizes) in + one or more files is defined as an external file list, or EFL, + and the contiguous logical addresses of the data storage are mapped onto + these segments. + +

+
herr_t H5Pset_external (hid_t plist, const + char *name, off_t offset, hsize_t + size) +
This function adds a new segment to the end of the external + file list of the specified dataset creation property list. The + segment begins a byte offset of file name and + continues for size bytes. The space represented by this + segment is adjacent to the space already represented by the external + file list. The last segment in a file list may have the size + H5F_UNLIMITED. + +

+
int H5Pget_external_count (hid_t plist) +
Calling this function returns the number of segments in an + external file list. If the dataset creation property list has no + external data then zero is returned. + +

+
herr_t H5Pget_external (hid_t plist, int + idx, size_t name_size, char *name, off_t + *offset, hsize_t *size) +
This is the counterpart for the H5Pset_external() + function. Given a dataset creation property list and a zero-based + index into that list, the file name, byte offset, and segment size are + returned through non-null arguments. At most name_size + characters are copied into the name argument which is not + null terminated if the file name is longer than the supplied name + buffer (this is similar to strncpy()). +
+ +

+

+ + + + + +

Example: Multiple Segments

+

This example shows how a contiguous, one-dimensional dataset + is partitioned into three parts and each of those parts is + stored in a segment of an external file. The top rectangle + represents the logical address space of the dataset + while the bottom rectangle represents an external file. +

+ Multiple Segments +
+ +

+plist = H5Pcreate (H5P_DATASET_CREATE);
+H5Pset_external (plist, "velocity.data", 3000, 1000);
+H5Pset_external (plist, "velocity.data", 0, 2500);
+H5Pset_external (plist, "velocity.data", 4500, 1500);
+	      
+ +

One should note that the segments are defined in order of the + logical addresses they represent, not their order within the + external file. It would also have been possible to put the + segments in separate files. Care should be taken when setting + up segments in a single file since the library doesn't + automatically check for segments that overlap. +

+
+ +

+

+ + + + + +

Example: Multi-Dimensional

+

This example shows how a contiguous, two-dimensional dataset + is partitioned into three parts and each of those parts is + stored in a separate external file. The top rectangle + represents the logical address space of the dataset + while the bottom rectangles represent external files. +

+ Multiple Dimensions +
+ +

+plist = H5Pcreate (H5P_DATASET_CREATE);
+H5Pset_external (plist, "scan1.data", 0, 24);
+H5Pset_external (plist, "scan2.data", 0, 24);
+H5Pset_external (plist, "scan3.data", 0, 16);
+	      
+ +

The library maps the multi-dimensional array onto a linear + address space like normal, and then maps that address space + into the segments defined in the external file list. +

+
+ +

The segments of an external file can exist beyond the end of the + file. The library reads that part of a segment as zeros. When writing + to a segment that exists beyond the end of a file, the file is + automatically extended. Using this feature, one can create a segment + (or set of segments) which is larger than the current size of the + dataset, which allows to dataset to be extended at a future time + (provided the data space also allows the extension). + +

All referenced external data files must exist before performing raw + data I/O on the dataset. This is normally not a problem since those + files are being managed directly by the application, or indirectly + through some other library. + + +

5. Data Type

+ +

Raw data has a constant data type which describes the data type + of the raw data stored in the file, and a memory data type that + describes the data type stored in application memory. Both data + types are manipulated with the H5T API. + +

The constant file data type is associated with the dataset when + the dataset is created in a manner described below. Once + assigned, the constant datatype can never be changed. + +

The memory data type is specified when data is transferred + to/from application memory. In the name of data sharability, + the memory data type must be specified, but can be the same + type identifier as the constant data type. + +

During dataset I/O operations, the library translates the raw + data from the constant data type to the memory data type or vice + versa. Structured data types include member offsets to allow + reordering of struct members and/or selection of a subset of + members and array data types include index permutation + information to allow things like transpose operations (the + prototype does not support array reordering) Permutations + are relative to some extrinsic descritpion of the dataset. + + + +

6. Data Space

+ +

The dataspace of a dataset defines the number of dimensions + and the size of each dimension and is manipulated with the + H5S API. The simple dataspace consists of + maximum dimension sizes and actual dimension sizes, which are + usually the same. However, maximum dimension sizes can be the + constant H5D_UNLIMITED in which case the actual + dimension size can be incremented with calls to + H5Dextend(). The maximium dimension sizes are + constant meta data while the actual dimension sizes are + persistent meta data. Initial actual dimension sizes are + supplied at the same time as the maximum dimension sizes when + the dataset is created. + +

The dataspace can also be used to define partial I/O + operations. Since I/O operations have two end-points, the raw + data transfer functions take two data space arguments: one which + describes the application memory data space or subset thereof + and another which describes the file data space or subset + thereof. + + +

7. Setting Constant or Persistent Properties

+ +

Each dataset has a set of constant and persistent properties + which describe the layout method, pre-compression + transformation, compression method, data type, external storage, + and data space. The constant properties are set as described + above in a dataset creation property list whose identifier is + passed to H5Dcreate(). + +

+
hid_t H5Dcreate (hid_t file_id, const char + *name, hid_t type_id, hid_t + space_id, hid_t create_plist_id) +
A dataset is created by calling H5Dcreate with + a file identifier, a dataset name, a data type, a data space, + and constant properties. The data type and data space are the + type and space of the dataset as it will exist in the file, + which may be different than in application memory. The + create_plist_id is a H5P_DATASET_CREATE + property list created with H5Pcreate() and + initialized with the various functions described above. + H5Dcreate() returns a dataset handle for success + or negative for failure. The handle should eventually be + closed by calling H5Dclose() to release resources + it uses. + +

+
hid_t H5Dopen (hid_t file_id, const char + *name) +
An existing dataset can be opened for access by calling this + function. A dataset handle is returned for success or a + negative value is returned for failure. The handle should + eventually be closed by calling H5Dclose() to + release resources it uses. + +

+
herr_t H5Dclose (hid_t dataset_id) +
This function closes a dataset handle and releases all + resources it might have been using. The handle should not be + used in subsequent calls to the library. + +

+
herr_t H5Dextend (hid_t dataset_id, + hsize_t dim[]) +
This function extends a dataset by increasing the size in + one or more dimensions. Not all datasets can be extended. +
+ + + +

8. Querying Constant or Persistent Properties

+ +

Constant or persistent properties can be queried with a set of + three functions. Each function returns an identifier for a copy + of the requested properties. The identifier can be passed to + various functions which modify the underlying object to derive a + new object; the original dataset is completely unchanged. The + return values from these functions should be properly destroyed + when no longer needed. + +

+
hid_t H5Dget_type (hid_t dataset_id) +
Returns an identifier for a copy of the dataset permanent + data type or negative for failure. + +
hid_t H5Dget_space (hid_t dataset_id) +
Returns an identifier for a copy of the dataset permanent + data space, which also contains information about the current + size of the dataset if the data set is extendable with + H5Dextend(). + +
hid_t H5Dget_create_plist (hid_t + dataset_id) +
Returns an identifier for a copy of the dataset creation + property list. The new property list is created by examining + various permanent properties of the dataset. This is mostly a + catch-all for everything but type and space. +
+ + + +

9. Setting Memory and Transfer Properties

+ +

A dataset also has memory properties which describe memory + within the application, and transfer properties that control + various aspects of the I/O operations. The memory can have a + data type different than the permanent file data type (different + number types, different struct member offsets, different array + element orderings) and can also be a different size (memory is a + subset of the permanent dataset elements, or vice versa). The + transfer properties might provide caching hints or collective + I/O information. Therefore, each I/O operation must specify + memory and transfer properties. + +

The memory properties are specified with type_id and + space_id arguments while the transfer properties are + specified with the transfer_id property list for the + H5Dread() and H5Dwrite() functions + (these functions are described below). + +

+
herr_t H5Pset_buffer (hid_t xfer_plist, + size_t max_buf_size, void *tconv_buf, void + *bkg_buf) +
size_t H5Pget_buffer (hid_t xfer_plist, void + **tconv_buf, void **bkg_buf) +
Sets or retrieves the maximum size in bytes of the temporary + buffer used for data type conversion in the I/O pipeline. An + application-defined buffer can also be supplied as the + tconv_buf argument, otherwise a buffer will be + allocated and freed on demand by the library. A second + temporary buffer bkg_buf can also be supplied and + should be the same size as the tconv_buf. The + default values are 1MB for the maximum buffer size, and null + pointers for each buffer indicating that they should be + allocated on demand and freed when no longer needed. The + H5Pget_buffer() function returns the maximum + buffer size or zero on error. +
+ +

If the maximum size of the temporary I/O pipeline buffers is + too small to hold the entire I/O request, then the I/O request + will be fragmented and the transfer operation will be strip + mined. However, certain restrictions apply to the strip + mining. For instance, when performing I/O on a hyperslab of a + simple data space the strip mining is in terms of the slowest + varying dimension. So if a 100x200x300 hyperslab is requested, + the temporary buffer must be large enough to hold a 1x200x300 + sub-hyperslab. + +

To prevent strip mining from happening, the application should + use H5Pset_buffer() to set the size of the + temporary buffer so it's large enough to hold the entire + request. + +

+

+ + + + + +

Example

+

This example shows how to define a function that sets + a dataset transfer property list so that strip mining + does not occur. It takes an (optional) dataset transfer + property list, a dataset, a data space that describes + what data points are being transfered, and a data type + for the data points in memory. It returns a (new) + dataset transfer property list with the temporary + buffer size set to an appropriate value. The return + value should be passed as the fifth argument to + H5Dread() or H5Dwrite(). +

+ 1 hid_t
+ 2 disable_strip_mining (hid_t xfer_plist, hid_t dataset,
+ 3                       hid_t space, hid_t mem_type)
+ 4 {
+ 5     hid_t file_type;          /* File data type */
+ 6     size_t type_size;         /* Sizeof larger type */
+ 7     size_t size;              /* Temp buffer size */
+ 8     hid_t xfer_plist;         /* Return value */
+ 9 
+10     file_type = H5Dget_type (dataset);
+11     type_size = MAX(H5Tget_size(file_type), H5Tget_size(mem_type));
+12     H5Tclose (file_type);
+13     size = H5Sget_npoints(space) * type_size;
+14     if (xfer_plist<0) xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+15     H5Pset_buffer(xfer_plist, size, NULL, NULL);
+16     return xfer_plist;
+17 }
+	      
+
+
+ + + +

10. Querying Memory or Transfer Properties

+ +

Unlike constant and persistent properties, a dataset cannot be + queried for it's memory or transfer properties. Memory + properties cannot be queried because the application already + stores those properties separate from the buffer that holds the + raw data, and the buffer may hold multiple segments from various + datasets and thus have more than one set of memory properties. + The transfer properties cannot be queried from the dataset + because they're associated with the transfer itself and not with + the dataset (but one can call + H5Pget_property() to query transfer + properties from a tempalate). + + +

11. Raw Data I/O

+ +

All raw data I/O is accomplished through these functions which + take a dataset handle, a memory data type, a memory data space, + a file data space, transfer properties, and an application + memory buffer. They translate data between the memory data type + and space and the file data type and space. The data spaces can + be used to describe partial I/O operations. + +

+
herr_t H5Dread (hid_t dataset_id, hid_t + mem_type_id, hid_t mem_space_id, hid_t + file_space_id, hid_t xfer_plist_id, + void *buf/*out*/) +
Reads raw data from the specified dataset into buf + converting from file data type and space to memory data type + and space. + +

+
herr_t H5Dwrite (hid_t dataset_id, hid_t + mem_type_id, hid_t mem_space_id, hid_t + file_space_id, hid_t xfer_plist_id, + const void *buf) +
Writes raw data from an application buffer buf to + the specified dataset converting from memory data type and + space to file data type and space. +
+ + +

In the name of sharability, the memory datatype must be + supplied. However, it can be the same identifier as was used to + create the dataset or as was returned by + H5Dget_type(); the library will not implicitly + derive memory data types from constant data types. + +

For complete reads of the dataset one may supply + H5S_ALL as the argument for the file data space. + If H5S_ALL is also supplied as the memory data + space then no data space conversion is performed. This is a + somewhat dangerous situation since the file data space might be + different than what the application expects. + + + +

12. Examples

+ +

The examples in this section illustrate some common dataset + practices. + + +

This example shows how to create a dataset which is stored in + memory as a two-dimensional array of native double + values but is stored in the file in Cray float + format using LZ77 compression. The dataset is written to the + HDF5 file and then read back as a two-dimensional array of + float values. + +

+

+ + + + + +

Example 1

+

+ 1 hid_t file, data_space, dataset, properties;
+ 2 double dd[500][600];
+ 3 float ff[500][600];
+ 4 hsize_t dims[2], chunk_size[2];
+ 5 
+ 6 /* Describe the size of the array */
+ 7 dims[0] = 500;
+ 8 dims[1] = 600;
+ 9 data_space = H5Screate_simple (2, dims);
+10 
+11 
+12 /*
+13  * Create a new file using with read/write access,
+14  * default file creation properties, and default file
+15  * access properties.
+16  */
+17 file = H5Fcreate ("test.h5", H5F_ACC_RDWR, H5P_DEFAULT,
+18                   H5P_DEFAULT);
+19 
+20 /* 
+21  * Set the dataset creation plist to specify that
+22  * the raw data is to be partitioned into 100x100 element
+23  * chunks and that each chunk is to be compressed with
+24  * LZ77.
+25  */
+26 chunk_size[0] = chunk_size[1] = 100;
+27 properties = H5Pcreate (H5P_DATASET_CREATE);
+28 H5Pset_chunk (properties, 2, chunk_size);
+29 H5Pset_compression (properties, H5D_COMPRESS_LZ77);
+30 
+31 /*
+32  * Create a new dataset within the file.  The data type
+33  * and data space describe the data on disk, which may
+34  * be different than the format used in the application's
+35  * memory.
+36  */
+37 dataset = H5Dcreate (file, "dataset", H5T_CRAY_FLOAT,
+38                      data_space, properties);
+39 
+40 /*
+41  * Write the array to the file.  The data type and data
+42  * space describe the format of the data in the `dd'
+43  * buffer.  The raw data is translated to the format
+44  * required on disk defined above.  We use default raw
+45  * data transfer properties.
+46  */
+47 H5Dwrite (dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL,
+48           H5P_DEFAULT, dd);
+49 
+50 /*
+51  * Read the array as floats.  This is similar to writing
+52  * data except the data flows in the opposite direction.
+53  */
+54 H5Dread (dataset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL,
+55          H5P_DEFAULT, ff);
+56 
+64 H5Dclose (dataset);
+65 H5Sclose (data_space);
+66 H5Pclose (properties);
+67 H5Fclose (file);
+	      
+
+
+ +

This example uses the file created in Example 1 and reads a + hyperslab of the 500x600 file dataset. The hyperslab size is + 100x200 and it is located beginning at element + <200,200>. We read the hyperslab into an 200x400 array in + memory beginning at element <0,0> in memory. Visually, + the transfer looks something like this: + +

+ Raw Data Transfer +
+ +

+

+ + + + + +

Example 2

+

+ 1 hid_t file, mem_space, file_space, dataset;
+ 2 double dd[200][400];
+ 3 hssize_t offset[2];
+ 4 hsize size[2];
+ 5 
+ 6 /*
+ 7  * Open an existing file and its dataset.
+ 8  */
+ 9 file = H5Fopen ("test.h5", H5F_ACC_RDONLY, H5P_DEFAULT);
+10 dataset = H5Dopen (file, "dataset");
+11 
+12 /*
+13  * Describe the file data space.
+14  */
+15 offset[0] = 200; /*offset of hyperslab in file*/
+16 offset[1] = 200;
+17 size[0] = 100;   /*size of hyperslab*/
+18 size[1] = 200;
+19 file_space = H5Dget_space (dataset);
+20 H5Sset_hyperslab (file_space, 2, offset, size);
+21 
+22 /*
+23  * Describe the memory data space.
+24  */
+25 size[0] = 200;  /*size of memory array*/
+26 size[1] = 400;
+27 mem_space = H5Screate_simple (2, size);
+28 
+29 offset[0] = 0;  /*offset of hyperslab in memory*/
+30 offset[1] = 0;
+31 size[0] = 100;  /*size of hyperslab*/
+32 size[1] = 200;
+33 H5Sset_hyperslab (mem_space, 2, offset, size);
+34 
+35 /*
+36  * Read the dataset.
+37  */
+38 H5Dread (dataset, H5T_NATIVE_DOUBLE, mem_space,
+39          file_space, H5P_DEFAULT, dd);
+40 
+41 /*
+42  * Close/release resources.
+43  */
+44 H5Dclose (dataset);
+45 H5Sclose (mem_space);
+46 H5Sclose (file_space);
+47 H5Fclose (file);
+	      
+
+
+ +

If the file contains a compound data structure one of whose + members is a floating point value (call it "delta") but the + application is interested in reading an array of floating point + values which are just the "delta" values, then the application + should cast the floating point array as a struct with a single + "delta" member. + +

+

+ + + + + +

Example 3

+

+ 1 hid_t file, dataset, type;
+ 2 double delta[200];
+ 3 
+ 4 /*
+ 5  * Open an existing file and its dataset.
+ 6  */
+ 7 file = H5Fopen ("test.h5", H5F_ACC_RDONLY, H5P_DEFAULT);
+ 8 dataset = H5Dopen (file, "dataset");
+ 9 
+10 /*
+11  * Describe the memory data type, a struct with a single
+12  * "delta" member.
+13  */
+14 type = H5Tcreate (H5T_COMPOUND, sizeof(double));
+15 H5Tinsert (type, "delta", 0, H5T_NATIVE_DOUBLE);
+16 
+17 /*
+18  * Read the dataset.
+19  */
+20 H5Dread (dataset, type, H5S_ALL, H5S_ALL,
+21          H5P_DEFAULT, dd);
+22 
+23 /*
+24  * Close/release resources.
+25  */
+26 H5Dclose (dataset);
+27 H5Tclose (type);
+28 H5Fclose (file);
+	      
+
+
+ +
+
Robb Matzke
+ + +Last modified: Wed May 13 18:57:47 EDT 1998 + + + diff --git a/doc/html/Dataspaces.html b/doc/html/Dataspaces.html new file mode 100644 index 0000000..d2579b6 --- /dev/null +++ b/doc/html/Dataspaces.html @@ -0,0 +1,568 @@ + + + + The Data Space Interface (H5S) + + + +

+The Dataspace Interface (H5S)

+ +

+1. Introduction

+The dataspace interface (H5S) provides a mechanism to describe the positions +of the elements of a dataset and is designed in such a way as to allow +new features to be easily added without disrupting applications that use +the dataspace interface. A dataset (defined with the dataset interface) is +composed of a collection of raw data points of homogeneous type, defined in the +datatype (H5T) interface, organized according to the dataspace with this +interface. + +

A dataspace describes the locations that dataset elements are located at. +A dataspace is either a regular N-dimensional array of data points, +called a simple dataspace, or a more general collection of data +points organized in another manner, called a complex dataspace. +A scalar dataspace is a special case of the simple data +space and is defined to be a 0-dimensional single data point in size. Currently +only scalar and simple dataspaces are supported with this version +of the H5S interface. +Complex dataspaces will be defined and implemented in a future +version. Complex dataspaces are intended to be used for such structures +which are awkward to express in simple dataspaces, such as irregularly +gridded data or adaptive mesh refinement data. This interface provides +functions to set and query properties of a dataspace. + +

Operations on a dataspace include defining or extending the extent of +the dataspace, selecting portions of the dataspace for I/O and storing the +dataspaces in the file. The extent of a dataspace is the range of coordinates +over which dataset elements are defined and stored. Dataspace selections are +subsets of the extent (up to the entire extent) which are selected for some +operation. + +

For example, a 2-dimensional dataspace with an extent of 10 by 10 may have +the following very simple selection: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0 1 2 3 4 5 6 7 8 9
0----------
1-XXX------
2-XXX------
3-XXX------
4-XXX------
5-XXX------
6----------
7----------
8----------
9----------
+
Example 1: Contiguous rectangular selection +
+ + +
Or, a more complex selection may be defined: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0 1 2 3 4 5 6 7 8 9
0----------
1-XXX--X---
2-X-X------
3-X-X--X---
4-X-X------
5-XXX--X---
6----------
7--XXXX----
8----------
9----------
+
Example 2: Non-contiguous selection +
+ +

Selections within dataspaces have an offset within the extent which is used +to locate the selection within the extent of the dataspace. Selection offsets +default to 0 in each dimension, but may be changed to move the selection within +a dataspace. In example 2 above, if the offset was changed to 1,1, the selection +would look like this: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0 1 2 3 4 5 6 7 8 9
0----------
1----------
2--XXX--X--
3--X-X-----
4--X-X--X--
5--X-X-----
6--XXX--X--
7----------
8---XXXX---
9----------
+
Example 3: Non-contiguous selection with 1,1 offset +
+ +

Selections also have an linearization ordering of the points selected +(defaulting to "C" order, ie. last dimension changing fastest). The +linearization order may be specified for each point or it may be chosen by +the axis of the dataspace. For example, with the default "C" ordering, +example 1's selected points are iterated through in this order: (1,1), (2,1), +(3,1), (1,2), (2,2), etc. With "FORTRAN" ordering, example 1's selected points +would be iterated through in this order: (1,1), (1,2), (1,3), (1,4), (1,5), +(2,1), (2,2), etc. + +

A dataspace may be stored in the file as a permanent object, to allow many +datasets to use a commonly defined dataspace. Dataspaces with extendable +extents (ie. unlimited dimensions) are not able to be stored as permanent +dataspaces. + +

Dataspaces may be created using an existing permanent dataspace as a +container to locate the new dataspace within. These dataspaces are complete +dataspaces and may be used to define datasets. A dataspaces with a "parent" +can be queried to determine the parent dataspace and the location within the +parent. These dataspaces must currently be the same number of dimensions as +the parent dataspace. + +

2. General Dataspace Operations

+The functions defined in this section operate on dataspaces as a whole. +New dataspaces can be created from scratch or copied from existing data +spaces. When a dataspace is no longer needed its resources should be released +by calling H5Sclose(). +
+ +
+hid_t H5Screate(H5S_class_t type)
+ +
+ This function creates a new dataspace of a particular type. The +types currently supported are H5S_SCALAR, H5S_SIMPLE, or H5S_NONE, although +others are planned to be added later. The H5S_NONE dataspace can only hold a +selection, not an extent. +
+ +
+hid_t H5Sopen(hid_t location, const char *name)
+ +
+ This function opens a permanent dataspace for use in an application. +The location argument is a file or group ID and name is +an absolute or relative path to the permanent dataspace. The dataspace ID which +is returned is a handle to a permanent dataspace which can't be modified. +
+ +
+hid_t H5Scopy (hid_t space)
+ +
+ This function creates a new dataspace which is an exact copy of the +dataspace space. +
+ +
+hid_t H5Ssubspace (hid_t space)
+ +
+ This function uses the currently defined selection and offset in space +to create a dataspace which is located within space. The space +dataspace must be a sharable dataspace located in the file, not a dataspace for +a dataset. The relationship of the new dataspace within the existing dataspace +is preserved when the new dataspace is used to create datasets. Currently, +only subspaces which are equivalent to simple dataspaces (ie. rectangular +contiguous areas) are allowed. A subspace is not "simplified" or reduced in +the number of dimensions used if the selection is "flat" in one dimension, they +always have the same number of dimensions as their parent dataspace. +
+ +
+herr_t H5Scommit (hid_t location, const char *name, hid_t space)
+ +
+ The dataspaces specified with space is stored in the file specified +by location. Location may be either a file or group handle +and name is an absolute or relative path to the location to store the +dataspace. After this call, the dataspace is permanent and can't be modified. +
+ +
+herr_t H5Sclose (hid_t space)
+ +
+Releases resources associated with a dataspace. Subsequent use of the +dataspace identifier after this call is undefined. +
+ +
+H5S_class_t H5Sget_class (hid_t space)
+ +
+Query a dataspace to determine the current class of a dataspace. The value +which is returned is one of: H5S_SCALAR, H5S_SIMPLE, or H5S_NONE on success or +FAIL on failure. +
+
+ + +

3. Dataspace Extent Operations

+These functions operate on the extent portion of a dataspace. + +
+
+herr_t H5Sset_extent_simple (hid_t space, int rank, const hsize_t +*current_size, const hsize_t *maximum_size)
+ +
+Sets or resets the size of an existing dataspace, where rank is +the dimensionality, or number of dimensions, of the dataspace. +current_size is an array of size rank which contains the new size +of each dimension in the dataspace. maximum_size is an array of size +rank which contains the maximum size of each dimension in the dataspace. +Any previous extent is removed from the dataspace, the dataspace type is set to +H5S_SIMPLE and the extent is set as specified. +
+ +
+herr_t H5Sset_extent_none (hid_t space)
+ +
+Removes the extent from a dataspace and sets the type to H5S_NONE. +
+ +
+herr_t H5Sextent_copy (hid_t dest_space, + hid_t source_space)
+ +
+Copies the extent from source_space to dest_space, which may +change the type of the dataspace. Returns non-negative on success, negative on +failure. +
+ +
+hsize_t H5Sextent_npoints (hid_t space)
+ +
+This function determines the number of elements in a dataspace. For example, a +simple 3-dimensional dataspace with dimensions 2, 3 and 4 would have 24 +elements. +Returns the number of elements in the dataspace, negative on failure. +
+ +
+int H5Sextent_ndims (hid_t space)
+ +
+This function determines the dimensionality (or rank) of a dataspace. +Returns the number of dimensions in the dataspace, negative on failure. +
+ +
+herr_t H5Sextent_dims (hid_t space, hsize_t *dims, + hsize_t *max)
+ +
+The function retrieves the size of the extent of the dataspace space by +placing the size of each dimension in the array dims. Also retrieves +the size of the maximum extent of the dataspace, placing the results in +max. +Returns non-negative on success, negative on failure. +
+ +
+ +

4. Dataspace Selection Operations

+Selections are maintained separately from extents in dataspaces and operations +on the selection of a dataspace do not affect the extent of the dataspace. +Selections are independent of extent type and the boundaries of selections are +reconciled with the extent at the time of the data transfer. Selection offsets +apply a selection to a location within an extent, allowing the same selection +to be moved within the extent without requiring a new selection to be specified. +Offsets default to 0 when the dataspace is created. Offsets are applied when +an I/O transfer is performed (and checked during calls to H5Sselect_valid). +Selections have an iteration order for the points selected, which can be any +permutation of the dimensions involved (defaulting to 'C' array order) or a +specific order for the selected points, for selections composed of single array +elements with H5Sselect_elements. Selections can also be copied or combined +together in various ways with H5Sselect_op. Further methods of selecting +portions of a dataspace may be added in the future. + +
+
+herr_t H5Sselect_hyperslab (hid_t space, h5s_selopt_t op, + const hssize_t * start, const hsize_t * stride, + const hsize_t * count, const hsize_t * block)
+ +
+This function selects a hyperslab region to add to the current selected region +for the space dataspace. The start, stride, count +and block arrays must be the same size as the rank of the dataspace. +The selection operator op determines how the new selection is to be +combined with the already existing selection for the dataspace. Currently, +only the H5S_SELECT_SET operator is supported, which replaces the existing +selection with the parameters from this call. Overlapping blocks are not +supported with the H5S_SELECT_SET operator. +

The start array determines the starting coordinates of the hyperslab +to select. The stride array chooses array locations from the dataspace +with each value in the stride array determining how many elements to move +in each dimension. Setting a value in the stride array to 1 moves to +each element in that dimension of the dataspace, setting a value of 2 in a +location in the stride array moves to every other element in that +dimension of the dataspace. In other words, the stride determines the +number of elements to move from the start location in each dimension. +Stride values of 0 are not allowed. If the stride parameter is NULL, +a contiguous hyperslab is selected (as if each value in the stride array +was set to all 1's). The count array determines how many blocks to +select from the dataspace, in each dimension. The block array determines +the size of the element block selected from the dataspace. If the block +parameter is set to NULL, the block size defaults to a single element +in each dimension (as if the block array was set to all 1's). +

For example, in a 2-dimensional dataspace, setting start to [1,1], +stride to [4,4], count to [3,7] and block to [2,2] selects +21 2x2 blocks of array elements starting with location (1,1) and selecting +blocks at locations (1,1), (5,1), (9,1), (1,5), (5,5), etc. +

Regions selected with this function call default to 'C' order iteration when +I/O is performed. +

+ +
+herr_t H5Sselect_elements (hid_t space, h5s_selopt_t op, + const size_t num_elements, const hssize_t *coord[])
+ +
+This function selects array elements to be included in the selection for the +space dataspace. The number of elements selected must be set with the +num_elements. The coord array is a two-dimensional array of size +<dataspace rank> by <num_elements> in size (ie. a list of +coordinates in the array). The order of the element coordinates in the +coord array also specifies the order that the array elements are +iterated through when I/O is performed. Duplicate coordinate locations are not +checked for. + +

The selection operator op determines how the new selection is to be +combined with the already existing selection for the dataspace. Currently, +only the H5S_SELECT_SET operator is supported, which replaces the existing +selection with the parameters from this call. When operators other than +H5S_SELECT_SET are used to combine a new selection with an existing selection, +the selection ordering is reset to 'C' array ordering. +

+ +
+herr_t H5Sselect_all (hid_t space)
+ +
+This function selects the special H5S_SELECT_ALL region for the space +dataspace. H5S_SELECT_ALL selects the entire dataspace for any dataspace is is +applied to. +
+ +
+herr_t H5Sselect_none (hid_t space)
+ +
+This function resets the selection region for the space +dataspace not to include any elements. +
+ +
+herr_t H5Sselect_op (hid_t space1, h5s_selopt_t op, + hid_t space2)
+ +
+Uses space2 to perform an operation on space1. The valid +operations for op are: +
+
H5S_SELECT_COPY +
Copies the selection from space2 into space1, removing any + previously defined selection for space1. The selection order + and offset are also copied to space1 +
H5S_SELECT_UNION +
Performs a set union of the selection of the dataspace space2 + with the selection from the dataspace space1, with the result + being stored in space1. The selection order for space1 is + reset to 'C' order. +
H5S_SELECT_INTERSECT +
Performs an set intersection of the selection from space2 with + space1, with the result being stored in space1. The + selection order for space1 is reset to 'C' order. +
H5S_SELECT_DIFFERENCE +
Performs a set difference of the selection from space2 with + space1, with the result being stored in space1. The + selection order for space1 is reset to 'C' order. +
+ +
+ +
+herr_t H5Sselect_order (hid_t space, + hsize_t perm_vector[])
+ +
+This function selects the order to iterate through the dimensions of a dataspace +when performing I/O on a selection. If a specific order has already been +selected for the selection with H5Sselect_elements, this function will remove +it and use a dimension oriented ordering on the selected elements. The elements +of the perm_vector array must be unique and between 0 and the rank of the +dataspace, minus 1. The order of the elements in perm_vector specify +the order to iterate through the selection for each dimension of the dataspace. +To iterate through a 3-dimensional dataspace selection in 'C' order, specify +the elements of the perm_vector as [0, 1, 2], for FORTRAN order they +would be [2, 1, 0]. Other orderings, such as [1, 2, 0] are also possible, but +may execute slower. +
+ +
+hbool_t H5Sselect_valid (hid_t space)
+ +
+This function verifies that the selection for a dataspace is within the extent +of the dataspace, if the currently set offset for the dataspace is used. +Returns TRUE if the selection is contained within the extent, FALSE if it +is not contained within the extent and FAIL on error conditions (such as if +the selection or extent is not defined). +
+ +
+hsize_t H5Sselect_npoints (hid_t space)
+ +
+This function determines the number of elements in the current selection +of a dataspace. +
+ +
+herr_t H5Soffset_simple (hid_t space, const hssize_t * + offset)
+ +
+Sets the offset of a simple dataspace space. The offset array +must be the same number of elements as the number of dimensions for the +dataspace. If the offset array is set to NULL, the offset +for the dataspace is reset to 0. +
+ +
+ +

5. Misc. Dataspace Operations

+ +
+ +
+herr_t H5Slock (hid_t space)
+ +
+Locks the dataspace so that it cannot be modified or closed. When the library +exits, the dataspace will be unlocked and closed. +
+ +
+hid_t H5Screate_simple(int rank, const hsize_t *current_size, + const hsize_t *maximum_size)
+ +
+ This function is a "convenience" wrapper to create a simple dataspace +and set it's extent in one call. It is equivalent to calling H5Screate +and H5Sset_extent_simple() in two steps. +
+ +
+int H5Sis_subspace(hid_t space)
+ +
+ This function returns positive if space is located within another +dataspace, zero if it is not, and negative on a failure. +
+ +
+char *H5Ssubspace_name(hid_t space)
+ +
+ This function returns the name of the named dataspace that space +is located within. If space is not located within another dataspace, +or an error occurs, NULL is returned. The application is responsible for +freeing the string returned. +
+ +
+herr_t H5Ssubspace_location(hid_t space, hsize_t *loc)
+ +
+ If space is located within another dataspace, this function puts +the location of the origin of space in the loc array. The loc +array must be at least as large as the number of dimensions of space. +If space is not located within another dataspace +or an error occurs, a negative value is returned, otherwise a non-negative value +is returned. +
+ +
+ +
+
+Robb Matzke
+ +
+Quincey Koziol
+ +
Last +modified: Thu May 28 15:12:04 EST 1998  + + diff --git a/doc/html/Datatypes.html b/doc/html/Datatypes.html new file mode 100644 index 0000000..75bc57e --- /dev/null +++ b/doc/html/Datatypes.html @@ -0,0 +1,1370 @@ + + + + The Data Type Interface (H5T) + + + +

The Data Type Interface (H5T)

+ +

1. Introduction

+ +

The data type interface provides a mechanism to describe the + storage format of individual data points of a data set and is + hopefully designed in such a way as to allow new features to be + easily added without disrupting applications that use the data + type interface. A dataset (the H5D interface) is composed of a + collection or raw data points of homogeneous type organized + according to the data space (the H5S interface). + +

A data type is a collection of data type properties, all of + which can be stored on disk, and which when taken as a whole, + provide complete information for data conversion to or from that + data type. The interface provides functions to set and query + properties of a data type. + +

A data point is an instance of a data type, + which is an instance of a type class. We have defined + a set of type classes and properties which can be extended at a + later time. The atomic type classes are those which describe + types which cannot be decomposed at the data type interface + level; all other classes are compound. + +

2. General Data Type Operations

+ +

The functions defined in this section operate on data types as + a whole. New data types can be created from scratch or copied + from existing data types. When a data type is no longer needed + its resources should be released by calling H5Tclose(). + +

Data types come in two flavors: named data types and transient + data types. A named data type is stored in a file while the + transient flavor is independent of any file. Named data types + are always read-only, but transient types come in three + varieties: modifiable, read-only, and immutable. The difference + between read-only and immutable types is that immutable types + cannot be closed except when the entire library is closed (the + predefined types like H5T_NATIVE_INT are immutable + transient types). + +

+
hid_t H5Tcreate (H5T_class_t class, size_t + size) +
Data types can be created by calling this + function, where class is a data type class + identifier. However, the only class currently allowed is + H5T_COMPOUND to create a new empty compound data + type where size is the total size in bytes of an + instance of this data type. Other data types are created with + H5Tcopy(). All functions that return data type + identifiers return a negative value for failure. + +

+
hid_t H5Topen (hid_t location, const char + *name) +
A named data type can be opened by calling this function, + which returns a handle to the data type. The handle should + eventually be closed by calling H5Tclose() to + release resources. The named data type returned by this + function is read-only or a negative value is returned for + failure. The location is either a file or group + handle. + +

+
herr_t H5Tcommit (hid_t location, const char + *name, hid_t type) +
A transient data type (not immutable) can be committed to a + file and turned into a named data type by calling this + function. The location is either a file or group + handle and when combined with name refers to a new + named data type. + +

+
hbool_t H5Tcommitted (hid_t type) +
A type can be queried to determine if it is a named type or + a transient type. If this function returns a positive value + then the type is named (that is, it has been committed perhaps + by some other application). Datasets which return committed + data types with H5Dget_type() are able to share + the data type with other datasets in the same file. + +

+
hid_t H5Tcopy (hid_t type) +
This function returns a modifiable transient data type + which is a copy of type or a negative value for + failure. If type is a dataset handle then the type + returned is a modifiable transient copy of the data type of + the specified dataset. + +

+
herr_t H5Tclose (hid_t type) +
Releases resources associated with a data type. The data + type identifier should not be subsequently used since the + results would be unpredictable. It is illegal to close an + immutable transient data type. + +

+
hbool_t H5Tequal (hid_t type1, hid_t + type2) +
Determines if two types are equal. If type1 and + type2 are the same then this function returns + TRUE, otherwise it returns FALSE (an + error results in a negative return value). + +

+
herr_t H5Tlock (hid_t type) +
A transient data type can be locked, making it immutable + (read-only and not closable). The library does this to all + predefined types to prevent the application from inadvertently + modifying or deleting (closing) them, but the application is + also allowed to do this for its own data types. Immutable + data types are closed when the library closes (either by + H5close() or by normal program termination). +
+ +

3. Properties of Atomic Types

+ +

An atomic type is a type which cannot be decomposed into + smaller units at the API level. All atomic types have a common + set of properties which are augmented by properties specific to + a particular type class. Some of these properties also apply to + compound data types, but we discuss them only as they apply to + atomic data types here. The properties and the functions that + query and set their values are: + +

+
H5T_class_t H5Tget_class (hid_t type) +
This property holds one of the class names: + H5T_INTEGER, H5T_FLOAT, H5T_TIME, H5T_STRING, + H5T_BITFIELD, or H5T_OPAQUE. This + property is read-only and is set when the datatype is + created or copied (see H5Tcreate(), + H5Tcopy()). If this function fails it returns + H5T_NO_CLASS which has a negative value (all + other class constants are non-negative). + +

+
size_t H5Tget_size (hid_t type) +
herr_t H5Tset_size (hid_t type, size_t + size) +
This property is total size of the datum in bytes, including + padding which may appear on either side of the actual value. + If this property is reset to a smaller value which would cause + the significant part of the data to extend beyond the edge of + the data type then the offset property is + decremented a bit at a time. If the offset reaches zero and + the significant part of the data still extends beyond the edge + of the data type then the precision property is + decremented a bit at a time. Decreasing the size of a data + type may fail if the precesion must be decremented and the + data type is of the H5T_OPAQUE class or the + H5T_FLOAT bit fields would extend beyond the + significant part of the type. Increasing the size of an + H5T_STRING automatically increases the precision + as well. On error, H5Tget_size() returns zero + which is never a valid size. + +

+
H5T_order_t H5Tget_order (hid_t type) +
herr_t H5Tset_order (hid_t type, H5T_order_t + order) +
All atomic data types have a byte order which describes how + the bytes of the data type are layed out in memory. If the + lowest memory address contains the least significant byte of + the datum then it is said to be little-endian or + H5T_ORDER_LE. If the bytes are in the oposite + order then they are said to be big-endian or + H5T_ORDER_BE. Some data types have the same byte + order on all machines and are H5T_ORDER_NONE + (like character strings). If H5Tget_order() + fails then it returns H5T_ORDER_ERROR which is a + negative value (all successful return values are + non-negative). + +

+
size_t H5Tget_precision (hid_t type) +
herr_t H5Tset_precision (hid_t type, size_t + precision) +
Some data types occupy more bytes than what is needed to + store the value. For instance, a short on a Cray + is 32 significant bits in an eight-byte field. The + precision property identifies the number of + significant bits of a datatype and the offset + property (defined below) identifies its location. The + size property defined above represents the entire + size (in bytes) of the data type. If the precision is + decreased then padding bits are inserted on the MSB side of + the significant bits (this will fail for + H5T_FLOAT types if it results in the sign, + mantissa, or exponent bit field extending beyond the edge of + the significant bit field). On the other hand, if the + precision is increased so that it "hangs over" the edge of the + total size then the offset property is + decremented a bit at a time. If the offset + reaches zero and the significant bits still hang over the + edge, then the total size is increased a byte at a time. The + precision of an H5T_STRING is read-only and is + always eight times the value returned by + H5Tget_size(). H5Tget_precision() + returns zero on failure since zero is never a valid precision. + +

+
size_t H5Tget_offset (hid_t type) +
herr_t H5Tset_offset (hid_t type, size_t + offset) +
While the precision property defines the number + of significant bits, the offset property defines + the location of those bits within the entire datum. The bits + of the entire data are numbered beginning at zero at the least + significant bit of the least significant byte (the byte at the + lowest memory address for a little-endian type or the byte at + the highest address for a big-endian type). The + offset property defines the bit location of the + least signficant bit of a bit field whose length is + precision. If the offset is increased so the + significant bits "hang over" the edge of the datum, then the + size property is automatically incremented. The + offset is a read-only property of an H5T_STRING + and is always zero. H5Tget_offset() returns zero + on failure which is also a valid offset, but is guaranteed to + succeed if a call to H5Tget_precision() succeeds + with the same arguments. + +

+
herr_t H5Tget_pad (hid_t type, H5T_pad_t + *lsb, H5T_pad_t *msb) +
herr_t H5Tset_pad (hid_t type, H5T_pad_t + lsb, H5T_pad_t msb) +
The bits of a datum which are not significant as defined by + the precision and offset properties + are called padding. Padding falls into two + categories: padding in the low-numbered bits is lsb + padding and padding in the high-numbered bits is msb + padding (bits are numbered according to the description for + the offset property). Padding bits can always be + set to zero (H5T_PAD_ZERO) or always set to one + (H5T_PAD_ONE). The current pad types are returned + through arguments of H5Tget_pad() either of which + may be null pointers. +
+ +

3.1. Properties of Integer Atomic Types

+ +

Integer atomic types (class=H5T_INTEGER) + describe integer number formats. Such types include the + following information which describes the type completely and + allows conversion between various integer atomic types. + +

+
H5T_sign_t H5Tget_sign (hid_t type) +
herr_t H5Tset_sign (hid_t type, H5T_sign_t + sign) +
Integer data can be signed two's complement + (H5T_SGN_2) or unsigned + (H5T_SGN_NONE). Whether data is signed or not + becomes important when converting between two integer data + types of differing sizes as it determines how values are + truncated and sign extended. +
+ +

3.2. Properties of Floating-point Atomic Types

+ +

The library supports floating-point atomic types + (class=H5T_FLOAT) as long as the bits of the + exponent are contiguous and stored as a biased positive number, + the bits of the mantissa are contiguous and stored as a positive + magnitude, and a sign bit exists which is set for negative + values. Properties specific to floating-point types are: + +

+
herr_t H5Tget_fields (hid_t type, size_t + *spos, size_t *epos, size_t + *esize, size_t *mpos, size_t + *msize) +
herr_t H5Tset_fields (hid_t type, size_t + spos, size_t epos, size_t esize, + size_t mpos, size_t msize) +
A floating-point datum has bit fields which are the exponent + and mantissa as well as a mantissa sign bit. These properties + define the location (bit position of least significant bit of + the field) and size (in bits) of each field. The bit + positions are numbered beginning at zero at the beginning of + the significant part of the datum (see the descriptions of the + precision and offset + properties). The sign bit is always of length one and none of + the fields are allowed to overlap. When expanding a + floating-point type one should set the precision first; when + decreasing the size one should set the field positions and + sizes first. + +

+
size_t H5Tget_ebias (hid_t type) +
herr_t H5Tset_ebias (hid_t type, size_t + ebias) +
The exponent is stored as a non-negative value which is + ebias larger than the true exponent. + H5Tget_ebias() returns zero on failure which is + also a valid exponent bias, but the function is guaranteed to + succeed if H5Tget_precision() succeeds when + called with the same arguments. + +

+
H5T_norm_t H5Tget_norm (hid_t type) +
herr_t H5Tset_norm (hid_t type, H5T_norm_t + norm) +
This property determines the normalization method of the + mantissa. +
    +
  • If the value is H5T_NORM_MSBSET then the + mantissa is shifted left (if non-zero) until the first bit + after the radix point is set and the exponent is adjusted + accordingly. All bits of the mantissa after the radix + point are stored. + +
  • If its value is H5T_NORM_IMPLIED then the + mantissa is shifted left (if non-zero) until the first bit + after the radix point is set and the exponent is adjusted + accordingly. The first bit after the radix point is not stored + since it's always set. + +
  • If its value is H5T_NORM_NONE then the fractional + part of the mantissa is stored without normalizing it. +
+ +

+
H5T_pad_t H5Tget_inpad (hid_t type) +
herr_t H5Tset_inpad (hid_t type, H5T_pad_t + inpad) +
If any internal bits (that is, bits between the sign bit, + the mantissa field, and the exponent field but within the + precision field) are unused, then they will be filled + according to the value of this property. The inpad + argument can be H5T_PAD_ZERO if the internal + padding should always be set to zero, or H5T_PAD_ONE + if it should always be set to one. + H5Tget_inpad() returns H5T_PAD_ERROR + on failure which is a negative value (successful return is + always non-negative). +
+ +

3.3. Properties of Date and Time Atomic Types

+ +

Dates and times (class=H5T_TIME) are stored as + character strings in one of the ISO-8601 formats like + "1997-12-05 16:25:30"; as character strings using the + Unix asctime(3) format like "Thu Dec 05 16:25:30 1997"; + as an integer value by juxtaposition of the year, month, and + day-of-month, hour, minute and second in decimal like + 19971205162530; as an integer value in Unix time(2) + format; or other variations. + +

I'm deferring definition until later since they're probably not + as important as the other data types. + +

3.4. Properties of Character String Atomic Types

+ +

Fixed-length character string types are used to store textual + information. The offset property of a string is + always zero and the precision property is eight + times as large as the value returned by + H5Tget_size() (since precision is measured in bits + while size is measured in bytes). Both properties are + read-only. + +

+
H5T_cset_t H5Tget_cset (hid_t type) +
herr_t H5Tset_cset (hid_t type, H5T_cset_t + cset) +
HDF5 is able to distinguish between character sets of + different nationalities and to convert between them to the + extent possible. The only character set currently supported + is H5T_CSET_ASCII. + +

+
H5T_str_t H5Tget_strpad (hid_t type) +
herr_t H5Tset_strpad (hid_t type, H5T_str_t + strpad) +
The method used to store character strings differs with the + programming language: C usually null terminates strings while + Fortran left-justifies and space-pads strings. This property + defines the storage mechanism and can be + H5T_STR_NULL for C-style strings or + H5T_STR_SPACE for Fortran-style + strings. H5Tget_strpad() returns + H5T_STR_ERROR on failure, a negative value (all + successful return values are non-negative). +
+ +

3.5. Properties of Bit Field Atomic Types

+ +

Converting a bit field (class=H5T_BITFIELD) from + one type to another simply copies the significant bits. If the + destination is smaller than the source then bits are truncated. + Otherwise new bits are filled according to the msb + padding type. + +

3.6. Properties of Opaque Atomic Types

+ +

Opaque atomic types (class=H5T_OPAQUE) act like + bit fields except conversions which change the precision are not + allowed. However, padding can be added or removed from either + end and the bytes can be reordered. Opaque types can be used to + create novel data types not directly supported by the library, + but the application is responsible for data conversion of these + types. + +

4. Properties of Compound Types

+ +

A compound data type is similar to a struct in C + or a common block in Fortran: it is a collection of one or more + atomic types or small arrays of such types. Each + member of a compound type has a name which is unique + within that type, and a byte offset that determines the first + byte (smallest byte address) of that member in a compound datum. + A compound data type has the following properties: + +

+
H5T_class_t H5Tget_class (hid_t type) +
All compound data types belong to the type class + H5T_COMPOUND. This property is read-only and is + defined when a data type is created or copied (see + H5Tcreate() or H5Tcopy()). + +

+
size_t H5Tget_size (hid_t type) +
Compound data types have a total size in bytes which is + returned by this function. All members of a compound data + type must exist within this size. A value of zero is returned + for failure; all successful return values are positive. + +

+
int H5Tget_nmembers (hid_t type) +
A compound data type consists of zero or more members + (defined in any order) with unique names and which occupy + non-overlapping regions within the datum. In the functions + that follow, individual members are referenced by an index + number between zero and N-1, inclusive, where + N is the value returned by this function. + H5Tget_nmembers() returns -1 on failure. + +

+
char *H5Tget_member_name (hid_t type, int + membno) +
Each member has a name which is unique among its siblings in + a compound data type. This function returns a pointer to a + null-terminated copy of the name allocated with + malloc() or the null pointer on failure. The + caller is responsible for freeing the memory returned by this + function. + +

+
size_t H5Tget_member_offset (hid_t type, int + membno) +
The byte offset of member number membno with + respect to the beginning of the containing compound datum is + returned by this function. A zero is returned on failure + which is also a valid offset, but this function is guaranteed + to succeed if a call to H5Tget_member_dims() + succeeds when called with the same type and + membno arguments. + +

+
int H5Tget_member_dims (hid_t type, int + membno, int dims[4], int + perm[4]) +
Each member can be a small array of up to four dimensions, + making it convenient to describe things like transposition + matrices. The dimensionality of the member is returned (or + negative for failure) and the size in each dimension is + returned through the dims argument. The + perm argument describes how the array's elements are + mapped to the linear address space of memory with respect to + some reference order (the reference order is specified in + natural language documentation which describes the compound + data type). The application which "invented" the type will + often use the identity permutation and other applications will + use a permutation that causes the elements to be rearranged to + the desired order. Only the first few elements of + dims and perm are initialized according to + the dimensionality of the member. Scalar members have + dimensionality zero. + + The only permutations supported at this + time are the identity permutation and the transpose + permutation (in the 4d case, {0,1,2,3} and {3,2,1,0}). + +

+
hid_t H5Tget_member_type (hid_t type, int + membno) +
Each member has its own data type, a copy of which is + returned by this function. The returned data type identifier + should be released by eventually calling + H5Tclose() on that type. +
+ +

Properties of members of a compound data type are + defined when the member is added to the compound type (see + H5Tinsert()) and cannot be subsequently modified. + This makes it imposible to define recursive data structures. + +

5. Predefined Atomic Data Types

+ +

The library predefines a modest number of data types having + names like H5T_arch_base where + arch is an architecture name and base is a + programming type name. New types can be derived from the + predifined types by copying the predefined type (see + H5Tcopy()) and then modifying the result. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Architecture NameDescription
IEEEThis architecture defines standard floating point + types in various byte orders.
STDThis is an architecture that contains semi-standard + datatypes like signed two's complement integers, + unsigned integers, and bitfields in various byte + orders.
UNIXTypes which are specific to Unix operating systems are + defined in this architecture. The only type currently + defined is the Unix date and time types + (time_t).
C
FORTRAN
Types which are specific to the C or Fortran + programming languages are defined in these + architectures. For instance, H5T_C_STRING + defines a base string type with null termination which + can be used to derive string types of other + lengths.
NATIVEThis architecture contains C-like data types for the + machine on which the library was compiled. The types + were actually defined by running the + H5detect program when the library was + compiled. In order to be portable, applications should + almost always use this architecture to describe things + in memory.
CRAYCray architectures. These are word-addressable, + big-endian systems with non-IEEE floating point.
INTELAll Intel and compatible CPU's including 80286, 80386, + 80486, Pentium, Pentium-Pro, and Pentium-II. These are + little-endian systems with IEEE floating-point.
MIPSAll MIPS CPU's commonly used in SGI systems. These + are big-endian systems with IEEE floating-point.
ALPHAAll DEC Alpha CPU's, little-endian systems with IEEE + floating-point.
+
+ +

The base name of most types consists of a letter, a precision + in bits, and an indication of the byte order. The letters are: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
BBitfield
DDate and time
FFloating point
ISigned integer
SCharacter string
UUnsigned integer
+
+ +

The byte order is a two-letter sequence: + +

+

+ + + + + + + + + + + + + +
BEBig endian
LELittle endian
VXVax order
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +


Example


Description
H5T_IEEE_F64LEEight-byte, little-endian, IEEE floating-point
H5T_IEEE_F32BEFour-byte, big-endian, IEEE floating point
H5T_STD_I32LEFour-byte, little-endian, signed two's complement integer
H5T_STD_U16BETwo-byte, big-endian, unsigned integer
H5T_UNIX_D32LEFour-byte, little-endian, time_t
H5T_C_S1One-byte, null-terminated string of eight-bit characters
H5T_INTEL_B64Eight-byte bit field on an Intel CPU
H5T_CRAY_F64Eight-byte Cray floating point
+
+ +

The NATIVE architecture has base names which don't + follow the same rules as the others. Instead, native type names + are similar to the C type names. Here are some examples: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +


Example


Corresponding C Type
H5T_NATIVE_CHARsigned char
H5T_NATIVE_UCHARunsigned char
H5T_NATIVE_SHORTshort
H5T_NATIVE_USHORTunsigned short
H5T_NATIVE_INTint
H5T_NATIVE_UINTunsigned
H5T_NATIVE_LONGlong
H5T_NATIVE_ULONGunsigned long
H5T_NATIVE_LLONGlong long
H5T_NATIVE_ULLONGunsigned long long
H5T_NATIVE_FLOATfloat
H5T_NATIVE_DOUBLEdouble
H5T_NATIVE_LDOUBLElong double
+
+ +

+

+ + + + + +

Example: A 128-bit + integer

+

To create a 128-bit, little-endian signed integer + type one could use the following (increasing the + precision of a type automatically increases the total + size): + +

+hid_t new_type = H5Tcopy (H5T_NATIVE_INT);
+H5Tset_precision (new_type, 128);
+H5Tset_order (new_type, H5T_ORDER_LE);
+	      
+
+
+ +

+

+ + + + + +

Example: An 80-character + string

+

To create an 80-byte null terminated string type one + might do this (the offset of a character string is + always zero and the precision is adjusted + automatically to match the size): + +

+hid_t str80 = H5Tcopy (H5T_C_S1);
+H5Tset_size (str80, 80);
+	      
+
+
+ +

6. Defining Compound Data Types

+ +

Unlike atomic data types which are derived from other atomic + data types, compound data types are created from scratch. First, + one creates an empty compound data type and specifies it's total + size. Then members are added to the compound data type in any + order. + +

Usually a C struct will be defined to hold a data point in + memory, and the offsets of the members in memory will be the + offsets of the struct members from the beginning of an instance + of the struct. + +

+
HOFFSET(s,m) +
This macro computes the offset of member m within + a struct s. +
offsetof(s,m) +
This macro defined in stddef.h does + exactly the same thing as the HOFFSET() macro. +
+ +

Each member must have a descriptive name which is the + key used to uniquely identify the member within the compound + data type. A member name in an HDF5 data type does not + necessarily have to be the same as the name of the member in the + C struct, although this is often the case. Nor does one need to + define all members of the C struct in the HDF5 compound data + type (or vice versa). + +

+

+ + + + + +

Example: A simple struct

+

An HDF5 data type is created to describe complex + numbers whose type is defined by the + complex_t struct. + +

+typedef struct {
+   double re;   /*real part*/
+   double im;   /*imaginary part*/
+} complex_t;
+
+hid_t complex_id = H5Tcreate (H5T_COMPOUND, sizeof tmp);
+H5Tinsert (complex_id, "real", HOFFSET(complex_t,re),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (complex_id, "imaginary", HOFFSET(complex_t,im),
+           H5T_NATIVE_DOUBLE);
+	      
+
+
+ +

Member alignment is handled by the HOFFSET + macro. However, data stored on disk does not require alignment, + so unaligned versions of compound data structures can be created + to improve space efficiency on disk. These unaligned compound + data types can be created by computing offsets by hand to + eliminate inter-member padding, or the members can be packed by + calling H5Tpack() (which modifies a data type + directly, so it is usually preceded by a call to + H5Tcopy()): + +

+

+ + + + + +

Example: A packed struct

+

This example shows how to create a disk version of a + compound data type in order to store data on disk in + as compact a form as possible. Packed compound data + types should generally not be used to describe memory + as they may violate alignment constraints for the + architecture being used. Note also that using a + packed data type for disk storage may involve a higher + data conversion cost. +

+hid_t complex_disk_id = H5Tcopy (complex_id);
+H5Tpack (complex_disk_id);
+	      
+
+
+ + +

+

+ + + + + +

Example: A flattened struct

+

Compound data types that have a compound data type + member can be handled two ways. This example shows + that the compound data type can be flattened, + resulting in a compound type with only atomic + members. + +

+typedef struct {
+   complex_t x;
+   complex_t y;
+} surf_t;
+
+hid_t surf_id = H5Tcreate (H5T_COMPOUND, sizeof tmp);
+H5Tinsert (surf_id, "x-re", HOFFSET(surf_t,x.re),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (surf_id, "x-im", HOFFSET(surf_t,x.im),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (surf_id, "y-re", HOFFSET(surf_t,y.re),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (surf_id, "y-im", HOFFSET(surf_t,y.im),
+           H5T_NATIVE_DOUBLE);
+	      
+
+
+ +

+

+ + + + + +

Example: A nested struct

+

However, when the complex_t is used + often it becomes inconvenient to list its members over + and over again. So the alternative approach to + flattening is to define a compound data type and then + use it as the type of the compound members, as is done + here (the typedefs are defined in the previous + examples). + +

+hid_t complex_id, surf_id; /*hdf5 data types*/
+
+complex_id = H5Tcreate (H5T_COMPOUND, sizeof c);
+H5Tinsert (complex_id, "re", HOFFSET(complex_t,re),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (complex_id, "im", HOFFSET(complex_t,im),
+           H5T_NATIVE_DOUBLE);
+
+surf_id = H5Tcreate (H5T_COMPOUND, sizeof s);
+H5Tinsert (surf_id, "x", HOFFSET(surf_t,x), complex_id);
+H5Tinsert (surf_id, "y", HOFFSET(surf_t,y), complex_id);
+	      
+
+
+ +

7. Sharing Data Types among Datasets

+ +

If a file has lots of datasets which have a common data type + then the file could be made smaller by having all the datasets + share a single data type. Instead of storing a copy of the data + type in each dataset object header, a single data type is stored + and the object headers point to it. The space savings is + probably only significant for datasets with a compound data type + since the simple data types can be described with just a few + bytes anyway. + +

To create a bunch of datasets that share a single data type + just create the datasets with a committed (named) data type. + +

+

+ + + + + +

Example: Shared Types

+

To create two datasets that share a common data type + one just commits the data type, giving it a name, and + then uses that data type to create the datasets. + +

+hid_t t1 = ...some transient type...;
+H5Tcommit (file, "shared_type", t1);
+hid_t dset1 = H5Dcreate (file, "dset1", t1, space, H5P_DEFAULT);
+hid_t dset2 = H5Dcreate (file, "dset2", t1, space, H5P_DEFAULT);
+	      
+ +

And to create two additional datasets later which + share the same type as the first two datasets: + +

+hid_t dset1 = H5Dopen (file, "dset1");
+hid_t t2 = H5Dget_type (dset1);
+hid_t dset3 = H5Dcreate (file, "dset3", t2, space, H5P_DEFAULT);
+hid_t dset4 = H5Dcreate (file, "dset4", t2, space, H5P_DEFAULT);
+	      
+
+
+ +

8. Data Conversion

+ +

The library is capable of converting data from one type to + another and does so automatically when reading or writing the + raw data of a dataset. The data type interface does not provide + functions to the application for changing data types directly, + but the user is allowed a certain amount of control over the + conversion process. + +

In order to insure that data conversion exceeds disk I/O rates, + common data conversion paths can be hand-tuned and optimized for + performance. If a hand-tuned conversion function is not + available, then the library falls back to a slower but more + general conversion function. Although conversion paths include + data space conversion, only data type conversions are described + here. Most applications will not be concerned with data type + conversions since the library will contain hand-tuned conversion + functions for many common conversion paths. In fact, if an + application does define a conversion function which would be of + general interest, we request that the function be submitted to + the HDF5 development team for inclusion in the library (there + might be less overhead involved with calling an internal + conversion functions than calling an application-defined + conversion function). + +

Note: The alpha version of the library does not contain + a full set of conversions. It can convert from one integer + format to another and one struct to another. It can also + perform byte swapping when the source and destination types are + otherwise the same. + +

A conversion path contains a source and destination data type + and each path contains a hard conversion function + and/or a soft conversion function. The only difference + between hard and soft functions is the way in which the library + chooses which function applies: A hard function applies to a + specific conversion path while a soft function may apply to + multiple paths. When both hard and soft functions apply to a + conversion path, then the hard function is favored and when + multiple soft functions apply, the one defined last is favored. + +

A data conversion function is of type H5T_conv_t + which is defined as: + +

+

+typedef herr_t (*H5T_conv_t)(hid_t src_type,
+                             hid_t dest_type,
+			     H5T_cdata_t *cdata,
+			     size_t nelmts,
+			     void *buffer,
+                             void *background);
+    
+ +

The conversion function is called with the source and + destination data types (src_type and + dst_type), path-constant data (cdata), the + number of instances of the data type to convert + (nelmts), a buffer which initially contains an array of + data having the source type and on return will contain an array + of data having the destination type (buffer), and a + temporary or background buffer (background). Functions + return a negative value on failure and some other value on + success. + +

The command field of the cdata argument + determines what happens within the conversion function. It's + values can be: + +

+
H5T_CONV_INIT +
This command is to hard conversion functions when they're + registered or soft conversion functions when the library is + determining if a conversion can be used for a particular path. + The src_type and dst_type are the end-points + of the path being queried and cdata is all zero. The + library should examine the source and destination types and + return zero if the conversion is possible and negative + otherwise (hard conversions need not do this since they've + presumably been registered only on paths they support). If + the conversion is possible the library may allocate and + initialize private data and assign the pointer to the + priv field of cdata (or private data can + be initialized later). It should also initialize the + need_bkg field described below. The buf + and background pointers will be null pointers. + +

+
H5T_CONV_CONV +
This is the usually command which indicates that + data points should be converted. The conversion function + should initialize the priv field of + cdata if it wasn't initialize during the + H5T_CONV_INIT command and then convert + nelmts instances of the src_type to the + dst_type. The buffer serves as both input + and output. The background buffer is supplied + according to the value of the need_bkg field of + cdata (the values are described below). + +

+
H5T_CONV_FREE +
The conversion function is about to be removed from some + path and the private data (the + cdata->priv pointer) should be freed and + set to null. All other pointer arguments are null and the + nelmts argument is zero. + +

+
Others... +
Other commands might be implemented later and conversion + functions that don't support those commands should return a + negative value. +
+ + +

Whether a background buffer is supplied to a conversion + function, and whether the background buffer is initialized + depends on the value of cdata->need_bkg + which the conversion function should have initialized during the + H5T_CONV_INIT command. It can have one of these values: + +

+
H5T_BKG_NONE +
No background buffer will be supplied to the conversion + function. This is the default. + +

+
H5T_BKG_TEMP +
A background buffer will be supplied but it will not be + initialized. This is useful for those functions requiring some + extra buffer space as the buffer can probably be allocated + more efficiently by the library (the application can supply + the buffer as part of the dataset transfer template). + +

+
H5T_BKG_YES +
An initialized background buffer is passed to the conversion + function. The buffer is initialized with the current values + of the destination for the data which is passed in through the + buffer argument. It can be used to "fill in between + the cracks". For instance, if the destination type is a + compound data type and we are initializing only part of the + compound data type from the source type then the background + buffer can be used to initialize the other part of the + destination. +
+ +

Other fields of cdata can be read or written by + the conversion functions. Many of these contain + performance-measuring fields which can be printed by the + conversion function during the H5T_CONV_FREE + command which is issued whenever the function is removed from a + conversion path. + +

+
hbool_t recalc +
This field is set by the library when any other data type + conversion function is registered or unregistered. It allows + conversion functions to cache pointers to other conversion + functions and be notified when the cache should be + recalculated. + +

+
unsigned long ncalls +
This field contains the number of times the conversion + function was called with the command + H5T_CONV_CONV. It is updated automatically by + the library. + +

+
unsigned long nelmts +
This is the total number of data points converted by this + function and is updated automatically by the library. +
+ + + +

Once a conversion function is written it can be registered and + unregistered with these functions: + +

+
herr_t H5Tregister_hard (const char *name, + hid_t src_type, hid_t dest_type, + H5T_conv_t func) +
Once a conversion function is written, the library must be + notified so it can be used. The function can be registered as a + hard conversion for one or more conversion paths by calling + H5Tregister_hard(), displacing any previous hard + conversion for those paths. The name is used only + for debugging but must be supplied. + +

+
herr_t H5Tregister_soft (const char *name, + H5T_class_t src_class, H5T_class_t dest_class, + H5T_conv_t func) +
The function can be registered as a generic function which + will be automatically added to any conversion path for which + it returns an indication that it applies. The name is used + only for debugging but must be supplied. + +

+
herr_t H5Tunregister (H5T_conv_t func) +
A function can be removed from the set of known conversion + functions by calling H5Tunregister(). The + function is removed from all conversion paths. +
+ +

+

+ + + + + +

Example: A conversion + function

+

Here's an example application-level function that + converts Cray unsigned short to any other + 16-bit unsigned big-endian integer. A cray + short is a big-endian value which has 32 + bits of precision in the high-order bits of a 64-bit + word. + +

+ 1 typedef struct {
+ 2     size_t dst_size;
+ 3     int direction;
+ 4 } cray_ushort2be_t;
+ 5 
+ 6 herr_t
+ 7 cray_ushort2be (hid_t src, hid_t dst,
+ 8                 H5T_cdata_t *cdata,
+ 9                 size_t nelmts, void *buf,
+10                 const void *background)
+11 {
+12     unsigned char *src = (unsigned char *)buf;
+13     unsigned char *dst = src;
+14     cray_ushort2be_t *priv = NULL;
+15 
+16     switch (cdata->command) {
+17     case H5T_CONV_INIT:
+18         /*
+19          * We are being queried to see if we handle this
+20          * conversion.  We can handle conversion from
+21          * Cray unsigned short to any other big-endian
+22          * unsigned integer that doesn't have padding.
+23          */
+24         if (!H5Tequal (src, H5T_CRAY_USHORT) ||
+25             H5T_ORDER_BE != H5Tget_order (dst) ||
+26             H5T_SGN_NONE != H5Tget_signed (dst) ||
+27             8*H5Tget_size (dst) != H5Tget_precision (dst)) {
+28             return -1;
+29         }
+30 
+31         /*
+32          * Initialize private data.  If the destination size
+33          * is larger than the source size, then we must
+34          * process the elements from right to left.
+35          */
+36         cdata->priv = priv = malloc (sizeof(cray_ushort2be_t));
+37         priv->dst_size = H5Tget_size (dst);
+38         if (priv->dst_size>8) {
+39             priv->direction = -1;
+40         } else {
+41             priv->direction = 1;
+42         }
+43         break;
+44 
+45     case H5T_CONV_FREE:
+46         /*
+47          * Free private data.
+48          */
+49         free (cdata->priv);
+50         cdata->priv = NULL;
+51         break;
+52 
+53     case H5T_CONV_CONV:
+54         /*
+55          * Convert each element, watch out for overlap src
+56          * with dst on the left-most element of the buffer.
+57          */
+58         priv = (cray_ushort2be_t *)(cdata->priv);
+59         if (priv->direction<0) {
+60             src += (nelmts - 1) * 8;
+61             dst += (nelmts - 1) * dst_size;
+62         }
+63         for (i=0; i<n; i++) {
+64             if (src==dst && dst_size<4) {
+65                 for (j=0; j<dst_size; j++) {
+66                     dst[j] = src[j+4-dst_size];
+67                 }
+68             } else {
+69                 for (j=0; j<4 && j<dst_size; j++) {
+70                     dst[dst_size-(j+1)] = src[3-j];
+71                 }
+72                 for (j=4; j<dst_size; j++) {
+73                     dst[dst_size-(j+1)] = 0;
+74                 }
+75             }
+76             src += 8 * direction;
+77             dst += dst_size * direction;
+78         }
+79         break;
+80 
+81     default:
+82         /*
+83          * Unknown command.
+84          */
+85         return -1;
+86     }
+87     return 0;
+88 }
+	      
+ +

The background argument is ignored since + it's generally not applicable to atomic data types. +

+
+ +

+

+ + + + + +

Example: Soft + Registration

+

The convesion function described in the previous + example applies to more than one conversion path. + Instead of enumerating all possible paths, we register + it as a soft function and allow it to decide which + paths it can handle. + +

+H5Tregister_soft ("cus2be", H5T_INTEGER, H5T_INTEGER, cray_ushort2be);
+	      
+ +

This causes it to be consulted for any conversion + from an integer type to another integer type. The + first argument is just a short identifier which will + be printed with the data type conversion statistics. +

+
+ + +

NOTE: The idea of a master soft list and being able to + query conversion functions for their abilities tries to overcome + problems we saw with AIO. Namely, that there was a dichotomy + between generic conversions and specific conversions that made + it very difficult to write a conversion function that operated + on, say, integers of any size and order as long as they don't + have zero padding. The AIO mechanism required such a function + to be explicitly registered (like + H5Tregister_hard()) for each an every possible + conversion path whether that conversion path was actually used + or not. + +


+
Robb Matzke
+
Quincey Koziol
+ + +Last modified: Thu Jun 18 13:59:12 EDT 1998 + + + diff --git a/doc/html/Errors.html b/doc/html/Errors.html new file mode 100644 index 0000000..4c3637d --- /dev/null +++ b/doc/html/Errors.html @@ -0,0 +1,281 @@ + + + + The Error Handling Interface (H5E) + + + +

The Error Handling Interface (H5E)

+ +

1. Introduction

+ +

When an error occurs deep within the HDF5 library a record is + pushed onto an error stack and that function returns a failure + indication. Its caller detects the failure, pushes another + record onto the stack, and returns a failure indication. This + continues until the application-called API function returns a + failure indication (a negative integer or null pointer). The + next API function which is called (with a few exceptions) resets + the stack. + +

In normal circumstances, an error causes the stack to be + printed on the standard error stream. The first item, number + "#000" is produced by the API function itself and is usually + sufficient to indicate to the application programmer what went + wrong. + +

+

+ + + + + +

Example: An Error Message

+

If an application calls H5Tclose on a + predefined data type then the following message is + printed on the standard error stream. This is a + simple error that has only one component, the API + function; other errors may have many components. + +

+HDF5-DIAG: Error detected in thread 0.  Back trace follows.
+  #000: H5T.c line 462 in H5Tclose(): predefined data type
+    major(01): Function argument
+    minor(05): Bad value
+	      
+
+
+ +

The error stack can also be printed and manipulated by these + functions, but if an application wishes make explicit calls to + H5Eprint() then the automatic printing should be + turned off to prevent error messages from being displayed twice + (see H5Eset_auto() below). + +

+
herr_t H5Eprint (FILE *stream) +
The error stack is printed on the specified stream. Even if + the error stack is empty a one-line message will be printed: + HDF5-DIAG: Error detected in thread 0. + +

+
herr_t H5Eclear (void) +
The error stack can be explicitly cleared by calling this + function. The stack is also cleared whenever an API function + is called, with certain exceptions (for instance, + H5Eprint()). +
+ +

Sometimes an application will call a function for the sake of + its return value, fully expecting the function to fail. Under + these conditions, it would be misleading if an error message + were automatically printed. Automatic printing of messages is + controlled by the H5Eset_auto() function: + +

+
herr_t H5Eset_auto (herr_t(*func)(void*), + void *client_data) +
If func is not a null pointer, then the function to + which it points will be called automatically when an API + function is about to return an indication of failure. The + function is called with a single argument, the + client_data pointer. When the library is first + initialized the auto printing function is set to + H5Eprint() (cast appropriately) and + client_data is the standard error stream pointer, + stderr. + +

+
herr_t H5Eget_auto (herr_t(**func)(void*), + void **client_data) +
This function returns the current automatic error traversal + settings through the func and client_data + arguments. Either (or both) arguments may be null pointers in + which case the corresponding information is not returned. +
+ +

+

+ + + + + +

Example: Error Control

+

An application can temporarily turn off error + messages while "probing" a function. + +

+/* Save old error handler */
+herr_t (*old_func)(void*);
+void *old_client_data;
+H5Eget_auto(&old_func, &old_client_data);
+
+/* Turn off error handling */
+H5Eset_auto(NULL, NULL);
+
+/* Probe. Likely to fail, but that's okay */
+status = H5Fopen (......);
+
+/* Restore previous error handler */
+H5Eset_auto(old_func, old_client_data);
+	      
+ +

Or automatic printing can be disabled altogether and + error messages can be explicitly printed. + +

+/* Turn off error handling permanently */
+H5Eset_auto (NULL, NULL);
+
+/* If failure, print error message */
+if (H5Fopen (....)<0) {
+    H5Eprint (stderr);
+    exit (1);
+}
+	      
+
+
+ +

The application is allowed to define an automatic error + traversal function other than the default + H5Eprint(). For instance, one could define a + function that prints a simple, one-line error message to the + standard error stream and then exits. + +

+

+ + + + + +

Example: Simple Messages

+

The application defines a function to print a simple + error message to the standard error stream. + +

+herr_t
+my_hdf5_error_handler (void *unused)
+{
+   fprintf (stderr, "An HDF5 error was detected. Bye.\n");
+   exit (1);
+}
+	      
+ +

The function is installed as the error handler by + saying + +

+H5Eset_auto (my_hdf5_error_handler, NULL);
+	      
+
+
+ +

The H5Eprint() function is actually just a wrapper + around the more complex H5Ewalk() function which + traverses an error stack and calls a user-defined function for + each member of the stack. + +

+
herr_t H5Ewalk (H5E_direction_t direction, + H5E_walk_t func, void *client_data) +
The error stack is traversed and func is called for + each member of the stack. Its arguments are an integer + sequence number beginning at zero (regardless of + direction), a pointer to an error description record, + and the client_data pointer. If direction + is H5E_WALK_UPWARD then traversal begins at the + inner-most function that detected the error and concludes with + the API function. The opposite order is + H5E_WALK_DOWNWARD. + +

+
typedef herr_t (*H5E_walk_t)(int n, + H5E_error_t *eptr, void + *client_data) +
An error stack traversal callback function takes three + arguments: n is a sequence number beginning at zero + for each traversal, eptr is a pointer to an error + stack member, and client_data is the same pointer + passed to H5Ewalk(). + +

+
typedef struct {
+    H5E_major_t maj_num;
+    H5E_minor_t min_num;
+    const char  *func_name;
+    const char  *file_name;
+    unsigned    line;
+    const char  *desc;
+} H5E_error_t;
+
The maj_num and min_num are major + and minor error numbers, func_name is the name of + the function where the error was detected, + file_name and line locate the error + within the HDF5 library source code, and desc + points to a description of the error. + +

+
const char *H5Eget_major (H5E_major_t num) +
const char *H5Eget_minor (H5E_minor_t num) +
These functions take a major or minor error number and + return a constant string which describes the error. If + num is out of range than a string like "Invalid major + error number" is returned. +
+ +

+

+ + + + + +

Example: H5Ewalk_cb

+

This is the implementation of the default error stack + traversal callback. + +

+herr_t
+H5Ewalk_cb(int n, H5E_error_t *err_desc, void *client_data)
+{
+    FILE		*stream = (FILE *)client_data;
+    const char		*maj_str = NULL;
+    const char		*min_str = NULL;
+    const int		indent = 2;
+
+    /* Check arguments */
+    assert (err_desc);
+    if (!client_data) client_data = stderr;
+
+    /* Get descriptions for the major and minor error numbers */
+    maj_str = H5Eget_major (err_desc->maj_num);
+    min_str = H5Eget_minor (err_desc->min_num);
+
+    /* Print error message */
+    fprintf (stream, "%*s#%03d: %s line %u in %s(): %s\n",
+	     indent, "", n, err_desc->file_name, err_desc->line,
+	     err_desc->func_name, err_desc->desc);
+    fprintf (stream, "%*smajor(%02d): %s\n",
+	     indent*2, "", err_desc->maj_num, maj_str);
+    fprintf (stream, "%*sminor(%02d): %s\n",
+	     indent*2, "", err_desc->min_num, min_str);
+
+    return 0;
+}
+	      
+
+
+ + + +
+
Robb Matzke
+ + +Last modified: Wed Mar 4 10:06:17 EST 1998 + + + diff --git a/doc/html/ExternalFiles.html b/doc/html/ExternalFiles.html new file mode 100644 index 0000000..39ebd2b --- /dev/null +++ b/doc/html/ExternalFiles.html @@ -0,0 +1,278 @@ + + + + External Files in HDF5 + + + +

External Files in HDF5

+ +

Overview of Layers

+ +

This table shows some of the layers of HDF5. Each layer calls + functions at the same or lower layers and never functions at + higher layers. An object identifier (OID) takes various forms + at the various layers: at layer 0 an OID is an absolute physical + file address; at layers 1 and 2 it's an absolute virtual file + address. At layers 3 through 6 it's a relative address, and at + layers 7 and above it's an object handle. + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Layer-7GroupsDatasets
Layer-6Indirect StorageSymbol Tables
Layer-5B-treesObject HdrsHeaps
Layer-4Caching
Layer-3H5F chunk I/O
Layer-2H5F low
Layer-1File FamilySplit Meta/Raw
Layer-0Section-2 I/OStandard I/OMalloc/Free
+
+ +

Single Address Space

+ +

The simplest form of hdf5 file is a single file containing only + hdf5 data. The file begins with the boot block, which is + followed until the end of the file by hdf5 data. The next most + complicated file allows non-hdf5 data (user defined data or + internal wrappers) to appear before the boot block and after the + end of the hdf5 data. The hdf5 data is treated as a single + linear address space in both cases. + +

The next level of complexity comes when non-hdf5 data is + interspersed with the hdf5 data. We handle that by including + the non-hdf5 interspersed data in the hdf5 address space and + simply not referencing it (eventually we might add those + addresses to a "do-not-disturb" list using the same mechanism as + the hdf5 free list, but it's not absolutely necessary). This is + implemented except for the "do-not-disturb" list. + +

The most complicated single address space hdf5 file is when we + allow the address space to be split among multiple physical + files. For instance, a >2GB file can be split into smaller + chunks and transfered to a 32 bit machine, then accessed as a + single logical hdf5 file. The library already supports >32 bit + addresses, so at layer 1 we split a 64-bit address into a 32-bit + file number and a 32-bit offset (the 64 and 32 are + arbitrary). The rest of the library still operates with a linear + address space. + +

Another variation might be a family of two files where all the + meta data is stored in one file and all the raw data is stored + in another file to allow the HDF5 wrapper to be easily replaced + with some other wrapper. + +

The H5Fcreate and H5Fopen functions + would need to be modified to pass file-type info down to layer 2 + so the correct drivers can be called and parameters passed to + the drivers to initialize them. + +

Implementation

+ +

I've implemented fixed-size family members. The entire hdf5 + file is partitioned into members where each member is the same + size. The family scheme is used if one passes a name to + H5F_open (which is called by H5Fopen() + and H5Fcreate) that contains a + printf(3c)-style integer format specifier. + Currently, the default low-level file driver is used for all + family members (H5F_LOW_DFLT, usually set to be Section 2 I/O or + Section 3 stdio), but we'll probably eventually want to pass + that as a parameter of the file access template, which hasn't + been implemented yet. When creating a family, a default family + member size is used (defined at the top H5Ffamily.c, currently + 64MB) but that also should be settable in the file access + template. When opening an existing family, the size of the first + member is used to determine the member size (flushing/closing a + family ensures that the first member is the correct size) but + the other family members don't have to be that large (the local + address space, however, is logically the same size for all + members). + +

I haven't implemented a split meta/raw family yet but am rather + curious to see how it would perform. I was planning to use the + `.h5' extension for the meta data file and `.raw' for the raw + data file. The high-order bit in the address would determine + whether the address refers to meta data or raw data. If the user + passes a name that ends with `.raw' to H5F_open + then we'll chose the split family and use the default low level + driver for each of the two family members. Eventually we'll + want to pass these kinds of things through the file access + template instead of relying on naming convention. + +

External Raw Data

+ +

We also need the ability to point to raw data that isn't in the + HDF5 linear address space. For instance, a dataset might be + striped across several raw data files. + +

Fortunately, the only two packages that need to be aware of + this are the packages for reading/writing contiguous raw data + and discontiguous raw data. Since contiguous raw data is a + special case, I'll discuss how to implement external raw data in + the discontiguous case. + +

Discontiguous data is stored as a B-tree whose keys are the + chunk indices and whose leaf nodes point to the raw data by + storing a file address. So what we need is some way to name the + external files, and a way to efficiently store the external file + name for each chunk. + +

I propose adding to the object header an External File + List message that is a 1-origin array of file names. + Then, in the B-tree, each key has an index into the External + File List (or zero for the HDF5 file) for the file where the + chunk can be found. The external file index is only used at + the leaf nodes to get to the raw data (the entire B-tree is in + the HDF5 file) but because of the way keys are copied among + the B-tree nodes, it's much easier to store the index with + every key. + +

Multiple HDF5 Files

+ +

One might also want to combine two or more HDF5 files in a + manner similar to mounting file systems in Unix. That is, the + group structure and meta data from one file appear as though + they exist in the first file. One opens File-A, and then + mounts File-B at some point in File-A, the mount + point, so that traversing into the mount point actually + causes one to enter the root object of File-B. File-A and + File-B are each complete HDF5 files and can be accessed + individually without mounting them. + +

We need a couple additional pieces of machinery to make this + work. First, an haddr_t type (a file address) doesn't contain + any info about which HDF5 file's address space the address + belongs to. But since haddr_t is an opaque type except at + layers 2 and below, it should be quite easy to add a pointer to + the HDF5 file. This would also remove the H5F_t argument from + most of the low-level functions since it would be part of the + OID. + +

The other thing we need is a table of mount points and some + functions that understand them. We would add the following + table to each H5F_t struct: + +

+struct H5F_mount_t {
+   H5F_t *parent;         /* Parent HDF5 file if any */
+   struct {
+      H5F_t *f;           /* File which is mounted */
+      haddr_t where;      /* Address of mount point */
+   } *mount;              /* Array sorted by mount point */
+   intn nmounts;          /* Number of mounted files */
+   intn alloc;            /* Size of mount table */
+}
+    
+ +

The H5Fmount function takes the ID of an open + file, the name of a to-be-mounted file, the name of the mount + point, and a file access template (like H5Fopen). + It opens the new file and adds a record to the parent's mount + table. The H5Funmount function takes the parent + file ID and the name of the mount point and closes the file + that's mounted at that point. The H5Fclose + function closes/unmounts files recursively. + +

The H5G_iname function which translates a name to + a file address (haddr_t) looks at the mount table + at each step in the translation and switches files where + appropriate. All name-to-address translations occur through + this function. + +

How Long?

+ +

I'm expecting to be able to implement the two new flavors of + single linear address space in about two days. It took two hours + to implement the malloc/free file driver at level zero and I + don't expect this to be much more work. + +

I'm expecting three days to implement the external raw data for + discontiguous arrays. Adding the file index to the B-tree is + quite trivial; adding the external file list message shouldn't + be too hard since the object header message class from wich this + message derives is fully implemented; and changing + H5F_istore_read should be trivial. Most of the + time will be spent designing a way to cache Unix file + descriptors efficiently since the total number open files + allowed per process could be much smaller than the total number + of HDF5 files and external raw data files. + +

I'm expecting four days to implement being able to mount one + HDF5 file on another. I was originally planning a lot more, but + making haddr_t opaque turned out to be much easier + than I planned (I did it last Fri). Most of the work will + probably be removing the redundant H5F_t arguments for lots of + functions. + +

Conclusion

+ +

The external raw data could be implemented as a single linear + address space, but doing so would require one to allocate large + enough file addresses throughout the file (>32bits) before the + file was created. It would make mixing an HDF5 file family with + external raw data, or external HDF5 wrapper around an HDF4 file + a more difficult process. So I consider the implementation of + external raw data files as a single HDF5 linear address space a + kludge. + +

The ability to mount one HDF5 file on another might not be a + very important feature especially since each HDF5 file must be a + complete file by itself. It's not possible to stripe an array + over multiple HDF5 files because the B-tree wouldn't be complete + in any one file, so the only choice is to stripe the array + across multiple raw data files and store the B-tree in the HDF5 + file. On the other hand, it might be useful if one file + contains some public data which can be mounted by other files + (e.g., a mesh topology shared among collaborators and mounted by + files that contain other fields defined on the mesh). Of course + the applications can open the two files separately, but it might + be more portable if we support it in the library. + +

So we're looking at about two weeks to implement all three + versions. I didn't get a chance to do any of them in AIO + although we had long-term plans for the first two with a + possibility of the third. They'll be much easier to implement in + HDF5 than AIO since I've been keeping these in mind from the + start. + +


+
Robb Matzke
+ + +Last modified: Wed Nov 12 15:01:14 EST 1997 + + + diff --git a/doc/html/Files.html b/doc/html/Files.html new file mode 100644 index 0000000..791cc1f --- /dev/null +++ b/doc/html/Files.html @@ -0,0 +1,529 @@ + + + + HDF5 Files + + + +

Files

+ +

1. Introduction

+ +

HDF5 files are composed of a "boot block" describing information + required to portably access files on multiple platforms, followed + by information about the groups in a file and the datasets in the + file. The boot block contains information about the size of offsets + and lengths of objects, the number of entries in symbol tables + (used to store groups) and additional version information for the + file. + +

2. File access modes

+ +

The HDF5 library assumes that all files are implicitly opened for read + access at all times. Passing the H5F_ACC_RDWR + parameter to H5Fopen() allows write access to a + file also. H5Fcreate() assumes write access as + well as read access, passing H5F_ACC_TRUNC forces + the truncation of an existing file, otherwise H5Fcreate will + fail to overwrite an existing file. + +

3. Creating, Opening, and Closing Files

+ +

Files are created with the H5Fcreate() function, + and existing files can be accessed with H5Fopen(). Both + functions return an object ID which should be eventually released by + calling H5Fclose(). + +

+
hid_t H5Fcreate (const char *name, uintn + flags, hid_t create_properties, hid_t + access_properties) +
This function creates a new file with the specified name in + the current directory. The file is opened with read and write + permission, and if the H5F_ACC_TRUNC flag is set, + any current file is truncated when the new file is created. + If a file of the same name exists and the + H5F_ACC_TRUNC flag is not set (or the + H5F_ACC_EXCL bit is set), this function will + fail. Passing H5P_DEFAULT for the creation + and/or access property lists uses the library's default + values for those properties. Creating and changing the + values of a property list is documented further below. The + return value is an ID for the open file and it should be + closed by calling H5Fclose() when it's no longer + needed. A negative value is returned for failure. + +

+
hid_t H5Fopen (const char *name, uintn + flags, hid_t access_properties) +
This function opens an existing file with read permission + and write permission if the H5F_ACC_RDWR flag is + set. The access_properties is a file access property + list ID or H5P_DEFAULT for the default I/O access + parameters. Creating and changing the parameters for access + templates is documented further below. Files which are opened + more than once return a unique identifier for each + H5Fopen() call and can be accessed through all + file IDs. The return value is an ID for the open file and it + should be closed by calling H5Fclose() when it's + no longer needed. A negative value is returned for failure. + +

+
herr_t H5Fclose (hid_t file_id) +
This function releases resources used by a file which was + opened by H5Fcreate() or H5Fopen(). After + closing a file the file_id should not be used again. This + function returns zero for success or a negative value for failure. +
+ +

4. File Property Lists

+ +

Additional parameters to H5Fcreate() or + H5Fopen() are passed through property list + objects, which are created with the H5Pcreate() + function. These objects allow many parameters of a file's + creation or access to be changed from the default values. + Property lists are used as a portable and extensible method of + modifying multiple parameter values with simple API functions. + There are two kinds of file-related property lists, + namely file creation properties and file access properties. + +

4.1. File Creation Properties

+ +

File creation property lists apply to H5Fcreate() only + and are used to control the file meta-data which is maintained + in the boot block of the file. The parameters which can be + modified are: + +

+
User-Block Size
The "user-block" is a fixed length block of + data located at the beginning of the file which is ignored by the + HDF5 library and may be used to store any data information found + to be useful to applications. This value may be set to any power + of two equal to 512 or greater (i.e. 512, 1024, 2048, etc). This + parameter is set and queried with the + H5Pset_userblock() and + H5Pget_userblock() calls. + +

+
Offset and Length Sizes +
The number of bytes used to store the offset and length of + objects in the HDF5 file can be controlled with this + parameter. Values of 2, 4 and 8 bytes are currently + supported to allow 16-bit, 32-bit and 64-bit files to + be addressed. These parameters are set and queried + with the H5Pset_sizes() and + H5Pget_sizes() calls. + +

+
Symbol Table Parameters +
The size of symbol table B-trees can be controlled by setting + the 1/2 rank and 1/2 node size parameters of the B-tree. These + parameters are set and queried with the + H5Pset_sym_k() and H5Pget_sym_k() calls. + +

+
Indexed Storage Parameters +
The size of indexed storage B-trees can be controlled by + setting the 1/2 rank and 1/2 node size parameters of the B-tree. + These parameters are set and queried with the + H5Pset_istore_k() and H5Pget_istore_k() + calls. +
+ +

4.2. File Access Property Lists

+ +

File access property lists apply to H5Fcreate() or + H5Fopen() and are used to control different methods of + performing I/O on files. + +

+
Unbuffered I/O +
Local permanent files can be accessed with the functions described + in Section 2 of the Posix manual, namely open(), + lseek(), read(), write(), and + close(). The lseek64() function is used + on operating systems that support it. This driver is enabled and + configured with H5Pset_sec2(), and queried with + H5Pget_sec2(). + +

+
Buffered I/O +
Local permanent files can be accessed with the functions declared + in the stdio.h header file, namely + fopen(), fseek(), fread(), + fwrite(), and fclose(). The + fseek64() function is used on operating systems that + support it. This driver is enabled and configured with + H5Pset_stdio(), and queried with + H5Pget_stdio(). + +

+
Memory I/O +
Local temporary files can be created and accessed directly from + memory without ever creating permanent storage. The library uses + malloc() and free() to create storage + space for the file. The total size of the file must be small enough + to fit in virtual memory. The name supplied to + H5Fcreate() is irrelevant, and H5Fopen() + will always fail. + +

+
Parallel Files using MPI I/O +
This driver allows parallel access to a file through the MPI I/O + library. The parameters which can be modified are the MPI + communicator, the info object, and the access mode. + The communicator and info object are saved and then + passed to MPI_File_open() during file creation or open. + The access_mode controls the kind of parallel access the application + intends. (Note that it is likely that the next API revision will + remove the access_mode parameter and have access control specified + via the raw data transfer property list of H5Dread() + and H5Dwrite().) These parameters are set and queried + with the H5Pset_mpi() and H5Pget_mpi() + calls. + +

+
Data Alignment +
Sometimes file access is faster if certain things are + aligned on file blocks. This can be controlled by setting + alignment properties of a file access property list with the + H5Pset_alignment() function. Any allocation + request at least as large as some threshold will be aligned on + an address which is a multiple of some number. +
+ +

5. Examples of using file templates

+ +

5.1. Example of using file creation templates

+ +

This following example shows how to create a file with 64-bit object + offsets and lengths:
+

+        hid_t create_template;
+        hid_t file_id;
+
+        create_template = H5Pcreate(H5P_FILE_CREATE);
+        H5Pset_sizes(create_template, 8, 8);
+
+        file_id = H5Fcreate("test.h5", H5F_ACC_TRUNC,
+                             create_template, H5P_DEFAULT);
+        .
+        .
+        .
+        H5Fclose(file_id);
+    
+ +

5.2. Example of using file creation templates

+ +

This following example shows how to open an existing file for + independent datasets access by MPI parallel I/O:
+

+        hid_t access_template;
+        hid_t file_id;
+
+        access_template = H5Pcreate(H5P_FILE_ACCESS);
+        H5Pset_mpi(access_template, MPI_COMM_WORLD, MPI_INFO_NULL);
+
+	/* H5Fopen must be called collectively */
+        file_id = H5Fopen("test.h5", H5F_ACC_RDWR, access_template);
+        .
+        .
+        .
+	/* H5Fclose must be called collectively */
+        H5Fclose(file_id);
+        
+ + +

6. Low-level File Drivers

+ +

HDF5 is able to access its address space through various types of + low-level file drivers. For instance, an address space might + correspond to a single file on a Unix file system, multiple files on a + Unix file system, multiple files on a parallel file system, or a block + of memory within the application. Generally, an HDF5 address space is + referred to as an "HDF5 file" regardless of how the space is organized + at the storage level. + +

6.1 Unbuffered Permanent Files

+ +

The sec2 driver uses functions from section 2 of the + Posix manual to access files stored on a local file system. These are + the open(), close(), read(), + write(), and lseek() functions. If the + operating system supports lseek64() then it is used instead + of lseek(). The library buffers meta data regardless of + the low-level driver, but using this driver prevents data from being + buffered again by the lowest layers of the HDF5 library. + +

+
H5F_driver_t H5Pget_driver (hid_t + access_properties) +
This function returns the constant H5F_LOW_SEC2 if the + sec2 driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_sec2 (hid_t access_properties) +
The file access properties are set to use the sec2 + driver. Any previously defined driver properties are erased from the + property list. Additional parameters may be added to this function in + the future. + +

+
herr_t H5Pget_sec2 (hid_t access_properties) +
If the file access property list is set to the sec2 driver + then this function returns zero; otherwise it returns a negative + value. In the future, additional arguments may be added to this + function to match those added to H5Pset_sec2(). +
+ +

6.2 Buffered Permanent Files

+ +

The stdio driver uses the functions declared in the + stdio.h header file to access permanent files in a local + file system. These are the fopen(), fclose(), + fread(), fwrite(), and fseek() + functions. If the operating system supports fseek64() then + it is used instead of fseek(). Use of this driver + introduces an additional layer of buffering beneath the HDF5 library. + +

+
H5F_driver_t H5Pget_driver(hid_t + access_properties) +
This function returns the constant H5F_LOW_STDIO if the + stdio driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_stdio (hid_t access_properties) +
The file access properties are set to use the stdio + driver. Any previously defined driver properties are erased from the + property list. Additional parameters may be added to this function in + the future. + +

+
herr_t H5Pget_stdio (hid_t access_properties) +
If the file access property list is set to the stdio driver + then this function returns zero; otherwise it returns a negative + value. In the future, additional arguments may be added to this + function to match those added to H5Pset_stdio(). +
+ +

6.3 Buffered Temporary Files

+ +

The core driver uses malloc() and + free() to allocated space for a file in the heap. Reading + and writing to a file of this type results in mem-to-mem copies instead + of disk I/O and as a result is somewhat faster. However, the total file + size must not exceed the amount of available virtual memory, and only + one HDF5 file handle can access the file (because the name of such a + file is insignificant and H5Fopen() always fails). + +

+
H5F_driver_t H5Pget_driver (hid_t + access_properties) +
This function returns the constant H5F_LOW_CORE if the + core driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_core (hid_t access_properties, size_t + block_size) +
The file access properties are set to use the core + driver and any previously defined driver properties are erased from + the property list. Memory for the file will always be allocated in + units of the specified block_size. Additional parameters may + be added to this function in the future. + +

+
herr_t H5Pget_core (hid_t access_properties, size_t + *block_size) +
If the file access property list is set to the core driver + then this function returns zero and block_size is set to the + block size used for the file; otherwise it returns a negative + value. In the future, additional arguments may be added to this + function to match those added to H5Pset_core(). +
+ +

6.4 Parallel Files

+ +

This driver uses MPI I/O to provide parallel access to a file. + +

+
H5F_driver_t H5Pget_driver (hid_t + access_properties) +
This function returns the constant H5F_LOW_MPI if the + mpi driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_mpi (hid_t access_properties, MPI_Comm + comm, MPI_info info) +
The file access properties are set to use the mpi + driver and any previously defined driver properties are erased from + the property list. Additional parameters may be added to this + function in the future. + +

+
herr_t H5Pget_mpi (hid_t access_properties, MPI_Comm + *comm, MPI_info *info) +
If the file access property list is set to the mpi driver + then this function returns zero and comm, and info + are set to the values stored in the property + list; otherwise the function returns a negative value. In the future, + additional arguments may be added to this function to match those + added to H5Pset_mpi(). +
+ +

6.4 File Families

+ +

A single HDF5 address space may be split into multiple files which, + together, form a file family. Each member of the family must be the + same logical size although the size and disk storage reported by + ls(1) may be substantially smaller. The name passed to + H5Fcreate() or H5Fopen() should include a + printf(3c) style integer format specifier which will be + replaced with the family member number (the first family member is + zero). + +

Any HDF5 file can be split into a family of files by running + the file through split(1) and numbering the output + files. However, because HDF5 is lazy about extending the size + of family members, a valid file cannot generally be created by + concatenation of the family members. Additionally, + split and cat don't attempt to + generate files with holes. The h5repart program + can be used to repartition an HDF5 file or family into another + file or family and preserves holes in the files. + +

+
h5repart [-v] [-b + block_size[suffix]] [-m + member_size[suffix]] source + destination +
This program repartitions an HDF5 file by copying the source + file or family to the destination file or family preserving + holes in the underlying Unix files. Families are used for the + source and/or destination if the name includes a + printf-style integer format such as "%d". The + -v switch prints input and output file names on + the standard error stream for progress monitoring, + -b sets the I/O block size (the default is 1kB), + and -m sets the output member size if the + destination is a family name (the default is 1GB). The block + and member sizes may be suffixed with the letters + g, m, or k for GB, MB, + or kB respectively. + +

+
H5F_driver_t H5Pget_driver (hid_t + access_properties) +
This function returns the constant H5F_LOW_FAMILY if + the family driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_family (hid_t access_properties, + hsize_t memb_size, hid_t member_properties) +
The file access properties are set to use the family + driver and any previously defined driver properties are erased + from the property list. Each member of the file family will + use member_properties as its file access property + list. The memb_size argument gives the logical size + in bytes of each family member but the actual size could be + smaller depending on whether the file contains holes. The + member size is only used when creating a new file or + truncating an existing file; otherwise the member size comes + from the size of the first member of the family being + opened. Note: if the size of the off_t type is + four bytes then the maximum family member size is usually + 2^31-1 because the byte at offset 2,147,483,647 is generally + inaccessable. Additional parameters may be added to this + function in the future. + +

+
herr_t H5Pget_family (hid_t access_properties, + hsize_t *memb_size, hid_t + *member_properties) +
If the file access property list is set to the family + driver then this function returns zero; otherwise the function + returns a negative value. On successful return, + access_properties will point to a copy of the member + access property list which should be closed by calling + H5Pclose() when the application is finished with + it. If memb_size is non-null then it will contain + the logical size in bytes of each family member. In the + future, additional arguments may be added to this function to + match those added to H5Pset_family(). +
+ +

6.5 Split Meta/Raw Files

+ +

On occasion, it might be useful to separate meta data from raw + data. The split driver does this by creating two files: one for + meta data and another for raw data. The application provides a base + file name to H5Fcreate() or H5Fopen() and this + driver appends a file extension which defaults to ".meta" for the meta + data file and ".raw" for the raw data file. Each file can have its own + file access property list which allows, for instance, a split file with + meta data stored with the core driver and raw data stored with + the sec2 driver. + +

+
H5F_driver_t H5Pget_driver (hid_t + access_properties) +
This function returns the constant H5F_LOW_SPLIT if + the split driver is defined as the low-level driver for the + specified access property list. + +

+
herr_t H5Pset_split (hid_t access_properties, + const char *meta_extension, hid_t + meta_properties, const char *raw_extension, hid_t + raw_properties) +
The file access properties are set to use the split + driver and any previously defined driver properties are erased from + the property list. The meta file will have a name which is formed by + adding meta_extension (or ".meta") to the end of the base + name and will be accessed according to the + meta_properties. The raw file will have a name which is + formed by appending raw_extension (or ".raw") to the base + name and will be accessed according to the raw_properties. + Additional parameters may be added to this function in the future. + +

+
herr_t H5Pget_split (hid_t access_properties, + size_t meta_ext_size, const char *meta_extension, + hid_t meta_properties, size_t raw_ext_size, const + char *raw_extension, hid_t *raw_properties) +
If the file access property list is set to the split + driver then this function returns zero; otherwise the function + returns a negative value. On successful return, + meta_properties and raw_properties will + point to copies of the meta and raw access property lists + which should be closed by calling H5Pclose() when + the application is finished with them, but if the meta and/or + raw file has no property list then a negative value is + returned for that property list handle. Also, if + meta_extension and/or raw_extension are + non-null pointers, at most meta_ext_size or + raw_ext_size characters of the meta or raw file name + extension will be copied to the specified buffer. If the + actual name is longer than what was requested then the result + will not be null terminated (similar to + strncpy()). In the future, additional arguments + may be added to this function to match those added to + H5Pset_split(). +
+ + +
+
Quincey Koziol
+
Robb Matzke
+ + +Last modified: Tue Jun 9 15:03:44 EDT 1998 + + + diff --git a/doc/html/Groups.html b/doc/html/Groups.html new file mode 100644 index 0000000..b1be2f1 --- /dev/null +++ b/doc/html/Groups.html @@ -0,0 +1,288 @@ + + + + Groups + + + +

Groups

+ +

1. Introduction

+ +

An object in HDF5 consists of an object header at a fixed file + address that contains messages describing various properties of + the object such as its storage location, layout, compression, + etc. and some of these messages point to other data such as the + raw data of a dataset. The address of the object header is also + known as an OID and HDF5 has facilities for translating + names to OIDs. + +

Every HDF5 object has at least one name and a set of names can + be stored together in a group. Each group implements a name + space where the names are any length and unique with respect to + other names in the group. + +

Since a group is a type of HDF5 object it has an object header + and a name which exists as a member of some other group. In this + way, groups can be linked together to form a directed graph. + One particular group is called the Root Group and is + the group to which the HDF5 file boot block points. Its name is + "/" by convention. The full name of an object is + created by joining component names with slashes much like Unix. + +

+

+ Group Graph Example +
+ +

However, unlike Unix which arranges directories hierarchically, + HDF5 arranges groups in a directed graph. Therefore, there is + no ".." entry in a group since a group can have more than one + parent. There is no "." entry either but the library understands + it internally. + +

2. Names

+ +

HDF5 places few restrictions on names: component names may be + any length except zero and may contain any character except + slash ("/") and the null terminator. A full name may be + composed of any number of component names separated by slashes, + with any of the component names being the special name ".". A + name which begins with a slash is an absolute name + which is looked up beginning at the root group of the file while + all other relative names are looked up beginning at the + current working group (described below) or a specified group. + Multiple consecutive slashes in a full name are treated as + single slashes and trailing slashes are not significant. A + special case is the name "/" (or equivalent) which refers to the + root group. + +

Functions which operate on names generally take a location + identifier which is either a file ID or a group ID and perform + the lookup with respect to that location. Some possibilities + are: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Location TypeObject NameDescription
File ID/foo/barThe object bar in group foo + in the root group of the specified file.
Group ID/foo/barThe object bar in group foo + in the root group of the file containing the specified + group. In other words, the group ID's only purpose is + to supply a file.
File ID/The root group of the specified file.
Group ID/The root group of the file containing the specified + group.
File IDfoo/barThe object bar in group foo + in the current working group of the specified file. The + initial current working group is the root group of the + file as described below.
Group IDfoo/barThe object bar in group foo + in the specified group.
File ID.The current working group of the specified file.
Group ID.The specified group.
Other ID.The specified object.
+
+ + +

3. Creating, Opening, and Closing Groups

+ +

Groups are created with the H5Gcreate() function, + and existing groups can be access with + H5Gopen(). Both functions return an object ID which + should be eventually released by calling + H5Gclose(). + +

+
hid_t H5Gcreate (hid_t location_id, const char + *name, size_t size_hint) +
This function creates a new group with the specified + name at the specified location which is either a file ID or a + group ID. The name must not already be taken by some other + object and all parent groups must already exist. The + size_hint is a hint for the number of bytes to + reserve to store the names which will be eventually added to + the new group. Passing a value of zero for size_hint + is usually adequate since the library is able to dynamically + resize the name heap, but a correct hint may result in better + performance. The return value is a handle for the open group + and it should be closed by calling H5Gclose() + when it's no longer needed. A negative value is returned for + failure. + +

+
hid_t H5Gopen (hid_t location_id, const char + *name) +
This function opens an existing group with the specified + name at the specified location which is either a file ID or a + group ID and returns an object ID. The object ID should be + released by calling H5Gclose() when it is no + longer needed. A negative value is returned for failure. + +

+
herr_t H5Gclose (hid_t group_id) +
This function releases resources used by an group which was + opened by H5Gcreate() or + H5Gopen(). After closing a group the + group_id should not be used again. This function + returns zero for success or a negative value for failure. +
+ +

4. Current Working Group

+ +

Each file handle (hid_t file_id) has a + current working group, initially the root group of the file. + Names which do not begin with a slash are relative to the + specified group or to the current working group as described + above. For instance, the name "/Foo/Bar/Baz" is resolved by + first looking up "Foo" in the root group. But the name + "Foo/Bar/Baz" is resolved by first looking up "Foo" in the + current working group. + +

+
herr_t H5Gset (hid_t location_id, const char + *name) +
The group with the specified name is made the current + working group for the file which contains it. The + location_id can be a file handle or a group handle + and the name is resolved as described above. Each file handle + has it's own current working group and if the + location_id is a group handle then the file handle is + derived from the group handle. This function returns zero for + success or negative for failure. + +

+
herr_t H5Gpush (hid_t location_id, const char + *name) +
Each file handle has a stack of groups and the top group on + that stack is the current working group. The stack initially + contains only the root group. This function pushes a new + group onto the stack and returns zero for success or negative + for failure. + +

+
herr_t H5Gpop (hid_t location_id) +
This function pops one group off the group stack for the + specified file (if the location_id is a group then + the file is derived from that group), changing the current + working group to the new top-of-stack group. The function + returns zero for success or negative for failure (failure + includes attempting to pop from an empty stack). If the last + item is popped from the stack then the current working group + is set to the root group. +
+ +

5. Objects with Multiple Names

+ +

An object (including a group) can have more than one + name. Creating the object gives it the first name, and then + functions described here can be used to give it additional + names. The association between a name and the object is called + a link and HDF5 supports two types of links: a hard + link is a direct association between the name and the + object where both exist in a single HDF5 address space, and a + soft link is an indirect association. + +

+

+ Hard Link Example +
+ +

+

+ Soft Link Example +
+ +
+
Object Creation
+
The creation of an object creates a hard link which is + indistinguishable from other hard links that might be added + later. + +

+
herr_t H5Glink (hid_t file_id, H5G_link_t + link_type, const char *current_name, + const char *new_name) +
Creates a new name for an object that has some current name + (possibly one of many names it currently has). If the + link_type is H5G_LINK_HARD then a new + hard link is created. Otherwise if link_type is + H5T_LINK_SOFT a soft link is created which is an + alias for the current_name. When creating a soft + link the object need not exist. This function returns zero + for success or negative for failure. This function is not + part of the prototype API. + +

+
herr_t H5Gunlink (hid_t file_id, const char + *name) +
This function removes an association between a name and an + object. Object headers keep track of how many hard links refer + to the object and when the hard link count reaches zero the + object can be removed from the file (but objects which are + open are not removed until all handles to the object are + closed). This function is not part of the prototype + API. +
+ +
+
Robb Matzke
+ + +Last modified: Tue Mar 24 15:52:14 EST 1998 + + + diff --git a/doc/html/H5.api.html b/doc/html/H5.api.html new file mode 100644 index 0000000..b2402a5 --- /dev/null +++ b/doc/html/H5.api.html @@ -0,0 +1,4611 @@ + +HDF5 Draft API Specification + + +
+

HDF5: API Specification

+
+ +
    +
  1. Library - H5<name> - API for global library HDF information/modification +
      +
    1. H5dont_atexit +
    2. H5close +
    3. H5version +
    + +
  2. File - H5F<name> - API for accessing HDF files +
      +
    1. H5Fopen +
    2. H5Fcreate +
    3. H5Fis_hdf5 +
    4. H5Fget_create_template +
    5. H5Fclose +
    + +
  3. Template - H5P<name> - API for manipulating object templates +
      +
    1. H5Pcreate +
    2. H5Pget_class +
    3. H5Pcopy +
    4. H5Pclose +
    5. H5Pget_version +
    6. H5Pset_userblock +
    7. H5Pget_userblock +
    8. H5Pset_sizes +
    9. H5Pget_sizes +
    10. H5Pset_mpi +
    11. H5Pget_mpi +
    12. H5Pset_xfer +
    13. H5Pget_xfer +
    14. H5Pset_sym_k +
    15. H5Pget_sym_k +
    16. H5Pset_istore_k +
    17. H5Pget_istore_k +
    18. H5Pset_layout +
    19. H5Pget_layout +
    20. H5Pset_chunk +
    21. H5Pget_chunk +
    + + + + + +
  4. Dataset - H5D<name> - API for manipulating scientific datasets. See datasets. +
      +
    1. H5Dcreate +
    2. H5Dopen +
    3. H5Dget_space +
    4. H5Dget_type +
    5. H5Dget_create_parms +
    6. H5Dread +
    7. H5Dwrite +
    8. H5Dextend +
    9. H5Dclose +
    + +
  5. Datatype - H5T<name> - API for defining dataset element information. See data types. +
      +
    1. H5Tcreate +
    2. H5Tcopy +
    3. H5Tequal +
    4. H5Tlock +
    5. H5Tget_class +
    6. H5Tget_size +
    7. H5Tset_size +
    8. H5Tget_order +
    9. H5Tset_order +
    10. H5Tget_precision +
    11. H5Tset_precision +
    12. H5Tget_offset +
    13. H5Tset_offset +
    14. H5Tget_pad +
    15. H5Tset_pad +
    16. H5Tget_sign +
    17. H5Tset_sign +
    18. H5Tget_fields +
    19. H5Tset_fields +
    20. H5Tget_ebias +
    21. H5Tset_ebias +
    22. H5Tget_norm +
    23. H5Tset_norm +
    24. H5Tget_inpad +
    25. H5Tset_inpad +
    26. H5Tget_cset +
    27. H5Tset_cset +
    28. H5Tget_strpad +
    29. H5Tset_strpad +
    30. H5Tget_nmembers +
    31. H5Tget_member_name +
    32. H5Tget_member_offset +
    33. H5Tget_member_dims +
    34. H5Tget_member_type +
    35. H5Tinsert +
    36. H5Tpack +
    37. H5Tregister_hard +
    38. H5Tregister_soft +
    39. H5Tunregister +
    40. H5Tclose +
    + +
  6. Dataspace - H5S<name> - API for defining dataset dataspace +
      +
    1. H5Screate_simple +
    2. H5Scopy +
    3. H5Sget_npoints +
    4. H5Sget_ndims +
    5. H5Sget_dims +
    6. H5Sis_simple +
    7. H5Sset_space +
    8. H5Sset_hyperslab +
    9. H5Sget_hyperslab +
    10. H5Sclose +
    + +
  7. Group - H5G<name> - API for creating physical groups of objects on disk. +
      +
    1. H5Gcreate +
    2. H5Gopen +
    3. H5Gset +
    4. H5Gpush +
    5. H5Gpop +
    6. H5Gclose + +
    + +
  8. Glossary - A glossary of data-types used in the APIs +
      +
    1. Basic Types +
    2. Complex Types +
    3. Disk I/O Types +
    + +
+ +
+

Library API Functions

+

These functions are designed to provide access to HDF5 application/library +behavior. They are used to get information about or change global library +parameters. +
+
+ +


+
+
Name: H5dont_atexit +
Signature: +
herr_t H5dont_atexit(void) +
Description: +
This routine indicates to the library that an 'atexit()' cleanup routine + should not be installed. The major (only?) purpose for this is in + situations where the library is dynamically linked into an application and + is un-linked from the application before 'exit()' gets callled. In those + situations, a routine installed with 'atexit()' would jump to a routine + which was no longer in memory, causing errors. + In order to be effective, this routine must be called before any other + HDF function calls, and must be called each time the library is loaded/ + linked into the application. (the first time and after it's been un-loaded) +
Parameters: none +
Returns: +
zero/negative +
+ +
+
+
Name: H5close +
Signature: +
herr_t H5close(void) +
Description: +
This routines flushes all data to disk, closes all file handles and + cleans up all memory used by the library. Generally it is installed + to be called when the application calls exit, but may be + called earlier in event of an emergency shutdown or out of desire + to free all resources used by the HDF5 library. +
Parameters: none +
Returns: +
zero/negative +
+ +
+
+
Name: H5version +
Signature: +
herr_t H5version(uintn *majversion, + uintn *minversion, + uintn *relversion, + uintn *patversion + ) +
Description: +
This routine retrieves the major, minor, release and patch versions + of the library which is linked to the application. +
Parameters: +
+
uintn *majversion +
The major version of the library. +
uintn *minversion +
The minor version of the library. +
uintn *relversion +
The release number of the library. +
uintn *patversion +
The patch number of the library. +
+
Returns: +
zero/negative +
+ +
+

File API Functions

+

These functions are designed to provide file-level access to HDF5 files. +Further manipulation of objects inside a file is performed through one of APIs +documented below. +
+
+ +


+
+
Name: H5Fopen +
Signature: +
hid_t H5Fopen(const char *name, + uintn flags, + hid_t access_template + ) +
Description: +
This is the primary function for opening existing HDF5 files. + The flags parameter determines the file access mode. + There is no read flag, all open files are implicitily opened for + read access. + All flags may be combined with the '|' (boolean OR operator) to + change the behavior of the file open call. + The access_template parameter is a template containing + additional information required for specific methods of access, + parallel I/O for example. The paramters for access templates are + described in the H5P API documentation. +
Parameters: +
+
const char *name +
Name of the file to access. +
uintn flags +
File access flags: +
    +
    H5F_ACC_RDWR +
    Allow read and write access to file. +
+
hid_taccess_template +
Template indicating the file access properties. + If parallel file access is desired, this is a collective + call according to the communicator stored in the + access_template. Use 0 for default access template. +
+
Returns: +
An ID (of type hid_t) for the file upon success, + otherwise negative +
+ +
+
+
Name: H5Fcreate +
Signature: +
hid_t H5Fcreate(const char *name, + uintn flags, + hid_t create_template, + hid_t access_template + ) +
Description: +
This is the primary function for opening and creating HDF5 files. + The flags parameter determines whether an existing + file will be overwritten or not. All newly created files are opened + for both reading and writing. + All flags may be combined with the '|' (boolean OR operator) to + change the behavior of the file open call. + The create_template and access_template + parameters are templates containing additional information required + for specific methods of access or particular aspects of the file + to set when creating a file. + The parameters for creation and access templates are + described in the H5P API documentation. +
Parameters: +
+
const char *name +
Name of the file to access. +
uintn flags +
File access flags: +
    +
    H5F_ACC_TRUNC +
    Truncate file, if it already exists. The file will + be truncated, erasing all data previously stored in + the file. +
+
hid_tcreate_template +
File creation template ID, used when modifying default file meta-data +
hid_taccess_template +
Template indicating the file access properties. + If parallel file access is desired, this is a collective + call according to the communicator stored in the + access_template. Use 0 for default access template. +
+
Returns: +
An ID (of type hid_t) for the file upon success, + otherwise negative +
+ +
+
+
Name: H5Fis_hdf5 +
Signature: +
hbool_t H5Fis_hdf5(const char *name + ) +
Description: +
This function determines whether a file is in the HDF5 format. +
Parameters: +
+
const char *name +
File name to check format. +
+
Returns: +
TRUE/FALSE/negative +
+ +
+
+
Name: H5Fget_create_template +
Signature: +
hid_t H5Fget_create_template(hid_t file_id + ) +
Description: +
This function returns an template ID with a copy of the parameters + used to create this file. Useful for duplicating the parameters + when creating another file. +
Parameters: +
+
hid_t file_id +
File ID to get creation template of +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Fclose +
Signature: +
herr_t H5Fclose(hid_t file_id + ) +
Description: +
This function terminates access to an HDF5 file. If this is the + last file ID open for a file and if access IDs are still in use, + this function will fail. +
Parameters: +
+
hid_t file_id +
File ID to terminate access to. +
+
Returns: +
zero/negative +
+ +
+

Template API Functions

+

These functions manipulate template objects to allow objects which require +many different parameters to be easily manipulated. +
+
+ +


+
+
Name: H5Pcreate +
Signature: +
hid_t H5Pcreate(H5P_class_t type + ) +
Description: +
This function returns a template ID for a copy of the default + template of a given type. +
+
+
Template Types and Uses: +
    +
    H5P_FILE_CREATE +
    Used to set the metadata information about a file during + file creation. +
    H5P_FILE_ACCESS +
    Used to set I/O access information about a file. +
    H5P_DATASET_CREATE +
    Used to set information about a dataset when it is + created. +
    H5P_DATASET_XFER +
    Used to set access information about a memory to dataset + transfer. +
+
+
Parameters: +
+
H5P_class_t type +
The type of template to create. +
+
Returns: +
Valid ID on success, negative on failure +
+ +
+
+
Name: H5Pclose +
Signature: +
herr_t H5Pclose(hid_t template_id + ) +
Description: +
This function terminates access to a template. +
Parameters: +
+
hid_t template_id +
Template ID to terminate access to. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_class +
Signature: +
H5P_class_t H5Pget_class(hid_t template_id + ) +
Description: +
This function queries the class of a template ID. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
+
Returns: +
Template class code on success, negative on failure +
+ +
+
+
Name: H5Pcopy +
Signature: +
hid_t H5Pcopy(hid_t template_id + ) +
Description: +
This function makes a copy of a template ID. +
Parameters: +
+
hid_t template_id +
Template ID to duplicate. +
+
Returns: +
Template ID on success, negative on failure +
+ +
+
+
Name: H5Pget_version +
Signature: +
herr_t H5Pget_version(hid_t template_id, + int * boot, + int * freelist, + int * stab, + int * shhdr + ) +
Description: +
This function queries the version information of various objects + for a file creation template. Any pointer parameters which are + passed as NULL are not queried. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
int * boot +
Pointer to location to return boot block version number. +
int * freelist +
Pointer to location to return global freelist version number. +
int * stab +
Pointer to location to return symbol table version number. +
int * shhdr +
Pointer to location to return shared object header version number. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_userblock +
Signature: +
herr_t H5Pset_userblock(hid_t template_id, + hsize_t size + ) +
Description: +
This function sets the size of the user-block located at the + beginning of an HDF5 file. This function is only valid for + file creation templates. The default user-block size is 0. + Only values which are powers of 2 larger equal to 512 or larger + may be used as a valid user-block size. +
Parameters: +
+
hid_t template_id +
Template to modify. +
hsize_t size +
Size of the user-block in bytes. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_userblock +
Signature: +
herr_t H5Pget_userblock(hid_t template_id, + hsize_t * size + ) +
Description: +
This function retrieves the size of the user-block located at the + beginning of an HDF5 file. This function is only valid for + file creation templates. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
hsize_t * size +
Pointer to location to return user-block size. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_sizes +
Signature: +
herr_t H5Pset_sizes(hid_t template_id, + size_t sizeof_addr, + size_t sizeof_size + ) +
Description: +
This function sets the byte size of the offsets and lengths used to + address objects in an HDF5 file. This function is only valid for + file creation templates. Passing in a value of 0 for one of the + sizeof parameters retains the current value. The default value + for both values is 4 bytes. Valid values currenly are 2, 4, 8 and + 16. +
Parameters: +
+
hid_t template_id +
Template to modify. +
size_t sizeof_addr +
Size of an object offset in bytes. +
size_t sizeof_size +
Size of an object length in bytes. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_sizes +
Signature: +
herr_t H5Pget_sizes(hid_t template_id, + size_t * sizeof_addr, + size_t * sizeof_size + ) +
Description: +
This function retrieves the size of the offsets and lengths used + in an HDF5 file. This function is only valid for file creation + templates. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
size_t * size +
Pointer to location to return offset size in bytes. +
size_t * size +
Pointer to location to return length size in bytes. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_mpi +
Signature: +
herr_t H5Pset_mpi(hid_t tid, + MPI_Comm comm, + MPI_Info info + ) +
Description: +
Store the access mode for parallel I/O call and the user supplied + communicator and info in the access template which can then + be used to open file. This function is available only in the + parallel HDF5 library. +
Parameters: +
+
hid_t tid +
ID of template to modify +
MPI_Comm comm +
+ MPI communicator to be used for file open as defined in + MPI_FILE_OPEN of MPI-2. This function does not make a + duplicated communicator. Any modification to comm after + this function call returns may have undetermined effect + to the access template. Users should call this function + again to setup the template. +
MPI_Info info +
+ MPI info object to be used for file open as defined in + MPI_FILE_OPEN of MPI-2. This function does not make a + duplicated info. Any modification to info after + this function call returns may have undetermined effect + to the access template. Users should call this function + again to setup the template. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_mpi +
Signature: +
herr_t H5Pget_mpi(hid_t tid, + MPI_Comm *comm, + MPI_Info *info + ) +
Description: +
Retrieves the communicator and info object + that have been set by H5Pset_mpi. + This function is available only in the parallel HDF5 library. +
Parameters: +
+
hid_t tid +
ID of a file access property list that has been set + successfully by H5Pset_mpi. +
MPI_Comm * comm +
Pointer to location to return the communicator. +
MPI_Info * info +
Pointer to location to return the info object. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_xfer +
Signature: +
herr_t H5Pset_xfer(hid_t tid, + H5D_transfer_t data_xfer_mode + ) +
Description: +
Set the transfer mode of the dataset transfer property list. + The list can then be used to control the I/O transfer mode + during dataset accesses. This function is available only + in the parallel HDF5 library and is not a collective function. +
Parameters: +
+
hid_t tid +
ID of a dataset transfer property list +
H5D_transfer_t data_xfer_mode +
Data transfer modes: +
    +
    H5D_XFER_INDEPENDENT +
    Use independent I/O access. +
    H5D_XFER_COLLECTIVE +
    Use MPI collective I/O access. +
+
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_xfer +
Signature: +
herr_t H5Pget_xfer(hid_t tid, + H5D_transfer_t * data_xfer_mode + ) +
Description: +
Retrieves the transfer mode from the dataset + transfer property list. + This function is available only in the parallel HDF5 library. +
Parameters: +
+
hid_t tid +
ID of a dataset transfer property list. +
H5D_transfer_t * data_xfer_mode +
Pointer to location to return the data_xfer_mode. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_sym_k +
Signature: +
herr_t H5Pset_sym_k(hid_t template_id, + size_t ik, + size_t lk + ) +
Description: +
This function sets the size of parameters used to control the + symbol table nodes. This function is only valid for + file creation templates. Passing in a value of 0 for one of the + parameters retains the current value. + ik is one half the rank of a tree that stores a symbol + table for a group. Internal nodes of the symbol table are on + average 75% full. That is, the average rank of the tree is + 1.5 times the value of ik. + lk is one half of the number of symbols that can be stored in + a symbol table node. A symbol table node is the leaf of a + symbol table tree which is used to store a group. When + symbols are inserted randomly into a group, the group's + symbol table nodes are 75% full on average. That is, they + contain 1.5 times the number of symbols specified by lk. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
size_t ik +
Symbol table tree rank. +
size_t lk +
Symbol table node size. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_sym_k +
Signature: +
herr_t H5Pget_sym_k(hid_t template_id, + size_t * ik, + size_t * lk + ) +
Description: +
This function retrieves the size of the symbol table's B-tree + 1/2 rank and the symbol table's leaf node 1/2 size. See + information for H5Pset_sym_k for + more information. This function is only valid for file creation + templates. If a parameter valued is set to NULL, that parameter is + not retrieved. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
size_t * ik +
Pointer to location to return the symbol table's B-tree 1/2 rank. +
size_t * size +
Pointer to location to return the symbol table's leaf node 1/2 size. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_istore_k +
Signature: +
herr_t H5Pset_istore_k(hid_t template_id, + size_t ik + ) +
Description: +
This function sets the size of the parameter used to control the + B-trees for indexing chunked datasets. This function is only valid for + file creation templates. Passing in a value of 0 for one of the + parameters retains the current value. + ik is one half the rank of a tree that stores chunked raw + data. On average, such a tree will be 75% full, or have an + average rank of 1.5 times the value of ik. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
size_t ik +
1/2 rank of chunked storage B-tree. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_istore_k +
Signature: +
herr_t H5Pget_istore_k(hid_t template_id, + size_t * ik + ) +
Description: +
Queries the 1/2 rank of an indexed storage B-tree. See + H5Pset_istore_k for details. + The argument ik may be the null pointer. This + function is only valid for file creation templates. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
size_t * ik +
Pointer to location to return the chunked storage B-tree 1/2 rank. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_layout +
Signature: +
herr_t H5Pset_layout(hid_t template_id, + H5D_layout_t layout + ) +
Description: +
This function sets the type of storage used store the raw data for + a dataset. This function is only valid for dataset creation templates. + Valid parameter for layout are: +
    +
    H5D_COMPACT +
    Store raw data and object header contiguously in file. + This should only be used for very small amounts of raw + data (suggested less than 1KB). +
    H5D_CONTIGUOUS +
    Store raw data seperately from object header in one + large chunk in the file. +
    H5D_CHUNKED +
    Store raw data seperately from object header in one + large chunk in the file and store chunks of the raw + data in seperate locations in the file. +
+
Parameters: +
+
hid_t template_id +
Template ID to query. +
H5D_layout_t layout +
Type of storage layout for raw data. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_layout +
Signature: +
herr_t H5Pget_layout(hid_t template_id, + H5D_layout_t * layout + ) +
Description: +
Queries the layout of the raw data for a dataset. + This function is only valid for dataset creation templates. + Valid types for layout are: +
    +
    H5D_COMPACT +
    Raw data and object header stored contiguously in file. +
    H5D_CONTIGUOUS +
    Raw data stored seperately from object header in one + large chunk in the file. +
    H5D_CHUNKED +
    Raw data stored seperately from object header in + chunks in seperate locations in the file. +
+
Parameters: +
+
hid_t template_id +
Template ID to query. +
H5D_layout_t * layout +
Pointer to location to return the storage layout. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pset_chunk +
Signature: +
herr_t H5Pset_chunk(hid_t template_id, + int ndims, + const hsize_t * dim + ) +
Description: +
This function sets the size of the chunks used to store a chunked + layout dataset. This function is only valid for dataset creation + templates. The ndims parameter currently must be the + same size as the rank of the dataset. The values of the + dim array define the size of the chunks to store the + dataset's raw data. As a side-effect, the layout of the dataset is + changed to H5D_CHUNKED, if it isn't already. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
int ndims +
The number of dimensions of each chunk. +
const hsize_t * dim +
An array containing the size of each chunk. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Pget_chunk +
Signature: +
herr_t H5Pget_chunk(hid_t template_id, + int max_ndims + hsize_t * dims + ) +
Description: +
Queries the size of chunks for the raw data of a chunked layout + dataset. This function is only valid for dataset creation + templates. +
Parameters: +
+
hid_t template_id +
Template ID to query. +
int max_ndims +
Size of the dims array. +
hsize_t * dims +
Array to store the chunk dimensions. +
+
Returns: +
zero/negative +
+ + + + + +
+

Dataset Object API Functions

+

These functions create and manipulate dataset objects. Each dataset must +be constructed from a datatype and a dataspace. +
+
+ +


+
+
Name: H5Dcreate +
Signature: +
hid_t H5Dcreate(hid_t file_id, + const char *name, + hid_ttype_id, + hid_tspace_id, + hid_ttemplate_id + ) +
Description: +
This function creates a new dataset in the file specified with the + file_id. The type_id and space_id + are the IDs of the datatype and dataspace used to construct the + framework of the dataset. The datatype and dataspace parameters + describe the dataset as it will exist in the file, which is not + necessarily the same as it exists in memory. The template_id + contains either the default template (H5P_DEFAULT) or a template_id + with particular constant properties used to create the dataset. The + name is used to identify the dataset in a group and must + be unique within that group. +
Parameters: +
+
hid_t file_id +
ID of the file to create the dataset within. +
const char * name +
The name of the dataset to create. +
hid_t type_id +
ID of the datatype to use when creating the dataset. +
hid_t space_id +
ID of the dataspace to use when creating the dataset. +
hid_t template_id +
ID of the dataset creation template. +
+
Returns: +
Dataset ID on success, negative on failure. +
+ +
+
+
Name: H5Dopen +
Signature: +
hid_t H5Dopen(hid_t file_id, + const char *name + ) +
Description: +
This function opens an existing dataset for access in the file + specified with the file_id. The name is + used to identify the dataset in the file. +
Parameters: +
+
hid_t file_id +
ID of the file to access the dataset within. +
const char * name +
The name of the dataset to access. +
+
Returns: +
Dataset ID on success, negative on failure. +
+ +
+
+
Name: H5Dget_space +
Signature: +
hid_t H5Dget_space(hid_t dataset_id + ) +
Description: +
This function returns a copy of the dataspace for a dataset. The + dataspace should be released with the H5Sclose() function. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset to query. +
+
Returns: +
Dataspace ID on success, negative on failure. +
+ +
+
+
Name: H5Dget_type +
Signature: +
hid_t H5Dget_type(hid_t dataset_id + ) +
Description: +
This function returns a copy of the datatype for a dataset. The + dataspace should be released with the H5Tclose() function. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset to query. +
+
Returns: +
Datatype ID on success, negative on failure. +
+ +
+
+
Name: H5Dget_create_plist +
Signature: +
hid_t H5Dget_create_plist(hid_t dataset_id + ) +
Description: +
This function returns a copy of the dataset creation template for a + dataset. The template should be released with the H5Pclose() function. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset to query. +
+
Returns: +
Dataset creation template ID on success, negative on failure. +
+ +
+
+
Name: H5Dread +
Signature: +
herr_t H5Dread(hid_t dataset_id, + hid_t mem_type_id, + hid_t mem_space_id, + hid_t file_space_id, + hid_t transfer_template_id, + void * buf + ) +
Description: +
This function reads raw data from the specified dataset into buf, + converting from the file datatype of the dataset into the memory + datatype specified in mem_type_id. The portion of the + dataset to read from disk is specified with the file_spaceid + which can contain a dataspace with a hyperslab selected or the constant + H5S_ALL, which indicates the entire dataset is to be read. The portion + of the dataset read into the memory buffer is specified with the + mem_space_id which can also be a hyperslab of the same + size or the H5S_ALL parameter to store the entire dataset. The + transfer_template_id is a dataset transfer template ID which + is used to provide addition parameters for the I/O operation or can + be H5P_DEFAULT for the default library behavior. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset read from. +
hid_t mem_type_id +
ID of the memory datatype. +
hid_t mem_space_id +
ID of the memory dataspace. +
hid_t file_space_id +
ID of the dataset's dataspace in the file. +
hid_t transfer_template_id +
ID of a transfer template for this I/O operation. +
void * buf +
Buffer to store information read from the file. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Dwrite +
Signature: +
herr_t H5Dwrite(hid_t dataset_id, + hid_t mem_type_id, + hid_t mem_space_id, + hid_t file_space_id, + hid_t transfer_template_id, + const void * buf + ) +
Description: +
This function writes raw data from memory into the specified dataset + converting from the memory datatype of the dataset specified in + mem_type_id into the file datatype. + The portion of the + dataset to written to disk is specified with the file_spaceid + which can contain a dataspace with a hyperslab selected or the constant + H5S_ALL, which indicates the entire dataset is to be written. The portion + of the dataset written from the memory buffer is specified with the + mem_space_id which can also be a hyperslab of the same + size or the H5S_ALL parameter to store the entire dataset. The + transfer_template_id is a dataset transfer template ID which + is used to provide addition parameters for the I/O operation or can + be H5P_DEFAULT for the default library behavior. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset read from. +
hid_t mem_type_id +
ID of the memory datatype. +
hid_t mem_space_id +
ID of the memory dataspace. +
hid_t file_space_id +
ID of the dataset's dataspace in the file. +
hid_t transfer_template_id +
ID of a transfer template for this I/O operation. +
const void * buf +
Buffer to store information to be written to the file. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Dextend +
Signature: +
herr_t H5Dextend(hid_t dataset_id, + const hsize_t * size + ) +
Description: +
This function increases the size of the dataspace of a dataset with + unlimited dimensions. It cannot be used to extend the size of a + dataspace's fixed dimensions. The size array must have + the same number of entries as the rank of the dataset's dataspace. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset read from. +
const hsize_t * size +
Array containing the new magnitude of each dimension. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Dclose +
Signature: +
hid_t H5Dclose(hid_t dataset_id + ) +
Description: +
This function ends access to a dataset and releases resources used by + it. Further use of the dataset ID is illegal in calls to the dataset + API. +
Parameters: +
+
hid_t dataset_id +
ID of the dataset to finish access to. +
+
Returns: +
zero/negative +
+ +
+

Datatype Object API Functions

+

These functions create and manipulate the datatype which describes elements +of a dataset. +
+
+ +


+
+
Name: H5Tcreate +
Signature: +
hid_t H5Tcreate(H5T_class_t class, + size_tsize + ) +
Description: +
This function creates a new dataype of the specified class with the + specified number of bytes. Currently, only the H5T_COMPOUND + datatype class is supported with this function, use H5Tcopy + to create integer or floating-point datatypes. The datatype ID + returned from this function should be released with H5Tclose or resource + leaks will result. +
Parameters: +
+
H5T_class_t class +
Class of datatype to create. +
size_t size +
The number of bytes in the datatype to create. +
+
Returns: +
Datatype ID on success, negative on failure. +
+ +
+
+
Name: H5Tcopy +
Signature: +
hid_t H5Tcopy(hid_t type_id + ) +
Description: +
This function copies an existing datatype. The datatype ID returned + should be released with H5Tclose or resource leaks will occur. Native + datatypes supported by the library are: +
    +
    H5T_NATIVE_CHAR +
    Native character type, declare dataset array as 'char' +
    H5T_NATIVE_UCHAR +
    Native unsigned character type, declare dataset array as 'unsigned char' +
    H5T_NATIVE_SHORT +
    Native short type, declare dataset array as 'short' +
    H5T_NATIVE_USHORT +
    Native unsigned short type, declare dataset array as 'unsigned short' +
    H5T_NATIVE_INT +
    Native int type, declare dataset array as 'int' +
    H5T_NATIVE_UINT +
    Native unsigned int type, declare dataset array as 'unsigned int' +
    H5T_NATIVE_LONG +
    Native long type, declare dataset array as 'unsigned long' +
    H5T_NATIVE_ULONG +
    Native unsigned long type, declare dataset array as 'unsigned long' +
    H5T_NATIVE_LLONG +
    Native long long type, declare dataset array as 'unsigned long long' +
    H5T_NATIVE_ULLONG +
    Native unsigned long long type, declare dataset array as 'unsigned long long' +
    H5T_NATIVE_INT8 +
    Native signed 8-bit type, declare dataset array as 'int8' +
    H5T_NATIVE_UINT8 +
    Native unsigned 8-bit type, declare dataset array as 'uint8' +
    H5T_NATIVE_INT16 +
    Native signed 16-bit type, declare dataset array as 'int16' +
    H5T_NATIVE_UINT16 +
    Native unsigned 16-bit type, declare dataset array as 'uint16' +
    H5T_NATIVE_INT32 +
    Native signed 32-bit type, declare dataset array as 'int32' +
    H5T_NATIVE_UINT32 +
    Native unsigned 32-bit type, declare dataset array as 'uint32' +
    H5T_NATIVE_INT64 +
    Native signed 64-bit type, declare dataset array as 'uint64' +
    H5T_NATIVE_UINT64 +
    Native unsigned 64-bit type, declare dataset array as 'uint64' +
    H5T_NATIVE_FLOAT +
    Native single-precision float type, declare dataset array as 'float' +
    H5T_NATIVE_DOUBLE +
    Native double-precision float type, declare dataset array as 'double' +
+
Parameters: +
+
hid_t type_id +
ID of datatype to copy. +
+
Returns: +
Datatype ID on success, negative on failure. +
+ +
+
+
Name: H5Tequal +
Signature: +
hbool_t H5Tequal(hid_t type_id1, + hid_ttype_id2 + ) +
Description: +
This function determines if two datatype IDs refer to the same + datatype. +
Parameters: +
+
hid_t type_id1 +
ID of datatype to compare. +
hid_t type_id2 +
ID of datatype to compare. +
+
Returns: +
TRUE/FALSE/negative +
+ +
+
+
Name: H5Tlock +
Signature: +
herr_t H5Tlock(hid_t type_id + ) +
Description: +
This function locks a type, making it read-only and non-destrucible. + This is normally done by the library for predefined data types so the + application doesn't inadvertently change or delete a predefined type. + Once a data type is locked it can never be unlocked. +
Parameters: +
+
hid_t type_id +
ID of datatype to lock. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_class +
Signature: +
H5T_class_t H5Tget_class(hid_t type_id + ) +
Description: +
This function returns the base class of a datatype. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative type class on success, negative on failure. +
+ +
+
+
Name: H5Tget_size +
Signature: +
size_t H5Tget_size(hid_t type_id + ) +
Description: +
This function returns the size of a datatype in bytes. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Positve size in bytes on success, 0 on failure. +
+ +
+
+
Name: H5Tset_size +
Signature: +
herr_t H5Tset_size(hid_t type_id, + size_tsize + ) +
Description: +
This function sets the total size in bytes for an atomic data type (this + operation is not permitted on compound data types). If the size is + decreased so that the significant bits of the data type extend beyond + the edge of the new size, then the `offset' property is decreased + toward zero. If the `offset' becomes zero and the significant + bits of the data type still hang over the edge of the new size, then + the number of significant bits is decreased. + Adjusting the size of an H5T_STRING automatically sets the precision + to 8*size. All data types have a positive size. +
Parameters: +
+
hid_t type_id +
ID of datatype to change size. +
size_t size +
Size in bytes to modify datatype. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_order +
Signature: +
H5T_order_t H5Tget_order(hid_t type_id + ) +
Description: +
This function returns the byte order of an atomic datatype. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Byte order constant on success, negative on failure +
+ +
+
+
Name: H5Tset_order +
Signature: +
herr_t H5Tset_order(hid_t type_id, + H5T_order_torder + ) +
Description: +
This function sets the byte ordering of an atomic datatype. + Byte orderings currently supported are: +
    +
    H5T_ORDER_LE +
    Little-endian byte ordering (default) +
    H5T_ORDER_BE +
    Big-endian byte ordering +
    H5T_ORDER_Vax +
    VAX-endianness byte ordering (not currently supported) +
+
Parameters: +
+
hid_t type_id +
ID of datatype to set. +
H5T_order_t order +
Byte ordering constant. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_precision +
Signature: +
size_t H5Tget_precision(hid_t type_id + ) +
Description: +
This function returns the precision of an atomic data type. The + precision is the number of significant bits which, unless padding is + present, is 8 times larger than the value returned by H5Tget_size(). +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Number of significant bits on success, 0 on failure +
+ +
+
+
Name: H5Tset_precision +
Signature: +
herr_t H5Tset_precision(hid_t type_id, + size_tprecision + ) +
Description: +
This function sets the precision of an atomic data type. The precision + is the number of significant bits which, unless padding is present, is 8 + times larger than the value returned by H5Tget_size(). +

If the precision is increased then the offset is decreased and then + the size is increased to insure that significant bits do not "hang + over" the edge of the data type. +

Changing the precision of an H5T_STRING automatically changes the + size as well. The precision must be a multiple of 8. +

When decreasing the precision of a floating point type, set the + locations and sizes of the sign, mantissa, and exponent fields + first. +

Parameters: +
+
hid_t type_id +
ID of datatype to set. +
size_t precision +
Number of bits of precision for datatype. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_offset +
Signature: +
size_t H5Tget_offset(hid_t type_id + ) +
Description: +
This function retrieves the bit offset of the first significant bit. + The signficant bits of an atomic datum can be offset from the beginning + of the memory for that datum by an amount of padding. The `offset' + property specifies the number of bits of padding that appear to the + "right of" the value. That is, if we have a 32-bit datum with 16-bits + of precision having the value 0x1122 then it will be layed out in + memory as (from small byte address toward larger byte addresses): +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Byte PositionBig-Endian Offset=0Big-Endian Offset=16Little-Endian Offset=0Little-Endian Offset=16
0:[ pad][0x11][0x22][ pad]
1:[ pad][0x22][0x11][ pad]
2:[0x11][ pad][ pad][0x22]
3:[0x22][ pad][ pad][0x11]
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Positive offset value on success, 0 on failure. +
+ +
+
+
Name: H5Tset_offset +
Signature: +
herr_t H5Tset_offset(hid_t type_id, + size_t offset + ) +
Description: +
This function sets the bit offset of the first significant bit. The + signficant bits of an atomic datum can be offset from the beginning of + the memory for that datum by an amount of padding. The `offset' + property specifies the number of bits of padding that appear to the + "right of" the value. That is, if we have a 32-bit datum with 16-bits + of precision having the value 0x1122 then it will be layed out in + memory as (from small byte address toward larger byte addresses): +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Byte PositionBig-Endian Offset=0Big-Endian Offset=16Little-Endian Offset=0Little-Endian Offset=16
0:[ pad][0x11][0x22][ pad]
1:[ pad][0x22][0x11][ pad]
2:[0x11][ pad][ pad][0x22]
3:[0x22][ pad][ pad][0x11]
+ +

If the offset is incremented then the total size is +incremented also if necessary to prevent significant bits of +the value from hanging over the edge of the data type. + +

The offset of an H5T_STRING cannot be set to anything but +zero. +

Parameters: +
+
hid_t type_id +
ID of datatype to set. +
size_t offset +
Offset of first significant bit. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_pad +
Signature: +
herr_t H5Tget_pad(hid_t type_id, + H5T_pad_t * lsb, + H5T_pad_t * msb + ) +
Description: +
This function retrieves the padding type of the least and most-significant + bit padding. Valid types are: +
    +
    H5T_PAD_ZERO +
    Set background to zeros. +
    H5T_PAD_ONE +
    Set background to ones. +
    H5T_PAD_BACKGROUND +
    Leave background alone. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
H5T_pad_t * lsb +
Pointer to location to return least-significant bit padding type. +
H5T_pad_t * msb +
Pointer to location to return most-significant bit padding type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tset_pad +
Signature: +
herr_t H5Tset_pad(hid_t type_id, + H5T_pad_t lsb, + H5T_pad_t msb + ) +
Description: +
This function sets the least and most-significant bits padding types. +
    +
    H5T_PAD_ZERO +
    Set background to zeros. +
    H5T_PAD_ONE +
    Set background to ones. +
    H5T_PAD_BACKGROUND +
    Leave background alone. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to set. +
H5T_pad_t lsb +
Padding type for least-significant bits. +
H5T_pad_t msb +
Padding type for most-significant bits. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_sign +
Signature: +
H5T_sign_t H5Tget_sign(hid_t type_id + ) +
Description: +
This function retrieves the sign type for an integer type. + Valid types are: +
    +
    H5T_SGN_NONE +
    Unsigned integer type. +
    H5T_SGN_2 +
    Two's complement signed integer type. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative sign type on success, negative on failure +
+ +
+
+
Name: H5Tset_sign +
Signature: +
herr_t H5Tset_sign(hid_t type_id, + H5T_sign_t sign + ) +
Description: +
This function sets the sign proprety for an integer type. +
    +
    H5T_SGN_NONE +
    Unsigned integer type. +
    H5T_SGN_2 +
    Two's complement signed integer type. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to set. +
H5T_sign_t sign +
Sign type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_fields +
Signature: +
herr_t H5Tget_fields(hid_t type_id, + size_t * epos, + size_t * esize, + size_t * mpos, + size_t * msize + ) +
Description: +
This function retrieves information about the locations of the various + bit fields of a floating point data type. The field positions are bit + positions in the significant region of the data type. Bits are + numbered with the least significant bit number zero. + Any (or even all) of the arguments can be null pointers. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
size_t * epos +
Pointer to location to return exponent bit-position. +
size_t * esize +
Pointer to location to return size of exponent in bits. +
size_t * mpos +
Pointer to location to return mantissa bit-position. +
size_t * msize +
Pointer to location to return size of mantissa in bits. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tset_fields +
Signature: +
herr_t H5Tset_fields(hid_t type_id, + size_t epos, + size_t esize, + size_t mpos, + size_t msize + ) +
Description: +
This function sets the locations and sizes of the various floating + point bit fields. The field positions are bit positions in the + significant region of the data type. Bits are numbered with the least + significant bit number zero. + +

Fields are not allowed to extend beyond the number of bits of + precision, nor are they allowed to overlap with one another. +

Parameters: +
+
hid_t type_id +
ID of datatype to set. +
size_t epos +
Exponent bit position. +
size_t esize +
Size of exponent in bits. +
size_t mpos +
Mantissa bit position. +
size_t msize +
Size of mantissa in bits. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_ebias +
Signature: +
size_t H5Tget_ebias(hid_t type_id + ) +
Description: +
This function retrieves the exponent bias of a floating-point type. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Positive value on success, 0 on failure. +
+ +
+
+
Name: H5Tset_ebias +
Signature: +
herr_t H5Tset_ebias(hid_t type_id, + size_t ebias + ) +
Description: +
This function sets the exponent bias of a floating-point type. +
Parameters: +
+
hid_t type_id +
ID of datatype to set. +
size_t ebias +
Exponent bias value. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_norm +
Signature: +
H5T_norm_t H5Tget_norm(hid_t type_id + ) +
Description: +
This function retrieves the mantissa normalization of a floating-point + datatype. Valid normalization values are: +
    +
    H5T_NORM_IMPLIED +
    MSB of mantissa isn't stored, always 1 +
    H5T_NORM_MSBSET +
    MSB of mantissa is always 1 +
    H5T_NORM_NONE +
    Mantissa is not normalized +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative normalization type on success, negative on failure +
+ +
+
+
Name: H5Tset_norm +
Signature: +
herr_t H5Tset_norm(hid_t type_id, + H5T_norm_t norm + ) +
Description: +
This function sets the mantissa normalization of a floating-point + datatype. Valid normalization values are: +
    +
    H5T_NORM_IMPLIED +
    MSB of mantissa isn't stored, always 1 +
    H5T_NORM_MSBSET +
    MSB of mantissa is always 1 +
    H5T_NORM_NONE +
    Mantissa is not normalized +
+
Parameters: +
+
hid_t type_id +
ID of datatype to set. +
H5T_norm_t norm +
Mantissa normalization type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_inpad +
Signature: +
H5T_pad_t H5Tget_inpad(hid_t type_id + ) +
Description: +
This function retrieves the internal padding type for unused bits in + floating-point datatypes. + Valid padding values are: +
    +
    H5T_PAD_ZERO +
    Set background to zeros. +
    H5T_PAD_ONE +
    Set background to ones. +
    H5T_PAD_BACKGROUND +
    Leave background alone. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative padding type on success, negative on failure +
+ +
+
+
Name: H5Tset_inpad +
Signature: +
herr_t H5Tset_inpad(hid_t type_id, + H5T_pad_t inpad + ) +
Description: +
If any internal bits of a floating point type are unused + (that is, those significant bits which are not part of the + sign, exponent, or mantissa) then they will be filled + according to the value of this property. + Valid padding values are: +
    +
    H5T_PAD_ZERO +
    Set background to zeros. +
    H5T_PAD_ONE +
    Set background to ones. +
    H5T_PAD_BACKGROUND +
    Leave background alone. +
+
Parameters: +
+
hid_t type_id +
ID of datatype to modify. +
H5T_pad_t pad +
Padding type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_cset +
Signature: +
H5T_cset_t H5Tget_cset(hid_t type_id + ) +
Description: +
This function retrieves the character set type of a string datatype. + Valid character set values are: +
    +
    H5T_CSET_ASCII +
    Character set is US ASCII +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative character set type on success, negative on failure +
+ +
+
+
Name: H5Tset_cset +
Signature: +
herr_t H5Tset_cset(hid_t type_id, + H5T_cset_t cset + ) +
Description: +
HDF5 is able to distinguish between character sets of different + nationalities and to convert between them to the extent possible. + Valid character set values are: +
    +
    H5T_CSET_ASCII +
    Character set is US ASCII +
+
Parameters: +
+
hid_t type_id +
ID of datatype to modify. +
H5T_cset_t cset +
Character set type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_strpad +
Signature: +
H5T_str_t H5Tget_strpad(hid_t type_id + ) +
Description: +
This function retrieves the string padding method for a string datatype. + Valid string padding values are: +
    +
    H5T_STR_NULL +
    Pad with zeros (as C does) +
    H5T_STR_SPACE +
    Pad with spaces (as FORTRAN does) +
+
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Non-negative string padding type on success, negative on failure +
+ +
+
+
Name: H5Tset_strpad +
Signature: +
herr_t H5Tset_strpad(hid_t type_id, + H5T_str_t strpad + ) +
Description: +
The method used to store character strings differs with the programming + language: C usually null terminates strings while Fortran + left-justifies and space-pads strings. This property defines the + storage mechanism for the string. + Valid string padding values are: +
    +
    H5T_STR_NULL +
    Pad with zeros (as C does) +
    H5T_STR_SPACE +
    Pad with spaces (as FORTRAN does) +
+
Parameters: +
+
hid_t type_id +
ID of datatype to modify. +
H5T_str_t strpad +
String padding type. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tget_nmembers +
Signature: +
intn H5Tget_nmembers(hid_t type_id + ) +
Description: +
This function retrieves the number of fields a compound datatype has. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
+
Returns: +
Number of members datatype has on success, negative on failure +
+ +
+
+
Name: H5Tget_member_name +
Signature: +
char * H5Tget_member_name(hid_t type_id, + intn fieldno + ) +
Description: +
This function retrieves the name of a field of a compound data type. + Fields are stored in no particular order with numbers 0 through N-1 + where N is the value returned by H5Tget_nmembers(). The name of the + field is allocated with malloc() and the caller is responsible for + freeing the memory used by the name. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
intn fieldno +
Field number (indexed from 0) of the field name to retrieve. +
+
Returns: +
Valid pointer on success, NULL on failure +
+ +
+
+
Name: H5Tget_member_dims +
Signature: +
int H5Tget_member_dims(hid_t type_id, + intn fieldno, + size_t * dims, + int * perm + ) +
Description: +
This function returns the dimensionality of the field. The dimensions + and permuation vector are returned through arguments dims + and perm, both arrays of at least four elements. Either + (or even both) may be null pointers. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
intn fieldno +
Field number (indexed from 0) of the field dims to retrieve. +
size_t * dims +
Pointer to buffer to store the dimensions of the field. +
int * perm +
Pointer to buffer to store the permutation vector of the field. +
+
Returns: +
Number of dimensions on success, negative on failure. +
+ +
+
+
Name: H5Tget_member_type +
Signature: +
hid_t H5Tget_member_type(hid_t type_id, + intn fieldno + ) +
Description: +
This function returns the data type of the specified member. The caller + should invoke H5Tclose() to release resources associated with the type. +
Parameters: +
+
hid_t type_id +
ID of datatype to query. +
intn fieldno +
Field number (indexed from 0) of the field type to retrieve. +
+
Returns: +
The ID of a copy of the datatype of the field, negative on failure. +
+ +
+
+
Name: H5Tinsert +
Signature: +
herr_t H5Tinsert(hid_t type_id, + const char * name, + off_t offset, + hid_t field_id + ) +
Description: +
This function adds another member to the compound data type + type_id. The new member has a name which + must be unique within the compound data type. The offset + argument defines the start of the member in an instance of the compound + data type, and field_id is the type of the new member. + +

Note: All members of a compound data type must be atomic; a + compound data type cannot have a member which is a compound data + type. +

Parameters: +
+
hid_t type_id +
ID of compound datatype to modify. +
const char * name +
Name of the field to insert. +
off_t offset +
Offset in memory structure of the field to insert. +
hid_t field_id +
Datatype ID of the field to insert. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tpack +
Signature: +
herr_t H5Tpack(hid_t type_id + ) +
Description: +
This function recursively removes padding from within a compound + datatype to make it more efficient (space-wise) to store that data. +
Parameters: +
+
hid_t type_id +
ID of datatype to modify. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tregister_hard +
Signature: +
herr_t H5Tregister_hard(const char + * name, hid_t src_id, + hid_t dst_id, + H5T_conv_t func + ) +
Description: +
This function registers a hard conversion function for a data type + conversion path. The path is specified by the source and destination + datatypes src_id and dst_id. A conversion + path can only have one hard function, so func replaces any + previous hard function. +

If func is the null pointer then any hard function + registered for this path is removed from this path. The soft functions + are then used when determining which conversion function is appropriate + for this path. The name argument is used only + for debugging and should be a short identifier for the function. +

The type of the conversion function pointer is declared as: + typedef herr_t (*H5T_conv_t) (hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, + size_t nelmts, void *buf, void *bkg); +

Parameters: +
+
const char * name +
Name displayed in diagnostic output. +
hid_t src_id +
ID of source datatype. +
hid_t dst_id +
ID of destination datatype. +
H5T_conv_t func +
Function to convert between source and destination datatypes. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tregister_soft +
Signature: +
herr_t H5Tregister_soft(const char + * name, hid_t src_id, + hid_t dst_id, + H5T_conv_t func + ) +
Description: +
This function registers a soft conversion function by adding it to the + end of the master soft list and replacing the soft function in all + applicable existing conversion paths. The name + is used only for debugging and should be a short identifier + for the function. +

The type of the conversion function pointer is declared as: + typedef herr_t (*H5T_conv_t) (hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, + size_t nelmts, void *buf, void *bkg); +

Parameters: +
+
const char * name +
Name displayed in diagnostic output. +
hid_t src_id +
ID of source datatype. +
hid_t dst_id +
ID of destination datatype. +
H5T_conv_t func +
Function to convert between source and destination datatypes. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tunregister +
Signature: +
herr_t H5Tunregister(H5T_conv_t func + ) +
Description: +
This function removes a conversion function from all conversion paths. +

The type of the conversion function pointer is declared as: + typedef herr_t (*H5T_conv_t) (hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, + size_t nelmts, void *buf, void *bkg); +

Parameters: +
+
H5T_conv_t func +
Function to remove from conversion paths. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Tclose +
Signature: +
herr_t H5Tclose(hid_t type_id + ) +
Description: +
This function releases a datatype. Further access through the datatype + ID is illegal. Failure to release a datatype with this call will + result in resource leaks. +
Parameters: +
+
hid_t type_id +
ID of datatype to release. +
+
Returns: +
zero/negative +
+ + +
+

Dataspace Object API Functions

+

These functions create and manipulate the dataspace in which to store the +elements of a dataset. +
+
+ +


+
+
Name: H5Screate_simple +
Signature: +
hid_t H5Screate_simple(int rank, + const hsize_t * dims, + const hsize_t * maxdims + ) +
Description: +
This function creates a new simple data space object and opens it for + access. The rank is the number of dimensions used in the + dataspace. The dims argument is the size of the simple + dataset and the maxdims argument is the upper limit on the + size of the dataset. maxdims may be the null pointer in + which case the upper limit is the same as dims. If an + element of maxdims is zero then the corresponding dimension + is unlimited, otherwise no element of maxdims should be + smaller than the corresponding element of dims. The + dataspace ID returned from this function should be released with + H5Sclose or resource leaks will occur. +
Parameters: +
+
int rank +
Number of dimensions of dataspace. +
const hsize_t * dims +
An array of the size of each dimension. +
const hsize_t * maxdims +
An array of the maximum size of each dimension. +
+
Returns: +
A dataspace ID on success, negative on failure. +
+ +
+
+
Name: H5Scopy +
Signature: +
hid_t H5Scopy(hid_t space_id + ) +
Description: +
This function copies a dataspace. The dataspace ID returned from this + function should be released with H5Sclose or resource leaks will occur. +
Parameters: +
+
hid_t space_id +
ID of dataspace to copy. +
+
Returns: +
A dataspace ID on success, negative on failure. +
+ +
+
+
Name: H5Sget_npoints +
Signature: +
hsize_t H5Sget_npoints(hid_t space_id) +
Description: +
This function determines the number of elements in a dataspace. For + example, a simple 3-dimensional dataspace with dimensions 2, 3 and 4 + would have 24 elements. +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to query +
+
Returns: +
Number of elements in the dataspace, 0 on failure +
+ +
+
+
Name: H5Sget_ndims +
Signature: +
int H5Sget_ndims(hid_t space_id) +
Description: +
This function determines the dimensionality (or rank) of a dataspace. +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to query +
+
Returns: +
Number of dimensions in the dataspace, negative on failure +
+ +
+
+
Name: H5Sget_dims +
Signature: +
int H5Sget_dims(hid_t space_id, + hsize_t *dims, + hsize_t *maxdims + ) +
Description: +
This function returns the size of each dimension in a dataspace through + the dims parameter. +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to query +
hsize_t *dims +
Pointer to array to store the size of each dimension. +
hsize_t *maxdims +
Pointer to array to store the maximum size of each dimension. +
+
Returns: +
Number of dimensions in the dataspace, negative on failure +
+ +
+
+
Name: H5Sis_simple +
Signature: +
hbool_t H5Sis_simple(hid_t space_id) +
Description: +
This function determines whether a dataspace object is a simple + dataspace or not. [Currently, all dataspace objects are simple + dataspaces, complex dataspace support will be added in the future] +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to query +
+
Returns: +
TRUE or FALSE on success, negative on failure +
+ +
+
+
Name: H5Sset_space +
Signature: +
herr_t H5Sset_space(hid_t space_id, + uint32 rank, + uint32 *dims + ) +
Description: +
This function determines the number of dimensions and the size of each + dimension for the space that a dataset is stored within. This function + only creates simple dataspace objects. Setting the rank to a + value of zero allows scalar objects to be created. Dimensions are + specified from slowest to fastest changing in the dims + array (i.e. 'C' order). Setting the size of a dimension to zero + indicates that the dimension is of unlimited size and should be allowed + to expand. Currently, only the first dimension in the array (the + slowest) may be unlimited in size. + [Currently, all dataspace objects are simple + dataspaces, complex dataspace support will be added in the future] +
Parameters: +
+
hid_t space_id +
ID of the dataspace object. +
uint32 rank +
The number of dimensions the object is composed of. +
uint32 * dims +
An array of the size of each dimension. (NULL for scalar objects) +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Sset_hyperslab +
Signature: +
herr_t H5Sset_hyperslab(hid_t space_id, + const hssize_t *start, + const hsize_t *count, + const hsize_t *stride + ) +
Description: +
This function selects a hyperslab from a simple dataspace. The stride + array may be used to sub-sample the hyperslab chosen, a value of 1 in each + position of the stride array selects contiguous elements in the array, + a value of 2 selects every other element, etc. If the stride parameter is + set to NULL, a contiguous hyperslab is chosen. The values in the start and + count arrays may be negative, to allow for selecting hyperslabs in chunked + datasets which extend in arbitrary directions. +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to set hyperslab in. +
const hssize_t *start +
Pointer to array of starting location for hyperslab. +
const hsize_t *count +
Pointer to array of magnitude of hyperslab. +
const hsize_t *stride +
Pointer to array of stride of hyperslab. +
+
Returns: +
zero/negative +
+ +
+
+
Name: H5Sget_hyperslab +
Signature: +
int H5Sget_hyperslab(hid_t space_id, + hssize_t *start, + hsize_t *count, + hsize_t *stride + ) +
Description: +
This function retrieves information about the hyperslab from a simple + dataspace. If no hyperslab has been defined then the hyperslab is the + same as the entire array. +
Parameters: +
+
hid_t space_id +
ID of the dataspace object to set hyperslab in. +
hssize_t *start +
Pointer to array to store starting location of hyperslab. +
hsize_t *count +
Pointer to array to store magnitude of hyperslab. +
hsize_t *stride +
Pointer to array to store stride of hyperslab. +
+
Returns: +
Hyperslab dimensionality on success, negative on failure. +
+ +
+
+
Name: H5Sclose +
Signature: +
herr_t H5Sclose(hid_t space_id + ) +
Description: +
This function releases a dataspace. Further access through the dataspace + ID is illegal. Failure to release a dataspace with this call will + result in resource leaks. +
Parameters: +
+
hid_t space_id +
ID of dataspace to release. +
+
Returns: +
zero/negative +
+ +
+

Group Object API Functions

+ +

A group associates names with objects and provides a mechanism +which can map a name to an object. Since all objects +appear in at least one group (with the possible exception of the root +object) and since objects can have names in more than one group, the +set of all objects in an HDF5 file is a directed graph. The internal +nodes (nodes with out-degree greater than zero) must be groups while +the leaf nodes (nodes with out-degree zero) are either empty groups or +objects of some other type. Exactly one object in every non-empty +file is the root object. The root object always has a positive +in-degree because it is pointed to by the file boot block. + +

Every file handle returned by H5Fcreate or +H5Fopen maintains an independent current working group +stack, the top item of which is the current working group (the root +object is the current working group if the stack is empty). The stack +can be manipulated with H5Gset, H5Gpush, and +H5Gpop. + +

An object name consists of one or more components separated from +one another by slashes. If the name begins with a slash then the +object is located by looking for the first component in the root +object, then looking for the second component in that object, etc., +until the entire name is traversed. If the name doesn't begin with a +slash then the traversal begins with the current working group. + +

The library does not maintain the full absolute name of its current +working group because (1) cycles in the graph can make the name length +unbounded and (2) a group doesn't necessarily have a unique name. A +more Unix-like hierarchical naming scheme can be implemented on top of +the directed graph scheme by creating a ".." entry in each group that +points to its single predecessor and then a getcwd +function would be trivial. + +
+
+ +


+
+
Name: H5Gcreate +
Signature: +
herr_t H5Gset (hid_t + file, const char *name, + size_t size_hint) +
Description: +
This function creates a new empty group and gives it a name. +
Parameters: +
+
+
hid_t file +
The file handle returned by H5Fcreate or + H5Fopen. +
const char *name +
The absolute or relative name of the new group. +
size_t size_hint +
The size hint is an optional parameter that indicates + the number of bytes to reserve for the names that will + appear in the group. A conservative estimate could result + in multiple system-level I/O requests to read the group + name heap while a liberal estimate could result in a + single large I/O request even when the group has just a + few names. HDF5 stores each name with a null terminator. +
+
Returns: +
Returns a negative value on failure, non-negative otherwise. +
+ +
+
+
Name: H5Sopen +
Signature: +
hid_t H5Gopen(hid_t file_id, + const char *name + ) +
Description: +
This function opens an existing group for modification. When finished, + call H5Gclose() to close it and release resources. +
Parameters: +
+
hid_t file_id +
ID of file to open group within. +
const char * name +
Name of group to open. +
+
Returns: +
Valid group ID on success, negative on failure. +
+ +
+
+
Name: H5Gset +
Signature: +
herr_t H5Gset (hid_t + file, const char *name) +
Description: +
This function sets the current working group by modifying the + top element of the current working group stack or, if the + stack is empty, by pushing a new element onto the stack. +
Parameters: +
+
+
hid_t file +
The file handle returned by H5Fcreate or + H5Fopen. +
const char *name +
The name of the new current working group. If the name + doesn't begin with a slash then it is looked up relative the + the previous current working group. +
+
Returns: +
Returns a negative value on failure, non-negative otherwise. +
+ +
+
+
Name: H5Gpush +
Signature: +
herr_t H5Gpush (hid_t + file, const char *name) +
Description: +
This function sets the current working group by pushing a + new element onto the current working group stack. +
Parameters: +
+
+
hid_t file +
The file handle returned by H5Fcreate or + H5Fopen. +
const char *name +
The name of the new current working group. If the name + doesn't begin with a slash then it is looked up relative the + the previous current working group. +
+
Returns: +
Returns a negative value on failure, non-negative otherwise. +
+ +
+
+
Name: H5Gpop +
Signature: +
herr_t H5Gpop (hid_t + file) +
Description: +
This function restores the previous current working group by + popping an element from the current working group stack. An + empty stack implies that the current working group is the root + object. Attempting to pop an empty stack results in failure. +
Parameters: +
+
+
hid_t file +
The file handle returned by H5Fcreate or + H5Fopen. +
+
Returns: +
Returns a negative value on failure, non-negative otherwise. +
+ +
+
+
Name: H5Gclose +
Signature: +
herr_t H5Gclose(hid_t group_id + ) +
Description: +
This function releases a group. Further access through the group + ID is illegal. Failure to release a group with this call will + result in resource leaks. +
Parameters: +
+
hid_t group_id +
ID of group to release. +
+
Returns: +
zero/negative +
+ + + + + +
+

Glossary of data-types used

+

Since many of the typedefs in the HDF5 API are not well-defined yet, +the types below may change radically en route to a final API... +
+
+ +Basic Types: +

    +
  • char - 8-bit character (only for ASCII information) +
  • int8 - 8-bit signed integer +
  • uint8 - 8-bit unsigned integer +
  • int16 - 16-bit signed integer +
  • uint16 - 16-bit unsigned integer +
  • int32 - 32-bit signed integer +
  • uint32 - 32-bit unsigned integer +
  • intn - "native" signed integer +
  • uintn - "native" unsigned integer +
  • int64 - 64-bit signed integer (new) +
  • uint64 - 64-bit unsigned integer (new) +
  • float32 - 32-bit IEEE float +
  • float64 - 64-bit IEEE float +
+ +Complex Types: +
    +
  • hid_t - 32-bit unsigned integer used as ID for memory objects +
  • hoid_t - 32-bit unsigned integer (currently) used as ID for disk-based + objects +
  • hbool_t - boolean to indicate true/false/error codes from functions +
  • herr_t - 32-bit integer to indicate succeed/fail codes from functions +
+ +Disk I/O Types: +
    +
  • hoff_t - (64-bit?) offset on disk in bytes +
  • hlen_t - (64-bit?) length on disk in bytes +
+ diff --git a/doc/html/H5.api_map.html b/doc/html/H5.api_map.html new file mode 100644 index 0000000..c35102a --- /dev/null +++ b/doc/html/H5.api_map.html @@ -0,0 +1,849 @@ + +HDF5 Legacy API Equivalence + + +
+

HDF5: API Mapping to legacy APIs

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionalitynetCDFSDAIOHDF5Comments
Open existing file for read/writencopenSDstartAIO_openH5Fopen
Creates new file for read/write.nccreate

H5FcreateSD API handles this with SDopen
Close filenccloseSDendAIO_closeH5Fclose
Redefine parametersncredef


Unneccessary under SD & HDF5 data-models
End "define" modencendef


Unneccessary under SD & HDF5 data-models
Query the number of datasets, dimensions and attributes in a filencinquireSDfileinfo
H5Dget_info
H5Rget_num_relations
H5Gget_num_contents
HDF5 interface is more granular and flexible
Update a writeable file with current changesncsync
AIO_flushH5MflushHDF5 interface is more flexible because it can be applied to parts of the +file hierarchy instead of the whole file at once. The SD interface does not +have this feature, although most of the lower HDF library supports it.
Close file access without applying recent changesncabort


How useful is this feature?
Create new dimensionncdimdefSDsetdimname
H5McreateSD interface actually creates dimensions with datasets, this just allows +naming them
Get ID of existing dimensionncdimidSDgetdimid
H5MaccessSD interface looks up dimensions by index and the netCDF interface uses +names, but they are close enough. The HDF5 interface does not current allow +access to particular dimensions, only the dataspace as a whole.
Get size & name of dimensionncdiminqSDdiminfo
H5Mget_name
H5Sget_lrank
Only a rough match
Rename dimensionncdimrenameSDsetdimname
H5Mset_name
Create a new datasetncvardefSDcreateAIO_mkarrayH5Mcreate
Attach to an existing datasetncvaridSDselectAIO_arr_loadH5Maccess
Get basic information about a datasetncvarinqSDgetinfoAIO_arr_get_btype
AIO_arr_get_nelmts
AIO_arr_get_nbdims
AIO_arr_get_bdims
AIO_arr_get_slab
H5Dget_infoAll interfaces have different levels of information that they return, some +use of auxilliary functions is required to get equivalent amount of information
Write a single value to a datasetncvarput1SDwritedataAIO_writeH5DwriteWhat is this useful for?
Read a single value from a datasetncvarget1SDreaddataAIO_readH5DreadWhat is this useful for?
Write a solid hyperslab of data (i.e. subset) to a datasetncvarputSDwritedataAIO_writeH5Dwrite
Read a solid hyperslab of data (i.e. subset) from a datasetncvargetSDreaddataAIO_readH5Dread
Write a general hyperslab of data (i.e. possibly subsampled) to a datasetncvarputgSDwritedataAIO_writeH5Dwrite
Read a general hyperslab of data (i.e. possibly subsampled) from a datasetncvargetgSDreaddataAIO_readH5Dread
Rename a dataset variablencvarrename

H5Mset_name
Add an attribute to a datasetncattputSDsetattr
H5Rattach_oidHDF5 requires creating a seperate object to attach to a dataset, but it also +allows objects to be attributes of any other object, even nested.
Get attribute informationncattinqSDattrinfo
H5Dget_infoHDF5 has no specific function for attributes, they are treated as all other +objects in the file.
Retrieve attribute for a datasetncattgetSDreadattr
H5DreadHDF5 uses general dataset I/O for attributes.
Copy attribute from one dataset to anotherncattcopy


What is this used for?
Get name of attributencattnameSDattrinfo
H5Mget_name
Rename attributencattrename

H5Mset_name
Delete attributencattdel

H5MdeleteThis can be faked in current HDF interface with lower-level calls
Compute # of bytes to store a number-typenctypelenDFKNTsize

Hmm, the HDF5 Datatype interface needs this functionality.
Indicate that fill-values are to be written to datasetncsetfillSDsetfillmode

HDF5 Datatype interface should work on this functionality
Get information about "record" variables (Those datasets which share the +same unlimited dimensionncrecinq


This should probably be wrapped in a higher layer interface, if it's +needed for HDF5.
Get a record from each dataset sharing the unlimited dimensionncrecget


This is somewhat equivalent to reading a vdata with non-interlaced +fields, only in a dataset oriented way. This should also be wrapped in a +higher layer interface if it's necessary for HDF5.
Put a record from each dataset sharing the unlimited dimensionncrecput


This is somewhat equivalent to writing a vdata with non-interlaced +fields, only in a dataset oriented way. This should also be wrapped in a +higher layer interface if it's necessary for HDF5.
Map a dataset's name to an index to reference it with
SDnametoindex
H5Mfind_nameEquivalent functionality except HDF5 call returns an OID instead of an +index.
Get the valid range of values for data in a dataset
SDgetrange

Easily implemented with attributes at a higher level for HDF5.
Release access to a dataset
SDendaccessAIO_arr_destroyH5MreleaseOdd that the netCDF API doesn't have this...
Set the valid range of data in a dataset
SDsetrange

Easily implemented with attributes at a higher level for HDF5.
Set the label, units, format, etc. of the data values in a dataset
SDsetdatastrs

Easily implemented with attributes at a higher level for HDF5.
Get the label, units, format, etc. of the data values in a dataset
SDgetdatastrs

Easily implemented with attributes at a higher level for HDF5.
Set the label, units, format, etc. of the dimensions in a dataset
SDsetdimstrs

Easily implemented with attributes at a higher level for HDF5.
Get the label, units, format, etc. of the dimensions in a dataset
SDgetdimstrs

Easily implemented with attributes at a higher level for HDF5.
Set the scale of the dimensions in a dataset
SDsetdimscale

Easily implemented with attributes at a higher level for HDF5.
Get the scale of the dimensions in a dataset
SDgetdimscale

Easily implemented with attributes at a higher level for HDF5.
Set the calibration parameters of the data values in a dataset
SDsetcal

Easily implemented with attributes at a higher level for HDF5.
Get the calibration parameters of the data values in a dataset
SDgetcal

Easily implemented with attributes at a higher level for HDF5.
Set the fill value for the data values in a dataset
SDsetfillvalue

HDF5 needs something like this, I'm not certain where to put it.
Get the fill value for the data values in a dataset
SDgetfillvalue

HDF5 needs something like this, I'm not certain where to put it.
Move/Set the dataset to be in an 'external' file
SDsetexternalfile
H5Dset_storageHDF5 has simple functions for this, but needs an API for setting up the +storage flow.
Move/Set the dataset to be stored using only certain bits from the dataset
SDsetnbitdataset
H5Dset_storageHDF5 has simple functions for this, but needs an API for setting up the +storage flow.
Move/Set the dataset to be stored in compressed form
SDsetcompress
H5Dset_storageHDF5 has simple functions for this, but needs an API for setting up the +storage flow.
Search for an dataset attribute with particular name
SDfindattr
H5Mfind_name
H5Mwild_search
HDF5 can handle wildcard searchs for this feature.
Map a run-time dataset handle to a persistant disk reference
SDidtoref

I'm not certain this is needed for HDF5.
Map a persistant disk reference for a dataset to an index in a group
SDreftoindex

I'm not certain this is needed for HDF5.
Determine if a dataset is a 'record' variable (i.e. it has an unlimited dimension)
SDisrecord

Easily implemented by querying the dimensionality at a higher level for HDF5.
Determine if a dataset is a 'coordinate' variable (i.e. it is used as a dimension)
SDiscoord

I'm not certain this is needed for HDF5.
Set the access type (i.e. parallel or serial) for dataset I/O
SDsetaccesstype

HDF5 has functions for reading the information about this, but needs a better +API for setting up the storage flow.
Set the size of blocks used to store a dataset with unlimited dimensions
SDsetblocksize

HDF5 has functions for reading the information about this, but needs a better +API for setting up the storage flow.
Sets backward compatibility of dimensions created.
SDsetdimval_comp

Unneccessary in HDF5.
Checks backward compatibility of dimensions created.
SDisdimval_comp

Unneccessary in HDF5.
Move/Set the dataset to be stored in chunked form
SDsetchunk
H5Dset_storageHDF5 has simple functions for this, but needs an API for setting up the +storage flow.
Get the chunking information for a dataset stored in chunked form
SDgetchunkinfo
H5Dstorage_detail
Read/Write chunks of a dataset using a chunk index
SDreadchunk
SDwritechunk


I'm not certain that HDF5 needs something like this.
Tune chunk caching parameters for chunked datasets
SDsetchunkcache

HDF5 needs something like this.
Change some default behavior of the library

AIO_defaults
Something like this would be useful in HDF5, to tune I/O pipelines, etc.
Flush and close all open files

AIO_exit
Something like this might be useful in HDF5, although it could be + encapsulated with a higher-level function.
Target an architecture for data-type storage

AIO_target
There are some rough parallels with using the data-type in HDF5 to create + data-type objects which can be used to write out future datasets.
Map a filename to a file ID

AIO_filenameH5Mget_name
Get the active directory (where new datasets are created)

AIO_getcwd
HDF5 allows multiple directories (groups) to be attached to, any of which + can have new datasets created within it.
Change active directory

AIO_chdir
Since HDF5 has a slightly different access method for directories (groups), + this functionality can be wrapped around calls to H5Gget_oid_by_name.
Create directory

AIO_mkdirH5Mcreate
Return detailed information about an object

AIO_statH5Dget_info
H5Dstorage_detail
Perhaps more information should be provided through another function in + HDF5?
Get "flag" information

AIO_getflags
Not required in HDF5.
Set "flag" information

AIO_setflags
Not required in HDF5.
Get detailed information about all objects in a directory

AIO_lsH5Gget_content_info_mult
H5Dget_info
H5Dstorage_detail
Only roughly equivalent functionality in HDF5, perhaps more should be + added?
Get base type of object

AIO_BASICH5Gget_content_info
Set base type of dataset

AIO_arr_set_btypeH5Mcreate(DATATYPE)
Set dimensionality of dataset

AIO_arr_set_bdimsH5Mcreate(DATASPACE)
Set slab of dataset to write

AIO_arr_set_slab
This is similar to the process of creating a dataspace for use when + performing I/O on an HDF5 dataset
Describe chunking of dataset to write

AIO_arr_set_chunkH5Dset_storage
Describe array index permutation of dataset to write

AIO_arr_set_permH5Dset_storage
Create a new dataset with dataspace and datatype information from an + existing dataset.

AIO_arr_copy
This can be mimicked in HDF5 by attaching to the datatype and dataspace of +an existing dataset and using the IDs to create new datasets.
Create a new directory to group objects within

AIO_mkgroupH5Mcreate(GROUP)
Read name of objects in directory

AIO_read_groupH5Gget_content_info_mult
Add objects to directory

AIO_write_groupH5Ginsert_item_mult
Combine an architecture and numeric type to derive the format's datatype

AIO_COMBINE
This is a nice feature to add to HDF5.
Derive an architecture from the format's datatype

AIO_ARCH
This is a nice feature to add to HDF5.
Derive a numeric type from the format's datatype

AIO_PNT
This is a nice feature to add to HDF5.
Register error handling function for library to call when errors occur

AIO_error_handler
This should be added to HDF5.
+ diff --git a/doc/html/H5.format.html b/doc/html/H5.format.html new file mode 100644 index 0000000..a3c9a7c --- /dev/null +++ b/doc/html/H5.format.html @@ -0,0 +1,3183 @@ + + + + HDF5 Draft Disk-Format Specification + + + +

HDF5: Disk Format Implementation

+ +
    +
  1. + Disk Format Level 0 - File Signature and Boot Block +
  2. + Disk Format Level 1 - File Infrastructure +
      +
    1. + Disk Format Level 1A - B-link Trees +
    2. + Disk Format Level 1B - Symbol Table +
    3. + Disk Format Level 1C - Symbol Table Entry +
    4. + Disk Format Level 1D - Local Heaps +
    5. + Disk Format Level 1E - Global Heap +
    6. + Disk Format Level 1F - Free-Space Index +
    +
  3. + Disk Format Level 2 - Data Objects +
      +
    1. + Disk Format Level 2a - Data Object Headers +
        +
      1. + Name: NIL +
      2. + Name: Simple Data Space +
      3. + Name: Data-Space +
      4. + Name: Data-Type +
      5. + Name: Reserved - not assigned yet +
      6. + Name: Reserved - not assigned yet +
      7. + Name: Data Storage - Compact +
      8. + Name: Data Storage - External Data Files +
      9. + Name: Data Storage - Layout +
      10. + Name: Reserved - not assigned yet +
      11. + Name: Reserved - not assigned yet +
      12. + Name: Data Storage - Compressed +
      13. + Name: Attribute List +
      14. + Name: Object Name +
      15. + Name: Object Modification Date & Time +
      16. + Name: Shared Object Message +
      17. + Name: Object Header Continuation +
      18. + Name: Symbol Table Message +
      +
    2. + Disk Format: Level 2b - Shared Data Object Headers +
    3. + Disk Format: Level 2c - Data Object Data Storage +
    +
+ + +

Disk Format Implementation

+ +

The format of a HDF5 file on disk encompasses several + key ideas of the current HDF4 & AIO file formats as well as + addressing some short-comings therein. The new format will be + more self-describing than the HDF4 format and will be more + uniformly applied to data objects in the file. + + +

Three levels of information compose the file format. The level + 0 contains basic information for identifying and + "boot-strapping" the file. Level 1 information is composed of + the object directory (stored as a B-tree) and is used as the + index for all the objects in the file. The rest of the file is + composed of data-objects at level 2, with each object + partitioned into header (or "meta") information and data + information. + +

The sizes of various fields in the following layout tables are + determined by looking at the number of columns the field spans + in the table. There are three exceptions: (1) The size may be + overridden by specifying a size in parentheses, (2) the size of + addresses is determined by the Size of Addresses field + in the boot block, and (3) the size of size fields is determined + by the Size of Sizes field in the boot block. + +

+ Disk Format: Level 0 - File Signature and Boot Block

+ +

The boot block may begin at certain predefined offsets within + the HDF5 file, allowing a block of unspecified content for + users to place additional information at the beginning (and + end) of the HDF5 file without limiting the HDF5 library's + ability to manage the objects within the file itself. This + feature was designed to accommodate wrapping an HDF5 file in + another file format or adding descriptive information to the + file without requiring the modification of the actual file's + information. The boot-block is located by searching for the + HDF5 file signature at byte offset 0, byte offset 512 and at + successive locations in the file, each a multiple of two of + the previous location, i.e. 0, 512, 1024, 2048, etc. + +

The boot-block is composed of a file signature, followed by + boot block and object directory version numbers, information + about the sizes of offset and length values used to describe + items within the file, the size of each object directory page, + and a symbol table entry for the root object in the file. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ HDF5 Boot Block Layout +
bytebytebytebyte

HDF5 File Signature (8 bytes)

Version # of Boot BlockVersion # of Global Free-Space StorageVersion # of Object DirectoryReserved
Version # of Shared Header Message FormatSize of AddressesSize of SizesReserved (zero)
Symbol Table Leaf Node KSymbol Table Internal Node K
File Consistency Flags
Base Address
Address of Global Free-Space Heap
End of File Address

+ Symbol-Table Entry of the "Root Object" +

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
File SignatureThis field contains a constant value and can be used to + quickly identify a file as being an HDF5 file. The + constant value is designed to allow easy identification of + an HDF5 file and to allow certain types of data corruption + to be detected. The file signature of a HDF5 file always + contain the following values: + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
decimal13772687013102610
hexadecimal894844460d0a1a0a
ASCII C Notation\211HDF\r\n\032\n
+
+
+ + This signature both identifies the file as a HDF5 file + and provides for immediate detection of common + file-transfer problems. The first two bytes distinguish + HDF5 files on systems that expect the first two bytes to + identify the file type uniquely. The first byte is + chosen as a non-ASCII value to reduce the probability + that a text file may be misrecognized as a HDF5 file; + also, it catches bad file transfers that clear bit + 7. Bytes two through four name the format. The CR-LF + sequence catches bad file transfers that alter newline + sequences. The control-Z character stops file display + under MS-DOS. The final line feed checks for the inverse + of the CR-LF translation problem. (This is a direct + descendent of the PNG file signature.)
Version # of the Boot BlockThis value is used to determine the format of the + information in the boot block. When the format of the + information in the boot block is changed, the version # + is incremented to the next integer and can be used to + determine how the information in the boot block is + formatted.
Version # of the Global Free-Space StorageThis value is used to determine the format of the + information in the Global Free-Space Heap. Currently, + this is implemented as a B-tree of length/offset pairs + to locate free space in the file, but future advances in + the file-format could change the method of finding + global free-space. When the format of the information + is changed, the version # is incremented to the next + integer and can be used to determine how the information + is formatted.
Version # of the Object DirectoryThis value is used to determine the format of the + information in the Object Directory. When the format of + the information in the Object Directory is changed, the + version # is incremented to the next integer and can be + used to determine how the information in the Object + Directory is formatted.
Version # of the Shared Header Message FormatThis value is used to determine the format of the + information in a shared object header message, which is + stored in the global small-data heap. Since the format + of the shared header messages differ from the private + header messages, a version # is used to identify changes + in the format.
Size of AddressesThis value contains the number of bytes used for + addresses in the file. The values for the addresses of + objects in the file are relative to a base address, + usually the address of the boot block signature. This + allows a wrapper to be added after the file is created + without invalidating the internal offset locations.
Size of SizesThis value contains the number of bytes used to store + the size of an object.
Symbol Table Leaf Node KEach leaf node of a symbol table B-tree will have at + least this many entries but not more than twice this + many. If a symbol table has a single leaf node then it + may have fewer entries.
Symbol Table Internal Node KEach internal node of a symbol table B-tree will have + at least K pointers to other nodes but not more than 2K + pointers. If the symbol table has only one internal + node then it might have fewer than K pointers.
Bytes per B-Tree PageThis value contains the # of bytes used for symbol + pairs per page of the B-Trees used in the file. All + B-Tree pages will have the same size per page.
(For + 32-bit file offsets, 340 objects is the maximum per 4KB + page, and for 64-bit file offset, 254 objects will fit + per 4KB page. In general, the equation is:
<# + of objects> = FLOOR((<page size>-<offset + size>)/(<Symbol size>+<offset size>))-1 )
File Consistency FlagsThis value contains flags to indicate information + about the consistency of the information contained + within the file. Currently, the following bit flags are + defined: bit 0 set indicates that the file is opened for + write-access and bit 1 set indicates that the file has + been verified for consistency and is guaranteed to be + consistent with the format defined in this document. + Bits 2-31 are reserved for future use. Bit 0 should be + set as the first action when a file is opened for write + access and should be cleared only as the final action + when closing a file. Bit 1 should be cleared during + normal access to a file and only set after the file's + consistency is guaranteed by the library or a + consistency utility.
Base AddressThis is the absolute file address of the first byte of + the hdf5 data within the file. Unless otherwise noted, + all other file addresses are relative to this base + address.
Address of Global Free-Space HeapThis value contains the relative address of the B-Tree + used to manage the blocks of data which are unused in the + file currently. The free-space heap is used to manage the + blocks of bytes at the file-level which become unused with + objects are moved within the file.
End of File AddressThis is the relative file address of the first byte past + the end of all HDF5 data. It is used to determine if a + file has been accidently truncated and as an address where + file memory allocation can occur if the free list is not + used.
Symbol-Table Entry of the Root ObjectThis symbol-table entry (described later in this + document) refers to the entry point into the group + graph. If the file contains a single object, then that + object can be the root object and no groups are used.
+
+ +

Disk Format: Level 1A - B-link Trees

+ +

B-link trees allow flexible storage for objects which tend to grow + in ways that cause the object to be stored discontiguously. B-trees + are described in various algorithms books including "Introduction to + Algorithms" by Thomas H. Cormen, Charles E. Leiserson, and Ronald + L. Rivest. The B-link tree, in which the sibling nodes at a + particular level in the tree are stored in a doubly-linked list, + is described in the "Efficient Locking for Concurrent Operations + on B-trees" paper by Phillip Lehman and S. Bing Yao as published + in the ACM Transactions on Database Systems, Vol. 6, + No. 4, December 1981. + +

The B-link trees implemented by the file format contain one more + key than the number of children. In other words, each child + pointer out of a B-tree node has a left key and a right key. + The pointers out of internal nodes point to sub-trees while + the pointers out of leaf nodes point to other file data types. + Notwithstanding that difference, internal nodes and leaf nodes + are identical. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ B-tree Nodes +
bytebytebytebyte
Node Signature
Node TypeNode LevelEntries Used
Address of Left Sibling
Address of Right Sibling
Key 0 (variable size)
Address of Child 0
Key 1 (variable size)
Address of Child 1
...
Key 2K (variable size)
Address of Child 2K
Key 2K+1 (variable size)
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Node SignatureThe value ASCII 'TREE' is used to indicate the + beginning of a B-link tree node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file.
Node TypeEach B-link tree points to a particular type of data. + This field indicates the type of data as well as + implying the maximum degree K of the tree and + the size of each Key field. +
+
+
0 +
This tree points to symbol table nodes. +
1 +
This tree points to a (partial) linear address space. +
+
Node LevelThe node level indicates the level at which this node + appears in the tree (leaf nodes are at level zero). Not + only does the level indicate whether child pointers + point to sub-trees or to data, but it can also be used + to help file consistency checking utilities reconstruct + damanged trees.
Entries UsedThis determines the number of children to which this + node points. All nodes of a particular type of tree + have the same maximum degree, but most nodes will point + to less than that number of children. The valid child + pointers and keys appear at the beginning of the node + and the unused pointers and keys appear at the end of + the node. The unused pointers and keys have undefined + values.
Address of Left SiblingThis is the file address of the left sibling of the + current node relative to the boot block. If the current + node is the left-most node at this level then this field + is the undefined address (all bits set).
Address of Right SiblingThis is the file address of the right sibling of the + current node relative to the boot block. If the current + node is the right-most node at this level then this + field is the undefined address (all bits set).
Keys and Child PointersEach tree has 2K+1 keys with 2K + child pointers interleaved between the keys. The number + of keys and child pointers actually containing valid + values is determined by the `Entries Used' field. If + that field is N then the B-link tree contains + N child pointers and N+1 keys.
KeyThe format and size of the key values is determined by + the type of data to which this tree points. The keys are + ordered and are boundaries for the contents of the child + pointer. That is, the key values represented by child + N fall between Key N and Key + N+1. Whether the interval is open or closed on + each end is determined by the type of data to which the + tree points.
Address of ChildrenThe tree node contains file addresses of subtrees or + data depending on the node level (0 implies data + addresses).
+
+ +

Disk Format: Level 1B - Symbol Table

+ +

A symbol table is a group internal to the file that allows + arbitrary nesting of objects (including other symbol + tables). A symbol table maps a set of names to a set of file + address relative to the file boot block. Certain meta data + for an object to which the symbol table points can be cached + in the symbol table in addition to (or in place of?) the + object header. + +

An HDF5 object name space can be stored hierarchically by + partitioning the name into components and storing each + component in a symbol table. The symbol table entry for a + non-ultimate component points to the symbol table containing + the next component. The symbol table entry for the last + component points to the object being named. + +

A symbol table is a collection of symbol table nodes pointed + to by a B-link tree. Each symbol table node contains entries + for one or more symbols. If an attempt is made to add a + symbol to an already full symbol table node containing + 2K entries, then the node is split and one node + contains K symbols and the other contains + K+1 symbols. + +

+

+ + + + + + + + + + + + + + + + + + + +
+ Symbol Table Node +
bytebytebytebyte
Node Signature
Version NumberReserved for Future UseNumber of Symbols


Symbol Table Entries


+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Node SignatureThe value ASCII 'SNOD' is used to indicate the + beginning of a symbol table node. This gives file + consistency checking utilities a better chance of + reconstructing a damaged file.
Version NumberThe version number for the symbol table node. This + document describes version 1.
Number of SymbolsAlthough all symbol table nodes have the same length, + most contain fewer than the maximum possible number of + symbol entries. This field indicates how many entries + contain valid data. The valid entries are packed at the + beginning of the symbol table node while the remaining + entries contain undefined values.
Symbol Table EntriesEach symbol has an entry in the symbol table node. + The format of the entry is described below.
+
+ +

+ Disk Format: Level 1C - Symbol-Table Entry

+ +

Each symbol table entry in a symbol table node is designed to allow + for very fast browsing of commonly stored scientific objects. + Toward that design goal, the format of the symbol-table entries + includes space for caching certain constant meta data from the + object header. + +

+

+ + + + + + + + + + + + + + + + + + + + +
+ Symbol Table Entry +
bytebytebytebyte
Name Offset (<size> bytes)
Object Header Address
Symbol-Type


Scratch-pad Space (24 bytes)


+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Name OffsetThis is the byte offset into the symbol table local + heap for the name of the symbol. The name is null + terminated.
Object Header AddressEvery object has an object header which serves as a + permanent home for the object's meta data. In addition + to appearing in the object header, the meta data can be + cached in the scratch-pad space.
Symbol-TypeThe symbol type is determined from the object header. + It also determines the format for the scratch-pad space. + The value zero indicates that no object header meta data + is cached in the symbol table entry. +
+
+
0 +
No data is cached by the symbol table entry. This + is guaranteed to be the case when an object header + has a link count greater than one. + +
1 +
Symbol table meta data is cached in the symbol + table entry. This implies that the symbol table + entry refers to another symbol table. + +
2 +
The entry is a symbolic link. The first four bytes + of the scratch pad space are the offset into the local + heap for the link value. The object header address + will be undefined. + +
N +
Other cache values can be defined later and + libraries that don't understand the new values will + still work properly. +
+
Scratch-Pad SpaceThis space is used for different purposes, depending + on the value of the Symbol Type field. Any meta-data + about a dataset object represented in the scratch-pad + space is duplicated in the object header for that + dataset. Furthermore, no data is cached in the symbol + table entry scratch-pad space if the object header for + the symbol table entry has a link count greater than + one.
+
+ +

The symbol table entry scratch-pad space is formatted + according to the value of the Symbol Type field. If the + Symbol Type field has the value zero then no information is + stored in the scratch pad space. + +

If the Symbol Type field is one, then the scratch pad space + contains cached meta data for another symbol table with the format: + +

+

+ + + + + + + + + + + + + + +
+ Symbol Table Scratch-Pad Format +
bytebytebytebyte
Address of B-tree
Address of Name Heap
+
+ +

+

+ + + + + + + + + + + + + + + +
Field NameDescription
Address of B-treeThis is the file address for the symbol table's + B-tree.
Address of Name HeapThis is the file address for the symbol table's local + heap that stores the symbol names.
+
+ +

+

+ + + + + + + + + + + + + +
+ Symbolic Link Scratch-Pad Format +
bytebytebytebyte
Offset to Link Value
+
+ +

+

+ + + + + + + + + + +
Field NameDescription
Offset to Link ValueThe value of a symbolic link (that is, the name of the + thing to which it points) is stored in the local heap. + This field is the 4-byte offset into the local heap for + the start of the link value, which is null terminated.
+
+ +

Disk Format: Level 1D - Local Heaps

+ +

A heap is a collection of small heap objects. Objects can be + inserted and removed from the heap at any time and the address + of a heap doesn't change once the heap is created. Note: this + is the "local" version of the heap mostly intended for the + storage of names in a symbol table. The storage of small + objects in a global heap is described below. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Local Heaps +
bytebytebytebyte
Heap Signature
Reserved (zero)
Data Segment Size
Offset to Head of Free-list (<size> bytes)
Address of Data Segment
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Heap SignatureThe valid ASCII 'HEAP' is used to indicate the + beginning of a heap. This gives file consistency + checking utilities a better chance of reconstructing a + damaged file.
Data Segment SizeThe total amount of disk memory allocated for the heap + data. This may be larger than the amount of space + required by the object stored in the heap. The extra + unused space holds a linked list of free blocks.
Offset to Head of Free-listThis is the offset within the heap data segment of the + first free block (or all 0xff bytes if there is no free + block). The free block contains <size> bytes that + are the offset of the next free chunk (or all 0xff bytes + if this is the last free chunk) followed by <size> + bytes that store the size of this free chunk.
Address of Data SegmentThe data segment originally starts immediately after + the heap header, but if the data segment must grow as a + result of adding more objects, then the data segment may + be relocated to another part of the file.
+
+ +

Objects within the heap should be aligned on an 8-byte boundary. + +

Disk Format: Level 1E - Global Heap

+ +

Each HDF5 file has a global heap which stores various types of + information which is typically shared between datasets. The + global heap was designed to satisfy these goals: + +

    +
  1. Repeated access to a heap object must be efficient without + resulting in repeated file I/O requests. Since global heap + objects will typically be shared among several datasets it's + probable that the object will be accessed repeatedly. + +

    +
  2. Collections of related global heap objects should result in + fewer and larger I/O requests. For instance, a dataset of + void pointers will have a global heap object for each + pointer. Reading the entire set of void pointer objects + should result in a few large I/O requests instead of one small + I/O request for each object. + +

    +
  3. It should be possible to remove objects from the global heap + and the resulting file hole should be eligible to be reclaimed + for other uses. +

    +
+ +

The implementation of the heap makes use of the memory + management already available at the file level and combines that + with a new top-level object called a collection to + achieve Goal B. The global heap is the set of all collections. + Each global heap object belongs to exactly one collection and + each collection contains one or more global heap objects. For + the purposes of disk I/O and caching, a collection is treated as + an atomic object. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Global Heap Collection +
bytebytebytebyte
Magic Number
VersionReserved
Collection Size

Object 1


Object 2


...


Object N


Object 0 (free space)

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Magic NumberThe magic number for global heap collections are the + four bytes `G', `C', `O', `L'.
VersionEach collection has its own version number so that new + collections can be added to old files. This document + describes version zero of the collections. +
Collection Data SizeThis is the size in bytes of the entire collection + including this field. The default (and minimum) + collection size is 4096 bytes which is a typical file + system block size and which allows for 170 16-byte heap + objects plus their overhead.
Object i for positive i The + objects are stored in any order with no intervening unused + space.
Object 0Object zero, when present, represents the free space in + the collection. Free space always appears at the end of + the collection. If the free space is too small to store + the header for object zero (described below) then the + header is implied. +
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + +
+ Global Heap Object +
bytebytebytebyte
Object IDReference Count
Object Total Size

Object Data

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Object IDEach object has a unique identification number within a + collection. The identification numbers are chosen so that + new objects have the smallest value possible with the + exception that the identifier `0' always refers to the + object which represents all free space within the + collection.
Reference CountAll heap objects have a reference count field. An + object which is referenced from some other part of the + file will have a positive reference count. The reference + count for Object zero is always zero.
Object Total SizeThis is the total size in bytes of the object. It + includes all fields listed in this table.
Object DataThe object data is treated as a one-dimensional array + of bytes to be interpreted by the caller.
+
+ +

Disk Format: Level 1F - Free-Space + Index (NOT FULLY DEFINED)

+ +

The Free-Space Index is a collection of blocks of data, + dispersed throughout the file, which are currently not used by + any file objects. The blocks of data are indexed by a B-tree of + their length within the file. + +

Each B-Tree page is composed of the following entries and + B-tree management information, organized as follows: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ HDF5 Free-Space Heap Page +
bytebytebytebyte
Free-Space Heap Signature
B-Tree Left-Link Offset

Length of Free-Block #1


Offset of Free-Block #1

.
.
.

Length of Free-Block #n


Offset of Free-Block #n

"High" Offset
Right-Link Offset
+
+ +

+

+
The elements of the free-space heap page are described below: +
+
+
Free-Space Heap Signature: (4 bytes) +
The value ASCII: 'FREE' is used to indicate the + beginning of a free-space heap B-Tree page. This gives + file consistency checking utilities a better chance of + reconstructing a damaged file. + +
B-Tree Left-Link Offset: (<offset> bytes) +
This value is used to indicate the offset of all offsets + in the B-link-tree which are smaller than the value of the + offset in entry #1. This value is also used to indicate a + leaf node in the B-link-tree by being set to all ones. + +
Length of Free-Block #n: (<length> bytes) +
This value indicates the length of an un-used block in + the file. + +
Offset of Free-Block #n: (<offset> bytes) +
This value indicates the offset in the file of an + un-used block in the file. + +
"High" Offset: (4-bytes) +
This offset is used as the upper bound on offsets + contained within a page when the page has been split. + +
Right-link Offset: (<offset> bytes) +
This value is used to indicate the offset of the next + child to the right of the parent of this object directory + page. When there is no node to the right, this value is + all zeros. +
+
+ +

The algorithms for searching and inserting objects in the + B-tree pages are described fully in the Lehman & Yao paper, + which should be read to provide a full description of the + B-Tree's usage. + +

Disk Format: Level 2 - Data Objects

+ +

Data objects contain the real information in the file. These + objects compose the scientific data and other information which + are generally thought of as "data" by the end-user. All the + other information in the file is provided as a framework for + these data objects. + +

A data object is composed of header information and data + information. The header information contains the information + needed to interpret the data information for the data object as + well as additional "meta-data" or pointers to additional + "meta-data" used to describe or annotate each data object. + +

+ Disk Format: Level 2a - Data Object Headers

+ +

The header information of an object is designed to encompass + all the information about an object which would be desired to be + known, except for the data itself. This information includes + the dimensionality, number-type, information about how the data + is stored on disk (in external files, compressed, broken up in + blocks, etc.), as well as other information used by the library + to speed up access to the data objects or maintain a file's + integrity. The header of each object is not necessarily located + immediately prior to the object's data in the file and in fact + may be located in any position in the file. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Object Headers +
bytebytebytebyte
Version # of Object HeaderAlignment of Object Header MessagesNumber of Header Messages
Object Reference Count

Total Object-Header Size

Header Message Type #1Size of Header Message Data #1
FlagsReserved
Header Message Data #1 (variable size)
.
.
.
Header Message Type #nSize of Header Message Data #n
FlagsReserved
Header Message Data #n (variable)
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Version # of the object headerThis value is used to determine the format of the + information in the object header. When the format of the + information in the object header is changed, the version # + is incremented and can be used to determine how the + information in the object header is formatted.
Alignment of object header messagesThis value is used to determine the byte-alignment of + messagesin the object header. Typically set to 4, which + aligns new messages on a 4-byte boundary in the object + header.
Number of header messagesThis value determines the number of messages listed in + this object header. This provides a fast way for software + to prepare storage for the messages in the header.
Object Reference CountThis value specifies the number of references to this + object within the current file. References to the + data-object from external files are not tracked.
Total Object-Header SizeThis value specifies the total number of bytes of header + message data following this length field for the current + message as well as any continuation data located elsewhere + in the file.
Header Message TypeThe header message type specifies the type of + information included in the header message data following + the type along with a small amount of other information. + Bit 15 of the message type is set if the message is + constant (constant messages cannot be changed since they + may be cached in symbol table entries throughout the + file). The header message types for the pre-defined + header messages will be included in further discussion + below.
Size of Header Message DataThis value specifies the number of bytes of header + message data following the header message type and length + information for the current message.
FlagsThis is a bit field with the following definition: +
+
0 +
If set, the message data is constant. This is used + for messages like the data type message of a dataset. +
1 +
If set, the message is stored in the global heap and + the Header Message Data field contains a Shared Object + message. and the Size of Header Message Data field + contains the size of that Shared Object message. +
2-7 +
Reserved +
+
Header Message DataThe format and length of this field is determined by the + header message type and size respectively. Some header + message types do not require any data and this information + can be eliminated by setting the length of the message to + zero.
+
+ +

The header message types and the message data associated with + them compose the critical "meta-data" about each object. Some + header messages are required for each object while others are + optional. Some optional header messages may also be repeated + several times in the header itself, the requirements and number + of times allowed in the header will be noted in each header + message description below. + +

The following is a list of currently defined header messages: + +


+

Name: NIL

+ Type: 0x0000
+ Length: varies
+ Status: Optional, may be repeated.
+ Purpose and Description: The NIL message is used to + indicate a message + which is to be ignored when reading the header messages for a data object. + [Probably one which has been deleted for some reason.]
+ Format of Data: Unspecified.
+ Examples: None. + + +
+

Name: Simple Data Space/a>

+ + Type: 0x0001
+ Length: varies
+ Status: One of the Simple Data Space or + Data-Space messages is required (but not both) and may + not be repeated.
+ +

The Simple Dimensionality message describes the number + of dimensions and size of each dimension that the data object + has. This message is only used for datasets which have a + simple, rectilinear grid layout, datasets requiring a more + complex layout (irregularly or unstructured grids, etc) must use + the Data-Space message for expressing the space the + dataset inhabits. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Simple Data Space Message +
bytebytebytebyte
Dimensionality
Dimension Flags
Dimension Size #1 (<size> bytes)
.
.
.
Dimension Size #n (<size> bytes)
Dimension Maximum #1 (<size> bytes)
.
.
.
Dimension Maximum #n (<size> bytes)
Permutation Index #1
.
.
.
Permutation Index #n
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
DimensionalityThis value is the number of dimensions that the data + object has.
Dimension FlagsThis field is used to store flags to indicate the + presence of parts of this message. Bit 0 (counting from + the right) is used to indicate that maximum dimensions are + present. Bit 1 is used to indicate that permutation + indices are present for each dimension.
Dimension Size #n (<size&rt; bytes)This value is the current size of the dimension of the + data as stored in the file. The first dimension stored in + the list of dimensions is the slowest changing dimension + and the last dimension stored is the fastest changing + dimension.
Dimension Maximum #n (<size&rt; bytes)This value is the maximum size of the dimension of the + data as stored in the file. This value may be the special + value <UNLIMITED> (0xffffffff) which indicates that + the data may expand along this dimension indefinitely. If + these values are not stored, the maximum value of each + dimension is assumed to be the same as the current size + value.
Permutation Index #n (4 bytes)This value is the index permutation used to map + each dimension from the canonical representation to an + alternate axis for each dimension. If these values are + not stored, the first dimension stored in the list of + dimensions is the slowest changing dimension and the last + dimension stored is the fastest changing dimension.
+
+ +

Examples

+
+
Example #1 +
A sample 640 horizontally by 480 vertically raster image + dimension header. The number of dimensions would be set to 2 + and the first dimension's size and maximum would both be set + to 480. The second dimension's size and maximum would both be + set to 640 +. +
Example #2 +
A sample 4 dimensional scientific dataset which is composed + of 30x24x3 slabs of data being written out in an unlimited + series every several minutes as timestep data (currently there + are five slabs). The number of dimensions is 4. The first + dimension size is 5 and it's maximum is <UNLIMITED>. The + second through fourth dimensions' size and maximum value are + set to 3, 24, and 30 respectively. + +
Example #3 +
A sample unlimited length text string, currently of length + 83. The number of dimensions is 1, the size of the first + dimension is 83 and the maximum of the first dimension is set + to <UNLIMITED>, allowing further text data to be + appended to the string or possibly the string to be replaced + with another string of a different size. (This could also be + stored as a scalar dataset with number-type set to "string") +
+ +
+

Name: Data-Space (Fiber Bundle?)

+ Type: 0x0002
+ Length: varies
+ + Status: One of the Simple Dimensionality or + Data-Space messages is required (but not both) and may + not be repeated.
Purpose and Description: The + Data-Space message describes space that the dataset is + mapped onto in a more comprehensive way than the Simple + Dimensionality message is capable of handling. The + data-space of a dataset encompasses the type of coordinate system + used to locate the dataset's elements as well as the structure and + regularity of the coordinate system. The data-space also + describes the number of dimensions which the dataset inhabits as + well as a possible higher dimensional space in which the dataset + is located within. + +
+ Format of Data: + +
+ + + + + + + + + + + + + +
+ HDF5 Data-Space Message Layout +
bytebytebytebyte
Mesh Type
Logical Dimensionality
+
+ +

+

+
The elements of the dimensionality message are described below: +
+
+
Mesh Type: (unsigned 32-bit integer) +
This value indicates whether the grid is + polar/spherical/cartesion, + structured/unstructured and regular/irregular.
+ The mesh type value is broken up as follows:
+ +

+

+ + + + + + + + + + + + + + +
+ HDF5 Mesh-Type Layout +
bytebytebytebyte
Mesh EmbeddingCoordinate SystemStructureRegularity
+
+ The following are the definitions of mesh-type bytes: +
+
Mesh Embedding +
This value indicates whether the dataset data-space + is located within + another dataspace or not: +
+
<STANDALONE> +
The dataset mesh is self-contained and is not + embedded in another mesh. +
<EMBEDDED> +
The dataset's data-space is located within + another data-space, as + described in information below. +
+
Coordinate System +
This value defines the type of coordinate system + used for the mesh: +
+
<POLAR> +
The last two dimensions are in polar + coordinates, higher dimensions are + cartesian. +
<SPHERICAL> +
The last three dimensions are in spherical + coordinates, higher dimensions + are cartesian. +
<CARTESIAN> +
All dimensions are in cartesian coordinates. +
+
Structure +
This value defines the locations of the grid-points + on the axes: +
+
<STRUCTURED> +
All grid-points are on integral, sequential + locations, starting from 0. +
<UNSTRUCTURED> +
Grid-points locations in each dimension are + explicitly defined and + may be of any numeric data-type. +
+
Regularity +
This value defines the locations of the dataset + points on the grid: +
+
<REGULAR> +
All dataset elements are located at the + grid-points defined. +
<IRREGULAR> +
Each dataset element has a particular + grid-location defined. +
+
+

The following grid combinations are currently allowed: +

+
<POLAR-STRUCTURED-REGULAR> +
<SPHERICAL-STRUCTURED-REGULAR> +
<CARTESIAN-STRUCTURED-REGULAR> +
<POLAR-UNSTRUCTURED-REGULAR> +
<SPHERICAL-UNSTRUCTURED-REGULAR> +
<CARTESIAN-UNSTRUCTURED-REGULAR> +
<CARTESIAN-UNSTRUCTURED-IRREGULAR> +
+ All of the above grid types can be embedded within another + data-space. +

+
Logical Dimensionality: (unsigned 32-bit integer) +
This value is the number of dimensions that the dataset occupies. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + +
+ HDF5 Data-Space Embedded Dimensionality Information +
bytebytebytebyte
Embedded Dimensionality
Embedded Dimension Size #1
.
.
.
Embedded Dimension Size #n
Embedded Origin Location #1
.
.
.
Embedded Origin Location #n
+
+ +
Embedded Dimensionality: (unsigned 32-bit integer) +
This value is the number of dimensions of the space the + dataset is located + within. i.e. a planar dataset located within a 3-D space, + or a 3-D dataset + which is a subset of another 3-D space, etc. +
Embedded Dimension Size: (unsigned 32-bit integer) +
These values are the sizes of the dimensions of the + embedded data-space + that the dataset is located within. +
Embedded Origin Location: (unsigned 32-bit integer) +
These values comprise the location of the dataset's + origin within the embedded data-space. +
+
+ [Comment: need some way to handle different orientations of the + dataset data-space + within the embedded data-space]
+ +

+

+ + + + + + + + + + + + + + + + + + + +
+ HDF5 Data-Space Structured/Regular Grid Information +
bytebytebytebyte
Logical Dimension Size #1
Logical Dimension Maximum #1
.
.
.
Logical Dimension Size #n
Logical Dimension Maximum #n
+
+ +

+

+
The elements of the dimensionality message are described below: +
+
+
Logical Dimension Size #n: (unsigned 32-bit integer) +
This value is the current size of the dimension of the + data as stored in + the file. The first dimension stored in the list of + dimensions is the slowest + changing dimension and the last dimension stored is the + fastest changing + dimension. +
Logical Dimension Maximum #n: (unsigned 32-bit integer) +
This value is the maximum size of the dimension of the + data as stored in + the file. This value may be the special value + <UNLIMITED> which + indicates that the data may expand along this dimension + indefinitely. +
+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + +
+ HDF5 Data-Space Structured/Irregular Grid Information +
bytebytebytebyte
# of Grid Points in Dimension #1
.
.
.
# of Grid Points in Dimension #n
Data-Type of Grid Point Locations
Location of Grid Points in Dimension #1
.
.
.
Location of Grid Points in Dimension #n
+
+ +

+

+ + + + + + + + + + + + + + + +
+ HDF5 Data-Space Unstructured Grid Information +
bytebytebytebyte
# of Grid Points
Data-Type of Grid Point Locations
Grid Point Locations
.
.
+
+ +

Examples:

+ Need some good examples, this is complex! + + +
+

Name: Data Type

+ + Type: 0x0003
+ Length: variable
+ Status: One required per dataset
+ +

The data type message defines the data type for each data point + of a dataset. A data type can describe an atomic type like a + fixed- or floating-point type or a compound type like a C + struct. A data type does not, however, describe how data points + are combined to produce a dataset. Data types are stored on disk + as a data type message, which is a list of data type classes and + their associated properties. + +

+

+ + + + + + + + + + + + + + + + + + + + + + +
+ Data Type Message +
bytebytebytebyte
Type ClassClass Bit Field
Size in Bytes (4 bytes)


Properties


+
+ +

The Class Bit Field and Properties fields vary depending + on the Type Class. The type class is one of: 0 (fixed-point + number), 1 (floating-point number), 2 (date and time), 3 (text + string), 4 (bit field), 5 (opaque), 6 (compound). The Class Bit + Field is zero and the size of the Properties field is zero + except for the cases noted here. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Bit Field for Fixed-Point Numbers (Class 0) +
BitsMeaning
0Byte Order. If zero, byte order is little-endian; + otherwise, byte order is big endian.
1, 2Padding type. Bit 1 is the lo_pad type and bit 2 + is the hi_pad type. If a datum has unused bits at either + end, then the lo_pad or hi_pad bit is copied to those + locations.
3Signed. If this bit is set then the fixed-point + number is in 2's complement form.
4-23Reserved (zero).
+
+ +

+

+ + + + + + + + + + + + + + +
+ Properties for Fixed-Point Numbers (Class 0) +
ByteByteByteByte
Bit OffsetBit Precision
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Bit Field for Floating-Point Numbers (Class 1) +
BitsMeaning
0Byte Order. If zero, byte order is little-endian; + otherwise, byte order is big endian.
1, 2, 3Padding type. Bit 1 is the low bits pad type, bit 2 + is the high bits pad type, and bit 3 is the internal bits + pad type. If a datum has unused bits at either or between + the sign bit, exponent, or mantissa, then the value of bit + 1, 2, or 3 is copied to those locations.
4-5Normalization. The value can be 0 if there is no + normalization, 1 if the most significant bit of the + mantissa is always set (except for 0.0), and 2 if the most + signficant bit of the mantissa is not stored but is + implied to be set. The value 3 is reserved and will not + appear in this field.
6-7Reserved (zero).
8-15Sign. This is the bit position of the sign + bit.
16-23Reserved (zero).
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Properties for Floating-Point Numbers (Class 1) +
ByteByteByteByte
Bit OffsetBit Precision
Exponent LocationExponent Size in BitsMantissa LocationMantissa Size in Bits
Exponent Bias
+
+ +

+

+ + + + + + + + + + + + + + + + +
+ Bit Field for Compound Types (Class 6) +
BitsMeaning
0-15Number of Members. This field contains the number + of members defined for the compound data type. The member + definitions are listed in the Properties field of the data + type message. +
15-23Reserved (zero).
+
+ +

The Properties field of a compound data type is a list of the + member definitions of the compound data type. The member + definitions appear one after another with no intervening bytes. + The member types are described with a recursive data type + message. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Properties for Compound Types (Class 6) +
ByteByteByteByte


Name (null terminated, multiple of + four bytes)


Byte Offset of Member in Compound Instance
Dimensionalityreserved
Size of Dimension 0 (optional)
Size of Dimension 1 (optional)
Size of Dimension 2 (optional)
Size of Dimension 3 (optional)
Dimension Permutation


Member Type Message


+
+ +

Data type examples are here. + + +


+

Name: Reserved - Not Assigned + Yet

+ Type: 0x0004
+ Length: N/A
+ Status: N/A
+ + +
+

Name: Reserved - Not Assigned + Yet

+ Type: 0x0005
+ Length: N/A
+ Status: N/A
+ + + +
+

Name: Data Storage - Compact

+ + Type: 0x0006
+ Length: varies
+ Status: Optional, may not be repeated.
+ +

This message indicates that the data for the data object is + stored within the current HDF file by including the actual + data within the header data for this message. The data is + stored internally in + the "normal" format, i.e. in one chunk, un-compressed, etc. + +

Note that one and only one of the "Data Storage" headers can be + stored for each data object. + +

Format of Data: The message data is actually composed + of dataset data, so the format will be determined by the dataset + format. + +

Examples:

+ [very straightforward] + +
+

Name: Data Storage - + External Data Files

+ Type: 0x0007
+ Length: varies
+ Status: Optional, may not be repeated.
+ +

Purpose and Description: The external object message + indicates that the data for an object is stored outside the HDF5 + file. The filename of the object is stored as a Universal + Resource Location (URL) of the actual filename containing the + data. An external file list record also contains the byte offset + of the start of the data within the file and the amount of space + reserved in the file for that data. + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ External File List Message +
bytebytebytebyte

Heap Address

Allocated SlotsUsed Slots
Reserved

Slot Definitions...

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Heap AddressThis is the address of a local name heap which contains + the names for the external files. The name at offset zero + in the heap is always the empty string.
Allocated SlotsThe total number of slots allocated in the message. Its + value must be at least as large as the value contained in + the Used Slots field.
Used SlotsThe number of initial slots which contain valid + information. The remaining slots are zero filled.
ReservedThis field is reserved for future use.
Slot DefinitionsThe slot definitions are stored in order according to + the array addresses they represent. If more slots have + been allocated than what has been used then the defined + slots are all at the beginning of the list.
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + +
+ External File List Slot +
bytebytebytebyte

Name Offset (<size> bytes)


File Offset (<size> bytes)


Size

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + +
Field NameDescription
Name Offset (<size> bytes)The byte offset within the local name heap for the name + of the file. File names are stored as a URL which has a + protocol name, a host name, a port number, and a file + name: + protocol:port//host/file. + If the protocol is omitted then "file:" is assumed. If + the port number is omitted then a default port for that + protocol is used. If both the protocol and the port + number are omitted then the colon can also be omitted. If + the double slash and host name are omitted then + "localhost" is assumed. The file name is the only + mandatory part, and if the leading slash is missing then + it is relative to the application's current working + directory (the use of relative names is not + recommended).
File Offset (<size> bytes)This is the byte offset to the start of the data in the + specified file. For files that contain data for a single + dataset this will usually be zero.
SizeThis is the total number of bytes reserved in the + specified file for raw data storage. For a file that + contains exactly one complete dataset which is not + extendable, the size will usually be the exact size of the + dataset. However, by making the size larger one allows + HDF5 to extend the dataset. The size can be set to a value + larger than the entire file since HDF5 will read zeros + past the end of the file without failing.
+
+ + +
+

Name: Data Storage - Layout

+ + Type: 0x0008
+ Length: varies
+ Status: Required for datasets, may not be repeated. + +

Purpose and Description: Data layout describes how the + elements of a multi-dimensional array are arranged in the linear + address space of the file. Two types of data layout are + supported: + +

    +
  1. The array can be stored in one contiguous area of the file. + The layout requires that the size of the array be constant and + does not permit chunking or compression. The message stores + the total size of the array and the offset of an element from + the beginning of the storage area is computed as in C. + +
  2. The array domain can be regularly decomposed into chunks and + each chunk is allocated separately. This layout supports + arbitrary element traversals and compression and the chunks + can be distributed across external raw data files (these + features are described in other messages). The message stores + the size of a chunk instead of the size of the entire array; + the size of the entire array can be calculated by traversing + the B-tree that stores the chunk addresses. +
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Data Layout Message +
bytebytebytebyte

Address

DimensionalityLayout ClassReserved
Reserved (4-bytes)
Dimension 0 (4-bytes)
Dimension 1 (4-bytes)
...
+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
AddressFor contiguous storage, this is the address of the first + byte of storage. For chunked storage this is the address + of the B-tree that is used to look up the addresses of the + chunks.
DimensionalityAn array has a fixed dimensionality. This field + specifies the number of dimension size fields later in the + message.
Layout ClassThe layout class specifies how the other fields of the + layout message are to be interpreted. A value of one + indicates contiguous storage while a value of two + indicates chunked storage. Other values will be defined + in the future.
DimensionsFor contiguous storage the dimensions define the entire + size of the array while for chunked storage they define + the size of a single chunk.
+
+ + +
+

Name: Reserved - Not Assigned Yet

+ Type: 0x0009
+ Length: N/A
+ Status: N/A
+ Purpose and Description: N/A
+ Format of Data: N/A + +
+

Name: Reserved - Not Assigned Yet

+ Type: 0x000A
+ Length: N/A
+ Status: N/A
+ Purpose and Description: N/A
+ Format of Data: N/A + +
+

Name: Data Storage - Compressed

+ Type: 0x000B
+ Length: varies
+ Status: Optional, may not be repeated. + +

Purpose and Description: Compressed objects are + datasets which are stored in an HDF file after they have been + compressed. The encoding algorithm and its parameters are + stored in a Compression Message in the object header of the + dataset. + +

+

+ + + + + + + + + + + + + + + + + + + +
+ Compression Message +
bytebytebytebyte
MethodFlagsClient Data Size

Client Data

+
+ +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Field NameDescription
MethodThe compression method is a value between zero and 255, + inclusive, that is used as a index into a compression + method lookup table. The value zero indicates no + compression. The values one through 15, inclusive, are + reserved for methods defined by NCSA. All other values + are user-defined compression methods.
FlagsEight bits of flags which are passed to the compression + algorithm. There meaning depends on the compression + method.
Client Data SizeThe size in bytes of the optional Client Data + field.
Client DataAdditional information needed by the compression method + can be stored in this field. The data will be passed to + the compression algorithm as a void pointer.
+
+ +

Sometimes additional redundancy can be added to the data before + it's compressed to result in a better compression ratio. The + library doesn't specifically support modeling methods to add + redundancy, but the effect can be achieved through the use of + user-defined data types. + +

The library uses the following compression methods. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0No compression: The blocks of data are stored in + their raw format.
1Deflation: This is the same algorithm used by + GNU gzip which is a combination Huffman and LZ77 + dictionary encoder. The libz library version + 1.1.2 or later must be available.
2Run length encoding: Not implemented yet.
3Adaptive Huffman: Not implemented yet.
4Adaptive Arithmetic: Not implemented yet.
5LZ78 Dictionary Encoding: Not implemented yet.
6Adaptive Lempel-Ziv: Similar to Unix + compress. Not implemented yet.
7-15Reserved for future use.
16-255User-defined.
+
+ +

The compression is applied independently to each chunk of + storage (after data space and data type conversions). If the + compression is unable to make the chunk smaller than it would + normally be, the chunk is stored without compression. At the + library's discretion, chunks which fail the compression can also + be stored in their raw format. + + + +


+

Name: Attribute List

+ Type: 0x000C
+ Length: varies
+ Status: Optional, may be repeated.
+ +

Purpose and Description: The Attribute List + message is used to list objects in the HDF file which are used + as attributes, or "meta-data" about the current object. Other + objects can be used as attributes for either the entire object + or portions of the current object. The attribute list is + composed of two lists of objects, the first being simple + attributes about the entire dataset, and the second being + pointers attribute objects about the entire dataset. Partial + dataset pointers are currently unspecified and + unimplemented. + +

Format of Data: + +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ HDF5 Attribute-List Message Layout +
bytebytebytebyte
Attribute List Flags
# of Simple Attributes
Simple Attribute #1 Name Offset
Simple Attribute #1 Data-Type
Simple Attribute #1 Rank
Simple Attribute #1 Dim #1 Size
Simple Attribute #1 Dim #2 Size
Simple Attribute #1 Dim #3 Size
Simple Attribute #1 Dim #4 Size
Simple Attribute #1 Data Offset
.
.
.
Simple Attribute #n Name Offset
Simple Attribute #n Data-Type
Simple Attribute #n Rank
Simple Attribute #n Dim #1 Size
Simple Attribute #n Dim #2 Size
Simple Attribute #n Dim #3 Size
Simple Attribute #n Dim #4 Size
Simple Attribute #n Data Offset
# of Complex Attributes
Pointer to Complex Attribute #1
.
.
.
Pointer to Complex Attribute #n
+
+ +

+

+
The elements of the attribute list message are described below: +
+
+
Attribute List Flags: (unsigned 32-bit integer) +
These flags indicate the presence of simple and complex + lists of attributes for this dataset. Bit 0 indicates the + presence of a list of simple attributes and Bit 1 + indicates the presence of a list of complex attributes. + +
# of Simple Attributes: (unsigned 32-bit integer) +
This indicates the number of simple attributes for this + dataset. + +
Simple Attribute #n Name Offset: (unsigned 32-bit integer) +
This is the offset of the simple attribute's name in the + global small-data heap. + +
Simple Attribute #n Data-type: (unsigned 32-bit integer) +
This is a simple data-type, which indicates the type of + data used for the attribute. + +
Simple Attribute #n Rank: (unsigned 32-bit integer) +
This is the number of dimensions of the attribute, + limited to four or less. + +
Simple Attribute #n Dim #n Size: (unsigned 32-bit integer) +
This is the size of the attribute's n'th dimension, + which is stored in the canonical order for dimensions + (i.e. no permutations of the indices are allowed). + +
Simple Attribute #n Data Offset: (unsigned 32-bit integer) +
This is the offset of the simple attribute's data in the + global small-data. + +
# of Complex Attributes: (unsigned 32-bit integer) +
This indicates the number of complex attributes for this + dataset. + +
Pointer to Complex Attribute #n: (unsigned 32-bit integer) +
This is the small-data heap offset of the name of the + attribute object in the file. +
+
+ +

[Note: It has been suggested that each attribute have an + additional "units" field, so this is being considered.] + +

Examples:

+ [Comment: need examples.] + +
+

Name: Object Name

+ Type: 0x000D
+ Length: varies
+ Status: Optional [required?], may not be repeated.
+ Purpose and Description: The object name is designed to be a short + description of the instance of the data object (the class may be a short + description of the "type" of the object). An object name is a sequence of + non-zero ('\0') ASCII characters with no other formatting included by the + library.
+ Format of Data:The data for the object name is just a sequence of ASCII + characters with no special formatting. + +
+

Name: Object Modification Date & Time

+ Type: 0x000E
+ Length: fixed
+ Status: Required?, may not be repeated.
+ Purpose and Description: The object modification date and time is a + timestamp which indicates (using ISO8601 date and time format) the last + modification of a data object.
+ Format of Data: + The date is represented as a fixed length ASCII string according to the + "complete calendar date representation, without hyphens" listed in the ISO8601 + standard.
+ The time of day is represented as a fixed length ASCII string according + to the "complete local time of day representation, without hyphens" + listed in the ISO8601 standard. + +

Examples:

+ "February 14, 1993, 1:10pm and 30 seconds" is represented as "19930214131030" in + the ISO standard format. + +
+

Name: Shared Object Message

+ Type: 0x000F
+ Length: 4 Bytes
+ Status: Optional, may be repeated. + +

A constant message can be shared among several object headers + by writing that message in the global heap and having the object + headers all point to it. The pointing is accomplished with a + Shared Object message which is understood directly by the object + header layer of the library and never actually appears as a + message in the file. It is also possible to have a message of + one object header point to a message in some other object + header, but care must be exercised to prevent cycles. + +

If a message is shared, then the message appears in the global + heap and its message ID appears in the Header Message Type + field of the object header. Also, the Flags field in the object + header for that message will have bit two set (the + H5O_FLAG_SHARED bit). The message body in the + object header will be that of a Shared Object message defined + here and not that of the pointed-to message. + +

+

+ + + + + + + + + + + + + +
+ Shared Message Message +
byte + byte + byte + byte +
Flags

Pointer

+
+ +

+

+ + + + + + + + + + + + + + + +
Field NameDescription
FlagsThe Shared Message Message is a pointer to a Shared + Message. The actual shared message can appear in either + the global heap or in some other object header and this + field specifies which form is used. If the value is zero + then the actual message is the first such message in some + other object header; otherwise the actual message is + stored in the global heap.
PointerThis field points to the actual message. The format of + the pointer depends on the value of the Flags field. If + the actual message is in the global heap then the pointer + is the file address of the global heap collection that + holds the message, and a four-byte index into that + collection. Otherwise the pointer is a symbol table entry + that points to some other object header.
+
+ + +
+

Name: Object Header Continuation

+Type: 0x0010
+Length: fixed
+Status: Optional, may be repeated.
+Purpose and Description: The object header continuation is the location +in the file of more header messages for the current data object. This can be +used when header blocks are large, or likely to change over time.
+Format of Data:

+ The object header continuation is formatted as follows (assuming a 4-byte +length & offset are being used in the current file): + +

+

+ + + + + + + + + + + + + +
+HDF5 Object Header Continuation Message Layout +
bytebytebytebyte
Header Continuation Offset
Header Continuation Length
+
+ +

+

+
The elements of the Header Continuation Message are described below: +
+
+
Header Continuation Offset: (<offset> bytes) +
This value is the offset in bytes from the beginning of the file where the +header continuation information is located. +
Header Continuation Length: (<length> bytes) +
This value is the length in bytes of the header continuation information in +the file. +
+
+ +

Examples:

+ [straightforward] + +
+

Name: Symbol Table Message

+Type: 0x0011
+Length: fixed
+Status: Required for symbol tables, may not be repeated.
+Purpose and Description: Each symbol table has a B-tree and a +name heap which are pointed to by this message.
+Format of data: +

The symbol table message is formatted as follows: + +

+

+ + + + + + + + + + + + + + +
+HDF5 Object Header Symbol Table Message Layout +
bytebytebytebyte
B-Tree Address
Heap Address
+
+ +

+

+
The elements of the Symbol Table Message are described below: +
+
+
B-tree Address (<offset> bytes) +
This value is the offset in bytes from the beginning of the file +where the B-tree is located. +
Heap Address (<offset> bytes) +
This value is the offset in bytes from the beginning of the file +where the symbol table name heap is located. +
+
+ +

Disk Format: Level 2b - Shared Data Object Headers

+

In order to share header messages between several dataset objects, object +header messages may be placed into the global small-data heap. Since these +messages require additional information beyond the basic object header message +information, the format of the shared message is detailed below. + +

+

+ + + + + + + + + + + + + +
+HDF5 Shared Object Header Message +
bytebytebytebyte
Reference Count of Shared Header Message

Shared Object Header Message

+
+ +

+

+
The elements of the shared object header message are described below: +
+
+
Reference Count of Shared Header Message: (32-bit unsigned integer) +
This value is used to keep a count of the number of dataset objects which +refer to this message from their dataset headers. When this count reaches zero, +the shared message header may be removed from the global small-data heap. +
Shared Object Header Message: (various lengths) +
The data stored for the shared object header message is formatted in the +same way as the private object header messages described in the object header +description earlier in this document and begins with the header message Type. +
+
+ + +

Disk Format: Level 2c - Data Object Data Storage

+

The data information for an object is stored separately from the header +information in the file and may not actually be located in the HDF5 file +itself if the header indicates that the data is stored externally. The +information for each record in the object is stored according to the +dimensionality of the object (indicated in the dimensionality header message). +Multi-dimensional data is stored in C order [same as current scheme], i.e. the +"last" dimension changes fastest. +

Data whose elements are composed of simple number-types are stored in +native-endian IEEE format, unless they are specifically defined as being stored +in a different machine format with the architecture-type information from the +number-type header message. This means that each architecture will need to +[potentially] byte-swap data values into the internal representation for that +particular machine. +

Data with a "variable" sized number-type is stored in an data heap +internal to the HDF file [which should not be user-modifiable]. +

Data whose elements are composed of pointer number-types are stored in several +different ways depending on the particular pointer type involved. Simple +pointers are just stored as the dataset offset of the object being pointed to with the +size of the pointer being the same number of bytes as offsets in the file. +Partial-object pointers are stored as a heap-ID which points to the following +information within the file-heap: an offset of the object pointed to, number-type +information (same format as header message), dimensionality information (same +format as header message), sub-set start and end information (i.e. a coordinate +location for each), and field start and end names (i.e. a [pointer to the] +string indicating the first field included and a [pointer to the] string name +for the last field). +Browse pointers are stored as an heap-ID (for the name in the file-heap) +followed by a offset of the data object being referenced. +

Data of a compound data-type is stored as a contiguous stream of the items +in the structure, with each item formatted according to it's +data-type. + +


+
Quincey Koziol
+
Robb Matzke
+ +Last modified: Mon Jun 1 21:44:38 EDT 1998 + + + diff --git a/doc/html/H5.intro.html b/doc/html/H5.intro.html new file mode 100644 index 0000000..e7d5a50 --- /dev/null +++ b/doc/html/H5.intro.html @@ -0,0 +1,997 @@ + + + + +H5introH + + + + +

Introduction to HDF5 1.0 Alpha1.0

+

This is a brief introduction to the HDF5 data model and programming model. It is not a full user's guide, but should provide enough information for you to understand how HDF5 is meant to work. Knowledge of the current version of HDF should make it easier to follow the text, but it is not required. For further information on the topics covered here, see the HDF5 documentation at http://hdf.ncsa.uiuc.edu/nra/BigHDF/.

+

What is the HDF5 prototype?

+

HDF5 is a new, experimental version of HDF that is designed to address some of the limitations of the current version of HDF (HDF4.1) and to address current and anticipated requirements of modern systems and applications.

+

This HDF5 prototype is not complete, but it should be sufficient show the basic features of HDF5. We urge you to look at it and give us feedback on what you like or don't like about it, and what features you would like to see added to it.

+

Why HDF5? The development of HDF5 is motivated by a number of limitations in the current HDF format, as well as limitations in the library. Some of these limitations are:

+ +
    +
  • A single file cannot store more than 20,000 complex objects, and a single file cannot be larger than 2 gigabytes
  • +
  • The data models are less consistent than they should be, there are more object types than necessary, and datatypes are too restricted.
  • +
  • The library source is old and overly complex, does not support parallel I/O effectively, and is difficult to use in threaded applications.
+ +

When complete HDF5 will include the following improvements.

+ +
    +
  • A new file format designed to address some of the deficiencies of HDF4.1, particularly the need to store larger files and more objects per file.
  • +
  • A simpler, more comprehensive data model that includes only two basic structures: a multidimensional array of record structures, and a grouping structure.
  • +
  • A simpler, better-engineered library and API, with improved support for parallel i/o, threads, and other requirements imposed by modern systems and applications.
+ +

Limitations of the current prototype

+

The prototype release includes most of the basic functionality that is planned for the HDF5 library. However, the library does not implement all of the features detailed in the format and API specifications. Here is a listing of some of the limitations of the current release:

+ +
    +
  • Attributes for data objects are not supported
  • +
  • Data compression is not supported
  • +
  • External storage of objects are not supported
  • +
  • Some functions for manipulating datasets, dataspaces, and groups have not been implemented
  • +
  • Some number types, including user-defined number types are not supported. Also number type conversion is limited.
+ +

See the API Specification at http://hdf.ncsa.uiuc.edu/nra/BigHDF/ for a complete listing of all routines that have been implemented.

+

HDF5 file organization and data model.

+

HDF5 files are organized in a hierarchical structure, with two primary structures: "groups" and "datasets."

+ +
    +
  • HDF5 group: a grouping structure containing instances of zero or more groups or datasets, together with supporting metadata
  • +
  • HD5F dataset: a multidimensional array of data elements, together with supporting metadata.
+ +

Working with groups and group members is similar in many ways to working with directories and files in UNIX. As with UNIX directories and files, objects in an HDF5 file are often described by giving their full path names. "/" signifies the root group. "/foo" signifies a member of the root group called "foo." "/foo/zoo" signifies a member of the group "foo," which in turn is a member of the root group.

+

Any HDF5 group or dataset may have an associated attribute list. An HDF5 attribute is a user-defined HDF5 structure that provides extra information about an HDF5 object. Attributes are described in more detail below. (Note: attributes are not supported in the current prototype.)

+

HDF5 Groups

+

An HDF5 group is a structure containing zero or more HDF5 objects. A group has two parts:

+ +
    +
  • A group header, which contains a group name and a list of group attributes. (Attributes are not yet implemented.)
  • +
  • A group symbol table, which is a list of the HDF5 objects that belong to the group.
+ +

 

+

HDF5 Datasets

+

A dataset is stored in a file in two parts: a header and a data array.

+

The header contains information that is needed to interpret the array portion of the dataset, as well as metadata, or pointers to metadata, that describes or annotates the dataset. Header information includes the name of the object, its dimensionality, its number-type, information about how the data itself is stored on disk, and other information used by the library to speed up access to the dataset or maintain the file's integrity.

+

There are four essential classes of information in any header: name, datatype, dataspace, and storage layout:

+

Name. A dataset name is a sequence of alphanumeric ASCII characters.

+

Datatype. HDF5 allows one to define many different kinds of datatypes. There are two basic categories of data types: "atomic" types and "compound" types. Atomic types are those that are not decomposed at the data type interface level, such as integers and floats. Compound types are made up of atomic types.

+

Atomic datatypes include integers and floating-point numbers. Each atomic type belongs to a particular class and has several properties: size, order, precision, and offset. In this introduction, we consider only a few of these properties.

+

Atomic datatypes include integer, float, date and time, string, bit field, and opaque. (Note: Only integer and float classes are available in the current implementation.)

+

Properties of integer types include size, order (endian-ness), and signed-ness (signed/unsigned).

+

Properties of float types include the size and location of the exponent and mantissa, and the location of the sign bit.

+

The datatypes that are supported in the current implementation are:

+ +
    +
  • Integer datatypes: 8-bit, 16-bit, 32-bit, and 64-bit integers in both little and big-endian format.
  • +
  • Floating-point numbers: IEEE 32-bit and 64-bit floating-point numbers in both little and big-endian format.
+ +

A compound datatype is one in which a collection of simple datatypes are represented as a single unit, similar to a "struct" in C. The parts of a compound datatype are called members. The members of a compound datatype may be of any datatype, including another compound datatype. It is possible to read members from a compound type without reading the whole type.

+

Dataspace. A dataset dataspace describes the dimensionality of the dataset. The dimensions of a dataset can be fixed (unchanging), or they may be unlimited, which means that they are extendible (i.e. they can grow larger).

+

Properties of a dataspace consist of the rank (number of dimensions) of the data array, and the actual sizes of the dimensions of the array, and the maximum sizes of the dimensions of the array. For a fixed-dimension dataset, the actual size is the same as the maximum size of a dimension. When a dimension is unlimited, the maximum size is set to the value H5S_UNLIMITED. (An example below shows how to create extendible datasets.)

+

A dataspace can also describe portions of a dataset, making it possible to do partial I/O (hyperslab) operations.

+

Since I/O operations have two end-points, the raw data transfer functions require two dataspace arguments: one describes the application memory dataspace or subset thereof, and the other describes the file dataspace or subset thereof.

+

Storage layout. The HDF5 format makes it possible to store data in a variety of ways. The default storage layout format is contiguous, meaning that data is stored in the same linear way that it is organized in memory. Two other storage layout formats are currently defined for HDF5: compact, and chunked. In the future, other storage layouts may be added.

+

Compact storage is used when the amount of data is small and can be stored directly in the object header. (Note: Compact storage is not supported in this prototype.)

+

Chunked storage involves dividing the dataset into equal-sized "chunks" that are stored separately. Chunking has three important benefits.

+
    + +
  1. It makes it possible to achieve good performance when accessing subsets of the datasets, even when the subset to be chosen is orthogonal to the normal storage order of the dataset.
  2. +
  3. It makes it possible to compress large datasets and still achieve good performance when accessing subsets of the dataset.
  4. +
  5. It makes it possible efficiently to extend the dimensions of a dataset in any direction.
+ +

HDF5 attribute lists

+

An attribute list for an dataset or group is a listing of objects in the HDF file that are used as attributes, or metadata for the object. The attribute list is composed of two lists of objects, the first being simple attributes about the object, and the second being pointers to attribute objects. (Note: Attributes are not supported in this prototype.)

+

 

+

The HDF5 Applications Programming Interface (API)

+

The current HDF5 API is implemented only in C. The API provides routines for creating HDF5 files, creating and writing groups, datasets, and their attributes to HDF5 files, and reading groups, datasets and their attributes from HDF5 files.

+

Naming conventions

+

All C routines on the HDF 5 library begin with a prefix of the form "H5*", where "*" is a single letter indicating the object on which the operation is to be performed:

+ +
    +
  • H5F: File-level access routines.
    +Example: H5Fopen, which opens an HDF5 file.
  • +
  • H5G: Group functions, for creating and operating on physical groups of objects.
    +Example: H5Gset,which sets the working group to the specified group.
  • +
  • H5T: DataType functions, for creating and operating on simple and compound datatypes to be used as the elements in data arrays.
    +
    Example: H5Tcopy,which creates a copy of an existing data type.
  • +
  • H5S: DataSPace functions, which create and manipulate the dataspace in which the elements of a data array are stored.
    +Example: H5Sget_ndims, which retrieves the number of dimensions of a data array.
  • +
  • H5D: Dataset functions, which manipulate the data within datasets and determine how the data is to be stored in the file.
    +Example: H5Dread, which reads all or part of a dataset into a buffer in memory.
  • +
  • H5P: Template functions, for manipulating object templates.
    +Example: H5Pset_chunk, which sets the number of dimensions and the size of a chunk.
+ +

Include files

+

There are a number definitions and declarations that should be included with any HDF5 program. These definitions and declarations are contained in several "include" files. The main include file is hdf5.h. This file includes all of the other files that your program is likely to need. Be sure to include hdf5.h in any program that accesses HDF5.

+

Predefined simple numeric scalar datatypes

+

The HDF5 prototype currently supports simple signed and unsigned 8-bit, 16-bit, 32-bit , and 64-bit integers, and floating point numbers. The naming scheme for type definitions uses the following conventions:

+ +
    +
  • "int" stands for "integer"
  • +
  • the prefix "u" stands for "unsigned"
  • +
  • the integer suffix indicates the number of bits in the number
+ +

For example, "uint16" indicates an unsigned 16-bit integer. Datatypes that are supported in this prototype are:

+
          char
+          int8
+          uint8
+          int16
+          uint16
+          int32
+          uint32
+          int64
+          uint64
+          float32
+          float64
+

These datatypes are defined in the file H5public.h together with keywords used to refer to them. H5public.h is included by the file hdf5.h described earlier. These datatypes should be used whenever you declare a variable to be used with an HDF5 routine. For instance, a 32-bit floating point variable should always be declared using a declaration such as

+
float32 x;
+

Programming models

+

In this section we describe how to program some basic operations on files, including how to

+ +
    +
  • create a file
  • +
  • create and initialize a dataset
  • +
  • discard objects when they are no longer needed
  • +
  • write a dataset to a new file
  • +
  • obtain information about a dataset
  • +
  • read a portion of a dataset
  • +
  • create and write compound datatypes
  • +
  • create and write extendible datasets
  • +
  • create and populate groups
+ +

How to create an HDF5 file

+

This programming model shows how to create a file and also how to close the file.

+
    + +
  1. Create the file using H5Fcreate. Obtain a file ID (e.g. file_id).
  2. +
  3. Close the file with H5Fclose(file_id).
  4. +

    The following code fragment implements the specified model. If there is a possibility that the file already exists, the user must add the flag H5F_ACC_TRUNC to the access mode to overwrite the previous file's information.

    +
    hid_t       file;                          /* handle */
    +/*
    + * Create a new file using H5F_ACC_TRUNC access,
    + * default file creation properties, and default file
    + * access properties.
    + * Then close the file.
    + */
    +file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
    +status = H5Fclose(file); 
    +

    How to create and initialize the essential components of a dataset for writing to a file.

    +

    Recall that datatypes and dimensionality (dataspace) are independent objects, which are created separately from any dataset that they might be attached to. Because of this the creation of a dataset requires, at a minimum, separate definitions of datatype, dimensionality, and dataset. Hence, to create a dataset the following steps need to be taken:

    +
  5. Create and initialize a dataspace for the dataset to be written.
  6. +
  7. Define the datatype for the dataset to be written.
  8. +
  9. Create and initialize the dataset itself.
+ +

 

+

The following code illustrates the creation of these three components of a dataset object.

+
hid_t    dataset, datatype, dataspace;   /* declare handles */
+
+/* 
+ * 1. Create dataspace: Describe the size of the array and 
+ * create the data space for fixed size dataset. 
+ */
+dimsf[0] = NX;
+dimsf[1] = NY;
+dataspace = H5Screate_simple(RANK, dimsf, NULL); 
+/*
+/* 
+ * 2. Define datatype for the data in the file.
+ * We will store little endian INT32 numbers.
+ */
+datatype = H5Tcopy(H5T_NATIVE_INT32);
+status = H5Tset_order(datatype, H5T_ORDER_LE);
+/*
+ * 3. Create a new dataset within the file using defined 
+ * dataspace and datatype and default dataset creation
+ * properties.
+ * NOTE: H5T_NATIVE_INT32 can be used as datatype if conversion
+ * to little endian is not needed.
+ */
+dataset = H5Dcreate(file, DATASETNAME, datatype, dataspace, H5P_DEFAULT);
+ + +

How to discard objects when they are no longer needed

+
+ +

The type, dataspace and dataset objects should be released once they are no longer needed by a program. Since each is an independent object, the must be released ("closed") separately. The following lines of code close the type, dataspace, datasets, and file that were created in the preceding section.

+

H5Tclose(datatype);

+

H5Dclose(dataset);

+

H5Sclose(dataspace);

+ + +

How to write a dataset to a new file

+
+ +

Having defined the datatype, dataset, and dataspace parameters, you write out the data with a call to H5Dwrite.

+
/*
+ * Write the data to the dataset using default transfer
+ * properties.
+ */
+status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL,
+                  H5P_DEFAULT, data);
+

The third and fourth parameters of H5Dwrite in the example describe the dataspaces in memory and in the file, respectively. They are set to the value H5S_ALL to indicate that an entire dataset is to be written. In a later section we look at how we would access a portion of a dataset.

+

Example 1 contains a program that creates a file and a dataset, and writes the dataset to the file.

+

Reading is analogous to writing. If, in the previous example, we wish to read an entire dataset, we would use the same basic calls with the same parameters. Of course, the routine H5Dread would replace H5Dwrite.

+ + +

Getting information about a dataset

+
+ +

Although reading is analogous to writing, it is often necessary to query a file to obtain information about a dataset. For instance, we often need to know about the datatype associated with a dataset, as well dataspace information (e.g. rank and dimensions). There are several "get" routines for obtaining this information The following code segment illustrates how we would get this kind of information:

+
/*
+ * Get datatype and dataspace handles and then query
+ * dataset class, order, size, rank and dimensions.
+ */
+
+datatype  = H5Dget_type(dataset);     /* datatype handle */ 
+class     = H5Tget_class(datatype);
+if (class == H5T_INTEGER) printf("Data set has INTEGER type \n");
+order     = H5Tget_order(datatype);
+if (order == H5T_ORDER_LE) printf("Little endian order \n");
+
+size  = H5Tget_size(datatype);
+printf(" Data size is %d \n", size);
+
+dataspace = H5Dget_space(dataset);    /* dataspace handle */
+rank      = H5Sget_ndims(dataspace);
+status_n  = H5Sget_dims(dataspace, dims_out, NULL);
+printf("rank %d, dimensions %d x %d \n", rank, dims_out[0], dims_out[1]);
+

 

+ + +

Reading a portion of a dataset: defining dataspaces

+
+ +

In the previous discussion, we describe how to access an entire dataset with one write (or read) operation. To read or write a portion of a dataset, we need to provide more contextual information.

+

Consider the following example. Suppose there is 500x600 dataset in a file, and we wish to read from the dataset a 100x200 hyperslab located beginning at element <200,200>. In addition, suppose we wish to read the hyperslab into an 200x400 array in memory beginning at element <0,0> in memory. Visually, the transfer looks something like this:

+

 

+

As the example illustrates, whenever we read part of a dataset from a file we must provide two dataspaces: the dataspace of the object in the file as well as the dataspace of the object in memory into which we read. There are dataspace routines (H5S...) for doing this.

+

For example, suppose we want to read a 3x4 hyperslab from a dataset in a file beginning at the element <1,2> in the dataset. In order to do this, we must create a dataspace that describes the overall rank and dimensions of the dataset in the file, as well as the position and size of the hyperslab that we are extracting from that dataset. The following code illustrates how this would be done.

+
/* 
+ * Get overall rank and dimensions of dataspace.
+ */
+dataspace = H5Dget_space(dataset);    /* get dataspace handle */
+rank      = H5Sget_ndims(dataspace);
+status_n  = H5Sget_dims(dataspace, dims_out, NULL);
+
+/* 
+ * Define hyperslab in the dataset. 
+ */
+offset[0] = 1;
+offset[1] = 2;
+count[0]  = 3;
+count[1]  = 4;
+status = H5Sset_hyperslab(dataspace, offset, count, NULL);
+

This describes the dataspace from which we wish to read. We need to define the dataspace in memory analogously. Suppose, for instance, that we have in memory a 3 dimensional 7x7x3 array into which we wish to read the 3x4 hyperslab described above beginning at the element <3,0,0>. Since the in-memory dataspace has three dimensions, we have to describe the hyperslab as an array with three dimensions, with the last dimension being 1: <3,4,1>.

+

Notice that now we must describe two things: the dimensions of the in-memory array, and the size and position of the hyperslab that we wish to read in. The following code illustrates how this would be done.

+
/*
+ * Define the memory dataspace.
+ */
+dimsm[0] = 7;
+dimsm[1] = 7;
+dimsm[2] = 3;
+memspace = H5Screate_simple(RANK_OUT,dimsm,NULL);   
+
+/* 
+ * Define memory hyperslab. 
+ */
+offset_out[0] = 3;
+offset_out[1] = 0;
+offset_out[2] = 0;
+count_out[0]  = 3;
+count_out[1]  = 4;
+count_out[2]  = 1;
+status = H5Sset_hyperslab(memspace, offset_out, count_out, NULL);
+
+/*
+

Example 2 contains a complete program that performs these operations.

+

Creating compound datatypes

+

Properties of compound datatypes.A compound datatype is similar to a struct in C or a common block in Fortran. It is a collection of one or more atomic types or small arrays of such types. To create and use of a compound datatype requires you need to refer to various properties of the data compound datatype:

+ +
    +
  • It is of class compound.
  • +
  • It has a fixed total size, in bytes.
  • +
  • It consists of zero or more members (defined in any order) with unique names and which occupy non-overlapping regions within the datum.
  • +
  • Each member has its own datatype.
  • +
  • Each member is referenced by an index number between zero and N-1, where N is the number of members in the compound datatype.
  • +
  • Each member has a name which is unique among its siblings in a compound data type.
  • +
  • Each member has a fixed byte offset, which is the first byte (smallest byte address) of that member in a compound datatype.
  • +
  • Each member can be a small array of up to four dimensions.
+ +

Properties of members of a compound data type are defined when the member is added to the compound type and cannot be subsequently modified.

+

Defining compound datatypes.

+

Compound datatypes must be built out of other datatypes. First, one creates an empty compound data type and specifies its total size. Then members are added to the compound data type in any order.

+

Member names. Each member must have a descriptive name, which is the key used to uniquely identify the member within the compound data type. A member name in an HDF5 data type does not necessarily have to be the same as the name of the corresponding member in the C struct in memory, although this is often the case. Nor does one need to define all members of the C struct in the HDF5 compound data type (or vice versa).

+

Offsets. Usually a C struct will be defined to hold a data point in memory, and the offsets of the members in memory will be the offsets of the struct members from the beginning of an instance of the struct. The library defines two macros to compute the offset of a member within a struct (The only difference between the two is that one uses s.m as the struct member while the other uses p->m):

+

HOFFSET(s,m). This macro computes the offset of member m within a struct variable s.

+

HPOFFSET(p,m). This macro computes the offset of member m from a pointer to a struct p.

+

Here is an example in which a compound data type is created to describe complex numbers whose type is defined by the complex_t struct.

+
typedef struct {
+   double re;   /*real part */
+   double im;   /*imaginary part */
+} complex_t;
+
+complex_t tmp;  /*used only to compute offsets */
+hid_t complex_id = H5Tcreate (H5T_COMPOUND, sizeof tmp);
+H5Tinsert (complex_id, "real", HOFFSET(tmp,re),
+           H5T_NATIVE_DOUBLE);
+H5Tinsert (complex_id, "imaginary", HOFFSET(tmp,im),
+           H5T_NATIVE_DOUBLE);
+
+

Example 3 shows how to create a compound data type, + write an array that has the compound data type to the file, and read back subsets of the members.

+

 

+

Creating and writing extendible datasets

+

An extendible dataset is one whose dimensions can grow. In HDF5, it is possible to define a dataset to have certain initial dimensions, then later to increase the size of any of the initial dimensions.

+

For example, you can create and store the following 3x3 HDF5 dataset:

+

1 1 1

+

1 1 1

+

1 1 1

+
+
+

then later to extend this into a 10x3 dataset by adding 7 rows, such as this:

+

1 1 1

+

1 1 1

+

1 1 1

+

2 2 2

+

2 2 2

+

2 2 2

+

2 2 2

+

2 2 2

+

2 2 2

+

2 2 2

+
+
+

then further extend it to a 10x5 dataset by adding two columns, such as this:

+

1 1 1 3 3

+

1 1 1 3 3

+

1 1 1 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+

2 2 2 3 3

+
+
+

The current version of HDF 5 requires you to use chunking in order to define extendible datasets. Chunking makes it possible to extend datasets efficiently, without having to reorganize storage excessively.

+

Three operations are required in order to write an extendible dataset:

+
    + +
  1. Declare the dataspace of the dataset to have unlimited dimensions for all dimensions that might eventually be extended.
  2. +
  3. When creating the dataset, set the storage layout for the dataset to chunked.
  4. +
  5. Extend the size of the dataset.
+ +

For example, suppose we wish to create a dataset similar to the one shown above. We want to start with a 3x3 dataset, then later extend it in both directions.

+

Declaring unlimited dimensions. We could declare the dataspace to have unlimited dimensions with the following code, which uses the predefined constant H5S_UNLIMITED to specify unlimited dimensions.

+

hsize_t dims[2] = { 3, 3}; /* dataset dimensions at the creation time */

+

hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED}; +

+

/*

+

* 1. Create the data space with unlimited dimensions.

+

*/

+

dataspace = H5Screate_simple(RANK, dims, maxdims);

+
+
+

Enabling chunking. We can then modify the dataset storage layout properties to + enable chunking. We do this using the routine H5Pset_chunk: +

hid_t cparms;

+

hsize_t chunk_dims[2] ={2, 5};

+

/*

+

* 2. Modify dataset creation properties to enable chunking.

+

*/

+

cparms = H5Pcreate (H5P_DATASET_CREATE);

+

status = H5Pset_chunk( cparms, RANK, chunk_dims);

+Extending dataset size. Finally, when we want to extend the size of the dataset, + we invoke H5Dextend to extend the size of the dataset. In the following example, we extend the dataset along the first dimension, by seven rows, so that the new dimensions are <10,3>.: +

/*

+

* Extend the dataset. Dataset becomes 10 x 3.

+

*/

+

dims[0] = dims[0] + 7;

+

size[0] = dims[0];

+

size[1] = dims[1];

+

status = H5Dextend (dataset, size);

+

 

+
+Example 4 shows how to create a 3x3 extendible dataset, to extend the dataset to 10x3, then to extend it again to 10x5.

+

Working with groups in a file

+

Groups provide a mechanism for organizing datasets in an HDF5 file extendable meaningful ways. The H5G API contains routines for working with groups.

+To create a group, use H5Gcreate>. For example, the following code creates two groups that are members of the root group. They are called "/IntData" and "/FloatData." The return value ("dir") is the group ID. +
/*
+ * Create two groups in a file. + */ +dir = H5Gcreate(file, "/IntData", 0); +status = H5Gclose(dir); +dir = H5Gcreate(file,"/FloatData", 0); +status = H5Gclose(dir);

+
The third parameter in H5Gcreate optionally specifies how much file space to reserve to store the names that will appear in this group. If a non-positive value is supplied then a default size is chosen.
+H5Gclose closes the group and releases the group ID.

+

Creating an object in a particular group. Except for single-object HDF5 files, every object in an HDF5 file must belong to a group, and hence has a path name. Hence, we put an object in a particular group by giving its path name when we create it. For example, the following code creates a dataset "IntArray" in the group "/IntData":

+
/*
+ * Create dataset in the /IntData group by specifying full path.
+ */
+dims[0] = 2;
+dims[1] = 3;
+dataspace = H5Screate_simple(2, dims, NULL);
+dataset = H5Dcreate(file, "/IntData/IntArray", H5T_NATIVE_INT, dataspace, H5P_DEFAULT); 
+

Changing the current group. The HDF5 Group API supports the idea of a "current," group. This is analogous to the "current working directory" idea in UNIX. You can set the current group in HDF5 with the routine H5Gset. The following code shows how to set a current group, then create a certain dataset ("FloatData") in that group.

+
/*
+ * Set current group to /FloatData.
+ */
+status = H5Gset (file, "/FloatData");
+
+/* 
+ * Create two datasets
+ */
+
+dims[0] = 5;
+dims[1] = 10;
+dataspace = H5Screate_simple(2, dims, NULL);
+dataset = H5Dcreate(file, "FloatArray", H5T_NATIVE_FLOAT, dataspace, H5P_DEFAULT); 
+
+Example 5 shows how to create an HDF5 file with two group, and to place some datasets within those groups.

+

Example code

+

Example 1: How to create a homogeneous multi-dimensional dataset and write it to a file.

+

This example creates a 2-dimensional HDF 5 dataset of little endian 32-bit integers.

+

/*

+

* This example writes data to HDF5 file.

+

* Data conversion is performed during write operation.

+

*/

+

#include "hdf5.h"

+

#define FILE "SDS.h5"

+

#define DATASETNAME "IntArray"

+

#define NX 5 /* dataset dimensions */

+

#define NY 6

+

#define RANK 2

+

main ()

+

{

+

hid_t file, dataset; /* file and dataset handles */

+

hid_t datatype, dataspace; /* handles */

+

hsize_t dimsf[2]; /* dataset dimensions */

+

herr_t status;

+

int32 data[NX][NY]; /* data to write */

+

int i, j;

+

/*

+

* Data and output buffer initialization.

+

*/

+

for (j = 0; j < NX; j++) {

+

for (i = 0; i < NY; i++)

+

data[j][i] = i + j;

+

}

+

/* 0 1 2 3 4 5

+

1 2 3 4 5 6

+

2 3 4 5 6 7

+

3 4 5 6 7 8

+

4 5 6 7 8 9 */

+

/*

+

* Create a new file using H5F_ACC_TRUNC access,

+

* default file creation properties, and default file

+

* access properties.

+

*/

+

file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

+

/*

+

* Describe the size of the array and create the data space for fixed

+

* size dataset.

+

*/

+

dimsf[0] = NX;

+

dimsf[1] = NY;

+

dataspace = H5Screate_simple(RANK, dimsf, NULL);

+

/*

+

* Define datatype for the data in the file.

+

* We will store little endian INT32 numbers.

+

*/

+

datatype = H5Tcopy(H5T_NATIVE_INT32);

+

status = H5Tset_order(datatype, H5T_ORDER_LE);

+

/*

+

* Create a new dataset within the file using defined dataspace and

+

* datatype and default dataset creation properties.

+

*/

+

dataset = H5Dcreate(file, DATASETNAME, datatype, dataspace,

+

H5P_DEFAULT);

+

/*

+

* Write the data to the dataset using default transfer properties.

+

*/

+

status = H5Dwrite(dataset, H5T_NATIVE_INT32, H5S_ALL, H5S_ALL,

+

H5P_DEFAULT, data);

+

/*

+

* Close/release resources.

+

*/

+

H5Sclose(dataspace);

+

H5Tclose(datatype);

+

H5Dclose(dataset);

+

H5Fclose(file);

+

}

+

 

+
+

Example 2. How to read a hyperslab from file into memory.

+

This example reads a hyperslab from a 2-d HDF5 dataset into a 3-d dataset in memory.

+

/*

+

* This example reads hyperslab from the SDS.h5 file

+

* created by h5_write.c program into two-dimensional

+

* plane of the tree-dimensional array.

+

* Information about dataset in the SDS.h5 file is obtained.

+

*/

+

#include "hdf5.h"

+

#define FILE "SDS.h5"

+

#define DATASETNAME "IntArray"

+

#define NX_SUB 3 /* hyperslab dimensions */

+

#define NY_SUB 4

+

#define NX 7 /* output buffer dimensions */

+

#define NY 7

+

#define NZ 3

+

#define RANK 2

+

#define RANK_OUT 3

+

main ()

+

{

+

hid_t file, dataset; /* handles */

+

hid_t datatype, dataspace;

+

hid_t memspace;

+

H5T_class_t class; /* data type class */

+

H5T_order_t order; /* data order */

+

size_t size; /* size of the data element

+

stored in file */

+

hsize_t dimsm[3]; /* memory space dimensions */

+

hsize_t dims_out[2]; /* dataset dimensions */

+

herr_t status;

+

int data_out[NX][NY][NZ ]; /* output buffer */

+

hsize_t count[2]; /* size of the hyperslab in the file */

+

hssize_t offset[2]; /* hyperslab offset in the file */

+

hsize_t count_out[3]; /* size of the hyperslab in memory */

+

hssize_t offset_out[3]; /* hyperslab offset in memory */

+

int i, j, k, status_n, rank;

+

for (j = 0; j < NX; j++) {

+

for (i = 0; i < NY; i++) {

+

for (k = 0; k < NZ ; k++)

+

data_out[j][i][k] = 0;

+

}

+

}

+

/*

+

* Open the file and the dataset.

+

*/

+

file = H5Fopen(FILE, H5F_ACC_RDONLY, H5P_DEFAULT);

+

dataset = H5Dopen(file, DATASETNAME);

+

/*

+

* Get datatype and dataspace handles and then query

+

* dataset class, order, size, rank and dimensions.

+

*/

+

datatype = H5Dget_type(dataset); /* datatype handle */

+

class = H5Tget_class(datatype);

+

if (class == H5T_INTEGER) printf("Data set has INTEGER type \n");

+

order = H5Tget_order(datatype);

+

if (order == H5T_ORDER_LE) printf("Little endian order \n");

+

size = H5Tget_size(datatype);

+

printf(" Data size is %d \n", size);

+

dataspace = H5Dget_space(dataset); /* dataspace handle */

+

rank = H5Sget_ndims(dataspace);

+

status_n = H5Sget_dims(dataspace, dims_out, NULL);

+

printf("rank %d, dimensions %d x %d \n", rank, dims_out[0], dims_out[1]);

+

/*

+

* Define hyperslab in the dataset.

+

*/

+

offset[0] = 1;

+

offset[1] = 2;

+

count[0] = NX_SUB;

+

count[1] = NY_SUB;

+

status = H5Sset_hyperslab(dataspace, offset, count, NULL);

+

/*

+

* Define the memory dataspace.

+

*/

+

dimsm[0] = NX;

+

dimsm[1] = NY;

+

dimsm[2] = NZ ;

+

memspace = H5Screate_simple(RANK_OUT,dimsm,NULL);

+

/*

+

* Define memory hyperslab.

+

*/

+

offset_out[0] = 3;

+

offset_out[1] = 0;

+

offset_out[2] = 0;

+

count_out[0] = NX_SUB;

+

count_out[1] = NY_SUB;

+

count_out[2] = 1;

+

status = H5Sset_hyperslab(memspace, offset_out, count_out, NULL);

+

/*

+

* Read data from hyperslab in the file into the hyperslab in

+

* memory and display.

+

*/

+

status = H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace,

+

H5P_DEFAULT, data_out);

+

for (j = 0; j < NX; j++) {

+

for (i = 0; i < NY; i++) printf("%d ", data_out[j][i][0]);

+

printf("\n");

+

}

+

/* 0 0 0 0 0 0 0

+

0 0 0 0 0 0 0

+

0 0 0 0 0 0 0

+

3 4 5 6 0 0 0

+

4 5 6 7 0 0 0

+

5 6 7 8 0 0 0

+

0 0 0 0 0 0 0 */

+

/*

+

* Close/release resources.

+

*/

+

H5Tclose(datatype);

+

H5Dclose(dataset);

+

H5Sclose(dataspace);

+

H5Sclose(memspace);

+

H5Fclose(file);

+

}

+
+

 

+

Example 3. Working with compound datatypes.

+

This example shows how to create a compound data type, write an array which has the compound data type to the file, and read back subsets of fields.

+

/*

+

* This example shows how to create a compound data type,

+

* write an array which has the compound data type to the file,

+

* and read back fields' subsets.

+

*/

+

#include "hdf5.h"

+

#define FILE "SDScompound.h5"

+

#define DATASETNAME "ArrayOfStructures"

+

#define LENGTH 10

+

#define RANK 1

+

main()

+

{

+

/* First structure and dataset*/

+

typedef struct s1_t {

+

int a;

+

float b;

+

double c;

+

} s1_t;

+

s1_t s1[LENGTH];

+

hid_t s1_tid; /* File datatype handle */

+

/* Second structure (subset of s1_t) and dataset*/

+

typedef struct s2_t {

+

double c;

+

int a;

+

} s2_t;

+

s2_t s2[LENGTH];

+

hid_t s2_tid; /* Memory datatype handle */

+

/* Third "structure" ( will be used to read float field of s1) */

+

hid_t s3_tid; /* Memory datatype handle */

+

float s3[LENGTH];

+

int i;

+

hid_t file, datatype, dataset, space; /* Handles */

+

herr_t status;

+

hsize_t dim[] = {LENGTH}; /* Dataspace dimensions */

+

H5T_class_t class;

+

size_t size;

+

/*

+

* Initialize the data

+

*/

+

for (i = 0; i< LENGTH; i++) {

+

s1[i].a = i;

+

s1[i].b = i*i;

+

s1[i].c = 1./(i+1);

+

}

+

/*

+

* Create the data space.

+

*/

+

space = H5Screate_simple(RANK, dim, NULL);

+

/*

+

* Create the file.

+

*/

+

file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

+

/*

+

* Create the memory data type.

+

*/

+

s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t));

+

status = H5Tinsert(s1_tid, "a_name", HPOFFSET(s1, a), H5T_NATIVE_INT);

+

status = H5Tinsert(s1_tid, "c_name", HPOFFSET(s1, c), H5T_NATIVE_DOUBLE);

+

status = H5Tinsert(s1_tid, "b_name", HPOFFSET(s1, b), H5T_NATIVE_FLOAT);

+

/*

+

* Create the dataset.

+

*/

+

dataset = H5Dcreate(file, DATASETNAME, s1_tid, space, H5P_DEFAULT);

+

/*

+

* Write data to the dataset;

+

*/

+

status = H5Dwrite(dataset, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, s1);

+

/*

+

* Release resources

+

*/

+

H5Tclose(s1_tid);

+

H5Sclose(space);

+

H5Dclose(dataset);

+

H5Fclose(file);

+

/*

+

* Open the file and the dataset.

+

*/

+

file = H5Fopen(FILE, H5F_ACC_RDONLY, H5P_DEFAULT);

+

dataset = H5Dopen(file, DATASETNAME);

+

/*

+

* Create a data type for s2

+

*/

+

s2_tid = H5Tcreate(H5T_COMPOUND, sizeof(s2_t));

+

status = H5Tinsert(s2_tid, "c_name", HPOFFSET(s2, c), H5T_NATIVE_DOUBLE);

+

status = H5Tinsert(s2_tid, "a_name", HPOFFSET(s2, a), H5T_NATIVE_INT);

+

/*

+

* Read two fields c and a from s1 dataset. Fields in the file

+

* are found by their names "c_name" and "a_name".

+

*/

+

status = H5Dread(dataset, s2_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, s2);

+

/*

+

* Display the fields

+

*/

+

printf("\n");

+

printf("Field c : \n");

+

for( i = 0; i < LENGTH; i++) printf("%.4f ", s2[i].c);

+

printf("\n");

+

printf("\n");

+

printf("Field a : \n");

+

for( i = 0; i < LENGTH; i++) printf("%d ", s2[i].a);

+

printf("\n");

+

/*

+

* Create a data type for s3.

+

*/

+

s3_tid = H5Tcreate(H5T_COMPOUND, sizeof(float));

+

status = H5Tinsert(s3_tid, "b_name", 0, H5T_NATIVE_FLOAT);

+

/*

+

* Read field b from s1 dataset. Field in the file is found by its name.

+

*/

+

status = H5Dread(dataset, s3_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, s3);

+

/*

+

* Display the field

+

*/

+

printf("\n");

+

printf("Field b : \n");

+

for( i = 0; i < LENGTH; i++) printf("%.4f ", s3[i]);

+

printf("\n");

+

/*

+

* Release resources

+

*/

+

H5Tclose(s2_tid);

+

H5Tclose(s3_tid);

+

H5Dclose(dataset);

+

H5Sclose(space);

+

H5Fclose(file);

+

}

+
+

 

+

Example 4. Creating and writing an extendible dataset.

+

This example shows how to create a 3x3 extendible dataset, to extend the dataset to 10x3, then to extend it again to 10x5.

+

/*

+

* This example shows how to work with extendible dataset.

+

* In the current version of the library dataset MUST be

+

* chunked.

+

*

+

*/

+

#include "hdf5.h"

+

#define FILE "SDSextendible.h5"

+

#define DATASETNAME "ExtendibleArray"

+

#define RANK 2

+

#define NX 10

+

#define NY 5

+

main ()

+

{

+

hid_t file; /* handles */

+

hid_t datatype, dataspace, dataset;

+

hid_t filespace;

+

hid_t cparms;

+

hsize_t dims[2] = { 3, 3}; /* dataset dimensions

+

at the creation time */

+

hsize_t dims1[2] = { 3, 3}; /* data1 dimensions */

+

hsize_t dims2[2] = { 7, 1}; /* data2 dimensions */

+

hsize_t dims3[2] = { 2, 2}; /* data3 dimensions */

+

hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};

+

hsize_t chunk_dims[2] ={2, 5};

+

hsize_t size[2];

+

hssize_t offset[2];

+

herr_t status;

+

int data1[3][3] = { 1, 1, 1, /* data to write */

+

1, 1, 1,

+

1, 1, 1 };

+

int data2[7] = { 2, 2, 2, 2, 2, 2, 2};

+

int data3[2][2] = { 3, 3,

+

3, 3};

+

/*

+

* Create the data space with unlimited dimensions.

+

*/

+

dataspace = H5Screate_simple(RANK, dims, maxdims);

+

/*

+

* Create a new file. If file exists its contents will be overwritten.

+

*/

+

file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

+

/*

+

* Modify dataset creation properties, i.e. enable chunking.

+

*/

+

cparms = H5Pcreate (H5P_DATASET_CREATE);

+

status = H5Pset_chunk( cparms, RANK, chunk_dims);

+

/*

+

* Create a new dataset within the file using cparms

+

* creation properties.

+

*/

+

dataset = H5Dcreate(file, DATASETNAME, H5T_NATIVE_INT, dataspace,

+

cparms);

+

/*

+

* Extend the dataset. This call assures that dataset is at least 3 x 3.

+

*/

+

size[0] = 3;

+

size[1] = 3;

+

status = H5Dextend (dataset, size);

+

/*

+

* Select a hyperslab.

+

*/

+

filespace = H5Dget_space (dataset);

+

offset[0] = 0;

+

offset[1] = 0;

+

status = H5Sset_hyperslab(filespace, offset, dims1, NULL);

+

/*

+

* Write the data to the hyperslab.

+

*/

+

status = H5Dwrite(dataset, H5T_NATIVE_INT, dataspace, filespace,

+

H5P_DEFAULT, data1);

+

/*

+

* Extend the dataset. Dataset becomes 10 x 3.

+

*/

+

dims[0] = dims1[0] + dims2[0];

+

size[0] = dims[0];

+

size[1] = dims[1];

+

status = H5Dextend (dataset, size);

+

/*

+

* Select a hyperslab.

+

*/

+

filespace = H5Dget_space (dataset);

+

offset[0] = 3;

+

offset[1] = 0;

+

status = H5Sset_hyperslab(filespace, offset, dims2, NULL);

+

/*

+

* Define memory space

+

*/

+

dataspace = H5Screate_simple(RANK, dims2, NULL);

+

/*

+

* Write the data to the hyperslab.

+

*/

+

status = H5Dwrite(dataset, H5T_NATIVE_INT, dataspace, filespace,

+

H5P_DEFAULT, data2);

+

/*

+

* Extend the dataset. Dataset becomes 10 x 5.

+

*/

+

dims[1] = dims1[1] + dims3[1];

+

size[0] = dims[0];

+

size[1] = dims[1];

+

status = H5Dextend (dataset, size);

+

/*

+

* Select a hyperslab

+

*/

+

filespace = H5Dget_space (dataset);

+

offset[0] = 0;

+

offset[1] = 3;

+

status = H5Sset_hyperslab(filespace, offset, dims3, NULL);

+

/*

+

* Define memory space.

+

*/

+

dataspace = H5Screate_simple(RANK, dims3, NULL);

+

/*

+

* Write the data to the hyperslab.

+

*/

+

status = H5Dwrite(dataset, H5T_NATIVE_INT, dataspace, filespace,

+

H5P_DEFAULT, data3);

+

/*

+

* Resulting dataset

+

*

+

3 3 3 2 2

+

3 3 3 2 2

+

3 3 3 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

2 0 0 0 0

+

*/

+

/*

+

* Close/release resources.

+

*/

+

H5Dclose(dataset);

+

H5Sclose(dataspace);

+

H5Sclose(filespace);

+

H5Fclose(file);

+

}

+
+

 

+

Example 5. Creating groups.

+

This example shows how to create an HDF5 file with two groups, and to place some datasets within those groups.

+

/*

+

* This example shows how to create groups within the file and

+

* datasets within the file and groups.

+

*/

+

 

+

#include "hdf5.h"

+

 

+

#define FILE "DIR.h5"

+

#define RANK 2

+

main()

+

{

+

hid_t file, dir;

+

hid_t dataset, dataspace;

+

herr_t status;

+

hsize_t dims[2];

+

hsize_t size[1];

+

/*

+

* Create a file.

+

*/

+

file = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

+

/*

+

* Create two groups in a file.

+

*/

+

dir = H5Gcreate(file, "/IntData", 0);

+

status = H5Gclose(dir);

+

dir = H5Gcreate(file,"/FloatData", 0);

+

status = H5Gclose(dir);

+

/*

+

* Create dataspace for the character string

+

*/

+

size[0] = 80;

+

dataspace = H5Screate_simple(1, size, NULL);

+

/*

+

* Create dataset "String" in the root group.

+

*/

+

dataset = H5Dcreate(file, "String", H5T_NATIVE_CHAR, dataspace, H5P_DEFAULT);

+

H5Dclose(dataset);

+

/*

+

* Create dataset "String" in the /IntData group.

+

*/

+

dataset = H5Dcreate(file, "/IntData/String", H5T_NATIVE_CHAR, dataspace,

+

H5P_DEFAULT);

+

H5Dclose(dataset);

+

/*

+

* Create dataset "String" in the /FloatData group.

+

*/

+

dataset = H5Dcreate(file, "/FloatData/String", H5T_NATIVE_CHAR, dataspace,

+

H5P_DEFAULT);

+

H5Sclose(dataspace);

+

H5Dclose(dataset);

+

/*

+

* Create IntArray dataset in the /IntData group by specifying full path.

+

*/

+

dims[0] = 2;

+

dims[1] = 3;

+

dataspace = H5Screate_simple(RANK, dims, NULL);

+

dataset = H5Dcreate(file, "/IntData/IntArray", H5T_NATIVE_INT, dataspace,

+

H5P_DEFAULT);

+

H5Sclose(dataspace);

+

H5Dclose(dataset);

+

/*

+

* Set current group to /IntData and attach to the dataset String.

+

*/

+

status = H5Gset (file, "/IntData");

+

dataset = H5Dopen(file, "String");

+

if (dataset > 0) printf("String dataset in /IntData group is found\n");

+

H5Dclose(dataset);

+

/*

+

* Set current group to /FloatData.

+

*/

+

status = H5Gset (file, "/FloatData");

+

/*

+

* Create two datasets FlatArray and DoubleArray.

+

*/

+

dims[0] = 5;

+

dims[1] = 10;

+

dataspace = H5Screate_simple(RANK, dims, NULL);

+

dataset = H5Dcreate(file, "FloatArray", H5T_NATIVE_FLOAT, dataspace, H5P_DEFAULT);

+

H5Sclose(dataspace);

+

H5Dclose(dataset);

+

dims[0] = 4;

+

dims[1] = 6;

+

dataspace = H5Screate_simple(RANK, dims, NULL);

+

dataset = H5Dcreate(file, "DoubleArray", H5T_NATIVE_DOUBLE, dataspace,

+

H5P_DEFAULT);

+

H5Sclose(dataspace);

+

H5Dclose(dataset);

+

/*

+

* Attach to /FloatData/String dataset.

+

*/

+

dataset = H5Dopen(file, "/FloatData/String");

+

if (dataset > 0) printf("/FloatData/String dataset is found\n");

+

H5Dclose(dataset);

+

H5Fclose(file);

+

}

+ + diff --git a/doc/html/H5.sample_code.html b/doc/html/H5.sample_code.html new file mode 100644 index 0000000..b3e5336 --- /dev/null +++ b/doc/html/H5.sample_code.html @@ -0,0 +1,123 @@ + +HDF5 Draft API Example Code + + +
+

HDF5: API Example Code

+
+ +

Example programs/sections of code below: +

+
#1 +
A simple example showing how to create a file. +
#2 +
A example showing how to create a homogenous multi-dimensional dataset. +
#3 +
A example showing how to read a generic dataset. +
+ +
+

Simple Example showing how to create a file.

+ +

Notes:
+This example creates a new HDF5 file and allows write access. +If the file exists already, the H5F_ACC_TRUNC flag would also be necessary to +overwrite the previous file's information. + +

Code: + +

+    hid_t file_id;
+
+    file_id=H5Fcreate("example1.h5",H5F_ACC_EXCL,H5P_DEFAULT_TEMPLATE,H5P_DEFAULT_TEMPLATE);
+
+    H5Fclose(file_id);
+
+
+ +
+

Example showing how create a homogenous multi-dimensional dataset.

+ +

Notes:
+This example creates a 4-dimensional dataset of 32-bit floating-point +numbers, corresponding to the current Scientific Dataset functionality. + +

Code: + +

+ 1 hid_t file_id;              /* new file's ID */
+ 2 hid_t dim_id;               /* new dimensionality's ID */
+ 3 int rank=4;                 /* the number of dimensions */
+ 4 hsize_t dims[4]={6,5,4,3};  /* the size of each dimension */
+ 5 hid_t dataset_id;           /* new dataset's ID */
+ 6 float buf[6][5][4][3];      /* storage for the dataset's data */
+ 7 herr_t status;              /* function return status */
+ 8 
+ 9 file_id = H5Fcreate ("example3.h5", H5F_ACC_TRUNC, H5P_DEFAULT,
+10                      H5P_DEFAULT);
+11 assert (file_id >= 0);
+12 
+13 /* Create & initialize a dimensionality object */
+14 dim_id = H5Screate_simple (rank, dims);
+15 assert (dim_id >= 0);
+16 
+17 /* Create & initialize the dataset object */
+18 dataset_id = H5Dcreate (file_id, "Simple Object", H5T_NATIVE_FLOAT,
+19                         dim_id, H5P_DEFAULT);
+20 assert (dataset_id >= 0);
+21 
+22 <initialize data array>
+23 
+24 /* Write the entire dataset out */
+25 status = H5Dwrite (dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL,
+26                    H5P_DEFAULT, buf);
+27 assert (status >= 0);
+28 
+29 /* Release the IDs we've created */
+30 H5Sclose (dim_id);
+31 H5Dclose (dataset_id);
+32 H5Fclose (file_id);
+
+ +
+

Example showing how read a generic dataset.

+ +

Notes:
+This example shows how to get the information for and display a generic +dataset. + +

Code: + +

+ 1 hid_t file_id;          /* file's ID */
+ 2 hid_t dataset_id;       /* dataset's ID in memory */
+ 3 hid_t space_id;         /* dataspace's ID in memory */
+ 4 uintn nelems;           /* number of elements in array */
+ 5 double *buf;            /* pointer to the dataset's data */
+ 6 herr_t status;          /* function return value */
+ 7 
+ 8 file_id = H5Fopen ("example6.h5", H5F_ACC_RDONLY, H5P_DEFAULT);
+ 9 assert (file_id >= 0);
+10 
+11 /* Attach to a datatype object */
+12 dataset_id = H5Dopen (file_id, "dataset1");
+13 assert (dataset_id >= 0);
+14 
+15 /* Get the OID for the dataspace */
+16 space_id = H5Dget_space (dataset_id);
+17 assert (space_id >= 0);
+18 
+19 /* Allocate space for the data */
+20 nelems = H5Sget_npoints (space_id);
+21 buf = malloc (nelems * sizeof(double));
+22 
+23 /* Read in the dataset */
+24 status = H5Dread (dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL,, H5S_ALL,
+25                   H5P_DEFAULT, buf);
+26 assert (status >= 0);
+27 
+28 /* Release the IDs we've accessed */
+29 H5Sclose (space_id);
+30 H5Dclose (dataset_id);
+31 H5Fclose (file_id);
+
diff --git a/doc/html/H5.user.html b/doc/html/H5.user.html new file mode 100644 index 0000000..3c16553 --- /dev/null +++ b/doc/html/H5.user.html @@ -0,0 +1,71 @@ + + + + HDF5 Uer's Guide + + + + +

A User Guide for HDF5

+ +

The following documents form a loosely organized user's guide + to the HDF5 library. + +

+ +

The following documents form a loosely organized developer's guide to + aspects of the HDF5 library. (Some of the following documents + may be rather out of date as they were working papers for design + goals.) + +

+ + + +
+
Quincey Koziol
+
Robb Matzke
+ +Last modified: Tue May 26 15:39:47 EDT 1998 + + + + diff --git a/doc/html/IOPipe.html b/doc/html/IOPipe.html new file mode 100644 index 0000000..7c24e2c --- /dev/null +++ b/doc/html/IOPipe.html @@ -0,0 +1,114 @@ + + + + The Raw Data I/O Pipeline + + + +

The Raw Data I/O Pipeline

+ +

The HDF5 raw data pipeline is a complicated beast that handles + all aspects of raw data storage and transfer of that data + between the file and the application. Data can be stored + contiguously (internal or external), in variable size external + segments, or regularly chunked; it can be sparse, extendible, + and/or compressible. Data transfers must be able to convert from + one data space to another, convert from one number type to + another, and perform partial I/O operations. Furthermore, + applications will expect their common usage of the pipeline to + perform well. + +

To accomplish these goals, the pipeline has been designed in a + modular way so no single subroutine is overly complicated and so + functionality can be inserted easily at the appropriate + locations in the pipeline. A general pipeline was developed and + then certain paths through the pipeline were optimized for + performance. + +

We describe only the file-to-memory side of the pipeline since + the memory-to-file side is a mirror image. We also assume that a + proper hyperslab of a simple data space is being read from the + file into a proper hyperslab of a simple data space in memory, + and that the data type is a compound type which may require + various number conversions on its members. + + Figure 1 + +

The diagrams should be read from the top down. The Line A + in the figure above shows that H5Dread() copies + data from a hyperslab of a file dataset to a hyperslab of an + application buffer by calling H5D_read(). And + H5D_read() calls, in a loop, + H5S_simp_fgath(), H5T_conv_struct(), + and H5S_simp_mscat(). A temporary buffer, TCONV, is + loaded with data points from the file, then data type conversion + is performed on the temporary buffer, and finally data points + are scattered out to application memory. Thus, data type + conversion is an in-place operation and data space conversion + consists of two steps. An additional temporary buffer, BKG, is + large enough to hold N instances of the destination + data type where N is the same number of data points + that can be held by the TCONV buffer (which is large enough to + hold either source or destination data points). + +

The application sets an upper limit for the size of the TCONV + buffer and optionally supplies a buffer. If no buffer is + supplied then one will be created by calling + malloc() when the pipeline is executed (when + necessary) and freed when the pipeline exits. The size of the + BKG buffer depends on the size of the TCONV buffer and if the + application supplies a BKG buffer it should be at least as large + as the TCONV buffer. The default size for these buffers is one + megabyte but the buffer might not be used to full capacity if + the buffer size is not an integer multiple of the source or + destination data point size (whichever is larger, but only + destination for the BKG buffer). + + + +

Occassionally the destination data points will be partially + initialized and the H5Dread() operation should not + clobber those values. For instance, the destination type might + be a struct with members a and b where + a is already initialized and we're reading + b from the file. An extra line, G, is added to the + pipeline to provide the type conversion functions with the + existing data. + + Figure 2 + +

It will most likely be quite common that no data type + conversion is necessary. In such cases a temporary buffer for + data type conversion is not needed and data space conversion + can happen in a single step. In fact, when the source and + destination data are both contiguous (they aren't in the + picture) the loop degenerates to a single iteration. + + + Figure 3 + +

So far we've looked only at internal contiguous storage, but by + replacing Line B in Figures 1 and 2 and Line A in Figure 3 with + Figure 4 the pipeline is able to handle regularly chunked + objects. Line B of Figure 4 is executed once for each chunk + which contains data to be read and the chunk address is found by + looking at a multi-dimensional key in a chunk B-tree which has + one entry per chunk. + + Figure 4 + +

If a single chunk is requested and the destination buffer is + the same size/shape as the chunk, then the CHUNK buffer is + bypassed and the destination buffer is used instead as shown in + Figure 5. + + Figure 5 + +


+
Robb Matzke
+ + +Last modified: Wed Mar 18 10:38:30 EST 1998 + + + diff --git a/doc/html/MemoryManagement.html b/doc/html/MemoryManagement.html new file mode 100644 index 0000000..93782b5 --- /dev/null +++ b/doc/html/MemoryManagement.html @@ -0,0 +1,510 @@ + + + + Memory Management in HDF5 + + + +

Memory Management in HDF5

+ + +

Is a Memory Manager Necessary?

+ +

Some form of memory management may be necessary in HDF5 when + the various deletion operators are implemented so that the + file memory is not permanently orphaned. However, since an + HDF5 file was designed with persistent data in mind, the + importance of a memory manager is questionable. + +

On the other hand, when certain meta data containers (file glue) + grow, they may need to be relocated in order to keep the + container contiguous. + +

+ Example: An object header consists of up to two + chunks of contiguous memory. The first chunk is a fixed + size at a fixed location when the header link count is + greater than one. Thus, inserting additional items into an + object header may require the second chunk to expand. When + this occurs, the second chunk may need to move to another + location in the file, freeing the file memory which that + chunk originally occupied. +
+ +

The relocation of meta data containers could potentially + orphan a significant amount of file memory if the application + has made poor estimates for preallocation sizes. + + +

Levels of Memory Management

+ +

Memory management by the library can be independent of memory + management support by the file format. The file format can + support no memory management, some memory management, or full + memory management. Similarly with the library. + +

Support in the Library

+ +
+
No Support: I +
When memory is deallocated it simply becomes unreferenced + (orphaned) in the file. Memory allocation requests are + satisfied by extending the file. + +
A separate off-line utility can be used to detect the + unreferenced bytes of a file and "bubble" them up to the end + of the file and then truncate the file. + +
Some Support: II +
The library could support partial memory management all + the time, or full memory management some of the time. + Orphaning free blocks instead of adding them to a free list + should not affect the file integrity, nor should fulfilling + new requests by extending the file instead of using the free + list. + +
Full Support: III +
The library supports space-efficient memory management by + always fulfilling allocation requests from the free list when + possible, and by coalescing adjacent free blocks into a + single larger free block. +
+ +

Support in the File Format

+ +
+
No Support: A +
The file format does not support memory management; any + unreferenced block in the file is assumed to be free. If + the library supports full memory management then it will + have to traverse the entire file to determine which blocks + are unreferenced. + +
Some Support: B +
Assuming that unreferenced blocks are free can be + dangerous in a situation where the file is not consistent. + For instance, if a directory tree becomes detached from the + main directory hierarchy, then the detached directory and + everything that is referenced only through the detached + directory become unreferenced. File repair utilities will + be unable to determine which unreferenced blocks need to be + linked back into the file hierarchy. + +
Therefore, it might be useful to keep an unsorted, + doubly-linked list of free blocks in the file. The library + can add and remove blocks from the list in constant time, + and can generate its own internal free-block data structure + in time proportional to the number of free blocks instead of + the size of the file. Additionally, a library can use a + subset of the free blocks, an alternative which is not + feasible if the file format doesn't support any form of + memory management. + +
Full Support: C +
The file format can mirror library data structures for + space-efficient memory management. The free blocks are + linked in unsorted, doubly-linked lists with one list per + free block size. The heads of the lists are pointed to by a + B-tree whose nodes are sorted by free block size. At the + same time, all free blocks are the leaf nodes of another + B-tree sorted by starting and ending address. When the + trees are used in combination we can deallocate and allocate + memory in O(log N) time where N is the + number of free blocks. +
+ +

Combinations of Library and File Format Support

+ +

We now evaluate each combination of library support with file + support: + +

+
I-A +
If neither the library nor the file support memory + management, then each allocation request will come from the + end of the file and each deallocation request is a no-op + that simply leaves the free block unreferenced. + +
    +
  • Advantages +
      +
    • No file overhead for allocation or deallocation. +
    • No library overhead for allocation or + deallocation. +
    • No file traversal required at time of open. +
    • No data needs to be written back to the file when + it's closed. +
    • Trivial to implement (already implemented). +
    + +
  • Disadvantages +
      +
    • Inefficient use of file space. +
    • A file repair utility must reclaim lost file space. +
    • Difficulties for file repair utilities. (Is an + unreferenced block a free block or orphaned data?) +
    +
+ +
II-A +
In order for the library to support memory management, it + will be required to build the internal free block + representation by traversing the entire file looking for + unreferenced blocks. + +
    +
  • Advantages +
      +
    • No file overhead for allocation or deallocation. +
    • Variable amount of library overhead for allocation + and deallocation depending on how much work the + library wants to do. +
    • No data needs to be written back to the file when + it's closed. +
    • Might use file space efficiently. +
    +
  • Disadvantages +
      +
    • Might use file space inefficiently. +
    • File traversal required at time of open. +
    • A file repair utility must reclaim lost file space. +
    • Difficulties for file repair utilities. +
    • Sharing of the free list between processes falls + outside the HDF5 file format documentation. +
    +
+ +
III-A +
In order for the library to support full memory + management, it will be required to build the internal free + block representation by traversing the entire file looking + for unreferenced blocks. + +
    +
  • Advantages +
      +
    • No file overhead for allocation or deallocation. +
    • Efficient use of file space. +
    • No data needs to be written back to the file when + it's closed. +
    +
  • Disadvantages +
      +
    • Moderate amount of library overhead for allocation + and deallocation. +
    • File traversal required at time of open. +
    • A file repair utility must reclaim lost file space. +
    • Difficulties for file repair utilities. +
    • Sharing of the free list between processes falls + outside the HDF5 file format documentation. +
    +
+ +
I-B +
If the library doesn't support memory management but the + file format supports some level of management, then a file + repair utility will have to be run occasionally to reclaim + unreferenced blocks. + +
    +
  • Advantages +
      +
    • No file overhead for allocation or deallocation. +
    • No library overhead for allocation or + deallocation. +
    • No file traversal required at time of open. +
    • No data needs to be written back to the file when + it's closed. +
    +
  • Disadvantages +
      +
    • A file repair utility must reclaim lost file space. +
    • Difficulties for file repair utilities. +
    +
+ +
II-B +
Both the library and the file format support some level + of memory management. + +
    +
  • Advantages +
      +
    • Constant file overhead per allocation or + deallocation. +
    • Variable library overhead per allocation or + deallocation depending on how much work the library + wants to do. +
    • Traversal at file open time is on the order of the + free list size instead of the file size. +
    • The library has the option of reading only part of + the free list. +
    • No data needs to be written at file close time if + it has been amortized into the cost of allocation + and deallocation. +
    • File repair utilties don't have to be run to + reclaim memory. +
    • File repair utilities can detect whether an + unreferenced block is a free block or orphaned data. +
    • Sharing of the free list between processes might + be easier. +
    • Possible efficient use of file space. +
    +
  • Disadvantages +
      +
    • Possible inefficient use of file space. +
    +
+ +
III-B +
The library provides space-efficient memory management but + the file format only supports an unsorted list of free + blocks. + +
    +
  • Advantages +
      +
    • Constant time file overhead per allocation or + deallocation. +
    • No data needs to be written at file close time if + it has been amortized into the cost of allocation + and deallocation. +
    • File repair utilities don't have to be run to + reclaim memory. +
    • File repair utilities can detect whether an + unreferenced block is a free block or orphaned data. +
    • Sharing of the free list between processes might + be easier. +
    • Efficient use of file space. +
    +
  • Disadvantages +
      +
    • O(log N) library overhead per allocation or + deallocation where N is the total number of + free blocks. +
    • O(N) time to open a file since the entire + free list must be read to construct the in-core + trees used by the library. +
    • Library is more complicated. +
    +
+ +
I-C +
This has the same advantages and disadvantages as I-C with + the added disadvantage that the file format is much more + complicated. + +
II-C +
If the library only provides partial memory management but + the file requires full memory management, then this method + degenerates to the same as II-A with the added disadvantage + that the file format is much more complicated. + +
III-C +
The library and file format both provide complete data + structures for space-efficient memory management. + +
    +
  • Advantages +
      +
    • Files can be opened in constant time since the + free list is read on demand and amortised into the + allocation and deallocation requests. +
    • No data needs to be written back to the file when + it's closed. +
    • File repair utilities don't have to be run to + reclaim memory. +
    • File repair utilities can detect whether an + unreferenced block is a free block or orphaned data. +
    • Sharing the free list between processes is easy. +
    • Efficient use of file space. +
    +
  • Disadvantages +
      +
    • O(log N) file allocation and deallocation + cost where N is the total number of free + blocks. +
    • O(log N) library allocation and + deallocation cost. +
    • Much more complicated file format. +
    • More complicated library. +
    +
+ +
+ + +

The Algorithm for II-B

+ +

The file contains an unsorted, doubly-linked list of free + blocks. The address of the head of the list appears in the + boot block. Each free block contains the following fields: + +

+ + + + + + + + + + + + + + + + + + + + + +
bytebytebytebyte
Free Block Signature
Total Free Block Size
Address of Left Sibling
Address of Right Sibling


Remainder of Free Block


+
+ +

The library reads as much of the free list as convenient when + convenient and pushes those entries onto stacks. This can + occur when a file is opened or any time during the life of the + file. There is one stack for each free block size and the + stacks are sorted by size in a balanced tree in memory. + +

Deallocation involves finding the correct stack or creating + a new one (an O(log K) operation where K is + the number of stacks), pushing the free block info onto the + stack (a constant-time operation), and inserting the free + block into the file free block list (a constant-time operation + which doesn't necessarily involve any I/O since the free blocks + can be cached like other objects). No attempt is made to + coalesce adjacent free blocks into larger blocks. + +

Allocation involves finding the correct stack (an O(log + K) operation), removing the top item from the stack + (a constant-time operation), and removing the block from the + file free block list (a constant-time operation). If there is + no free block of the requested size or larger, then the file + is extended. + +

To provide sharability of the free list between processes, + the last step of an allocation will check for the free block + signature and if it doesn't find one will repeat the process. + Alternatively, a process can temporarily remove free blocks + from the file and hold them in it's own private pool. + +

To summarize... +

+
File opening +
O(N) amortized over the time the file is open, + where N is the number of free blocks. The library + can still function without reading any of the file free + block list. + +
Deallocation +
O(log K) where K is the number of unique + sizes of free blocks. File access is constant. + +
Allocation +
O(log K). File access is constant. + +
File closing +
O(1) even if the library temporarily removes free + blocks from the file to hold them in a private pool since + the pool can still be a linked list on disk. +
+ + +

The Algorithm for III-C

+ +

The HDF5 file format supports a general B-tree mechanism + for storing data with keys. If we use a B-tree to represent + all parts of the file that are free and the B-tree is indexed + so that a free file chunk can be found if we know the starting + or ending address, then we can efficiently determine whether a + free chunk begins or ends at the specified address. Call this + the Address B-Tree. + +

If a second B-tree points to a set of stacks where the + members of a particular stack are all free chunks of the same + size, and the tree is indexed by chunk size, then we can + efficiently find the best-fit chunk size for a memory request. + Call this the Size B-Tree. + +

All free blocks of a particular size can be linked together + with an unsorted, doubly-linked, circular list and the left + and right sibling addresses can be stored within the free + chunk, allowing us to remove or insert items from the list in + constant time. + +

Deallocation of a block fo file memory consists of: + +

    +
  1. Add the new free block whose address is ADDR to the + address B-tree. + +
      +
    1. If the address B-tree contains an entry for a free + block that ends at ADDR-1 then remove that + block from the B-tree and from the linked list (if the + block was the first on the list then the size B-tree + must be updated). Adjust the size and address of the + block being freed to include the block just removed from + the free list. The time required to search for and + possibly remove the left block is O(log N) + where N is the number of free blocks. + +
    2. If the address B-tree contains an entry for the free + block that begins at ADDR+LENGTH then + remove that block from the B-tree and from the linked + list (if the block was the first on the list then the + size B-tree must be updated). Adjust the size of the + block being freed to include the block just removed from + the free list. The time required to search for and + possibly remove the right block is O(log N). + +
    3. Add the new (adjusted) block to the address B-tree. + The time for this operation is O(log N). +
    + +
  2. Add the new block to the size B-tree and linked list. + +
      +
    1. If the size B-tree has an entry for this particular + size, then add the chunk to the tail of the list. This + is an O(log K) operation where K is + the number of unique free block sizes. + +
    2. Otherwise make a new entry in the B-tree for chunks of + this size. This is also O(log K). +
    +
+ +

Allocation is similar to deallocation. + +

To summarize... + +

+
File opening +
O(1) + +
Deallocation +
O(log N) where N is the total number of + free blocks. File access time is O(log N). + +
Allocation +
O(log N). File access time is O(log N). + +
File closing +
O(1). +
+ + +
+
Robb Matzke
+ + +Last modified: Thu Jul 31 14:41:01 EST + + + diff --git a/doc/html/ObjectHeader.txt b/doc/html/ObjectHeader.txt new file mode 100644 index 0000000..d769377 --- /dev/null +++ b/doc/html/ObjectHeader.txt @@ -0,0 +1,60 @@ +OBJECT HEADERS +-------------- + +haddr_t +H5O_new (hdf5_file_t *f, intn nrefs, size_t size_hint) + + Creates a new empty object header and returns its address. + The SIZE_HINT is the initial size of the data portion of the + object header and NREFS is the number of symbol table entries + that reference this object header (normally one). + + If SIZE_HINT is too small, then at least some default amount + of space is allocated for the object header. + +intn /*num remaining links */ +H5O_link (hdf5_file_t *f, /*file containing header */ + haddr_t addr, /*header file address */ + intn adjust) /*link adjustment amount */ + + +size_t +H5O_sizeof (hdf5_file_t *f, /*file containing header */ + haddr_t addr, /*header file address */ + H5O_class_t *type, /*message type or H5O_ANY */ + intn sequence) /*sequence number, usually zero */ + + Returns the size of a particular instance of a message in an + object header. When an object header has more than one + instance of a particular message type, then SEQUENCE indicates + which instance to return. + +void * +H5O_read (hdf5_file_t *f, /*file containing header */ + haddr_t addr, /*header file address */ + H5G_entry_t *ent, /*optional symbol table entry */ + H5O_class_t *type, /*message type or H5O_ANY */ + intn sequence, /*sequence number, usually zero */ + size_t size, /*size of output message */ + void *mesg) /*output buffer */ + + Reads a message from the object header into memory. + +const void * +H5O_peek (hdf5_file_t *f, /*file containing header */ + haddr_t addr, /*header file address */ + H5G_entry_t *ent, /*optional symbol table entry */ + H5O_class_t *type, /*type of message or H5O_ANY */ + intn sequence) /*sequence number, usually zero */ + +haddr_t /*new heap address */ +H5O_modify (hdf5_file_t *f, /*file containing header */ + haddr_t addr, /*header file address */ + H5G_entry_t *ent, /*optional symbol table entry */ + hbool_t *ent_modified, /*entry modification flag */ + H5O_class_t *type, /*message type */ + intn overwrite, /*sequence number or -1 */ + void *mesg) /*the message */ + + + diff --git a/doc/html/Properties.html b/doc/html/Properties.html new file mode 100644 index 0000000..5b6c03f --- /dev/null +++ b/doc/html/Properties.html @@ -0,0 +1,81 @@ + + + + Property List Interface (H5P) + + + +

Property List Interface (H5P)

+ +

1. Introduction

+ +

The property list (a.k.a., template) interface provides a + mechanism for default named arguments for a C function + interface. A property list is a collection of name/value pairs + which can be passed to various other HDF5 functions to control + features that are typically unimportant or whose default values + are usually used. + +

For instance, file creation needs to know various things such + as the size of the user-block at the beginning of the file, or + the size of various file data structures. Wrapping this + information in a property list simplifies the API by reducing + the number of arguments to H5Fcreate(). + +

2. General Property List Operations

+ +

Property lists follow the same create/open/close paradigm as + the rest of the library. + +

+
hid_t H5Pcreate (H5P_class_t class) +
A new property list can be created as an instance of some + property list class. The new property list is initialized + with default values for the specified class. The classes are: + +

+
+
H5P_FILE_CREATE +
Properties for file creation. See H5F + for details about the file creation properties. +
H5P_FILE_ACCESS +
Properties for file access. See H5F + for details about the file creation properties. +
H5P_DATASET_CREATE +
Properties for dataset creation. See + H5D for details about dataset + creation properties. +
H5P_DATASET_XFER +
Properties for raw data transfer. See + H5D for details about raw data + transfer properties. +
+ +

+
hid_t H5Pcopy (hid_t plist) +
A property list can be copied to create a new property + list. The new property list has the same properties and values + as the original property list. + +

+
herr_t H5Pclose (hid_t plist) +
All property lists should be closed when the application is + finished accessing them. This frees resources used by the + property list. + +

+
H5P_class_t H5Pget_class (hid_t plist) +
The class of which the property list is a member can be + obtained by calling this function. The property list classes + are defined above for H5Pcreate(). +
+ +
+
Robb Matzke
+
Quincey Koziol
+ + +Last modified: Tue Feb 10 17:26:41 PST 1998 + + + diff --git a/doc/html/Version.html b/doc/html/Version.html new file mode 100644 index 0000000..6e4af9f --- /dev/null +++ b/doc/html/Version.html @@ -0,0 +1,139 @@ + + + + Version Numbers + + + +

Version Numbers

+ +

1. Introduction

+ +

The HDF5 version number is a set of three integer values and + one lower-case letter written as, for example, + hdf5-1.2.0a. + +

The 5 is part of the library name and will only + change if the entire file format and library are redesigned + similar in scope to the changes between HDF4 and HDF5. + +

The 1 is the major version number and + changes when there is an extensive change to the file format or + library. Such a change will likely require files to be + translated and applications to be modified. This number is not + expected to change frequently. + +

The 2 is the minor version number and is + incremented by each public release that presents new features. + Even numbers are reserved for stable public versions of the + library while odd numbers are reserved for developement + versions. See the diagram below for examples. + +

The 0 is the release number. For public + versions of the library, the release number is incremented each + time a bug(s) is fixed and the fix is made available to the + public. For development versions, the release number is + incremented automatically each time a CVS commit occurs anywhere + in the source tree. + +

The a is the patch level and is used only + for public versions. It's incremented only for very minor + changes that don't affect the usability of the library. For + instance, fixing spelling errors, changing warning messages, or + updating documentation. + +

2. Abbreviated Versions

+ +

It's often convenient to drop the patch level and release + number when referring to a version of the library, like saying + version 1.2 of HDF5. The release number and patch level can be + any value in this case. + +

3. Special Versions

+ +

Version 1.0.0 was released for alpha testing the first week of + March, 1998. The developement version number was incremented to + 1.0.1 and remained constant until the the last week of April, + when the release number started to increase and development + versions were made available to people outside the core HDF5 + development team. + +

Version 1.1.0 will be the first official beta release but the + 1.1 branch will also serve as a development branch since we're + not concerned about providing bug fixes separate from normal + development for the beta version. + +

Version 1.2 will be the first official HDF5 version. The + version tree will fork at this point with public bug fixes + provided on the 1.2 branch and development will continue on the + 1.3 branch. + +

4. Public versus Development

+ +

The motivation for separate public and development versions is + that the public version will receive only bug fixes while the + development version will receive new features. + +

Eventually, the development version will near completion and a + new development branch will fork while the original one enters a + feature freeze state. When the original development branch is + ready for release the minor version number will be incremented + to an even value. + +

+

+ Version Example +
Fig 1: Version Example +
+ +

5. Version Support from the Library

+ +

The library provides a set of macros and functions to query and + check version numbers. + +

+
H5_VERS_MAJOR +
H5_VERS_MINOR +
H5_VERS_RELEASE +
H5_VERS_PATCH +
These preprocessor constants are defined in the public + include file and determine the version of the include files. + +

+
herr_t H5version (unsigned *majnum, unsigned + *minnum, unsigned *relnum, unsigned + *patnum) +
This function returns through its arguments the version + numbers for the library to which the application is linked. + +

+
void H5check(void) +
This is a macro that verifies that the version number of the + HDF5 include file used to compile the application matches the + version number of the library to which the application is + linked. This check occurs automatically when the first HDF5 + file is created or opened and is important because a mismatch + between the include files and the library is likely to result + in corrupted data and/or segmentation faults. If a mismatch + is detected the library issues an error message on the + standard error stream and aborts with a core dump. + +

+
herr_t H5vers_check (unsigned majnum, + unsigned minnum, unsigned relnum, unsigned + patnum) +
This function is called by the H5check() macro + with the include file version constants. The function + compares its arguments to the result returned by + H5version() and if a mismatch is detected prints + an error message on the standard error stream and aborts. +
+ +
+
Robb Matzke
+ + +Last modified: Wed Apr 22 12:19:53 EDT 1998 + + + diff --git a/doc/html/chunk1.gif b/doc/html/chunk1.gif new file mode 100644 index 0000000..0260818 Binary files /dev/null and b/doc/html/chunk1.gif differ diff --git a/doc/html/chunk1.obj b/doc/html/chunk1.obj new file mode 100644 index 0000000..5936b0c --- /dev/null +++ b/doc/html/chunk1.obj @@ -0,0 +1,52 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,16,1,9,1,1,0,0,3,0,1,1,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,384,384,5,2,1,29,0,0,0,0,0,'2',[ +]). +poly('black',2,[ + 128,64,128,384],0,2,1,30,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 192,64,192,384],0,2,1,31,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 256,64,256,384],0,2,1,32,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 320,64,320,384],0,2,1,33,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 64,128,384,128],0,2,1,34,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 64,192,384,192],0,2,1,35,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 64,256,384,256],0,2,1,36,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 64,320,384,320],0,2,1,37,0,4,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',128,448,192,512,5,2,1,56,0,0,0,0,0,'2',[ +]). +text('black',448,208,'Courier',0,17,2,1,0,1,84,28,61,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Entire array", + "5000 x 5000"]). +text('black',256,464,'Courier',0,17,2,1,0,1,84,28,63,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Single Chunk", + "1000 x 1000"]). +box('black',48,48,512,528,0,1,1,71,0,0,0,0,0,'1',[ +]). diff --git a/doc/html/compat.html b/doc/html/compat.html new file mode 100644 index 0000000..2992476 --- /dev/null +++ b/doc/html/compat.html @@ -0,0 +1,271 @@ + + + + Backward/Forward Compatability + + + +

Backward/Forward Compatability

+ +

The HDF5 development must proceed in such a manner as to + satisfy the following conditions: + +

    +
  1. HDF5 applications can produce data that HDF5 + applications can read and write and HDF4 applications can produce + data that HDF4 applications can read and write. The situation + that demands this condition is obvious.
  2. + +
  3. HDF5 applications are able to produce data that HDF4 applications + can read and HDF4 applications can subsequently modify the + file subject to certain constraints depending on the + implementation. This condition is for the temporary + situation where a consumer has neither been relinked with a new + HDF4 API built on top of the HDF5 API nor recompiled with the + HDF5 API.
  4. + +
  5. HDF5 applications can read existing HDF4 files and subsequently + modify the file subject to certain constraints depending on + the implementation. This is condition is for the temporary + situation in which the producer has neither been relinked with a + new HDF4 API built on top of the HDF5 API nor recompiled with + the HDF5 API, or the permanent situation of HDF5 consumers + reading archived HDF4 files.
  6. + + +

    There's at least one invarient: new object features introduced + in the HDF5 file format (like 2-d arrays of structs) might be + impossible to "translate" to a format that an old HDF4 + application can understand either because the HDF4 file format + or the HDF4 API has no mechanism to describe the object. + +

    What follows is one possible implementation based on how + Condition B was solved in the AIO/PDB world. It also attempts + to satisfy these goals: + +

      +
    1. The main HDF5 library contains as little extra baggage as + possible by either relying on external programs to take care + of compatability issues or by incorporating the logic of such + programs as optional modules in the HDF5 library. Conditions B + and C are separate programs/modules.
    2. + +
    3. No extra baggage not only means the library proper is small, + but also means it can be implemented (rather than migrated + from HDF4 source) from the ground up with minimal regard for + HDF4 thus keeping the logic straight forward.
    4. + +
    5. Compatability issues are handled behind the scenes when + necessary (and possible) but can be carried out explicitly + during things like data migration.
    6. +
    + +
    +

    Wrappers

    + +

    The proposed implementation uses wrappers to handle + compatability issues. A Format-X file is wrapped in a + Format-Y file by creating a Format-Y skeleton that replicates + the Format-X meta data. The Format-Y skeleton points to the raw + data stored in Format-X without moving the raw data. The + restriction is that raw data storage methods in Format-Y is a + superset of raw data storage methods in Format-X (otherwise the + raw data must be copied to Format-Y). We're assuming that meta + data is small wrt the entire file. + +

    The wrapper can be a separate file that has pointers into the + first file or it can be contained within the first file. If + contained in a single file, the file can appear as a Format-Y + file or simultaneously a Format-Y and Format-X file. + +

    The Format-X meta-data can be thought of as the original + wrapper around raw data and Format-Y is a second wrapper around + the same data. The wrappers are independend of one another; + modifying the meta-data in one wrapper causes the other to + become out of date. Modification of raw data doesn't invalidate + either view as long as the meta data that describes its storage + isn't modifed. For instance, an array element can change values + if storage is already allocated for the element, but if storage + isn't allocated then the meta data describing the storage must + change, invalidating all wrappers but one. + +

    It's perfectly legal to modify the meta data of one wrapper + without modifying the meta data in the other wrapper(s). The + illegal part is accessing the raw data through a wrapper which + is out of date. + +

    If raw data is wrapped by more than one internal wrapper + (internal means that the wrapper is in the same file as + the raw data) then access to that file must assume that + unreferenced parts of that file contain meta data for another + wrapper and cannot be reclaimed as free memory. + +


    +

    Implementation of Condition B

    + +

    Since this is a temporary situation which can't be + automatically detected by the HDF5 library, we must rely + on the application to notify the HDF5 library whether or not it + must satisfy Condition B. (Even if we don't rely on the + application, at some point someone is going to remove the + Condition B constraint from the library.) So the module that + handles Condition B is conditionally compiled and then enabled + on a per-file basis. + +

    If the application desires to produce an HDF4 file (determined + by arguments to H5Fopen), and the Condition B + module is compiled into the library, then H5Fclose + calls the module to traverse the HDF5 wrapper and generate an + additional internal or external HDF4 wrapper (wrapper specifics + are described below). If Condition B is implemented as a module + then it can benefit from the metadata already cached by the main + library. + +

    An internal HDF4 wrapper would be used if the HDF5 file is + writable and the user doesn't mind that the HDF5 file is + modified. An external wrapper would be used if the file isn't + writable or if the user wants the data file to be primarily HDF5 + but a few applications need an HDF4 view of the data. + +

    Modifying through the HDF5 library an HDF5 file that has + internal HDF4 wrapper should invalidate the HDF4 wrapper (and + optionally regenerate it when H5Fclose is + called). The HDF5 library must understand how wrappers work, but + not necessarily anything about the HDF4 file format. + +

    Modifying through the HDF5 library an HDF5 file that has an + external HDF4 wrapper will cause the HDF4 wrapper to become out + of date (but possibly regenerated during H5Fclose). + Note: Perhaps the next release of the HDF4 library should + insure that the HDF4 wrapper file has a more recent modification + time than the raw data file (the HDF5 file) to which it + points(?) + +

    Modifying through the HDF4 library an HDF5 file that has an + internal or external HDF4 wrapper will cause the HDF5 wrapper to + become out of date. However, there is now way for the old HDF4 + library to notify the HDF5 wrapper that it's out of date. + Therefore the HDF5 library must be able to detect when the HDF5 + wrapper is out of date and be able to fix it. If the HDF4 + wrapper is complete then the easy way is to ignore the original + HDF5 wrapper and generate a new one from the HDF4 wrapper. The + other approach is to compare the HDF4 and HDF5 wrappers and + assume that if they differ HDF4 is the right one, if HDF4 omits + data then it was because HDF4 is a partial wrapper (rather than + assume HDF4 deleted the data), and if HDF4 has new data then + copy the new meta data to the HDF5 wrapper. On the other hand, + perhaps we don't need to allow these situations (modifying an + HDF5 file with the old HDF4 library and then accessing it with + the HDF5 library is either disallowed or causes HDF5 objects + that can't be described by HDF4 to be lost). + +

    To convert an HDF5 file to an HDF4 file on demand, one simply + opens the file with the HDF4 flag and closes it. This is also + how AIO implemented backward compatability with PDB in its file + format. + +


    +

    Implementation of Condition C

    + +

    This condition must be satisfied for all time because there + will always be archived HDF4 files. If a pure HDF4 file (that + is, one without HDF5 meta data) is opened with an HDF5 library, + the H5Fopen builds an internal or external HDF5 + wrapper and then accesses the raw data through that wrapper. If + the HDF5 library modifies the file then the HDF4 wrapper becomes + out of date. However, since the HDF5 library hasn't been + released, we can at least implement it to disable and/or reclaim + the HDF4 wrapper. + +

    If an external and temporary HDF5 wrapper is desired, the + wrapper is created through the cache like all other HDF5 files. + The data appears on disk only if a particular cached datum is + preempted. Instead of calling H5Fclose on the HDF5 + wrapper file we call H5Fabort which immediately + releases all file resources without updating the file, and then + we unlink the file from Unix. + +


    +

    What do wrappers look like?

    + +

    External wrappers are quite obvious: they contain only things + from the format specs for the wrapper and nothing from the + format specs of the format which they wrap. + +

    An internal HDF4 wrapper is added to an HDF5 file in such a way + that the file appears to be both an HDF4 file and an HDF5 + file. HDF4 requires an HDF4 file header at file offset zero. If + a user block is present then we just move the user block down a + bit (and truncate it) and insert the minimum HDF4 signature. + The HDF4 dd list and any other data it needs are + appended to the end of the file and the HDF5 signature uses the + logical file length field to determine the beginning of the + trailing part of the wrapper. + +

    +

    + + + + + + + + + + + + + +
    HDF4 minimal file header. Its main job is to point to + the dd list at the end of the file.
    User-defined block which is truncated by the size of the + HDF4 file header so that the HDF5 boot block file address + doesn't change.
    The HDF5 boot block and data, unmodified by adding the + HDF4 wrapper.
    The main part of the HDF4 wrapper. The dd + list will have entries for all parts of the file so + hdpack(?) doesn't (re)move anything.
    +
    + +

    When such a file is opened by the HDF5 library for + modification it shifts the user block back down to address zero + and fills with zeros, then truncates the file at the end of the + HDF5 data or adds the trailing HDF4 wrapper to the free + list. This prevents HDF4 applications from reading the file with + an out of date wrapper. + +

    If there is no user block then we have a problem. The HDF5 + boot block must be moved to make room for the HDF4 file header. + But moving just the boot block causes problems because all file + addresses stored in the file are relative to the boot block + address. The only option is to shift the entire file contents + by 512 bytes to open up a user block (too bad we don't have + hooks into the Unix i-node stuff so we could shift the entire + file contents by the size of a file system page without ever + performing I/O on the file :-) + +

    Is it possible to place an HDF5 wrapper in an HDF4 file? I + don't know enough about the HDF4 format, but I would suspect it + might be possible to open a hole at file address 512 (and + possibly before) by moving some things to the end of the file + to make room for the HDF5 signature. The remainder of the HDF5 + wrapper goes at the end of the file and entries are added to the + HDF4 dd list to mark the location(s) of the HDF5 + wrapper. + +


    +

    Other Thoughts

    + +

    Conversion programs that copy an entire HDF4 file to a separate, + self-contained HDF5 file and vice versa might be useful. + + + + +


    +
    Robb Matzke
    + + +Last modified: Wed Oct 8 12:34:42 EST 1997 + + + diff --git a/doc/html/dataset_p1.gif b/doc/html/dataset_p1.gif new file mode 100644 index 0000000..1e7cea0 Binary files /dev/null and b/doc/html/dataset_p1.gif differ diff --git a/doc/html/dataset_p1.obj b/doc/html/dataset_p1.obj new file mode 100644 index 0000000..42d66fc --- /dev/null +++ b/doc/html/dataset_p1.obj @@ -0,0 +1,32 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,16,1,9,1,1,0,0,1,0,1,1,'Helvetica',0,24,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',128,240,288,432,4,1,1,26,0,0,0,0,0,'1',[ +]). +box('black',400,272,464,400,4,1,1,27,0,0,0,0,0,'1',[ +]). +box('black',192,304,224,368,6,1,1,28,0,0,0,0,0,'1',[ +]). +box('black',400,272,432,336,6,1,1,29,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 224,304,400,272],1,1,1,32,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 224,368,400,336],1,1,1,33,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',208,208,'Helvetica',0,20,1,1,0,1,77,17,40,0,14,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File Dataset"]). +text('black',432,208,'Helvetica',0,20,1,1,0,1,106,17,42,0,14,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Memory Dataset"]). +text('black',320,144,'Helvetica',0,24,1,1,0,1,206,29,68,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Raw Data Transfer"]). +box('black',96,128,512,464,0,1,1,70,0,0,0,0,0,'1',[ +]). diff --git a/doc/html/extern1.gif b/doc/html/extern1.gif new file mode 100644 index 0000000..dcac681 Binary files /dev/null and b/doc/html/extern1.gif differ diff --git a/doc/html/extern1.obj b/doc/html/extern1.obj new file mode 100644 index 0000000..9c56a50 --- /dev/null +++ b/doc/html/extern1.obj @@ -0,0 +1,40 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,16,1,9,1,1,0,0,1,0,1,0,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',128,96,192,128,4,1,1,49,0,0,0,0,0,'1',[ +]). +box('black',192,96,352,128,12,1,1,50,0,0,0,0,0,'1',[ +]). +box('black',352,96,416,128,18,1,1,51,0,0,0,0,0,'1',[ +]). +box('black',64,176,224,208,12,1,1,53,0,0,0,0,0,'1',[ +]). +box('black',256,176,320,208,4,1,1,54,0,0,0,0,0,'1',[ +]). +box('black',352,176,448,208,18,1,1,55,0,0,0,0,0,'1',[ +]). +box('black',224,176,256,208,0,1,1,56,0,0,0,0,0,'1',[ +]). +box('black',320,176,352,208,0,1,1,57,0,0,0,0,0,'1',[ +]). +box('black',448,176,512,208,0,1,1,58,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 176,128,272,176],1,1,1,59,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 240,128,208,176],1,1,1,60,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 384,128,384,176],1,1,1,61,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +box('black',48,80,528,224,0,1,1,64,0,0,0,0,0,'1',[ +]). diff --git a/doc/html/extern2.gif b/doc/html/extern2.gif new file mode 100644 index 0000000..5f0e942 Binary files /dev/null and b/doc/html/extern2.gif differ diff --git a/doc/html/extern2.obj b/doc/html/extern2.obj new file mode 100644 index 0000000..3e83452 --- /dev/null +++ b/doc/html/extern2.obj @@ -0,0 +1,108 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,16,1,9,1,1,1,1,0,0,1,1,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',48,48,464,432,0,1,1,144,0,0,0,0,0,'1',[ +]). +text('black',80,240,'Courier',0,17,1,0,0,1,70,14,146,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "scan1.data"]). +text('black',80,304,'Courier',0,17,1,0,0,1,70,14,148,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "scan2.data"]). +text('black',80,368,'Courier',0,17,1,0,0,1,70,14,150,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "scan3.data"]). +polygon('black',7,[ + 64,64,64,128,192,128,192,96,320,96,320,64,64,64],20,1,1,0,181,0,0,0,0,0,'1', + "00",[ +]). +polygon('black',7,[ + 64,128,64,160,320,160,320,96,192,96,192,128,64,128],4,1,1,0,182,0,0,0,0,0,'1', + "00",[ +]). +box('black',64,160,320,192,26,1,1,183,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 80,80,304,80],1,1,1,184,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 80,112,176,112],1,1,1,185,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 208,112,304,112],1,1,1,186,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 80,144,304,144],1,1,1,187,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 80,176,304,176],1,1,1,188,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +box('black',64,256,448,288,20,1,1,203,0,0,0,0,0,'1',[ +]). +box('black',64,320,448,352,4,1,1,216,0,0,0,0,0,'1',[ +]). +box('black',64,384,320,416,26,1,1,225,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 80,272,304,272],1,1,1,226,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 336,272,432,272],1,1,1,227,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 80,336,176,336],1,1,1,228,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 208,336,432,336],1,1,1,229,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 80,400,304,400],1,1,1,230,0,26,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 192,96,64,96],0,1,1,232,0,26,5,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 192,128,320,128],0,1,1,233,0,26,5,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 256,64,256,192],0,1,1,234,0,26,5,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 192,64,192,192],0,1,1,235,0,26,5,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 128,64,128,192],0,1,1,236,0,26,5,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 320,160,64,160],0,2,1,238,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',4,[ + 320,96,192,96,192,128,64,128],0,2,1,240,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',6,[ + 336,64,384,64,384,128,384,128,384,192,336,192],3,1,1,241,1,0,0,0,8,3,0,0,0,'1','8','3', + "78",[ +]). +text('black',429,124,'Courier',0,17,2,1,0,1,28,49,250,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,1,0,[ + 429,124,405,124,454,152,0,1000,-1000,0,-15,2,404,123,455,153],[ + "2-d", + "Dataset"]). diff --git a/doc/html/group_p1.gif b/doc/html/group_p1.gif new file mode 100644 index 0000000..5900446 Binary files /dev/null and b/doc/html/group_p1.gif differ diff --git a/doc/html/group_p1.obj b/doc/html/group_p1.obj new file mode 100644 index 0000000..5f41959 --- /dev/null +++ b/doc/html/group_p1.obj @@ -0,0 +1,85 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,8,1,9,1,1,0,2,1,0,1,1,'Times-Roman',0,24,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +text('black',80,168,'Courier',0,17,1,0,0,1,7,14,30,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',80,184,'Courier',0,17,1,0,0,1,7,14,34,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',80,200,'Courier',0,17,1,0,0,1,7,14,36,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',80,216,'Courier',0,17,1,0,0,1,21,14,38,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Foo"]). +text('black',80,232,'Courier',0,17,1,0,0,1,7,14,43,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',80,248,'Courier',0,17,1,0,0,1,7,14,47,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',64,152,128,280,0,1,1,0,16,49,0,0,0,0,'1',[ +]). +text('black',208,152,'Courier',0,17,1,0,0,1,7,14,52,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',80,152,'Courier',0,17,1,0,0,1,7,14,56,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',208,168,'Courier',0,17,1,0,0,1,7,14,58,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',208,184,'Courier',0,17,1,0,0,1,21,14,60,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Bar"]). +text('black',208,200,'Courier',0,17,1,0,0,1,7,14,62,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',208,216,'Courier',0,17,1,0,0,1,7,14,64,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',208,232,'Courier',0,17,1,0,0,1,7,14,68,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',208,248,'Courier',0,17,1,0,0,1,7,14,72,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',192,152,256,280,0,1,1,0,16,74,0,0,0,0,'1',[ +]). +text('black',336,152,'Courier',0,17,1,0,0,1,7,14,75,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,168,'Courier',0,17,1,0,0,1,7,14,77,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,184,'Courier',0,17,1,0,0,1,7,14,81,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,200,'Courier',0,17,1,0,0,1,7,14,88,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,216,'Courier',0,17,1,0,0,1,7,14,92,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,232,'Courier',0,17,1,0,0,1,7,14,94,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',336,248,'Courier',0,17,1,0,0,1,21,14,96,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Baz"]). +rcbox('black',320,152,384,280,0,1,1,0,16,98,0,0,0,0,'1',[ +]). +text('black',224,360,'NewCenturySchlbk-Roman',0,17,2,1,0,1,42,30,99,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Object", + "Header"]). +rcbox('black',192,344,256,408,0,1,1,0,16,101,0,0,0,0,'1',[ +]). +poly('black',4,[ + 112,224,136,216,152,184,192,168],1,1,1,102,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +poly('black',4,[ + 232,192,272,184,288,168,320,160],1,1,1,107,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +poly('black',4,[ + 368,256,416,272,392,336,256,352],1,1,1,110,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +text('black',96,128,'Times-Roman',0,17,1,1,0,1,40,15,120,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 1"]). +text('black',224,128,'Times-Roman',0,17,1,1,0,1,40,15,126,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 2"]). +text('black',352,128,'Times-Roman',0,17,1,1,0,1,40,15,130,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 3"]). +text('black',224,320,'Times-Roman',0,17,1,1,0,1,64,15,134,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Some Object"]). +text('black',224,80,'Times-Roman',0,24,1,1,0,1,258,28,138,0,22,6,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "The name \"/Foo/Bar/Baz\""]). +box('black',40,64,448,432,0,1,1,140,0,0,0,0,0,'1',[ +]). diff --git a/doc/html/group_p2.gif b/doc/html/group_p2.gif new file mode 100644 index 0000000..a2d12a0 Binary files /dev/null and b/doc/html/group_p2.gif differ diff --git a/doc/html/group_p2.obj b/doc/html/group_p2.obj new file mode 100644 index 0000000..cb91258 --- /dev/null +++ b/doc/html/group_p2.obj @@ -0,0 +1,57 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,8,1,9,1,1,0,2,1,0,1,0,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +text('black',144,128,'Courier',0,17,1,0,0,1,7,14,26,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,144,'Courier',0,17,1,0,0,1,7,14,30,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,160,'Courier',0,17,1,0,0,1,21,14,34,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Foo"]). +text('black',144,176,'Courier',0,17,1,0,0,1,7,14,36,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,192,'Courier',0,17,1,0,0,1,7,14,38,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',128,128,192,256,0,1,1,0,16,40,0,0,0,0,'1',[ +]). +text('black',144,320,'Courier',0,17,1,0,0,1,7,14,43,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,336,'Courier',0,17,1,0,0,1,7,14,45,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,352,'Courier',0,17,1,0,0,1,21,14,47,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Bar"]). +text('black',144,368,'Courier',0,17,1,0,0,1,7,14,49,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,384,'Courier',0,17,1,0,0,1,7,14,51,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',128,320,192,448,0,1,1,0,16,53,0,0,0,0,'1',[ +]). +text('black',160,96,'NewCenturySchlbk-Roman',0,17,1,1,0,1,46,15,64,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 1"]). +text('black',160,288,'NewCenturySchlbk-Roman',0,17,1,1,0,1,46,15,68,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 2"]). +text('black',352,224,'NewCenturySchlbk-Roman',0,17,2,1,0,1,35,30,70,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Some", + "Object"]). +rcbox('black',320,256,384,384,0,1,1,0,16,72,0,0,0,0,'1',[ +]). +poly('black',4,[ + 176,168,224,192,264,240,320,264],1,1,1,73,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +poly('black',4,[ + 176,360,232,344,272,288,320,272],1,1,1,74,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +text('black',264,40,'Helvetica',0,24,1,1,0,1,206,29,93,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Hard Link Example"]). +box('black',88,24,424,496,0,1,1,95,0,0,0,0,0,'1',[ +]). +text('black',240,192,'Courier',0,17,1,0,0,1,63,14,129,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "hard link"]). +text('black',248,336,'Courier',0,17,1,0,0,1,63,14,131,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "hard link"]). diff --git a/doc/html/group_p3.gif b/doc/html/group_p3.gif new file mode 100644 index 0000000..85346de Binary files /dev/null and b/doc/html/group_p3.gif differ diff --git a/doc/html/group_p3.obj b/doc/html/group_p3.obj new file mode 100644 index 0000000..ad93444 --- /dev/null +++ b/doc/html/group_p3.obj @@ -0,0 +1,59 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,8,1,9,1,1,0,2,1,0,1,0,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +text('black',144,128,'Courier',0,17,1,0,0,1,7,14,26,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,144,'Courier',0,17,1,0,0,1,7,14,30,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,160,'Courier',0,17,1,0,0,1,21,14,34,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Foo"]). +text('black',144,176,'Courier',0,17,1,0,0,1,7,14,36,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,192,'Courier',0,17,1,0,0,1,7,14,38,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',128,128,192,256,0,1,1,0,16,40,0,0,0,0,'1',[ +]). +text('black',144,320,'Courier',0,17,1,0,0,1,7,14,43,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,336,'Courier',0,17,1,0,0,1,7,14,45,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,352,'Courier',0,17,1,0,0,1,21,14,47,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Bar"]). +text('black',144,368,'Courier',0,17,1,0,0,1,7,14,49,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +text('black',144,384,'Courier',0,17,1,0,0,1,7,14,51,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "."]). +rcbox('black',128,320,192,448,0,1,1,0,16,53,0,0,0,0,'1',[ +]). +text('black',160,96,'NewCenturySchlbk-Roman',0,17,1,1,0,1,46,15,64,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 1"]). +text('black',160,288,'NewCenturySchlbk-Roman',0,17,1,1,0,1,46,15,68,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Group 2"]). +text('black',352,96,'NewCenturySchlbk-Roman',0,17,2,1,0,1,35,30,70,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Some", + "Object"]). +rcbox('black',320,128,384,256,0,1,1,0,16,72,0,0,0,0,'1',[ +]). +text('black',264,40,'Helvetica',0,24,1,1,0,1,197,29,93,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Soft Link Example"]). +box('black',88,24,424,496,0,1,1,95,0,0,0,0,0,'1',[ +]). +text('black',320,352,'Courier',0,17,1,0,0,1,35,14,105,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "\"Foo\""]). +poly('black',4,[ + 176,168,232,160,264,144,320,136],1,1,1,111,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +poly('black',2,[ + 176,360,312,360],1,1,1,116,2,0,0,0,8,3,0,0,0,'1','8','3', + "",[ +]). +text('black',240,160,'Courier',0,17,1,0,0,1,63,14,119,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "hard link"]). +text('black',216,368,'Courier',0,17,1,0,0,1,63,14,121,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "soft link"]). diff --git a/doc/html/h5s.examples b/doc/html/h5s.examples new file mode 100644 index 0000000..e7a479f --- /dev/null +++ b/doc/html/h5s.examples @@ -0,0 +1,347 @@ +Example 1: Create a simple fixed size 3-D dataspace in memory and on disk and + copy the entire dataset to disk. + +{ + hid_t file; /* File ID */ + hid_t dataset; /* Dataset ID */ + hid_t mem_space, file_space; /* Dataspaces for memory and the file */ + uint8 *buf; /* Buffer for data */ + hsize_t curr_dims[3]={3,4,5}; /* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example1.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace for dataset in the file */ + /* Selection for dataspace defaults to entire space */ + file_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the dataset's dataspace */ + H5Sset_extent_simple(file_space,3,curr_dims,curr_dims); + + /* Create the dataspace for the dataset in memory */ + /* Selection for dataspace defaults to entire space */ + mem_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the memory dataspace */ + H5Sset_extent_simple(mem_space,3,curr_dims,curr_dims); + + /* Create the dataset on disk */ + dataset=H5Dcreate(file,"Dataset",H5T_NATIVE_UINT8,file_space,H5P_DEFAULT); + + /* Write the dataset to the file */ + H5Dwrite(dataset,H5T_NATIVE_UINT8,mem_space,file_space,H5P_DEFAULT,buf); + + /* Close dataspaces */ + H5Sclose(mem_space); + H5Sclose(file_space); + + /* Close dataset & file */ + H5Dclose(dataset); + H5Fclose(file); +} + + +Example 2: Create a simple fixed size 3-D dataspace in memory and on disk and + copy a hyperslab to disk. The hyperslab blocks are packed and + contiguous in memory, but are scattered when written to the dataset + on disk. + +{ + hid_t file; /* File ID */ + hid_t dataset; /* Dataset ID */ + hid_t mem_space, file_space; /* Dataspaces for memory and the file */ + uint8 *buf; /* Buffer for data */ + hssize_t start[3]={3,4,5}; /* Start of hyperslab */ + hsize_t stride[3]={1,2,2}; /* Stride for hyperslab */ + hsize_t count[3]={3,3,3}; /* Hyperslab block count in each dimension */ + hsize_t block[3]={2,2,2}; /* Hyperslab block size in each dimension */ + hsize_t curr_dims[3]={13,14,15}; /* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example2.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace for dataset in the file */ + /* Selection for dataspace defaults to entire space */ + file_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the dataset's dataspace */ + H5Sset_extent_simple(file_space,3,curr_dims,curr_dims); + + /* Set the hyperslab selection for a file dataspace */ + H5Sselect_hyperslab(file_space,H5S_SELECT_SET,start,stride,count,block); + + /* Create the dataspace for the dataset in memory */ + /* Selection for dataspace defaults to entire space */ + mem_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the memory dataspace */ + /* Compute the memory dimensions based on the hyperslab blocks to write */ + for(i=0; i<3; i++) + curr_dims[i]=count[i]*block[i]; + H5Sset_extent_simple(mem_space,3,curr_dims,curr_dims); + + /* Create the dataset on disk */ + dataset=H5Dcreate(file,"Dataset",H5T_NATIVE_UINT8,file_space,H5P_DEFAULT); + + /* Write the hyperslab to the file */ + H5Dwrite(dataset,H5T_NATIVE_UINT8,mem_space,file_space,H5P_DEFAULT,buf); + + /* Close dataspaces */ + H5Sclose(mem_space); + H5Sclose(file_space); + + /* Close dataset & file */ + H5Dclose(dataset); + H5Fclose(file); +} + + +Example 3: Create a simple fixed size 3-D dataspace in memory and on disk and + copy a specific selection of points (with a particular order) to + disk. The memory and file dataspaces are different sizes, but the number + of points selected are the same. + +{ + hid_t file; /* File ID */ + hid_t dataset; /* Dataset ID */ + hid_t mem_space, file_space; /* Dataspaces for memory and the file */ + uint8 *buf; /* Buffer for data */ + hsize_t elements[5][3]; /* Dataspace elements selected */ + hsize_t curr_dims[3]={13,14,15}; /* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example3.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace for dataset in the file */ + /* Selection for dataspace defaults to entire space */ + file_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the dataset's dataspace */ + H5Sset_extent_simple(file_space,3,curr_dims,curr_dims); + + /* Set the elements for the selection in the file dataspace */ + elements[0]={0,2,4}; /* Yes, I know this won't compile.. :-) */ + elements[1]={3,4,1}; + elements[2]={9,8,3}; + elements[3]={7,2,0}; + elements[4]={6,5,8}; + H5Sselect_elements(file_space,H5S_SELECT_SET,5,elements); + + /* Create the dataspace for the dataset in memory */ + /* Selection for dataspace defaults to entire space */ + mem_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the memory dataspace */ + curr_dims={23,15,18}; /* This won't compile either :-) */ + H5Sset_extent_simple(mem_space,3,curr_dims,curr_dims); + + /* Set the elements for the selection in the file dataspace */ + elements[0]={9,2,1}; + elements[1]={13,1,12}; + elements[2]={4,1,7}; + elements[3]={0,12,0}; + elements[4]={20,10,17}; + H5Sselect_elements(mem_space,H5S_SELECT_SET,5,elements); + + /* Create the dataset on disk */ + dataset=H5Dcreate(file,"Dataset",H5T_NATIVE_UINT8,file_space,H5P_DEFAULT); + + /* Write the hyperslab to the file */ + H5Dwrite(dataset,H5T_NATIVE_UINT8,mem_space,file_space,H5P_DEFAULT,buf); + + /* Close dataspaces */ + H5Sclose(mem_space); + H5Sclose(file_space); + + /* Close dataset & file */ + H5Dclose(dataset); + H5Fclose(file); +} + + +Example 4: Create a simple fixed size 3-D dataspace in memory and on disk and + build up selection hyperslab selections to copy from memory to disk. The + selection is the same for both dataspaces, but a different offset is used, + to illustrate the selection offsets. + +{ + hid_t file; /* File ID */ + hid_t dataset; /* Dataset ID */ + hid_t mem_space, file_space; /* Dataspaces for memory and the file */ + uint8 *buf; /* Buffer for data */ + hssize_t start[3]; /* Start of hyperslab */ + hsize_t stride[3]; /* Stride for hyperslab */ + hsize_t count[3]; /* Hyperslab block count in each dimension */ + hsize_t block[3]; /* Hyperslab block size in each dimension */ + hssize_t offset[3]; /* Selection offset */ + hsize_t curr_dims[3]={13,14,15}; /* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example4.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace for dataset in the file */ + /* Selection for dataspace defaults to entire space */ + file_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the dataset's dataspace */ + H5Sset_extent_simple(file_space,3,curr_dims,curr_dims); + + /* Build up the selection with a series of hyperslab selections */ + start={0,2,4}; /* Again, this won't compile.. :-) */ + stride={1,1,1}; + count={6,5,8}; + block={1,1,1}; + + /* Set the first selection, union the rest in */ + H5Sselect_hyperslab(file_space,H5S_SELECT_SET,start,stride,count,block); + + /* initialize the second hyperslab */ + start={10,9,1}; /* Again, this won't compile.. :-) */ + stride={1,1,1}; + count={2,3,10}; + block={1,1,1}; + + /* Union the second hyperslab into the file dataspace's selection */ + H5Sselect_hyperslab(file_space,H5S_SELECT_UNION,start,stride,count,block); + + /* initialize the third hyperslab */ + start={3,10,5}; /* Again, this won't compile.. :-) */ + stride={1,1,1}; + count={8,2,6}; + block={1,1,1}; + + /* Union the final hyperslab into the file dataspace's selection */ + H5Sselect_hyperslab(file_space,H5S_SELECT_UNION,start,stride,count,block); + + /* Create the dataspace for the dataset in memory */ + /* Selection for dataspace defaults to entire space */ + mem_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the memory dataspace */ + curr_dims={23,15,18}; /* This won't compile either :-) */ + H5Sset_extent_simple(mem_space,3,curr_dims,curr_dims); + + /* Copy the selection from the file dataspace */ + H5Sselect_op(mem_space,H5S_SELECT_COPY,file_space); + + /* Adjust the offset of the selection in the memory dataspace */ + offset={1,1,1}; + H5Soffset_simple(mem_space,offset); + + /* Create the dataset on disk */ + dataset=H5Dcreate(file,"Dataset",H5T_NATIVE_UINT8,file_space,H5P_DEFAULT); + + /* Write the hyperslab to the file */ + H5Dwrite(dataset,H5T_NATIVE_UINT8,mem_space,file_space,H5P_DEFAULT,buf); + + /* Close dataspaces */ + H5Sclose(mem_space); + H5Sclose(file_space); + + /* Close dataset & file */ + H5Dclose(dataset); + H5Fclose(file); +} + + +Example 5: Same as example 1 (create a simple fixed size 3-D dataspace in memory and on disk and + copy the entire dataset to disk), except that the selection order is changed + for the memory dataspace, to change between FORTRAN and C array ordering. + +{ + hid_t file; /* File ID */ + hid_t dataset; /* Dataset ID */ + hid_t mem_space, file_space; /* Dataspaces for memory and the file */ + uint8 *buf; /* Buffer for data */ + hsize_t order[3]; /* Dimension ordering for selection */ + hsize_t curr_dims[3]={3,4,5}; /* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example5.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace for dataset in the file */ + /* Selection for dataspace defaults to entire space and C array order */ + file_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the dataset's dataspace */ + H5Sset_extent_simple(file_space,3,curr_dims,curr_dims); + + /* Create the dataspace for the dataset in memory */ + /* Selection for dataspace defaults to entire space and C array order */ + mem_space=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of the memory dataspace */ + H5Sset_extent_simple(mem_space,3,curr_dims,curr_dims); + + /* Change selection ordering to FORTRAN order for memory dataspace */ + order={0,1,2}; + H5Sselect_order(mem_space,order); + + /* Create the dataset on disk */ + dataset=H5Dcreate(file,"Dataset",H5T_NATIVE_UINT8,file_space,H5P_DEFAULT); + + /* Write the dataset to the file */ + H5Dwrite(dataset,H5T_NATIVE_UINT8,mem_space,file_space,H5P_DEFAULT,buf); + + /* Close dataspaces */ + H5Sclose(mem_space); + H5Sclose(file_space); + + /* Close dataset & file */ + H5Dclose(dataset); + H5Fclose(file); +} + + +Example 6: Create a stored dataspace on disk and use the H5Ssubspace function + create a dataspace located within that space. + +{ + hid_t file; /* File ID */ + hid_t space1, space2; /* Dataspace IDs */ + hssize_t start[3]; /* Start of hyperslab */ + hsize_t count[3]; /* Hyperslab block count in each dimension */ + hsize_t curr_dims[3]={13,14,15};/* Dimensions of the dataset */ + + /* Create file */ + file = H5Fcreate("example6.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create dataspace #1 */ + space1=H5Screate(H5S_SIMPLE); + + /* Set the extent & type of dataspace #1 */ + H5Sset_extent_simple(space1,3,curr_dims,curr_dims); + + /* Store dataspace #1 on disk */ + H5Scommit(file,"/Dataspaces/Dataspace #1",space1); + + /* Select a contiguous hyperslab in dataspace #1 to create dataspace #2 with */ + start={0,2,4}; + count={6,5,8}; + + /* + * Use stride and block set to NULL to get contiguous, single element sized + * hyperslab. The stride and block parameters could also be set to all + * 1's, but this is simpler and easier. + */ + H5Sselect_hyperslab(space1,H5S_SELECT_SET,start,NULL,count,NULL); + + /* Create dataspace #2 as a dataspace located within dataspace #1 */ + space2=H5Ssubspace(space1); + + /* Store dataspace #2 on disk also */ + H5Scommit(file,"/Dataspaces/Dataspace #2",space2); + + /* + * space1 & space2 can be used to create datasets, etc. Any datasets + * created with space2 can have their dataspace queried to find the parent + * dataspace and the location within the parent dataspace + */ + + /* Close dataspaces */ + H5Sclose(space1); + H5Sclose(space2); + + /* Close file */ + H5Fclose(file); +} diff --git a/doc/html/heap.txt b/doc/html/heap.txt new file mode 100644 index 0000000..6b4c058 --- /dev/null +++ b/doc/html/heap.txt @@ -0,0 +1,72 @@ + HEAP MANAGEMENT IN HDF5 + ------------------------ + +Heap functions are in the H5H package. + + +off_t +H5H_new (hdf5_file_t *f, size_t size_hint, size_t realloc_hint); + + Creates a new heap in the specified file which can efficiently + store at least SIZE_HINT bytes. The heap can store more than + that, but doing so may cause the heap to become less efficient + (for instance, a heap implemented as a B-tree might become + discontigous). The REALLOC_HINT is the minimum number of bytes + by which the heap will grow when it must be resized. The hints + may be zero in which case reasonable (but probably not + optimal) values will be chosen. + + The return value is the address of the new heap relative to + the beginning of the file boot block. + +off_t +H5H_insert (hdf5_file_t *f, off_t addr, size_t size, const void *buf); + + Copies SIZE bytes of data from BUF into the heap whose address + is ADDR in file F. BUF must be the _entire_ heap object. The + return value is the byte offset of the new data in the heap. + +void * +H5H_read (hdf5_file_t *f, off_t addr, off_t offset, size_t size, void *buf); + + Copies SIZE bytes of data from the heap whose address is ADDR + in file F into BUF and then returns the address of BUF. If + BUF is the null pointer then a new buffer will be malloc'd by + this function and its address is returned. + + Returns buffer address or null. + +const void * +H5H_peek (hdf5_file_t *f, off_t addr, off_t offset) + + A more efficient version of H5H_read that returns a pointer + directly into the cache; the data is not copied from the cache + to a buffer. The pointer is valid until the next call to an + H5AC function directly or indirectly. + + Returns a pointer or null. Do not free the pointer. + +void * +H5H_write (hdf5_file_t *f, off_t addr, off_t offset, size_t size, + const void *buf); + + Modifies (part of) an object in the heap at address ADDR of + file F by copying SIZE bytes from the beginning of BUF to the + file. OFFSET is the address withing the heap where the output + is to occur. + + This function can fail if the combination of OFFSET and SIZE + would write over a boundary between two heap objects. + +herr_t +H5H_remove (hdf5_file_t *f, off_t addr, off_t offset, size_t size); + + Removes an object or part of an object which begins at byte + OFFSET within a heap whose address is ADDR in file F. SIZE + bytes are returned to the free list. Removing the middle of + an object has the side effect that one object is now split + into two objects. + + Returns success or failure. + + diff --git a/doc/html/index.html b/doc/html/index.html new file mode 100644 index 0000000..edd1ab0 --- /dev/null +++ b/doc/html/index.html @@ -0,0 +1,40 @@ + + + + HDF5 - The Next Generation of the HDF library & tools + + + + +

    HDF5 - A New Generation of HDF

    + + + +
    +
    Quincey Koziol
    +
    Robb Matzke
    + +Last modified: Feb 16, 1998 + + + + diff --git a/doc/html/move.html b/doc/html/move.html new file mode 100644 index 0000000..ec87d11 --- /dev/null +++ b/doc/html/move.html @@ -0,0 +1,66 @@ + + + + How to Relocate a File Data Structure + + + +

    How to Relocate a File Data Structure

    + +

    Since file data structures can be cached in memory by the H5AC + package it becomes problematic to move such a data structure in + the file. One cannot just copy a portion of the file from one + location to another because: + +

      +
    1. the file might not contain the latest information, and
    2. +
    3. the H5AC package might not realize that the object's + address has changed and attempt to write the object to disk + at the old address.
    4. +
    + +

    Here's a correct method to move data from one location to + another. The example code assumes that one is moving a B-link + tree node from old_addr to new_addr. + +

      +
    1. Make sure the disk is up-to-date with respect to the + cache. There is no need to remove the item from the cache, + hence the final argument to H5AC_flush is + FALSE. +

      + + H5AC_flush (f, H5AC_BT, old_addr, FALSE);
      +
      +
      +
    2. + +
    3. Read the data from the old address and write it to the new + address. +

      + + H5F_block_read (f, old_addr, size, buf);
      + H5F_block_write (f, new_addr, size, buf);
      +
      +
      +
    4. + +
    5. Notify the cache that the address of the object changed. +

      + + H5AC_rename (f, H5AC_BT, old_addr, new_addr);
      +
      +
      +
    6. +
    + + + +
    +
    Robb Matzke
    + + +Last modified: Mon Jul 14 15:38:29 EST + + + diff --git a/doc/html/ph5design.html b/doc/html/ph5design.html new file mode 100644 index 0000000..1280052 --- /dev/null +++ b/doc/html/ph5design.html @@ -0,0 +1,77 @@ + + + + +new + + + + +

    Parallel HDF5 Design

    +

     

    +

    1. Design Overview

    +

    In this section, I first describe the function requirements of the Parallel HDF5 (PHDF5) software and the assumed system requirements. Section 2 describes the programming model of the PHDF5 interface. Section 3 shows an example PHDF5 program.

    +

    1.1. Function requirements

    + +
      +
    • An API to support parallel file access for HDF5 files in a message passing environment.
    • +
    • Fast parallel I/O to large datasets through standard parallel I/O interface.
    • +
    • Processes are required to do collective API calls only when structural changes are needed for the HDF5 file.
    • +
    • Each process may do independent I/O requests to different datasets in the same or different HDF5 files.
    • +
    • Supports collective I/O requests for datasets (to be included in next version).
    • +
    • Minimize diviation from HDF5 interface.
    • +
    + +

    1.2. System requirements

    + +
      +
    • C language interface is the initial requirement. Fortran77 interface will be added later.
    • +
    • Use Message Passing Interface (MPI) for interprocess communication.
    • +
    • Use MPI-IO calls for parallel file accesses.
    • +
    • Initial platforms—IBM SP2, Intel TFLOPS and SGI Origin 2000.
    + +

    2. Programming Model

    +

    HDF5 uses optional access template object to control the file access +mechanism. The general model in accessing an HDF5 file in parallel +contains the following steps:

    + +
      +
    • Setup access template
    • +
    • File open
    • +
    • Dataset open
    • +
    • Dataset data access (zero or more)
    • +
    • Dataset close
    • +
    • File close
    + +

    2.1. Setup access template

    +

    Each processes of the MPI communicator creates an access template and sets +it up with MPI parallel access information (communicator, info object, +access-mode).

    +

    2.1. File open

    +

    All processes of the MPI communicator open an HDF5 file by a collective call +(H5FCreate or H5Fopen) with the access template.

    +

    2.2. Dataset open

    +

    All processes of the MPI communicator open a dataset by a collective call (H5Dcreate or H5Dopen).  This version supports only collective dataset open.  Future version may support datasets open by a subset of the processes that have opened the file.

    +

    2.3. Dataset access

    +

    2.3.1. Independent dataset access

    +

    Each process may do independent and arbitrary number of data I/O access by independent calls (H5Dread or H5Dwrite) to the dataset with the transfer template set for independent access.  (The default transfer mode is independent transfer).  If the dataset is an unlimited dimension one and if the H5Dwrite is writing data beyond the current dimension size of the dataset, all processes that have opened the dataset must make a collective call (H5Dallocate) to allocate more space for the dataset BEFORE the independent H5Dwrite call.

    +

    2.3.2. Collective dataset access

    +

    All processes that have opened the dataset may do collective data I/O access by collective calls (H5Dread or H5Dwrite) to the dataset with the transfer template set for collective access.  Pre-allocation (H5Dallocate) is not needed for unlimited dimension datasets since the H5Dallocate call, if needed, is done internally by the collective data access call.

    +

    2.3.3. Dataset attributes access

    +

    Changes to attributes can only occur at the "main process" (process 0).  Read only access to attributes can occur independent in each process that has opened the dataset.  (API to be defined later.)

    +

    2.4. Dataset close

    +

    All processes that have opened the dataset must close the dataset by a collective call (H5Dclose).

    +

    2.5. File close

    +

    All processes that have opened the file must close the file by a collective call (H5Fclose).

    +

    3. Parallel HDF5 Example

    +
    +
    +Example code
    +
    +


    +

    Send comments to
    +hdfparallel@ncsa.uiuc.edu

    +
    Last Modified: Feb 16, 1998
    + diff --git a/doc/html/ph5example.c b/doc/html/ph5example.c new file mode 100644 index 0000000..8699bc9 --- /dev/null +++ b/doc/html/ph5example.c @@ -0,0 +1,1003 @@ + +/* + * Example of using the parallel HDF5 library to access datasets. + * + * This program contains two parts. In the first part, the mpi processes + * collectively create a new parallel HDF5 file and create two fixed + * dimension datasets in it. Then each process writes a hyperslab into + * each dataset in an independent mode. All processes collectively + * close the datasets and the file. + * In the second part, the processes collectively open the created file + * and the two datasets in it. Then each process reads a hyperslab from + * each dataset in an independent mode and prints them out. + * All processes collectively close the datasets and the file. + */ + +#include +#include +#include +#include + +/* Temporary source code */ +#define FAIL -1 +/* temporary code end */ + +/* Define some handy debugging shorthands, routines, ... */ +/* debugging tools */ +#define MESG(x)\ + if (verbose) printf("%s\n", x);\ + +#define MPI_BANNER(mesg)\ + {printf("--------------------------------\n");\ + printf("Proc %d: ", mpi_rank); \ + printf("*** %s\n", mesg);\ + printf("--------------------------------\n");} + +#define SYNC(comm)\ + {MPI_BANNER("doing a SYNC"); MPI_Barrier(comm); MPI_BANNER("SYNC DONE");} +/* End of Define some handy debugging shorthands, routines, ... */ + +/* Constants definitions */ +/* 24 is a multiple of 2, 3, 4, 6, 8, 12. Neat for parallel tests. */ +#define SPACE1_DIM1 24 +#define SPACE1_DIM2 24 +#define SPACE1_RANK 2 +#define DATASETNAME1 "Data1" +#define DATASETNAME2 "Data2" +#define DATASETNAME3 "Data3" +/* hyperslab layout styles */ +#define BYROW 1 /* divide into slabs of rows */ +#define BYCOL 2 /* divide into blocks of columns */ + + +/* dataset data type. Int's can be easily octo dumped. */ +typedef int DATATYPE; + +/* global variables */ +int nerrors = 0; /* errors count */ + +int mpi_size, mpi_rank; /* mpi variables */ + +/* option flags */ +int verbose = 0; /* verbose, default as no. */ +int doread=1; /* read test */ +int dowrite=1; /* write test */ + + + +/* + * Setup the dimensions of the hyperslab. + * Two modes--by rows or by columns. + * Assume dimension rank is 2. + */ +void +slab_set(hssize_t start[], hsize_t count[], hsize_t stride[], int mode) +{ + switch (mode){ + case BYROW: + /* Each process takes a slabs of rows. */ + stride[0] = 1; + stride[1] = 1; + count[0] = SPACE1_DIM1/mpi_size; + count[1] = SPACE1_DIM2; + start[0] = mpi_rank*count[0]; + start[1] = 0; + break; + case BYCOL: + /* Each process takes a block of columns. */ + stride[0] = 1; + stride[1] = 1; + count[0] = SPACE1_DIM1; + count[1] = SPACE1_DIM2/mpi_size; + start[0] = 0; + start[1] = mpi_rank*count[1]; + break; + default: + /* Unknown mode. Set it to cover the whole dataset. */ + printf("unknown slab_set mode (%d)\n", mode); + stride[0] = 1; + stride[1] = 1; + count[0] = SPACE1_DIM1; + count[1] = SPACE1_DIM2; + start[0] = 0; + start[1] = 0; + break; + } +} + + +/* + * Fill the dataset with trivial data for testing. + * Assume dimension rank is 2 and data is stored contiguous. + */ +void +dataset_fill(hssize_t start[], hsize_t count[], hsize_t stride[], DATATYPE * dataset) +{ + DATATYPE *dataptr = dataset; + int i, j; + + /* put some trivial data in the data_array */ + for (i=0; i < count[0]; i++){ + for (j=0; j < count[1]; j++){ + *dataptr++ = (i*stride[0]+start[0])*100 + (j*stride[1]+start[1]+1); + } + } +} + + +/* + * Print the content of the dataset. + */ +void dataset_print(hssize_t start[], hsize_t count[], hsize_t stride[], DATATYPE * dataset) +{ + DATATYPE *dataptr = dataset; + int i, j; + + /* print the slab read */ + for (i=0; i < count[0]; i++){ + printf("Row %d: ", (int)(i*stride[0]+start[0])); + for (j=0; j < count[1]; j++){ + printf("%03d ", *dataptr++); + } + printf("\n"); + } +} + + +/* + * Print the content of the dataset. + */ +int dataset_vrfy(hssize_t start[], hsize_t count[], hsize_t stride[], DATATYPE *dataset, DATATYPE *original) +{ +#define MAX_ERR_REPORT 10 /* Maximum number of errors reported */ + DATATYPE *dataptr = dataset; + DATATYPE *originptr = original; + + int i, j, nerrors; + + /* print it if verbose */ + if (verbose) + dataset_print(start, count, stride, dataset); + + nerrors = 0; + for (i=0; i < count[0]; i++){ + for (j=0; j < count[1]; j++){ + if (*dataset++ != *original++){ + nerrors++; + if (nerrors <= MAX_ERR_REPORT){ + printf("Dataset Verify failed at [%d][%d](row %d, col %d): expect %d, got %d\n", + i, j, + (int)(i*stride[0]+start[0]), (int)(j*stride[1]+start[1]), + *(dataset-1), *(original-1)); + } + } + } + } + if (nerrors > MAX_ERR_REPORT) + printf("[more errors ...]\n"); + if (nerrors) + printf("%d errors found in dataset_vrfy\n", nerrors); + return(nerrors); +} + + +/* + * Example of using the parallel HDF5 library to create two datasets + * in one HDF5 files with parallel MPIO access support. + * The Datasets are of sizes (number-of-mpi-processes x DIM1) x DIM2. + * Each process controls only a slab of size DIM1 x DIM2 within each + * dataset. + */ + +void +phdf5writeInd(char *filename) +{ + hid_t fid1, fid2; /* HDF5 file IDs */ + hid_t acc_tpl1; /* File access templates */ + hid_t sid1,sid2; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset1, dataset2; /* Dataset ID */ + int rank = SPACE1_RANK; /* Logical rank of dataspace */ + hsize_t dims1[SPACE1_RANK] = + {SPACE1_DIM1,SPACE1_DIM2}; /* dataspace dim sizes */ + hsize_t dimslocal1[SPACE1_RANK] = + {SPACE1_DIM1,SPACE1_DIM2}; /* local dataspace dim sizes */ + DATATYPE data_array1[SPACE1_DIM1][SPACE1_DIM2]; /* data buffer */ + + hssize_t start[SPACE1_RANK]; /* for hyperslab setting */ + hsize_t count[SPACE1_RANK], stride[SPACE1_RANK]; /* for hyperslab setting */ + + herr_t ret; /* Generic return value */ + int i, j; + int mpi_size, mpi_rank; + char *fname; + int mrc; /* mpi return code */ + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + if (verbose) + printf("Independent write test on file %s\n", filename); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + /* ------------------- + * START AN HDF5 FILE + * -------------------*/ + /* setup file access template with parallel IO access. */ + acc_tpl1 = H5Pcreate (H5P_FILE_ACCESS); + assert(acc_tpl1 != FAIL); + MESG("H5Pcreate access succeed"); + /* set Parallel access with communicator */ + ret = H5Pset_mpi(acc_tpl1, comm, info); + assert(ret != FAIL); + MESG("H5Pset_mpi succeed"); + + /* create the file collectively */ + fid1=H5Fcreate(filename,H5F_ACC_TRUNC,H5P_DEFAULT,acc_tpl1); + assert(fid1 != FAIL); + MESG("H5Fcreate succeed"); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl1); + assert(ret != FAIL); + + + /* -------------------------- + * Define the dimensions of the overall datasets + * and the slabs local to the MPI process. + * ------------------------- */ + /* setup dimensionality object */ + sid1 = H5Screate_simple (SPACE1_RANK, dims1, NULL); + assert (sid1 != FAIL); + MESG("H5Screate_simple succeed"); + + + /* create a dataset collectively */ + dataset1 = H5Dcreate(fid1, DATASETNAME1, H5T_NATIVE_INT, sid1, + H5P_DEFAULT); + assert(dataset1 != FAIL); + MESG("H5Dcreate succeed"); + + /* create another dataset collectively */ + dataset2 = H5Dcreate(fid1, DATASETNAME2, H5T_NATIVE_INT, sid1, + H5P_DEFAULT); + assert(dataset2 != FAIL); + MESG("H5Dcreate succeed"); + + + + /* set up dimensions of the slab this process accesses */ + start[0] = mpi_rank*SPACE1_DIM1/mpi_size; + start[1] = 0; + count[0] = SPACE1_DIM1/mpi_size; + count[1] = SPACE1_DIM2; + stride[0] = 1; + stride[1] =1; +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* put some trivial data in the data_array */ + dataset_fill(start, count, stride, &data_array1[0][0]); + MESG("data_array initialized"); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + MESG("H5Dget_space succeed"); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + MESG("H5Sset_hyperslab succeed"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* write data independently */ + ret = H5Dwrite(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array1); + assert(ret != FAIL); + MESG("H5Dwrite succeed"); + + /* write data independently */ + ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array1); + assert(ret != FAIL); + MESG("H5Dwrite succeed"); + + /* release dataspace ID */ + H5Sclose(file_dataspace); + + /* close dataset collectively */ + ret=H5Dclose(dataset1); + assert(ret != FAIL); + MESG("H5Dclose1 succeed"); + ret=H5Dclose(dataset2); + assert(ret != FAIL); + MESG("H5Dclose2 succeed"); + + /* release all IDs created */ + H5Sclose(sid1); + + /* close the file collectively */ + H5Fclose(fid1); +} + +/* Example of using the parallel HDF5 library to read a dataset */ +void +phdf5readInd(char *filename) +{ + hid_t fid1, fid2; /* HDF5 file IDs */ + hid_t acc_tpl1; /* File access templates */ + hid_t sid1,sid2; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset1, dataset2; /* Dataset ID */ + int rank = SPACE1_RANK; /* Logical rank of dataspace */ + hsize_t dims1[] = {SPACE1_DIM1,SPACE1_DIM2}; /* dataspace dim sizes */ + DATATYPE data_array1[SPACE1_DIM1][SPACE1_DIM2]; /* data buffer */ + DATATYPE data_origin1[SPACE1_DIM1][SPACE1_DIM2]; /* expected data buffer */ + + hssize_t start[SPACE1_RANK]; /* for hyperslab setting */ + hsize_t count[SPACE1_RANK], stride[SPACE1_RANK]; /* for hyperslab setting */ + + herr_t ret; /* Generic return value */ + int i, j; + int mpi_size, mpi_rank; + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + if (verbose) + printf("Independent read test on file %s\n", filename); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + + /* setup file access template */ + acc_tpl1 = H5Pcreate (H5P_FILE_ACCESS); + assert(acc_tpl1 != FAIL); + /* set Parallel access with communicator */ + ret = H5Pset_mpi(acc_tpl1, comm, info); + assert(ret != FAIL); + + + /* open the file collectively */ + fid1=H5Fopen(filename,H5F_ACC_RDWR,acc_tpl1); + assert(fid1 != FAIL); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl1); + assert(ret != FAIL); + + /* open the dataset1 collectively */ + dataset1 = H5Dopen(fid1, DATASETNAME1); + assert(dataset1 != FAIL); + + /* open another dataset collectively */ + dataset2 = H5Dopen(fid1, DATASETNAME1); + assert(dataset2 != FAIL); + + + /* set up dimensions of the slab this process accesses */ + start[0] = mpi_rank*SPACE1_DIM1/mpi_size; + start[1] = 0; + count[0] = SPACE1_DIM1/mpi_size; + count[1] = SPACE1_DIM2; + stride[0] = 1; + stride[1] =1; +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* fill dataset with test data */ + dataset_fill(start, count, stride, &data_origin1[0][0]); + + /* read data independently */ + ret = H5Dread(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array1); + assert(ret != FAIL); + + /* verify the read data with original expected data */ + ret = dataset_vrfy(start, count, stride, &data_array1[0][0], &data_origin1[0][0]); + assert(ret != FAIL); + + /* read data independently */ + ret = H5Dread(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array1); + assert(ret != FAIL); + + /* verify the read data with original expected data */ + ret = dataset_vrfy(start, count, stride, &data_array1[0][0], &data_origin1[0][0]); + assert(ret == 0); + + /* close dataset collectively */ + ret=H5Dclose(dataset1); + assert(ret != FAIL); + ret=H5Dclose(dataset2); + assert(ret != FAIL); + + /* release all IDs created */ + H5Sclose(file_dataspace); + + /* close the file collectively */ + H5Fclose(fid1); +} + + +/* + * Example of using the parallel HDF5 library to create two datasets + * in one HDF5 file with collective parallel access support. + * The Datasets are of sizes (number-of-mpi-processes x DIM1) x DIM2. + * Each process controls only a slab of size DIM1 x DIM2 within each + * dataset. [Note: not so yet. Datasets are of sizes DIM1xDIM2 and + * each process controls a hyperslab within.] + */ + +void +phdf5writeAll(char *filename) +{ + hid_t fid1, fid2; /* HDF5 file IDs */ + hid_t acc_tpl1; /* File access templates */ + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t sid1,sid2; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset1, dataset2; /* Dataset ID */ + int rank = SPACE1_RANK; /* Logical rank of dataspace */ + hsize_t dims1[SPACE1_RANK] = + {SPACE1_DIM1,SPACE1_DIM2}; /* dataspace dim sizes */ + DATATYPE data_array1[SPACE1_DIM1][SPACE1_DIM2]; /* data buffer */ + + hssize_t start[SPACE1_RANK]; /* for hyperslab setting */ + hsize_t count[SPACE1_RANK], stride[SPACE1_RANK]; /* for hyperslab setting */ + + herr_t ret; /* Generic return value */ + int mpi_size, mpi_rank; + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + if (verbose) + printf("Collective write test on file %s\n", filename); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + /* ------------------- + * START AN HDF5 FILE + * -------------------*/ + /* setup file access template with parallel IO access. */ + acc_tpl1 = H5Pcreate (H5P_FILE_ACCESS); + assert(acc_tpl1 != FAIL); + MESG("H5Pcreate access succeed"); + /* set Parallel access with communicator */ + ret = H5Pset_mpi(acc_tpl1, comm, info); + assert(ret != FAIL); + MESG("H5Pset_mpi succeed"); + + /* create the file collectively */ + fid1=H5Fcreate(filename,H5F_ACC_TRUNC,H5P_DEFAULT,acc_tpl1); + assert(fid1 != FAIL); + MESG("H5Fcreate succeed"); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl1); + assert(ret != FAIL); + + + /* -------------------------- + * Define the dimensions of the overall datasets + * and create the dataset + * ------------------------- */ + /* setup dimensionality object */ + sid1 = H5Screate_simple (SPACE1_RANK, dims1, NULL); + assert (sid1 != FAIL); + MESG("H5Screate_simple succeed"); + + + /* create a dataset collectively */ + dataset1 = H5Dcreate(fid1, DATASETNAME1, H5T_NATIVE_INT, sid1, H5P_DEFAULT); + assert(dataset1 != FAIL); + MESG("H5Dcreate succeed"); + + /* create another dataset collectively */ + dataset2 = H5Dcreate(fid1, DATASETNAME2, H5T_NATIVE_INT, sid1, H5P_DEFAULT); + assert(dataset2 != FAIL); + MESG("H5Dcreate 2 succeed"); + + /* + * Set up dimensions of the slab this process accesses. + */ + + /* Dataset1: each process takes a block of rows. */ + slab_set(start, count, stride, BYROW); +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + MESG("H5Dget_space succeed"); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + MESG("H5Sset_hyperslab succeed"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* fill the local slab with some trivial data */ + dataset_fill(start, count, stride, &data_array1[0][0]); + MESG("data_array initialized"); + if (verbose){ + MESG("data_array created"); + dataset_print(start, count, stride, &data_array1[0][0]); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + assert(xfer_plist != FAIL); + ret=H5Pset_xfer(xfer_plist, H5D_XFER_COLLECTIVE); + assert(ret != FAIL); + MESG("H5Pcreate xfer succeed"); + + /* write data collectively */ + ret = H5Dwrite(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + assert(ret != FAIL); + MESG("H5Dwrite succeed"); + + /* release all temporary handles. */ + /* Could have used them for dataset2 but it is cleaner */ + /* to create them again.*/ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + /* Dataset2: each process takes a block of columns. */ + slab_set(start, count, stride, BYCOL); +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* put some trivial data in the data_array */ + dataset_fill(start, count, stride, &data_array1[0][0]); + MESG("data_array initialized"); + if (verbose){ + MESG("data_array created"); + dataset_print(start, count, stride, &data_array1[0][0]); + } + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + MESG("H5Dget_space succeed"); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + MESG("H5Sset_hyperslab succeed"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* fill the local slab with some trivial data */ + dataset_fill(start, count, stride, &data_array1[0][0]); + MESG("data_array initialized"); + if (verbose){ + MESG("data_array created"); + dataset_print(start, count, stride, &data_array1[0][0]); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + assert(xfer_plist != FAIL); + ret=H5Pset_xfer(xfer_plist, H5D_XFER_COLLECTIVE); + assert(ret != FAIL); + MESG("H5Pcreate xfer succeed"); + + /* write data independently */ + ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + assert(ret != FAIL); + MESG("H5Dwrite succeed"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + + /* + * All writes completed. Close datasets collectively + */ + ret=H5Dclose(dataset1); + assert(ret != FAIL); + MESG("H5Dclose1 succeed"); + ret=H5Dclose(dataset2); + assert(ret != FAIL); + MESG("H5Dclose2 succeed"); + + /* release all IDs created */ + H5Sclose(sid1); + + /* close the file collectively */ + H5Fclose(fid1); +} + +/* + * Example of using the parallel HDF5 library to read two datasets + * in one HDF5 file with collective parallel access support. + * The Datasets are of sizes (number-of-mpi-processes x DIM1) x DIM2. + * Each process controls only a slab of size DIM1 x DIM2 within each + * dataset. [Note: not so yet. Datasets are of sizes DIM1xDIM2 and + * each process controls a hyperslab within.] + */ + +void +phdf5readAll(char *filename) +{ + hid_t fid1, fid2; /* HDF5 file IDs */ + hid_t acc_tpl1; /* File access templates */ + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t sid1,sid2; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset1, dataset2; /* Dataset ID */ + int rank = SPACE1_RANK; /* Logical rank of dataspace */ + hsize_t dims1[] = {SPACE1_DIM1,SPACE1_DIM2}; /* dataspace dim sizes */ + DATATYPE data_array1[SPACE1_DIM1][SPACE1_DIM2]; /* data buffer */ + DATATYPE data_origin1[SPACE1_DIM1][SPACE1_DIM2]; /* expected data buffer */ + + hssize_t start[SPACE1_RANK]; /* for hyperslab setting */ + hsize_t count[SPACE1_RANK], stride[SPACE1_RANK]; /* for hyperslab setting */ + + herr_t ret; /* Generic return value */ + int mpi_size, mpi_rank; + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + if (verbose) + printf("Collective read test on file %s\n", filename); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + /* ------------------- + * OPEN AN HDF5 FILE + * -------------------*/ + /* setup file access template with parallel IO access. */ + acc_tpl1 = H5Pcreate (H5P_FILE_ACCESS); + assert(acc_tpl1 != FAIL); + MESG("H5Pcreate access succeed"); + /* set Parallel access with communicator */ + ret = H5Pset_mpi(acc_tpl1, comm, info); + assert(ret != FAIL); + MESG("H5Pset_mpi succeed"); + + /* open the file collectively */ + fid1=H5Fopen(filename,H5F_ACC_RDWR,acc_tpl1); + assert(fid1 != FAIL); + MESG("H5Fopen succeed"); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl1); + assert(ret != FAIL); + + + /* -------------------------- + * Open the datasets in it + * ------------------------- */ + /* open the dataset1 collectively */ + dataset1 = H5Dopen(fid1, DATASETNAME1); + assert(dataset1 != FAIL); + MESG("H5Dopen succeed"); + + /* open another dataset collectively */ + dataset2 = H5Dopen(fid1, DATASETNAME1); + assert(dataset2 != FAIL); + MESG("H5Dopen 2 succeed"); + + /* + * Set up dimensions of the slab this process accesses. + */ + + /* Dataset1: each process takes a block of columns. */ + slab_set(start, count, stride, BYCOL); +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + MESG("H5Dget_space succeed"); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + MESG("H5Sset_hyperslab succeed"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* fill dataset with test data */ + dataset_fill(start, count, stride, &data_origin1[0][0]); + MESG("data_array initialized"); + if (verbose){ + MESG("data_array created"); + dataset_print(start, count, stride, &data_array1[0][0]); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + assert(xfer_plist != FAIL); + ret=H5Pset_xfer(xfer_plist, H5D_XFER_COLLECTIVE); + assert(ret != FAIL); + MESG("H5Pcreate xfer succeed"); + + /* read data collectively */ + ret = H5Dread(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + assert(ret != FAIL); + MESG("H5Dread succeed"); + + /* verify the read data with original expected data */ + ret = dataset_vrfy(start, count, stride, &data_array1[0][0], &data_origin1[0][0]); + assert(ret != FAIL); + + /* release all temporary handles. */ + /* Could have used them for dataset2 but it is cleaner */ + /* to create them again.*/ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + /* Dataset2: each process takes a block of rows. */ + slab_set(start, count, stride, BYROW); +if (verbose) + printf("start[]=(%d,%d), count[]=(%d,%d), total datapoints=%d\n", + start[0], start[1], count[0], count[1], count[0]*count[1]); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + assert(file_dataspace != FAIL); + MESG("H5Dget_space succeed"); + ret=H5Sset_hyperslab(file_dataspace, start, count, stride); + assert(ret != FAIL); + MESG("H5Sset_hyperslab succeed"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (SPACE1_RANK, count, NULL); + assert (mem_dataspace != FAIL); + + /* fill dataset with test data */ + dataset_fill(start, count, stride, &data_origin1[0][0]); + MESG("data_array initialized"); + if (verbose){ + MESG("data_array created"); + dataset_print(start, count, stride, &data_array1[0][0]); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + assert(xfer_plist != FAIL); + ret=H5Pset_xfer(xfer_plist, H5D_XFER_COLLECTIVE); + assert(ret != FAIL); + MESG("H5Pcreate xfer succeed"); + + /* read data independently */ + ret = H5Dread(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + assert(ret != FAIL); + MESG("H5Dread succeed"); + + /* verify the read data with original expected data */ + ret = dataset_vrfy(start, count, stride, &data_array1[0][0], &data_origin1[0][0]); + assert(ret != FAIL); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + + /* + * All reads completed. Close datasets collectively + */ + ret=H5Dclose(dataset1); + assert(ret != FAIL); + MESG("H5Dclose1 succeed"); + ret=H5Dclose(dataset2); + assert(ret != FAIL); + MESG("H5Dclose2 succeed"); + + /* close the file collectively */ + H5Fclose(fid1); +} + +/* + * test file access by communicator besides COMM_WORLD. + * Split COMM_WORLD into two, one (even_comm) contains the original + * processes of even ranks. The other (odd_comm) contains the original + * processes of odd ranks. Processes in even_comm creates a file, then + * cloose it, using even_comm. Processes in old_comm just do a barrier + * using odd_comm. Then they all do a barrier using COMM_WORLD. + * If the file creation and cloose does not do correct collective action + * according to the communicator argument, the processes will freeze up + * sooner or later due to barrier mixed up. + */ +void +test_split_comm_access(char *filenames[]) +{ + int mpi_size, myrank; + MPI_Comm comm; + MPI_Info info = MPI_INFO_NULL; + int color, mrc; + int newrank, newprocs; + hid_t fid; /* file IDs */ + hid_t acc_tpl; /* File access properties */ + herr_t ret; /* generic return value */ + + if (verbose) + printf("Independent write test on file %s %s\n", + filenames[0], filenames[1]); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + color = myrank%2; + mrc = MPI_Comm_split (MPI_COMM_WORLD, color, myrank, &comm); + assert(mrc==MPI_SUCCESS); + MPI_Comm_size(comm,&newprocs); + MPI_Comm_rank(comm,&newrank); + + if (color){ + /* odd-rank processes */ + mrc = MPI_Barrier(comm); + assert(mrc==MPI_SUCCESS); + }else{ + /* even-rank processes */ + /* setup file access template */ + acc_tpl = H5Pcreate (H5P_FILE_ACCESS); + assert(acc_tpl != FAIL); + + /* set Parallel access with communicator */ + ret = H5Pset_mpi(acc_tpl, comm, info); + assert(ret != FAIL); + + /* create the file collectively */ + fid=H5Fcreate(filenames[color],H5F_ACC_TRUNC,H5P_DEFAULT,acc_tpl); + assert(fid != FAIL); + MESG("H5Fcreate succeed"); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl); + assert(ret != FAIL); + + ret=H5Fclose(fid); + assert(ret != FAIL); + } + if (myrank == 0){ + mrc = MPI_File_delete(filenames[color], info); + assert(mrc==MPI_SUCCESS); + } +} + +/* + * Show command usage + */ +void +usage() +{ + printf("Usage: testphdf5 [-r] [-w] [-v]\n"); + printf("\t-r\tno read\n"); + printf("\t-w\tno write\n"); + printf("\t-v\tverbose on\n"); + printf("\tdefault do write then read\n"); + printf("\n"); +} + + +/* + * parse the command line options + */ +int +parse_options(int argc, char **argv){ + while (--argc){ + if (**(++argv) != '-'){ + break; + }else{ + switch(*(*argv+1)){ + case 'r': doread = 0; + break; + case 'w': dowrite = 0; + break; + case 'v': verbose = 1; + break; + default: usage(); + nerrors++; + return(1); + } + } + } + return(0); +} + + +main(int argc, char **argv) +{ + char *filenames[]={ "ParaEg1.h5f", "ParaEg2.h5f" }; + + int mpi_namelen; + char mpi_name[MPI_MAX_PROCESSOR_NAME]; + + MPI_Init(&argc,&argv); + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + MPI_Get_processor_name(mpi_name,&mpi_namelen); + /* Make sure datasets can be divided into equal chunks by the processes */ + if ((SPACE1_DIM1 % mpi_size) || (SPACE1_DIM2 % mpi_size)){ + printf("DIM1(%d) and DIM2(%d) must be multiples of processes (%d)\n", + SPACE1_DIM1, SPACE1_DIM2, mpi_size); + nerrors++; + goto finish; + } + + if (parse_options(argc, argv) != 0) + goto finish; + + if (dowrite){ + MPI_BANNER("testing PHDF5 dataset using split communicators..."); + test_split_comm_access(filenames); + MPI_BANNER("testing PHDF5 dataset independent write..."); + phdf5writeInd(filenames[0]); + MPI_BANNER("testing PHDF5 dataset collective write..."); + phdf5writeAll(filenames[1]); + } + if (doread){ + MPI_BANNER("testing PHDF5 dataset independent read..."); + phdf5readInd(filenames[0]); + MPI_BANNER("testing PHDF5 dataset collective read..."); + phdf5readAll(filenames[1]); + } + + if (!(dowrite || doread)){ + usage(); + nerrors++; + } + +finish: + if (mpi_rank == 0){ /* only process 0 reports */ + if (nerrors) + printf("***PHDF5 tests detected %d errors***\n", nerrors); + else{ + printf("===================================\n"); + printf("PHDF5 tests finished with no errors\n"); + printf("===================================\n"); + } + } + MPI_Finalize(); + + return(nerrors); +} + diff --git a/doc/html/ph5implement.txt b/doc/html/ph5implement.txt new file mode 100644 index 0000000..2fcbb3d --- /dev/null +++ b/doc/html/ph5implement.txt @@ -0,0 +1,27 @@ +Release information for parallel HDF5 +------------------------------------- + ++) Current release supports independent access to fixed dimension datasets + only. + ++) The comm and info arguments of H5Pset_mpi are not used. All parallel + I/O are done via MPI_COMM_WORLD. Access_mode for H5Pset_mpi can be + H5ACC_INDEPENDENT only. + ++) This release of parallel HDF5 has been tested on IBM SP2 and SGI + Origin 2000 systems. It uses the ROMIO version of MPIO interface + for parallel I/O supports. + ++) Useful URL's. + Parallel HDF webpage: "http://hdf.ncsa.uiuc.edu/Parallel_HDF/" + ROMIO webpage: "http://www.mcs.anl.gov/home/thakur/romio/" + ++) Some to-do items for future releases + support for Intel Teraflop platform. + support for unlimited dimension datasets. + support for file access via a communicator besides MPI_COMM_WORLD. + support for collective access to datasets. + support for independent create/open of datasets. + +---- +Last updated: Feb 16, 1998. diff --git a/doc/html/pipe1.gif b/doc/html/pipe1.gif new file mode 100644 index 0000000..3b489a6 Binary files /dev/null and b/doc/html/pipe1.gif differ diff --git a/doc/html/pipe1.obj b/doc/html/pipe1.obj new file mode 100644 index 0000000..41f3461 --- /dev/null +++ b/doc/html/pipe1.obj @@ -0,0 +1,136 @@ +%TGIF 3.0-p5 +state(1,33,100,0,0,0,8,1,9,1,1,0,0,0,0,1,1,'Helvetica',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1408,1088,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,128,256,0,1,1,22,0,0,0,0,0,'1',[ +]). +box('black',80,96,112,224,26,1,1,23,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,160,912,160],1,2,1,24,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 160,160,144,224,160,272,176,224,160,160],1,2,1,25,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 848,160,832,224,848,272,864,224,848,160],1,2,1,34,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +box('black',464,192,496,256,26,1,1,39,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 160,224,464,224],1,2,1,40,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 496,224,848,224],1,2,1,41,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 192,224,176,288,192,336,208,288,192,224],1,2,1,42,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 432,224,416,288,432,336,448,288,432,224],1,2,1,43,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 192,288,432,288],1,2,1,44,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',464,352,496,416,26,1,1,45,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 528,224,512,288,528,336,544,288,528,224],1,2,1,46,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 816,224,800,288,816,336,832,288,816,224],1,2,1,47,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 528,288,816,288],1,2,1,48,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 464,256,456,304,464,328,488,304,488,256],1,2,1,62,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 480,352,488,304],2,2,1,85,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',912,64,976,256,0,1,1,87,0,0,0,0,0,'1',[ +]). +box('black',928,96,960,224,26,1,1,88,0,0,0,0,0,'1',[ +]). +text('black',96,48,'Helvetica',0,17,1,1,0,1,21,15,89,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File"]). +text('black',944,48,'Helvetica',0,17,1,1,0,1,64,15,93,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Application"]). +text('black',480,144,'Helvetica',0,17,1,1,0,1,65,15,99,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5D_read()"]). +text('black',480,128,'Helvetica',0,17,1,1,0,1,58,15,108,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5Dread()"]). +text('black',304,208,'Helvetica',0,17,1,1,0,1,86,15,115,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_arr_read()"]). +text('black',304,192,'Helvetica',0,17,1,1,0,1,99,15,119,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_fgath()"]). +text('black',296,288,'Helvetica',0,17,1,1,0,1,101,15,125,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_block_read()"]). +text('black',296,304,'Helvetica',0,17,1,1,0,1,90,15,132,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_low_read()"]). +text('black',296,320,'Helvetica',0,17,1,1,0,1,98,15,136,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_sec2_read()"]). +text('black',296,336,'Helvetica',0,17,1,1,0,1,33,15,140,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "read()"]). +text('black',664,208,'Helvetica',0,17,1,1,0,1,106,15,146,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_stride_copy()"]). +text('black',664,176,'Helvetica',0,17,1,1,0,1,104,15,150,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_mscat()"]). +text('black',664,272,'Helvetica',0,17,1,1,0,1,54,15,154,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "memcpy()"]). +text('black',384,392,'Helvetica',0,17,1,1,0,1,105,15,170,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5T_conv_struct()"]). +poly('black',4,[ + 392,384,400,352,440,368,456,336],1,1,1,172,1,0,0,0,8,3,0,0,0,'1','8','3', + "6",[ +]). +text('black',480,176,'Helvetica',0,17,1,1,0,1,44,15,176,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "TCONV"]). +text('black',480,416,'Helvetica',0,17,1,1,0,1,25,15,182,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "BKG"]). +box('black',48,32,992,512,0,1,1,186,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 72,392,56,456,72,504,88,456,72,392],1,2,1,188,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',96,448,'Helvetica',0,17,1,0,0,1,46,15,189,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "== Loop"]). +poly('black',3,[ + 48,384,152,384,152,512],0,1,1,191,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',480,40,'Helvetica',0,24,1,1,0,1,380,29,197,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Fig 1: Internal Contiguous Storage"]). +text('black',136,144,'Helvetica',0,17,1,1,0,1,9,15,201,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "A"]). +text('black',160,208,'Helvetica',0,17,1,1,0,1,8,15,207,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "B"]). +text('black',192,272,'Helvetica',0,17,1,1,0,1,9,15,211,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "C"]). +text('black',504,208,'Helvetica',0,17,1,1,0,1,8,15,215,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "E"]). +text('black',528,272,'Helvetica',0,17,1,1,0,1,8,15,223,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "F"]). +text('black',464,304,'Helvetica',0,17,1,1,0,1,9,15,231,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "D"]). +text('black',664,192,'Helvetica',0,17,1,1,0,1,107,15,324,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_hyper_copy()"]). diff --git a/doc/html/pipe2.gif b/doc/html/pipe2.gif new file mode 100644 index 0000000..3a0c947 Binary files /dev/null and b/doc/html/pipe2.gif differ diff --git a/doc/html/pipe2.obj b/doc/html/pipe2.obj new file mode 100644 index 0000000..70d9c18 --- /dev/null +++ b/doc/html/pipe2.obj @@ -0,0 +1,168 @@ +%TGIF 3.0-p5 +state(1,33,100,0,0,0,8,1,9,1,1,1,1,0,0,1,1,'Helvetica',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1408,1088,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,128,256,0,1,1,22,0,0,0,0,0,'1',[ +]). +box('black',80,96,112,224,26,1,1,23,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,160,912,160],1,2,1,24,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 160,160,144,224,160,272,176,224,160,160],1,2,1,25,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 848,160,832,224,848,272,864,224,848,160],1,2,1,34,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +box('black',464,192,496,256,26,1,1,39,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 160,224,464,224],1,2,1,40,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 496,224,848,224],1,2,1,41,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 192,224,176,288,192,336,208,288,192,224],1,2,1,42,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 432,224,416,288,432,336,448,288,432,224],1,2,1,43,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 192,288,432,288],1,2,1,44,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',464,352,496,416,26,1,1,45,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 528,224,512,288,528,336,544,288,528,224],1,2,1,46,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 816,224,800,288,816,336,832,288,816,224],1,2,1,47,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 528,288,816,288],1,2,1,48,0,26,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 848,240,848,352,832,384,800,384,496,384],1,2,1,55,1,0,0,0,10,4,0,0,0,'2','10','4', + "70",[ +]). +poly('black',5,[ + 528,384,512,448,528,496,544,448,528,384],1,2,1,57,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 800,384,784,448,800,496,816,448,800,384],1,2,1,58,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 800,448,528,448],1,2,1,61,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',5,[ + 464,256,456,304,464,328,488,304,488,256],1,2,1,62,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 480,352,488,304],0,2,1,85,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',912,64,976,256,0,1,1,87,0,0,0,0,0,'1',[ +]). +box('black',928,96,960,224,26,1,1,88,0,0,0,0,0,'1',[ +]). +text('black',96,48,'Helvetica',0,17,1,1,0,1,21,15,89,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File"]). +text('black',944,48,'Helvetica',0,17,1,1,0,1,64,15,93,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Application"]). +text('black',480,144,'Helvetica',0,17,1,1,0,1,65,15,99,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5D_read()"]). +text('black',480,128,'Helvetica',0,17,1,1,0,1,58,15,108,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5Dread()"]). +text('black',304,208,'Helvetica',0,17,1,1,0,1,86,15,115,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_arr_read()"]). +text('black',304,192,'Helvetica',0,17,1,1,0,1,99,15,119,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_fgath()"]). +text('black',296,288,'Helvetica',0,17,1,1,0,1,101,15,125,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_block_read()"]). +text('black',296,304,'Helvetica',0,17,1,1,0,1,90,15,132,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_low_read()"]). +text('black',296,320,'Helvetica',0,17,1,1,0,1,98,15,136,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_sec2_read()"]). +text('black',296,336,'Helvetica',0,17,1,1,0,1,33,15,140,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "read()"]). +text('black',664,208,'Helvetica',0,17,1,1,0,1,106,15,146,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_stride_copy()"]). +text('black',664,176,'Helvetica',0,17,1,1,0,1,104,15,150,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_mscat()"]). +text('black',664,272,'Helvetica',0,17,1,1,0,1,54,15,154,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "memcpy()"]). +text('black',672,368,'Helvetica',0,17,1,1,0,1,106,15,158,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_stride_copy()"]). +text('black',672,336,'Helvetica',0,17,1,1,0,1,105,15,162,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_mgath()"]). +text('black',672,432,'Helvetica',0,17,1,1,0,1,54,15,166,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "memcpy()"]). +text('black',384,392,'Helvetica',0,17,1,1,0,1,105,15,170,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5T_conv_struct()"]). +poly('black',4,[ + 392,384,400,352,440,368,456,336],1,1,1,172,1,0,0,0,8,3,0,0,0,'1','8','3', + "6",[ +]). +text('black',480,176,'Helvetica',0,17,1,1,0,1,44,15,176,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "TCONV"]). +text('black',480,416,'Helvetica',0,17,1,1,0,1,25,15,182,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "BKG"]). +box('black',48,32,992,512,0,1,1,186,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 72,392,56,456,72,504,88,456,72,392],1,2,1,188,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',96,448,'Helvetica',0,17,1,0,0,1,46,15,189,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "== Loop"]). +poly('black',3,[ + 48,384,152,384,152,512],0,1,1,191,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',480,40,'Helvetica',0,24,1,1,0,1,404,29,197,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Fig 2: Partially Initialized Destination"]). +text('black',136,144,'Helvetica',0,17,1,1,0,1,9,15,201,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "A"]). +text('black',160,208,'Helvetica',0,17,1,1,0,1,8,15,207,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "B"]). +text('black',192,272,'Helvetica',0,17,1,1,0,1,9,15,211,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "C"]). +text('black',504,208,'Helvetica',0,17,1,1,0,1,8,15,215,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "E"]). +text('black',528,272,'Helvetica',0,17,1,1,0,1,8,15,223,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "F"]). +text('black',856,288,'Helvetica',0,17,1,1,0,1,9,15,225,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "G"]). +text('black',800,432,'Helvetica',0,17,1,1,0,1,9,15,229,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H"]). +text('black',464,304,'Helvetica',0,17,1,1,0,1,9,15,231,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "D"]). +poly('black',4,[ + 848,240,848,224,864,224,904,224],0,2,1,318,1,0,0,0,10,4,0,0,0,'2','10','4', + "6",[ +]). +text('black',664,192,'Helvetica',0,17,1,1,0,1,107,15,326,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_hyper_copy()"]). +text('black',672,352,'Helvetica',0,17,1,1,0,1,107,15,334,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_hyper_copy()"]). diff --git a/doc/html/pipe3.gif b/doc/html/pipe3.gif new file mode 100644 index 0000000..26d82ad Binary files /dev/null and b/doc/html/pipe3.gif differ diff --git a/doc/html/pipe3.obj b/doc/html/pipe3.obj new file mode 100644 index 0000000..cdfef7c --- /dev/null +++ b/doc/html/pipe3.obj @@ -0,0 +1,70 @@ +%TGIF 3.0-p5 +state(1,33,100,0,0,0,8,1,9,1,1,0,0,0,0,1,1,'Helvetica',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1408,1088,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,128,256,0,1,1,22,0,0,0,0,0,'1',[ +]). +box('black',80,96,112,224,26,1,1,23,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,160,912,160],1,2,1,24,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',912,64,976,256,0,1,1,87,0,0,0,0,0,'1',[ +]). +box('black',928,96,960,224,26,1,1,88,0,0,0,0,0,'1',[ +]). +text('black',96,48,'Helvetica',0,17,1,1,0,1,21,15,89,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File"]). +text('black',944,48,'Helvetica',0,17,1,1,0,1,64,15,93,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Application"]). +text('black',480,104,'Helvetica',0,17,1,1,0,1,65,15,99,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5D_read()"]). +text('black',480,88,'Helvetica',0,17,1,1,0,1,58,15,108,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5Dread()"]). +box('black',48,32,992,512,0,1,1,186,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 72,392,56,456,72,504,88,456,72,392],1,2,1,188,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',96,448,'Helvetica',0,17,1,0,0,1,46,15,189,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "== Loop"]). +poly('black',3,[ + 48,384,152,384,152,512],0,1,1,191,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',480,40,'Helvetica',0,24,1,1,0,1,295,29,197,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Fig 3: No Type Conversion"]). +text('black',136,144,'Helvetica',0,17,1,1,0,1,9,15,201,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "A"]). +poly('black',5,[ + 152,160,136,224,152,272,168,224,152,160],1,2,1,273,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',480,120,'Helvetica',0,17,1,1,0,1,96,15,277,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5S_simp_read()"]). +text('black',480,136,'Helvetica',0,17,1,1,0,1,86,15,281,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_arr_read()"]). +poly('black',5,[ + 880,160,864,224,880,272,896,224,880,160],1,2,1,283,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',2,[ + 152,224,880,224],1,2,1,286,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +text('black',480,232,'Helvetica',0,17,1,1,0,1,101,15,291,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_block_read()"]). +text('black',480,248,'Helvetica',0,17,1,1,0,1,90,15,293,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_low_read()"]). +text('black',480,264,'Helvetica',0,17,1,1,0,1,98,15,309,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_sec2_read()"]). +text('black',480,280,'Helvetica',0,17,1,1,0,1,33,15,311,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "read()"]). +text('black',176,208,'Helvetica',0,17,1,1,0,1,8,15,418,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "B"]). diff --git a/doc/html/pipe4.gif b/doc/html/pipe4.gif new file mode 100644 index 0000000..a3a857b Binary files /dev/null and b/doc/html/pipe4.gif differ diff --git a/doc/html/pipe4.obj b/doc/html/pipe4.obj new file mode 100644 index 0000000..6f50123 --- /dev/null +++ b/doc/html/pipe4.obj @@ -0,0 +1,92 @@ +%TGIF 3.0-p5 +state(1,33,100,0,0,0,8,1,9,1,1,1,2,1,0,1,0,'Helvetica',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1408,1088,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,128,256,0,1,1,22,0,0,0,0,0,'1',[ +]). +box('black',80,96,112,224,26,1,1,23,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,160,912,160],1,2,1,24,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',912,96,944,224,26,1,1,88,0,0,0,0,0,'1',[ +]). +text('black',96,48,'Helvetica',0,17,1,1,0,1,21,15,89,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File"]). +text('black',928,72,'Helvetica',0,17,1,1,0,1,32,15,93,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Buffer"]). +box('black',48,32,992,512,0,1,1,186,0,0,0,0,0,'1',[ +]). +poly('black',5,[ + 72,392,56,456,72,504,88,456,72,392],1,2,1,188,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',96,448,'Helvetica',0,17,1,0,0,1,46,15,189,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "== Loop"]). +poly('black',3,[ + 48,384,152,384,152,512],0,1,1,191,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',480,40,'Helvetica',0,24,1,1,0,1,372,29,197,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Fig 4: Regularly Chunked Storage"]). +text('black',136,144,'Helvetica',0,17,1,1,0,1,9,15,201,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "A"]). +text('black',480,104,'Helvetica',0,17,1,1,0,1,86,15,281,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_arr_read()"]). +text('black',480,120,'Helvetica',0,17,1,1,0,1,102,15,349,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_istore_read()"]). +text('black',480,136,'Helvetica',0,17,1,1,0,1,167,15,351,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_istore_copy_hyperslab()"]). +poly('black',5,[ + 160,160,144,224,160,272,176,224,160,160],1,2,1,362,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +poly('black',5,[ + 880,160,864,224,880,272,896,224,880,160],1,2,1,363,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +box('black',448,192,512,256,26,1,1,364,0,0,0,0,0,'1',[ +]). +text('black',480,176,'Helvetica',0,17,1,1,0,1,43,15,367,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "CHUNK"]). +poly('black',2,[ + 160,224,448,224],1,2,1,372,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +poly('black',2,[ + 512,224,880,224],1,2,1,373,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +text('black',288,224,'Helvetica',0,17,1,1,0,1,101,15,385,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_block_read()"]). +text('black',288,240,'Helvetica',0,17,1,1,0,1,90,15,387,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_low_read()"]). +text('black',288,256,'Helvetica',0,17,1,1,0,1,98,15,391,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_sec2_read()"]). +text('black',288,272,'Helvetica',0,17,1,1,0,1,33,15,395,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "read()"]). +poly('black',5,[ + 456,256,448,296,480,320,512,296,504,256],1,2,1,401,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',184,208,'Helvetica',0,17,1,1,0,1,8,15,422,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "B"]). +text('black',520,208,'Helvetica',0,17,1,1,0,1,9,15,434,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "D"]). +text('black',440,272,'Helvetica',0,17,1,1,0,1,9,15,440,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "C"]). +text('black',480,320,'Helvetica',0,17,1,1,0,1,107,15,444,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5Z_uncompress()"]). +text('black',672,224,'Helvetica',0,17,1,1,0,1,107,15,454,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_hyper_copy()"]). +text('black',672,240,'Helvetica',0,17,1,1,0,1,106,15,464,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5V_stride_copy()"]). +text('black',672,256,'Helvetica',0,17,1,1,0,1,54,15,466,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "memcpy()"]). +text('black',168,488,'Helvetica',0,17,1,0,0,1,282,15,471,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "NOTE: H5Z_uncompress() is not implemented yet."]). diff --git a/doc/html/pipe5.gif b/doc/html/pipe5.gif new file mode 100644 index 0000000..6ae0098 Binary files /dev/null and b/doc/html/pipe5.gif differ diff --git a/doc/html/pipe5.obj b/doc/html/pipe5.obj new file mode 100644 index 0000000..4738bbd --- /dev/null +++ b/doc/html/pipe5.obj @@ -0,0 +1,52 @@ +%TGIF 3.0-p5 +state(1,33,100,0,0,0,8,1,9,1,1,1,2,1,0,1,0,'Helvetica',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1408,1088,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,128,256,0,1,1,22,0,0,0,0,0,'1',[ +]). +box('black',80,96,112,224,26,1,1,23,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,160,912,160],1,2,1,24,0,0,0,0,10,4,0,0,0,'2','10','4', + "0",[ +]). +box('black',912,96,944,224,26,1,1,88,0,0,0,0,0,'1',[ +]). +text('black',96,48,'Helvetica',0,17,1,1,0,1,21,15,89,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "File"]). +text('black',928,72,'Helvetica',0,17,1,1,0,1,32,15,93,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Buffer"]). +box('black',48,32,992,512,0,1,1,186,0,0,0,0,0,'1',[ +]). +text('black',480,40,'Helvetica',0,24,1,1,0,1,333,29,197,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Fig 5: Reading a Single Chunk"]). +text('black',136,144,'Helvetica',0,17,1,1,0,1,9,15,201,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "A"]). +text('black',480,112,'Helvetica',0,17,1,1,0,1,86,15,281,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_arr_read()"]). +text('black',480,128,'Helvetica',0,17,1,1,0,1,102,15,349,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_istore_read()"]). +text('black',480,144,'Helvetica',0,17,1,1,0,1,167,15,351,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_istore_copy_hyperslab()"]). +text('black',480,160,'Helvetica',0,17,1,1,0,1,101,15,385,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_block_read()"]). +text('black',480,176,'Helvetica',0,17,1,1,0,1,90,15,387,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_low_read()"]). +text('black',480,192,'Helvetica',0,17,1,1,0,1,98,15,391,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5F_sec2_read()"]). +text('black',480,208,'Helvetica',0,17,1,1,0,1,33,15,395,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "read()"]). +text('black',864,240,'Helvetica',0,17,1,1,0,1,107,15,444,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "H5Z_uncompress()"]). +text('black',56,488,'Helvetica',0,17,1,0,0,1,282,15,471,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "NOTE: H5Z_uncompress() is not implemented yet."]). +poly('black',5,[ + 912,176,864,176,840,208,872,232,912,216],1,2,1,490,2,0,0,0,10,4,0,0,0,'2','10','4', + "",[ +]). +text('black',896,184,'Helvetica',0,17,1,0,0,1,8,15,491,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "B"]). diff --git a/doc/html/review1.html b/doc/html/review1.html new file mode 100644 index 0000000..50d55cd --- /dev/null +++ b/doc/html/review1.html @@ -0,0 +1,283 @@ + +HDF5 Draft Revised API Example Code + + +
    +

    HDF5: Revised API Example Code

    +
    + +

    Example programs/sections of code below: +

    +
    #1 +
    A simple example showing how to create a file. +
    #2 +
    A example showing how to check if a file is an HDF5 file and list it's contents. +
    #3 +
    A example showing how to create a homogenous multi-dimensional dataset. +
    #4 +
    A example showing how to create a compound 1-D dataset. +
    #5 +
    A example showing how to create a compound multi-dimensional dataset. +
    #6 +
    A example showing how to read a generic dataset. +
    + +
    +

    Simple Example showing how to create a file.

    + +

    Notes:
    +This example creates a new HDF5 file and allows write access. +If the file exists already, the H5F_ACC_TRUNC flag would also be necessary to +overwrite the previous file's information. + +

    Code: + +

    +    hid_t file_id;
    +
    +    file_id=H5Fcreate("example1.h5",0);
    +
    +    H5Fclose(file_id);
    +
    +
    + +
    +

    Example showing how check if a file is an HDF5 file and list it's contents.

    + +

    Notes:
    +This example checks if a file is an HDF5 file and lists the contents of the top +level (file level) group. + +

    Code: + +

    +    hid_t file_id;      /* File ID */
    +    uint32 num_items;   /* number of items in top-level group */
    +    intn i;             /* counter */
    +    char *obj_name;     /* object's name as string atom */
    +    uintn name_len;     /* object name's length in chars */
    +    uintn buf_len=0;    /* buffer length for names */
    +    char *buf=NULL;     /* buffer for names */
    +
    +    if(H5Fis_hdf5("example2.h5")==TRUE)
    +      {
    +        file_id=H5Fopen("example2.h5",H5F_ACC_RDWR|H5ACC_CREATE);
    +        num_items=H5GgetNumContents(file_id);
    +        for(i=0; i<num_items; i++)
    +          {
    +            obj_name=H5GgetNameByIndex(file_id,i,NULL,0);
    +            printf("object #%d is: %s\n",i,buf);
    +            HDfree(obj_name);
    +          } 
    +        H5Fclose(file_id);
    +      }
    +
    +
    + +
    +

    Example showing how create a homogenous multi-dimensional dataset.

    + +

    Notes:
    +This example creates a 4-dimensional dataset of 32-bit floating-point +numbers, corresponding to the current Scientific Dataset functionality. +This example assumes that the datatype and dataspace of the dataset will not +be re-used. + +

    Code: + +

    +    hid_t file_id;                  /* File's ID */
    +    uint32 dims[4]={6,5,4,3};       /* the size of each dimension */
    +    hid_t dataset_id;               /* new object's ID */
    +    float32 obj_data[6][5][4][3];   /* storage for the dataset's data */
    +
    +    if((file_id=H5Fcreate("example3.h5",H5F_ACC_TRUNC))>=0)
    +      {
    +        /* Create & initialize the dataset object */
    +        dataset_id=H5Mcreate(file_id,H5OBJ_DATASET,"Simple Object");
    +
    +        /* Create & initialize a datatype object */
    +        H5TsetType(dataset_id,H5TYPE_FLOAT,4,H5T_BIGENDIAN);
    +
    +        /* Initialize dimensionality of dataset */
    +        H5SsetSpace(dataset_id,rank,dims);
    +
    +        <initialize data array>
    +
    +        /* Write the entire dataset out */
    +        H5Dwrite(dataset_id,H5S_SCALAR,data);
    +        <or>
    +        H5Dwrite(dataset_id,dataset_id,data);
    +
    +        /* Release the atoms we've created */
    +        H5Mrelease(dataset_id);
    +
    +        /* close the file */
    +        H5Fclose(file_id);
    +      }
    +
    + +
    +

    Example showing how create a compound 1-D dataset.

    + +

    Notes:
    +This example creates a 1-dimensional dataset of compound datatype records, +corresponding to the current Vdata functionality. This example also assumes +that the datatype and dataspace will not be re-used. + +

    Code: + +

    +    hid_t file_id;              /* File's ID */
    +    uint32 dims[1]={45};        /* the size of the dimension */
    +    hid_t dataset_id;           /* object's ID */
    +    void *obj_data;             /* pointer to the dataset's data */
    +
    +    if((file_id=H5Fcreate("example4.h5",H5F_ACC_TRUNC))>=0)
    +      {
    +        /* Create & initialize the dataset object */
    +        dataset_id=H5Mcreate(file_id,H5OBJ_DATASET,"Compound Object");
    +
    +        /* Initialize datatype */
    +        H5TsetType(dataset_id,H5TYPE_STRUCT);
    +        H5TaddField(dataset_id,H5TYPE_FLOAT32,"Float32 Scalar Field",H5SPACE_SCALAR);
    +        H5TaddField(dataset_id,H5TYPE_CHAR,"Char Field",H5SPACE_SCALAR);
    +        H5TaddField(dataset_id,H5TYPE_UINT16,"UInt16 Field",H5SPACE_SCALAR);
    +        H5TendDefine(dataset_id);
    +
    +        /* Initialize dimensionality */
    +        H5SsetSpace(dataset_id,1,dims);
    +
    +        <initialize data array>
    +
    +        /* Write the entire dataset out */
    +        H5Dwrite(dataset_id,H5S_SCALAR,data);
    +
    +        /* Release the atoms we've created */
    +        H5Mrelease(dataset_id);
    +
    +        /* close the file */
    +        H5Fclose(file_id);
    +      }
    +
    + +
    +

    Example showing how create a compound multi-dimensional dataset.

    + +

    Notes:
    +This example creates a 3-dimensional dataset of compound datatype records, +roughly corresponding to a multi-dimensional Vdata functionality. This +example also shows the use of multi-dimensional fields in the compound datatype. +This example uses "stand-alone" datatypes and dataspaces. + +

    Code: + +

    +    hid_t file_id;              /* File's ID */
    +    hid_t type_id;              /* datatype's ID */
    +    hid_t dim_id;               /* dimensionality's ID */
    +    uint32 dims[3]={95,67,5};   /* the size of the dimensions */
    +    hid_t field_dim_id;         /* dimensionality ID for fields in the structure */
    +    uint32 field_dims[4];       /* array for field dimensions */
    +    hid_t dataset_id;           /* object's ID */
    +    void *obj_data;             /* pointer to the dataset's data */
    +
    +    if((file_id=H5Fcreate("example5.h5",H5F_ACC_TRUNC))>=0)
    +      {
    +        /* Create & initialize a datatype object */
    +        type_id=H5Mcreate(file_id,H5OBJ_DATATYPE,"Compound Type #1");
    +        H5TsetType(type_id,H5TYPE_STRUCT);
    +
    +        /* Create each multi-dimensional field in structure */
    +        field_dim_id=H5Mcreate(file_id,H5OBJ_DATASPACE,"Lat/Long Dims");
    +        field_dims[0]=360;
    +        field_dims[1]=720;
    +        H5SsetSpace(field_dim_id,2,field_dims);
    +        H5TaddField(type_id,H5TYPE_FLOAT32,"Lat/Long Locations",field_dim_id);
    +        H5Mrelease(field_dim_id);
    +
    +        field_dim_id=H5Mcreate(file_id,H5OBJ_DATASPACE,"Browse Dims");
    +        field_dims[0]=40;
    +        field_dims[1]=40;
    +        H5SsetSpace(field_dim_id,2,field_dims);
    +        H5TaddField(type_id,H5TYPE_CHAR,"Browse Image",field_dim_id);
    +        H5Mrelease(field_dim_id);
    +
    +        field_dim_id=H5Mcreate(file_id,H5OBJ_DATASPACE,"Multispectral Dims");
    +        field_dims[0]=80;
    +        field_dims[1]=60;
    +        field_dims[2]=40;
    +        H5SsetSpace(field_dim_id,3,field_dims);
    +        H5TaddField(type_id,H5TYPE_UINT16,"Multispectral Scans",field_dim_id);
    +        H5Mrelease(field_dim_id);
    +        H5TendDefine(type_id);
    +
    +        /* Create & initialize a dimensionality object */
    +        dim_id=H5Mcreate(file_id,H5OBJ_DATASPACE,"3-D Dim");
    +        H5SsetSpace(dim_id,3,dims);
    +
    +        /* Create & initialize the dataset object */
    +        dataset_id=H5Mcreate(file_id,H5OBJ_DATASET,"Compound Multi-Dim Object");
    +        H5DsetInfo(dataset_id,type_id,dim_id);
    +
    +        <initialize data array>
    +
    +        /* Write the entire dataset out */
    +        H5Dwrite(dataset_id,H5S_SCALAR,data);
    +
    +        /* Release the atoms we've created */
    +        H5Mrelease(type_id);
    +        H5Mrelease(dim_id);
    +        H5Mrelease(dataset_id);
    +
    +        /* close the file */
    +        H5Fclose(file_id);
    +      }
    +
    + +
    +

    Example showing how read a generic dataset.

    + +

    Notes:
    +This example shows how to get the information for and display a generic +dataset. + +

    Code: + +

    +    hid_t file_id;      /* File's ID */
    +    hid_t dataset_id;   /* dataset's ID in memory */
    +    uintn elem_size;    /* size of each element */
    +    uintn nelems;       /* number of elements in array */
    +    void *obj_data;     /* pointer to the dataset's data */
    +
    +    if((file_id=H5Fopen("example6.h5",0))>=0)
    +      {
    +        /* Attach to a datatype object */
    +        dataset_id=H5MaccessByIndex(obj_oid,0);
    +
    +        if(H5TbaseType(dataset_id)==H5T_COMPOUND)
    +          {
    +            <set up for compound object>
    +          } 
    +        else
    +          {
    +            <set up for homogenous object>
    +          } 
    +
    +        elem_size=H5Tsize(dataset_id);
    +        nelems=H5Snelem(dataset_id);
    +        <allocate space based on element size and number of elements >
    +
    +        /* Read in the dataset */
    +        H5Dwrite(dataset_id,H5S_SCALAR,data);
    +            <or>
    +        H5Dwrite(dataset_id,dataset_id,data);
    +
    +        /* Release the atoms we've accessed */
    +        H5Mrelease(dataset_id);
    +
    +        /* close the file */
    +        H5Fclose(file_id);
    +      }
    +
    diff --git a/doc/html/review1a.html b/doc/html/review1a.html new file mode 100644 index 0000000..78a5a84 --- /dev/null +++ b/doc/html/review1a.html @@ -0,0 +1,252 @@ + + + + Group Examples + + +

    Group Examples

    + +
    +

    Background

    + +

    Directories (or now Groups) are currently implemented as + a directed graph with a single entry point into the graph which + is the Root Object. The root object is usually a + group. All objects have at least one predecessor (the Root + Object always has the HDF5 file boot block as a + predecessor). The number of predecessors of a group is also + known as the hard link count or just link count. + Unlike Unix directories, HDF5 groups have no ".." entry since + any group can have multiple predecessors. Given the handle or + id of some object and returning a full name for that object + would be an expensive graph traversal. + +

    A special optimization is that a file may contain a single + non-group object and no group(s). The object has one + predecessor which is the file boot block. However, once a root + group is created it never dissappears (although I suppose it + could if we wanted). + +

    A special object called a Symbolic Link is simply a + name. Usually the name refers to some (other) object, but that + object need not exist. Symbolic links in HDF5 will have the + same semantics as symbolic links in Unix. + +

    The symbol table graph contains "entries" for each name. An + entry contains the file address for the object header and + possibly certain messages cached from the object header. + +

    The H5G package understands the notion of opening and object + which means that given the name of the object, a handle to the + object is returned (this isn't an API function). Objects can be + opened multiple times simultaneously through the same name or, + if the object has hard links, through other names. The name of + an object cannot be removed from a group if the object is opened + through that group (although the name can change within the + group). + +

    Below the API, object attributes can be read without opening + the object; object attributes cannot change without first + opening that object. The one exception is that the contents of a + group can change without opening the group. + +


    +

    Building a hierarchy from a flat namespace

    + +

    Assuming we have a flat name space (that is, the root object is + a group which contains names for all other objects in the file + and none of those objects are groups), then we can build a + hierarchy of groups that also refer to the objects. + +

    The file initially contains `foo' `bar' `baz' in the root + group. We wish to add groups `grp1' and `grp2' so that `grp1' + contains objects `foo' and `baz' and `grp2' contains objects + `bar' and `baz' (so `baz' appears in both groups). + +

    In either case below, one might want to move the flat objects + into some other group (like `flat') so their names don't + interfere with the rest of the hierarchy (or move the hierarchy + into a directory called `/hierarchy'). + +

    with symbolic links

    + +

    Create group `grp1' and add symbolic links called `foo' whose + value is `/foo' and `baz' whose value is `/baz'. Similarly for + `grp2'. + +

    Accessing `grp1/foo' involves searching the root group for + the name `grp1', then searching that group for `foo', then + searching the root directory for `foo'. Alternatively, one + could change working groups to the grp1 group and then ask for + `foo' which searches `grp1' for the name `foo', then searches + the root group for the name `foo'. + +

    Deleting `/grp1/foo' deletes the symbolic link without + affecting the `/foo' object. Deleting `/foo' leaves the + `/grp1/foo' link dangling. + +

    with hard links

    + +

    Creating the hierarchy is the same as with symbolic links. + +

    Accessing `/grp1/foo' searches the root group for the name + `grp1', then searches that group for the name `foo'. If the + current working group is `/grp1' then we just search for the + name `foo'. + +

    Deleting `/grp1/foo' leaves `/foo' and vice versa. + +

    the code

    + +

    Depending on the eventual API... + +

    +H5Gcreate (file_id, "/grp1");
    +H5Glink (file_id, H5G_HARD, "/foo", "/grp1/foo");
    +    
    + + or + +
    +group_id = H5Gcreate (root_id, "grp1");
    +H5Glink (file_id, H5G_HARD, root_id, "foo", group_id, "foo");
    +H5Gclose (group_id);
    +    
    + + +
    +

    Building a flat namespace from a hierarchy

    + +

    Similar to abvoe, but in this case we have to watch out that + we don't get two names which are the same: what happens to + `/grp1/baz' and `/grp2/baz'? If they really refer to the same + object then we just have `/baz', but if they point to two + different objects what happens? + +

    The other thing to watch out for cycles in the graph when we + traverse it to build the flat namespace. + +


    +

    Listing the Group Contents

    + +

    Two things to watch out for are that the group contents don't + appear to change in a manner which would confuse the + application, and that listing everything in a group is as + efficient as possible. + +

    Method A

    + +

    Query the number of things in a group and then query each item + by index. A trivial implementation would be O(n*n) and wouldn't + protect the caller from changes to the directory which move + entries around and therefore change their indices. + +

    +n = H5GgetNumContents (group_id);
    +for (i=0; i<n; i++) {
    +   H5GgetNameByIndex (group_id, i, ...); /*don't worry about args yet*/
    +}
    +    
    + +

    Method B

    + +

    The API contains a single function that reads all information + from the specified group and returns that info through an array. + The caller is responsible for freeing the array allocated by the + query and the things to which it points. This also makes it + clear the the returned value is a snapshot of the group which + doesn't change if the group is modified. + +

    +n = H5Glist (file_id, "/grp1", info, ...);
    +for (i=0; i<n; i++) {
    +   printf ("name = %s\n", info[i].name);
    +   free (info[i].name); /*and maybe other fields too?*/
    +}
    +free (info);
    +    
    + + Notice that it would be difficult to expand the info struct since + its definition is part of the API. + +

    Method C

    + +

    The caller asks for a snapshot of the group and then accesses + items in the snapshot through various query-by-index API + functions. When finished, the caller notifies the library that + it's done with the snapshot. The word "snapshot" makes it clear + that subsequent changes to the directory will not be reflected in + the shapshot_id. + +

    +snapshot_id = H5Gsnapshot (group_id); /*or perhaps group_name */
    +n = H5GgetNumContents (snapshot_id);
    +for (i=0; i<n; i++) {
    +   H5GgetNameByIndex (shapshot_id, i, ...);
    +}
    +H5Grelease (shapshot_id); 
    +    
    + + In fact, we could allow the user to leave off the H5Gsnapshot and + H5Grelease and use group_id in the H5GgetNumContents and + H5GgetNameByIndex so they can choose between Method A and Method + C. + +
    +

    An implementation of Method C

    + +
    +
    hid_t H5Gshapshot (hid_t group_id) +
    Opens every object in the specified group and stores the + handles in an array managed by the library (linear-time + operation). Open object handles are essentialy symbol table + entries with a little extra info (symbol table entries cache + certain things about the object which are also found in the + object header). Because the objects are open (A) they cannot be + removed from the group, (B) querying the object returns the + latest info even if something else has that object open, (C) + if the object is renamed within the group then its name with + H5GgetNameByIndex is changed. Adding new entries + to a group doesn't affect the snapshot. + +
    char *H5GgetNameByIndex (hid_t shapshot_id, int + index) +
    Uses the open object handle from entry index of + the snapshot array to get the object name. This is a + constant-time operation. The name is updated automatically if + the object is renamed within the group. + +
    H5Gget<whatever>ByIndex...() +
    Uses the open object handle from entry index, + which is just a symbol table entry, and reads the appropriate + object header message(s) which might be cached in the symbol + table entry. This is a constant-time operation if cached, + linear in the number of messages if not cached. + +
    H5Grelease (hid_t snapshot_id) +
    Closes each object refered to by the snapshot and then frees + the snapshot array. This is a linear-time operation. +
    + +
    +

    To return char* or some HDF5 string type.

    + +

    In either case, the caller has to release resources associated + with the return value, calling free() or some HDF5 function. + +

    Names in the current implementation of the H5G package don't + contain embedded null characters and are always null terminated. + +

    Eventually the caller probably wants a char* so it + can pass it to some non-HDF5 function, does that require + strdup'ing the string again? Then the caller has to free() the + the char* and release the DHF5 string. + +


    +
    Robb Matzke
    + + +Last modified: Fri Oct 3 09:32:10 EST 1997 + + + diff --git a/doc/html/storage.html b/doc/html/storage.html new file mode 100644 index 0000000..87ea54d --- /dev/null +++ b/doc/html/storage.html @@ -0,0 +1,274 @@ + + + + Raw Data Storage in HDF5 + + + +

    Raw Data Storage in HDF5

    + +

    This document describes the various ways that raw data is + stored in an HDF5 file and the object header messages which + contain the parameters for the storage. + +

    Raw data storage has three components: the mapping from some + logical multi-dimensional element space to the linear address + space of a file, compression of the raw data on disk, and + striping of raw data across multiple files. These components + are orthogonal. + +

    Some goals of the storage mechanism are to be able to + efficently store data which is: + +

    +
    Small +
    Small pieces of raw data can be treated as meta data and + stored in the object header. This will be achieved by storing + the raw data in the object header with message 0x0006. + Compression and striping are not supported in this case. + +
    Complete Large +
    The library should be able to store large arrays + contiguously in the file provided the user knows the final + array size a priori. The array can then be read/written in a + single I/O request. This is accomplished by describing the + storage with object header message 0x0005. Compression and + striping are not supported in this case. + +
    Sparse Large +
    A large sparse raw data array should be stored in a manner + that is space-efficient but one in which any element can still + be accessed in a reasonable amount of time. Implementation + details are below. + +
    Dynamic Size +
    One often doesn't have prior knowledge of the size of an + array. It would be nice to allow arrays to grow dynamically in + any dimension. It might also be nice to allow the array to + grow in the negative dimension directions if convenient to + implement. Implementation details are below. + +
    Subslab Access +
    Some multi-dimensional arrays are almost always accessed by + subslabs. For instance, a 2-d array of pixels might always be + accessed as smaller 1k-by-1k 2-d arrays always aligned on 1k + index values. We should be able to store the array in such a + way that striding though the entire array is not necessary. + Subslab access might also be useful with compression + algorithms where each storage slab can be compressed + independently of the others. Implementation details are below. + +
    Compressed +
    Various compression algorithms can be applied to the entire + array. We're not planning to support separate algorithms (or a + single algorithm with separate parameters) for each chunk + although it would be possible to implement that in a manner + similar to the way striping across files is + implemented. + +
    Striped Across Files +
    The array access functions should support arrays stored + discontiguously across a set of files. +
    + +

    Implementation of Indexed Storage

    + +

    The Sparse Large, Dynamic Size, and Subslab Access methods + share so much code that they can be described with a single + message. The new Indexed Storage Message (0x0008) + will replace the old Chunked Object (0x0009) and + Sparse Object (0x000A) Messages. + +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + The Format of the Indexed Storage Message +
    bytebytebytebyte

    Address of B-tree

    Number of DimensionsReservedReservedReserved
    Reserved (4 bytes)
    Alignment for Dimension 0 (4 bytes)
    Alignment for Dimension 1 (4 bytes)
    ...
    Alignment for Dimension N (4 bytes)
    +
    + +

    The alignment fields indicate the alignment in logical space to + use when allocating new storage areas on disk. For instance, + writing every other element of a 100-element one-dimensional + array (using one HDF5 I/O partial write operation per element) + that has unit storage alignment would result in 50 + single-element, discontiguous storage segments. However, using + an alignment of 25 would result in only four discontiguous + segments. The size of the message varies with the number of + dimensions. + +

    A B-tree is used to point to the discontiguous portions of + storage which has been allocated for the object. All keys of a + particular B-tree are the same size and are a function of the + number of dimensions. It is therefore not possible to change the + dimensionality of an indexed storage array after its B-tree is + created. + +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + +
    + The Format of a B-Tree Key +
    bytebytebytebyte
    External File Number or Zero (4 bytes)
    Chunk Offset in Dimension 0 (4 bytes)
    Chunk Offset in Dimension 1 (4 bytes)
    ...
    Chunk Offset in Dimension N (4 bytes)
    +
    + +

    The keys within a B-tree obey an ordering based on the chunk + offsets. If the offsets in dimension-0 are equal, then + dimension-1 is used, etc. The External File Number field + contains a 1-origin offset into the External File List message + which contains the name of the external file in which that chunk + is stored. + +

    Implementation of Striping

    + +

    The indexed storage will support arbitrary striping at the + chunk level; each chunk can be stored in any file. This is + accomplished by using the External File Number field of an + indexed storage B-tree key as a 1-origin offset into an External + File List Message (0x0009) which takes the form: + +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + The Format of the External File List Message +
    bytebytebytebyte

    Name Heap Address

    Number of Slots Allocated (4 bytes)
    Number of File Names (4 bytes)
    Byte Offset of Name 1 in Heap (4 bytes)
    Byte Offset of Name 2 in Heap (4 bytes)
    ...

    Unused Slot(s)

    +
    + +

    Each indexed storage array that has all or part of its data + stored in external files will contain a single external file + list message. The size of the messages is determined when the + message is created, but it may be possible to enlarge the + message on demand by moving it. At this time, it's not possible + for multiple arrays to share a single external file list + message. + +

    +
    + H5O_efl_t *H5O_efl_new (H5G_entry_t *object, intn + nslots_hint, intn heap_size_hint) + +
    Adds a new, empty external file list message to an object + header and returns a pointer to that message. The message + acts as a cache for file descriptors of external files that + are open. + +

    + intn H5O_efl_index (H5O_efl_t *efl, const char *filename) + +
    Gets the external file index number for a particular file name. + If the name isn't in the external file list then it's added to + the H5O_efl_t struct and immediately written to the object + header to which the external file list message belongs. Name + comparison is textual. Each name should be relative to the + directory which contains the HDF5 file. + +

    + H5F_low_t *H5O_efl_open (H5O_efl_t *efl, intn index, uintn mode) + +
    Gets a low-level file descriptor for an external file. The + external file list caches file descriptors because we might + have many more external files than there are file descriptors + available to this process. The caller should not close this file. + +

    + herr_t H5O_efl_release (H5O_efl_t *efl) + +
    Releases an external file list, closes all files + associated with that list, and if the list has been modified + since the call to H5O_efl_new flushes the message + to disk. +
    + +
    +
    Robb Matzke
    + + +Last modified: Tue Nov 25 12:36:50 EST 1997 + + + diff --git a/doc/html/study.html b/doc/html/study.html new file mode 100644 index 0000000..f9e192d --- /dev/null +++ b/doc/html/study.html @@ -0,0 +1,172 @@ + + + + Testing the chunked layout of HDF5 + + + +

    Testing the chunked layout of HDF5

    + +

    This is the results of studying the chunked layout policy in + HDF5. A 1000 by 1000 array of integers was written to a file + dataset extending the dataset with each write to create, in the + end, a 5000 by 5000 array of 4-byte integers for a total data + storage size of 100 million bytes. + +

    +

    + Order that data was written +
    Fig 1: Write-order of Output Blocks +
    + +

    After the array was written, it was read back in blocks that + were 500 by 500 bytes in row major order (that is, the top-left + quadrant of output block one, then the top-right quadrant of + output block one, then the top-left quadrant of output block 2, + etc.). + +

    I tried to answer two questions: +

      +
    • How does the storage overhead change as the chunk size + changes? +
    • What does the disk seek pattern look like as the chunk size + changes? +
    + +

    I started with chunk sizes that were multiples of the read + block size or k*(500, 500). + +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + Table 1: Total File Overhead +
    Chunk Size (elements)Meta Data Overhead (ppm)Raw Data Overhead (ppm)
    500 by 50085.840.00
    1000 by 100023.080.00
    5000 by 100023.080.00
    250 by 250253.300.00
    499 by 49985.84205164.84
    +
    + +
    +

    +

    + 500x500 +
    Fig 2: Chunk size is 500x500 +
    + +

    The first half of Figure 2 shows output to the file while the + second half shows input. Each dot represents a file-level I/O + request and the lines that connect the dots are for visual + clarity. The size of the request is not indicated in the + graph. The output block size is four times the chunk size which + results in four file-level write requests per block for a total + of 100 requests. Since file space for the chunks was allocated + in output order, and the input block size is 1/4 the output + block size, the input shows a staircase effect. Each input + request results in one file-level read request. The downward + spike at about the 60-millionth byte is probably the result of a + cache miss for the B-tree and the downward spike at the end is + probably a cache flush or file boot block update. + +


    +

    +

    + 1000x1000 +
    Fig 2: Chunk size is 1000x1000 +
    + +

    In this test I increased the chunk size to match the output + chunk size and one can see from the first half of the graph that + 25 file-level write requests were issued, one for each output + block. The read half of the test shows that four times the + amount of data was read as written. This results from the fact + that HDF5 must read the entire chunk for any request that falls + within that chunk, which is done because (1) if the data is + compressed the entire chunk must be decompressed, and (2) the + library assumes that a chunk size was chosen to optimize disk + performance. + +


    +

    +

    + 5000x1000 +
    Fig 3: Chunk size is 5000x1000 +
    + +

    Increasing the chunk size further results in even worse + performance since both the read and write halves of the test are + re-reading and re-writing vast amounts of data. This proves + that one should be careful that chunk sizes are not much larger + than the typical partial I/O request. + +


    +

    +

    + 250x250 +
    Fig 4: Chunk size is 250x250 +
    + +

    If the chunk size is decreased then the amount of data + transfered between the disk and library is optimal for no + caching, but the amount of meta data required to describe the + chunk locations increases to 250 parts per million. One can + also see that the final downward spike contains more file-level + write requests as the meta data is flushed to disk just before + the file is closed. + +


    +

    +

    + 499x499 +
    Fig 4: Chunk size is 499x499 +
    + +

    This test shows the result of choosing a chunk size which is + close to the I/O block size. Because the total size of the + array isn't a multiple of the chunk size, the library allocates + an extra zone of chunks around the top and right edges of the + array which are only partially filled. This results in + 20,516,484 extra bytes of storage, a 20% increase in the total + raw data storage size. But the amount of meta data overhead is + the same as for the 500 by 500 test. In addition, the mismatch + causes entire chunks to be read in order to update a few + elements along the edge or the chunk which results in a 3.6-fold + increase in the amount of data transfered. + +


    +
    Robb Matzke
    + + +Last modified: Fri Jan 30 23:51:31 EST 1998 + + + diff --git a/doc/html/study_1000x1000.gif b/doc/html/study_1000x1000.gif new file mode 100644 index 0000000..b7d5a83 Binary files /dev/null and b/doc/html/study_1000x1000.gif differ diff --git a/doc/html/study_250x250.gif b/doc/html/study_250x250.gif new file mode 100644 index 0000000..fe35f39 Binary files /dev/null and b/doc/html/study_250x250.gif differ diff --git a/doc/html/study_499x499.gif b/doc/html/study_499x499.gif new file mode 100644 index 0000000..0d2038b Binary files /dev/null and b/doc/html/study_499x499.gif differ diff --git a/doc/html/study_5000x1000.gif b/doc/html/study_5000x1000.gif new file mode 100644 index 0000000..0f3c290 Binary files /dev/null and b/doc/html/study_5000x1000.gif differ diff --git a/doc/html/study_500x500.gif b/doc/html/study_500x500.gif new file mode 100644 index 0000000..38dd7d6 Binary files /dev/null and b/doc/html/study_500x500.gif differ diff --git a/doc/html/study_p1.gif b/doc/html/study_p1.gif new file mode 100644 index 0000000..938d133 Binary files /dev/null and b/doc/html/study_p1.gif differ diff --git a/doc/html/study_p1.obj b/doc/html/study_p1.obj new file mode 100644 index 0000000..6fbf583 --- /dev/null +++ b/doc/html/study_p1.obj @@ -0,0 +1,113 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,16,1,9,1,1,0,0,3,7,1,1,'Helvetica',0,24,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +box('black',64,64,384,384,0,1,1,22,0,0,0,0,0,'1',[ +]). +poly('black',2,[ + 128,64,128,384],0,1,1,23,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 192,64,192,384],0,1,1,24,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 256,64,256,384],0,1,1,25,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 320,64,320,384],0,1,1,26,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 64,128,384,128],0,1,1,27,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 64,192,384,192],0,1,1,28,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 64,256,384,256],0,1,1,29,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 64,320,384,320],0,1,1,30,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',96,80,'Courier',0,17,1,1,0,1,7,14,37,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1"]). +text('black',160,80,'Courier',0,17,1,1,0,1,7,14,39,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "2"]). +text('black',224,80,'Courier',0,17,1,1,0,1,7,14,41,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "3"]). +text('black',288,80,'Courier',0,17,1,1,0,1,7,14,43,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "4"]). +text('black',352,80,'Courier',0,17,1,1,0,1,7,14,47,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "5"]). +text('black',96,144,'Courier',0,17,1,1,0,1,7,14,51,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "6"]). +text('black',160,144,'Courier',0,17,1,1,0,1,7,14,53,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "7"]). +text('black',224,144,'Courier',0,17,1,1,0,1,7,14,55,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "8"]). +text('black',288,144,'Courier',0,17,1,1,0,1,7,14,57,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "9"]). +text('black',352,144,'Courier',0,17,1,1,0,1,14,14,59,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "10"]). +text('black',96,208,'Courier',0,17,1,1,0,1,14,14,61,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "11"]). +text('black',160,208,'Courier',0,17,1,1,0,1,14,14,63,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "12"]). +text('black',224,208,'Courier',0,17,1,1,0,1,14,14,65,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "13"]). +text('black',288,208,'Courier',0,17,1,1,0,1,14,14,67,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "14"]). +text('black',352,208,'Courier',0,17,1,1,0,1,14,14,71,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "15"]). +text('black',96,272,'Courier',0,17,1,1,0,1,14,14,75,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "16"]). +text('black',160,272,'Courier',0,17,1,1,0,1,14,14,77,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "17"]). +text('black',224,272,'Courier',0,17,1,1,0,1,14,14,79,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "18"]). +text('black',288,272,'Courier',0,17,1,1,0,1,14,14,81,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "19"]). +text('black',352,272,'Courier',0,17,1,1,0,1,14,14,83,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "20"]). +text('black',96,336,'Courier',0,17,1,1,0,1,14,14,87,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "21"]). +text('black',160,336,'Courier',0,17,1,1,0,1,14,14,89,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "22"]). +text('black',224,336,'Courier',0,17,1,1,0,1,14,14,91,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "23"]). +text('black',288,336,'Courier',0,17,1,1,0,1,14,14,93,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "24"]). +text('black',352,336,'Courier',0,17,1,1,0,1,14,14,95,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "25"]). +poly('black',2,[ + 416,64,416,384],3,1,1,100,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 64,416,384,416],3,1,1,101,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',390,228,'Courier',0,17,1,0,0,1,14,35,102,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,1,0,[ + 390,228,390,228,425,242,0,-1000,1000,0,34,18,389,227,426,243],[ + "5,000"]). +text('black',224,432,'Courier',0,17,1,1,0,1,35,14,116,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "5,000"]). +text('black',160,512,'Courier',0,17,1,0,0,1,105,14,131,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "= 1,000 x 1,000"]). +box('black',80,480,144,544,7,1,1,134,0,0,0,0,0,'1',[ +]). +text('black',224,16,'Helvetica',0,24,1,1,0,1,296,29,144,0,24,5,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Order that data was written"]). +box('black',32,0,464,576,0,1,1,149,0,0,0,0,0,'1',[ +]). diff --git a/doc/html/symtab b/doc/html/symtab new file mode 100644 index 0000000..a657729 --- /dev/null +++ b/doc/html/symtab @@ -0,0 +1,313 @@ +A number of issues involving caching of object header messages in +symbol table entries must be resolved. + +What is the motivation for these changes? + + If we make objects completely independent of object name it allows + us to refer to one object by multiple names (a concept called hard + links in Unix file systems), which in turn provides an easy way to + share data between datasets. + + Every object in an HDF5 file has a unique, constant object header + address which serves as a handle (or OID) for the object. The + object header contains messages which describe the object. + + HDF5 allows some of the object header messages to be cached in + symbol table entries so that the object header doesn't have to be + read from disk. For instance, an entry for a directory caches the + directory disk addresses required to access that directory, so the + object header for that directory is seldom read. + + If an object has multiple names (that is, a link count greater than + one), then it has multiple symbol table entries which point to it. + All symbol table entries must agree on header messages. The + current mechanism is to turn off the caching of header messages in + symbol table entries when the header link count is more than one, + and to allow caching once the link count returns to one. + + However, in the current implementation, a package is allowed to + copy a symbol table entry and use it as a private cache for the + object header. This doesn't work for a number of reasons (all but + one require a `delete symbol entry' operation). + + 1. If two packages hold copies of the same symbol table entry, + they don't notify each other of changes to the symbol table + entry. Eventually, one package reads a cached message and + gets the wrong value because the other package changed the + message in the object header. + + 2. If one package holds a copy of the symbol table entry and + some other part of HDF5 removes the object and replaces it + with some other object, then the original package will + continue to access the non-existent object using the new + object header. + + 3. If one package holds a copy of the symbol table entry and + some other part of HDF5 (re)moves the directory which + contains the object, then the package will be unable to + update the symbol table entry with the new cached + data. Packages that refer to the object by the new name will + use old cached data. + + +The basic problem is that there may be multiple copies of the object +symbol table entry floating around in the code when there should +really be at most one per hard link. + + Level 0: A copy may exist on disk as part of a symbol table node, which + is a small 1d array of symbol table entries. + + Level 1: A copy may be cached in memory as part of a symbol table node + in the H5Gnode.c file by the H5AC layer. + + Level 2a: Another package may be holding a copy so it can perform + fast lookup of any header messages that might be cached in + the symbol table entry. It can't point directly to the + cached symbol table node because that node can dissappear + at any time. + + Level 2b: Packages may hold more than one copy of a symbol table + entry. For instance, if H5D_open() is called twice for + the same name, then two copies of the symbol table entry + for the dataset exist in the H5D package. + +How can level 2a and 2b be combined? + + If package data structures contained pointers to symbol table + entries instead of copies of symbol table entries and if H5G + allocated one symbol table entry per hard link, then it's trivial + for Level 2a and 2b to benefit from one another's actions since + they share the same cache. + +How does this work conceptually? + + Level 2a and 2b must notify Level 1 of their intent to use (or stop + using) a symbol table entry to access an object header. The + notification of the intent to access an object header is called + `opening' the object and releasing the access is `closing' the + object. + + Opening an object requires an object name which is used to locate + the symbol table entry to use for caching of object header + messages. The return value is a handle for the object. Figure 1 + shows the state after Dataset1 opens Object with a name that maps + through Entry1. The open request created a copy of Entry1 called + Shadow1 which exists even if SymNode1 is preempted from the H5AC + layer. + + ______ + Object / \ + SymNode1 +--------+ | + +--------+ _____\ | Header | | + | | / / +--------+ | + +--------+ +---------+ \______/ + | Entry1 | | Shadow1 | /____ + +--------+ +---------+ \ \ + : : \ + +--------+ +----------+ + | Dataset1 | + +----------+ + FIGURE 1 + + + + The SymNode1 can appear and disappear from the H5AC layer at any + time without affecting the Object Header data cached in the Shadow. + The rules are: + + * If the SymNode1 is present and is about to disappear and the + Shadow1 dirty bit is set, then Shadow1 is copied over Entry1, the + Entry1 dirty bit is set, and the Shadow1 dirty bit is cleared. + + * If something requests a copy of Entry1 (for a read-only peek + request), and Shadow1 exists, then a copy (not pointer) of Shadow1 + is returned instead. + + * Entry1 cannot be deleted while Shadow1 exists. + + * Entry1 cannot change directly if Shadow1 exists since this means + that some other package has opened the object and may be modifying + it. I haven't decided if it's useful to ever change Entry1 + directly (except of course within the H5G layer itself). + + * Shadow1 is created when Dataset1 `opens' the object through + Entry1. Dataset1 is given a pointer to Shadow1 and Shadow1's + reference count is incremented. + + * When Dataset1 `closes' the Object the Shadow1 reference count is + decremented. When the reference count reaches zero, if the + Shadow1 dirty bit is set, then Shadow1's contents are copied to + Entry1, and the Entry1 dirty bit is set. Shadow1 is then deleted + if its reference count is zero. This may require reading SymNode1 + back into the H5AC layer. + +What happens when another Dataset opens the Object through Entry1? + + If the current state is represented by the top part of Figure 2, + then Dataset2 will be given a pointer to Shadow1 and the Shadow1 + reference count will be incremented to two. The Object header link + count remains at one so Object Header messages continue to be cached + by Shadow1. Dataset1 and Dataset2 benefit from one another + actions. The resulting state is represented by Figure 2. + + _____ + SymNode1 Object / \ + +--------+ _____\ +--------+ | + | | / / | Header | | + +--------+ +---------+ +--------+ | + | Entry1 | | Shadow1 | /____ \_____/ + +--------+ +---------+ \ \ + : : _ \ + +--------+ |\ +----------+ + \ | Dataset1 | + \________ +----------+ + \ \ + +----------+ | + | Dataset2 | |- New Dataset + +----------+ | + / + FIGURE 2 + + +What happens when the link count for Object increases while Dataset +has the Object open? + + SymNode2 + +--------+ + SymNode1 Object | | + +--------+ ____\ +--------+ /______ +--------+ + | | / / | header | \ `| Entry2 | + +--------+ +---------+ +--------+ +--------+ + | Entry1 | | Shadow1 | /____ : : + +--------+ +---------+ \ \ +--------+ + : : \ + +--------+ +----------+ \________________/ + | Dataset1 | | + +----------+ New Link + + FIGURE 3 + + The current state is represented by the left part of Figure 3. To + create a new link the Object Header had to be located by traversing + through Entry1/Shadow1. On the way through, the Entry1/Shadow1 + cache is invalidated and the Object Header link count is + incremented. Entry2 is then added to SymNode2. + + Since the Object Header link count is greater than one, Object + header data will not be cached in Entry1/Shadow1. + + If the initial state had been all of Figure 3 and a third link is + being added and Object is open by Entry1 and Entry2, then creation + of the third link will invalidate the cache in Entry1 or Entry2. It + doesn't matter which since both caches are already invalidated + anyway. + +What happens if another Dataset opens the same object by another name? + + If the current state is represented by Figure 3, then a Shadow2 is + created and associated with Entry2. However, since the Object + Header link count is more than one, nothing gets cached in Shadow2 + (or Shadow1). + +What happens if the link count decreases? + + If the current state is represented by all of Figure 3 then it isn't + possible to delete Entry1 because the object is currently open + through that entry. Therefore, the link count must have + decreased because Entry2 was removed. + + As Dataset1 reads/writes messages in the Object header they will + begin to be cached in Shadow1 again because the Object header link + count is one. + +What happens if the object is removed while it's open? + + That operation is not allowed. + +What happens if the directory containing the object is deleted? + + That operation is not allowed since deleting the directory requires + that the directory be empty. The directory cannot be emptied + because the open object cannot be removed from the directory. + +What happens if the object is moved? + + Moving an object is a process consisting of creating a new + hard-link with the new name and then deleting the old name. + This will fail if the object is open. + +What happens if the directory containing the entry is moved? + + The entry and the shadow still exist and are associated with one + another. + +What if a file is flushed or closed when objects are open? + + Flushing a symbol table with open objects writes correct information + to the file since Shadow is copied to Entry before the table is + flushed. + + Closing a file with open objects will create a valid file but will + return failure. + +How is the Shadow associated with the Entry? + + A symbol table is composed of one or more symbol nodes. A node is a + small 1-d array of symbol table entries. The entries can move + around within a node and from node-to-node as entries are added or + removed from the symbol table and nodes can move around within a + symbol table, being created and destroyed as necessary. + + Since a symbol table has an object header with a unique and constant + file offset, and since H5G contains code to efficiently locate a + symbol table entry given it's name, we use these two values as a key + within a shadow to associate the shadow with the symbol table + entry. + + struct H5G_shadow_t { + haddr_t stab_addr; /*symbol table header address*/ + char *name; /*entry name wrt symbol table*/ + hbool_t dirty; /*out-of-date wrt stab entry?*/ + H5G_entry_t ent; /*my copy of stab entry */ + H5G_entry_t *main; /*the level 1 entry or null */ + H5G_shadow_t *next, *prev; /*other shadows for this stab*/ + }; + + The set of shadows will be organized in a hash table of linked + lists. Each linked list will contain the shadows associated with a + particular symbol table header address and the list will be sorted + lexicographically. + + Also, each Entry will have a pointer to the corresponding Shadow or + null if there is no shadow. + + When a symbol table node is loaded into the main cache, we look up + the linked list of shadows in the shadow hash table based on the + address of the symbol table object header. We then traverse that + list matching shadows with symbol table entries. + + We assume that opening/closing objects will be a relatively + infrequent event compared with loading/flushing symbol table + nodes. Therefore, if we keep the linked list of shadows sorted it + costs O(N) to open and close objects where N is the number of open + objects in that symbol table (instead of O(1)) but it costs only + O(N) to load a symbol table node (instead of O(N^2)). + +What about the root symbol entry? + + Level 1 storage for the root symbol entry is always available since + it's stored in the hdf5_file_t struct instead of a symbol table + node. However, the contents of that entry can move from the file + handle to a symbol table node by H5G_mkroot(). Therefore, if the + root object is opened, we keep a shadow entry for it whose + `stab_addr' field is zero and whose `name' is null. + + For this reason, the root object should always be read through the + H5G interface. + +One more key invariant: The H5O_STAB message in a symbol table header +never changes. This allows symbol table entries to cache the H5O_STAB +message for the symbol table to which it points without worrying about +whether the cache will ever be invalidated. + + diff --git a/doc/html/tracing.html b/doc/html/tracing.html new file mode 100644 index 0000000..18ef556 --- /dev/null +++ b/doc/html/tracing.html @@ -0,0 +1,192 @@ + + + + API Tracing + + + +

    API Tracing

    + +

    Introduction

    + +

    The HDF5 library is now able to trace API calls by printing the + function name, the argument names and their values, and the + return value. Some people like to see lots of output during + program execution instead of using a good symbolic debugger, and + this feature is intended for their consumption. For example, + the output from h5ls foo after turning on tracing, + includes: + +

    +

    + + + + + +
    Sample Output
    +
    +H5Tcopy(type=184549388) = 184549419 (type);
    +H5Tcopy(type=184549392) = 184549424 (type);
    +H5Tlock(type=184549424) = SUCCEED;
    +H5Tcopy(type=184549393) = 184549425 (type);
    +H5Tlock(type=184549425) = SUCCEED;
    +H5Fopen(filename="foo", flags=0, access=H5P_DEFAULT) = FAIL;
    +HDF5-DIAG: Error detected in thread 0.  Back trace follows.
    +  #000: H5F.c line 1245 in H5Fopen(): unable to open file
    +    major(04): File interface
    +    minor(10): Unable to open file
    +  #001: H5F.c line 846 in H5F_open(): file does not exist
    +    major(04): File interface
    +    minor(10): Unable to open file
    +	      
    +
    +
    + +

    Configuation

    + +

    This all happens with some magic in the configuration script, + the makefiles, and macros. First, from the end-user point of + view, the library must be configured with the + --enable-tracing switch. This causes the library to + include the support necessary for API tracing. + +

    +

    + + + + + +
    Configuration
    +
    +$ make distclean
    +$ sh configure --enable-tracing
    +$ make
    +	      
    +
    +
    + +

    Execution

    + +

    In order to actually get tracing output one must turn tracing + on and specify a file descriptor where the tracing output should + be written. This is done by assigning a file descriptor number + to the HDF5_TRACE environment variable. + +

    +

    + + + + + + + + +
    Execution Examples
    To display the trace on the standard error stream: +
    +$ export HDF5_TRACE=2
    +$ a.out
    +	      
    +
    To send the trace to a file: +
    +$ export HDF5_TRACE=255
    +$ a.out 255>trace-output
    +	      
    +
    +
    + +

    Performance

    + +

    If the library was not configured for tracing then there is no + unnecessary overhead since all tracing code is + excluded. + +

    However, if tracing is enabled but not used there is a + small penalty. First, code size is larger because of extra + statically-declared character strings used to store argument + types and names and extra auto variable pointer in each + function. Also, execution is slower because each function sets + and tests a local variable and each API function calls the + H5_trace() function. + +

    If tracing is enabled and turned on then the penalties from the + previous paragraph apply plus the time required to format each + line of tracing information. There is also an extra call to + H5_trace() for each API function to print the return value. + +

    Safety

    + +

    The tracing mechanism is invoked for each API function before + arguments are checked for validity. If bad arguments are passed + to an API function it could result in a segmentation fault. + However, the tracing output is line-buffered so all previous + output will appear. + +

    Completeness

    + +

    There are two API functions that don't participate in + tracing. They are H5Eprint() and + H5Eprint_cb() because their participation would + mess up output during automatic error reporting. + +

    On the other hand, a number of API functions are called during + library initialization and they print tracing information. + +

    Implementation

    + +

    For those interested in the implementation here is a + description. Each API function should have a call to one of the + H5TRACE() macros immediately after the + FUNC_ENTER() macro. The first argument is the + return type encoded as a string. The second argument is the + types of all the function arguments encoded as a string. The + remaining arguments are the function arguments. This macro was + designed to be as terse and unobtrousive as possible. + +

    In order to keep the H5TRACE() calls synchronized + with the source code we've written a perl script which gets + called automatically just before Makefile dependencies are + calculated for the file. However, this only works when one is + using GNU make. To reinstrument the tracing explicitly, invoke + the trace program from the hdf5 bin directory with + the names of the source files that need to be updated. If any + file needs to be modified then a backup is created by appending + a tilde to the file name. + +

    +

    + + + + + +
    Explicit Instrumentation
    +
    +$ ../bin/trace *.c
    +H5E.c: in function `H5Ewalk_cb':
    +H5E.c:336: warning: trace info was not inserted
    +	      
    +
    +
    + +

    Note: The warning message is the result of a comment of the + form /*NO TRACE*/ somewhere in the function + body. Tracing information will not be updated or inserted if + such a comment exists. + +

    Error messages have the same format as a compiler so that they + can be parsed from program development environments like + Emacs. Any function which generates an error will not be + modified. + + +


    +
    Robb Matzke
    + + +Last modified: Wed Jun 17 15:45:50 EDT 1998 + + + diff --git a/doc/html/version.gif b/doc/html/version.gif new file mode 100644 index 0000000..41d4401 Binary files /dev/null and b/doc/html/version.gif differ diff --git a/doc/html/version.obj b/doc/html/version.obj new file mode 100644 index 0000000..96b5b7f --- /dev/null +++ b/doc/html/version.obj @@ -0,0 +1,96 @@ +%TGIF 3.0-p5 +state(0,33,100,0,0,0,8,1,9,1,1,0,2,1,0,1,0,'Courier',0,17,0,0,0,10,0,0,1,1,0,16,0,0,1,1,1,0,1088,1408,0,0,2880). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +page(1,"",1). +poly('black',2,[ + 128,128,128,448],0,3,1,0,0,0,0,0,12,5,0,0,0,'3','12','5', + "0",[ +]). +poly('black',2,[ + 128,128,128,64],0,3,1,1,0,0,2,0,12,5,0,0,0,'3','12','5', + "0",[ +]). +poly('black',2,[ + 128,448,128,512],0,3,1,4,0,0,2,0,12,5,0,0,0,'3','12','5', + "0",[ +]). +text('black',144,112,'Courier',0,17,1,0,0,1,42,14,22,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.30"]). +text('black',144,144,'Courier',0,17,1,0,0,1,42,14,30,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.31"]). +text('black',144,176,'Courier',0,17,1,0,0,1,42,14,32,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.32"]). +poly('black',2,[ + 256,208,256,448],0,3,1,34,0,0,0,0,12,5,0,0,0,'3','12','5', + "0",[ +]). +poly('black',2,[ + 256,448,256,512],0,3,1,36,0,0,2,0,12,5,0,0,0,'3','12','5', + "0",[ +]). +poly('black',2,[ + 128,192,256,208],1,1,1,37,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',144,224,'Courier',0,17,1,0,0,1,42,14,41,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.33"]). +text('black',144,256,'Courier',0,17,1,0,0,1,42,14,43,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.34"]). +text('black',272,224,'Courier',0,17,1,0,0,1,35,14,45,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.5.0"]). +text('black',272,256,'Courier',0,17,1,0,0,1,35,14,47,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.5.1"]). +text('black',272,288,'Courier',0,17,1,0,0,1,35,14,49,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.5.2"]). +text('black',272,320,'Courier',0,17,1,0,0,1,35,14,51,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.5.3"]). +text('black',144,288,'Courier',0,17,1,0,0,1,42,14,53,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.3.35"]). +text('black',144,320,'Courier',0,17,1,0,0,1,35,14,57,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.4.0"]). +text('black',144,368,'Courier',0,17,1,0,0,1,35,14,59,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.4.1"]). +text('black',272,192,'Helvetica',0,17,1,0,0,1,144,15,67,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "New development branch"]). +text('black',144,64,'Helvetica',0,17,1,0,0,1,163,15,69,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Original development branch"]). +text('black',16,208,'Helvetica',0,17,2,0,0,1,87,30,71,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Feature Freeze", + "at this point."]). +text('black',16,320,'Helvetica',0,17,2,0,0,1,84,30,73,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Public Release", + "at this point."]). +poly('black',2,[ + 104,208,128,208],1,1,1,77,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 104,320,128,320],1,1,1,78,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +poly('black',2,[ + 256,336,128,352],1,1,1,79,0,0,0,0,8,3,0,0,0,'1','8','3', + "0",[ +]). +text('black',320,368,'Helvetica',0,17,3,0,0,1,137,45,82,0,12,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "Merge a bug fix from the", + "development branch to", + "the release branch."]). +box('black',312,368,464,416,0,1,1,87,0,0,0,0,0,'1',[ +]). +poly('black',4,[ + 312,392,240,384,296,344,232,344],1,1,1,90,1,0,0,0,8,3,0,0,0,'1','8','3', + "6",[ +]). +box('black',8,208,104,240,0,1,1,95,0,0,0,0,0,'1',[ +]). +box('black',8,320,104,352,0,1,1,98,0,0,0,0,0,'1',[ +]). +text('black',144,408,'Courier',0,17,1,0,0,1,35,14,102,0,11,3,0,0,0,0,0,2,0,0,0,0,"",0,0,0,[ + "1.4.2"]). +box('black',0,40,480,528,0,1,1,104,0,0,0,0,0,'1',[ +]). -- cgit v0.12