1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.9.1"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>HDF5: The HDF5 Data Model and File Structure</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
$(document).ready(function() { init_search(); });
/* @license-end */
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="hdf5doxy.css" rel="stylesheet" type="text/css">
<!-- <link href="hdf5doxy.css" rel="stylesheet" type="text/css"/>
-->
<script type="text/javascript" src="hdf5_navtree_hacks.js"></script>
</head>
<body>
<div style="background:#FFDDDD;font-size:120%;text-align:center;margin:0;padding:5px">Please, help us to better serve our user community by answering the following short survey: <a href="https://www.hdfgroup.org/website-survey/">https://www.hdfgroup.org/website-survey/</a></div>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="HDFG-logo.png"/></td>
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname"><a href="https://www.hdfgroup.org">HDF5</a>
 <span id="projectnumber">1.15.0.4261552</span>
</div>
<div id="projectbrief">API Reference</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.svg"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.9.1 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
/* @license-end */
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
$(document).ready(function(){initNavTree('_h5_d_m__u_g.html',''); initResizable(); });
/* @license-end */
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="PageDoc"><div class="header">
<div class="headertitle">
<div class="title">The HDF5 Data Model and File Structure </div> </div>
</div><!--header-->
<div class="contents">
<div class="textblock"><h1><a class="anchor" id="sec_data_model"></a>
The HDF5 Data Model and File Structure</h1>
<h2><a class="anchor" id="subsec_data_model_intro"></a>
Introduction</h2>
<p>The Hierarchical Data Format (HDF) implements a model for managing and storing data. The model includes an abstract data model and an abstract storage model (the data format), and libraries to implement the abstract model and to map the storage model to different storage mechanisms. The HDF5 library provides a programming interface to a concrete implementation of the abstract models. The library also implements a model of data transfer, an efficient movement of data from one stored representation to another stored representation. The figure below illustrates the relationships between the models and implementations. This chapter explains these models in detail.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig1.gif" alt=""/>
<div class="caption">
HDF5 models and implementations</div></div>
</td></tr>
</table>
<p>The <em>Abstract Data Model</em> is a conceptual model of data, data types, and data organization. The abstract data model is independent of storage medium or programming environment. The <em>Storage Model</em> is a standard representation for the objects of the abstract data model. The <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> defines the storage model.</p>
<p>The <em>Programming Model</em> is a model of the computing environment and includes platforms from small single systems to large multiprocessors and clusters. The programming model manipulates (instantiates, populates, and retrieves) objects from the abstract data model.</p>
<p>The <em>Library</em> is the concrete implementation of the programming model. The library exports the HDF5 APIs as its interface. In addition to implementing the objects of the abstract data model, the library manages data transfers from one stored form to another. Data transfer examples include reading from disk to memory and writing from memory to disk.</p>
<p><em>Stored Data</em> is the concrete implementation of the storage model. The <em>Storage Model</em> is mapped to several storage mechanisms including single disk files, multiple files (family of files), and memory representations.</p>
<p>The HDF5 library is a C module that implements the programming model and abstract data model. The HDF5 library calls the operating system or other storage management software (for example, the MPI/IO Library) to store and retrieve persistent data. The HDF5 library may also link to other software such as filters for compression. The HDF5 library is linked to an application program which may be written in C, C++, Fortran, or Java. The application program implements problem specific algorithms and data structures and calls the HDF5 library to store and retrieve data. The figure below shows the dependencies of these modules.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig2.gif" alt=""/>
<div class="caption">
The library, the application program, and other modules</div></div>
</td></tr>
</table>
<p>It is important to realize that each of the software components manages data using models and data structures that are appropriate to the component. When data is passed between layers (during storage or retrieval), it is transformed from one representation to another. The figure below suggests some of the kinds of data structures used in the different layers.</p>
<p>The <em>Application Program</em> uses data structures that represent the problem and algorithms including variables, tables, arrays, and meshes among other data structures. Depending on its design and function, an application may have quite a few different kinds of data structures and different numbers and sizes of objects.</p>
<p>The <em>HDF5 Library</em> implements the objects of the HDF5 abstract data model. Some of these objects include groups, datasets, and attributes. The application program maps the application data structures to a hierarchy of HDF5 objects. Each application will create a mapping best suited to its purposes.</p>
<p>The objects of the HDF5 abstract data model are mapped to the objects of the HDF5 storage model, and stored in a storage medium. The stored objects include header blocks, free lists, data blocks, B-trees, and other objects. Each group or dataset is stored as one or more header and data blocks. </p><dl class="section see"><dt>See also</dt><dd><a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> for more information on how these objects are organized. The HDF5 library can also use other libraries and modules such as compression.</dd></dl>
<table class="doxtable">
<caption>Data structures in different layers</caption>
<tr>
<td><div class="image">
<img src="Dmodel_fig3_a.gif" alt=""/>
</div>
</td><td><div class="image">
<img src="Dmodel_fig2.gif" alt=""/>
</div>
</td><td><div class="image">
<img src="Dmodel_fig3_c.gif" alt=""/>
</div>
</td></tr>
</table>
<p>The important point to note is that there is not necessarily any simple correspondence between the objects of the application program, the abstract data model, and those of the Format Specification. The organization of the data of application program, and how it is mapped to the HDF5 abstract data model is up to the application developer. The application program only needs to deal with the library and the abstract data model. Most applications need not consider any details of the <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> or the details of how objects of abstract data model are translated to and from storage.</p>
<h2><a class="anchor" id="subsec_data_model_abstract"></a>
The Abstract Data Model</h2>
<p>The abstract data model (ADM) defines concepts for defining and describing complex data stored in files. The ADM is a very general model which is designed to conceptually cover many specific models. Many different kinds of data can be mapped to objects of the ADM, and therefore stored and retrieved using HDF5. The ADM is not, however, a model of any particular problem or application domain. Users need to map their data to the concepts of the ADM.</p>
<p>The key concepts include: </p><ul>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_file">File</a> - a contiguous string of bytes in a computer store (memory, disk, etc.), and the bytes represent zero or more objects of the model </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_group">Group</a> - a collection of objects (including groups) </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_dataset">Dataset</a> - a multidimensional array of data elements with attributes and other metadata </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_space">Dataspace</a> - a description of the dimensions of a multidimensional array </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_type">Datatype</a> - a description of a specific class of data element including its storage layout as a pattern of bits </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_attr">Attribute</a> - a named data value associated with a group, dataset, or named datatype </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_plist">Property List</a> - a collection of parameters (some permanent and some transient) controlling options in the library </li>
<li>
<a class="el" href="_h5_d_m__u_g.html#subsubsec_data_model_abstract_link">Link</a> - the way objects are connected</li>
</ul>
<p>These key concepts are described in more detail below.</p>
<h3><a class="anchor" id="subsubsec_data_model_abstract_file"></a>
File</h3>
<p>Abstractly, an HDF5 file is a container for an organized collection of objects. The objects are groups, datasets, and other objects as defined below. The objects are organized as a rooted, directed graph. Every HDF5 file has at least one object, the root group. See the figure below. All objects are members of the root group or descendants of the root group.</p>
<table class="doxtable">
<caption>The HDF5 file</caption>
<tr>
<td><div class="image">
<img src="Dmodel_fig4_b.gif" alt=""/>
</div>
</td></tr>
<tr>
<td><div class="image">
<img src="Dmodel_fig4_a.gif" alt=""/>
</div>
</td></tr>
</table>
<p>HDF5 objects have a unique identity within a single HDF5 file and can be accessed only by their names within the hierarchy of the file. HDF5 objects in different files do not necessarily have unique identities, and it is not possible to access a permanent HDF5 object except through a file. For more information, see <a class="el" href="_h5_d_m__u_g.html#subsec_data_model_structure">The Structure of an HDF5 File</a>.</p>
<p>When the file is created, the file creation properties specify settings for the file. The file creation properties include version information and parameters of global data structures. When the file is opened, the file access properties specify settings for the current access to the file. File access properties include parameters for storage drivers and parameters for caching and garbage collection. The file creation properties are set permanently for the life of the file, and the file access properties can be changed by closing and reopening the file.</p>
<p>An HDF5 file can be “mounted” as part of another HDF5 file. This is analogous to Unix file system mounts. The root of the mounted file is attached to a group in the mounting file, and all the contents can be accessed as if the mounted file were part of the mounting file.</p>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_f__u_g.html#sec_file">The HDF5 File</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_group"></a>
Group</h3>
<p>An HDF5 group is analogous to a file system directory. Abstractly, a group contains zero or more objects, and every object must be a member of at least one group. The root group is a special case; it may not be a member of any group.</p>
<p>Group membership is actually implemented via link objects. See the figure below. A link object is owned by a group and points to a named object. Each link has a name, and each link points to exactly one object. Each named object has at least one and possibly many links to it.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig5.gif" alt=""/>
<div class="caption">
Group membership via link objects</div></div>
</td></tr>
</table>
<p>There are three classes of named objects: group, dataset, and committed (named) datatype. See the figure below. Each of these objects is the member of at least one group, and this means there is at least one link to it.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig6.gif" alt=""/>
<div class="caption">
Classes of named objects</div></div>
</td></tr>
</table>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_g__u_g.html#sec_group">HDF5 Groups</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_dataset"></a>
Dataset</h3>
<p>An HDF5 dataset is a multidimensional (rectangular) array of data elements. See the figure below. The shape of the array (number of dimensions, size of each dimension) is described by the dataspace object (described in the next section below).</p>
<p>A data element is a single unit of data which may be a number, a character, an array of numbers or characters, or a record of heterogeneous data elements. A data element is a set of bits. The layout of the bits is described by the datatype (see below).</p>
<p>The dataspace and datatype are set when the dataset is created, and they cannot be changed for the life of the dataset. The dataset creation properties are set when the dataset is created. The dataset creation properties include the fill value and storage properties such as chunking and compression. These properties cannot be changed after the dataset is created.</p>
<p>The dataset object manages the storage and access to the data. While the data is conceptually a contiguous rectangular array, it is physically stored and transferred in different ways depending on the storage properties and the storage mechanism used. The actual storage may be a set of compressed chunks, and the access may be through different storage mechanisms and caches. The dataset maps between the conceptual array of elements and the actual stored data.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig7_b.gif" alt=""/>
<div class="caption">
The dataset</div></div>
</td></tr>
</table>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_d__u_g.html#sec_dataset">HDF5 Datasets</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_space"></a>
Dataspace</h3>
<p>The HDF5 dataspace describes the layout of the elements of a multidimensional array. Conceptually, the array is a hyper-rectangle with one to 32 dimensions. HDF5 dataspaces can be extendable. Therefore, each dimension has a current size and a maximum size, and the maximum may be unlimited. The dataspace describes this hyper-rectangle: it is a list of dimensions with the current and maximum (or unlimited) sizes. See the figure below.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig8.gif" alt=""/>
<div class="caption">
The dataspace</div></div>
</td></tr>
</table>
<p>Dataspace objects are also used to describe hyperslab selections from a dataset. Any subset of the elements of a dataset can be selected for read or write by specifying a set of hyperslabs. A non-rectangular region can be selected by the union of several (rectangular) dataspaces.</p>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_s__u_g.html#sec_dataspace">HDF5 Dataspaces and Partial I/O</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_type"></a>
Datatype</h3>
<p>The HDF5 datatype object describes the layout of a single data element. A data element is a single element of the array; it may be a single number, a character, an array of numbers or carriers, or other data. The datatype object describes the storage layout of this data.</p>
<p>Data types are categorized into 11 classes of datatype. Each class is interpreted according to a set of rules and has a specific set of properties to describe its storage. For instance, floating point numbers have exponent position and sizes which are interpreted according to appropriate standards for number representation. Thus, the datatype class tells what the element means, and the datatype describes how it is stored.</p>
<p>The figure below shows the classification of datatypes. Atomic datatypes are indivisible. Each may be a single object such as a number or a string. Composite datatypes are composed of multiple elements of atomic datatypes. In addition to the standard types, users can define additional datatypes such as a 24-bit integer or a 16-bit float. A dataset or attribute has a single datatype object associated with it. See Figure 7 above. The datatype object may be used in the definition of several objects, but by default, a copy of the datatype object will be private to the dataset.</p>
<p>Optionally, a datatype object can be stored in the HDF5 file. The datatype is linked into a group, and therefore given a name. A committed datatype (formerly called a named datatype) can be opened and used in any way that a datatype object can be used.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig9.gif" alt=""/>
<div class="caption">
Datatype classifications</div></div>
</td></tr>
</table>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_t__u_g.html#sec_datatype">HDF5 Datatypes</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_attr"></a>
Attribute</h3>
<p>Any HDF5 named data object (group, dataset, or named datatype) may have zero or more user defined attributes. Attributes are used to document the object. The attributes of an object are stored with the object.</p>
<p>An HDF5 attribute has a name and data. The data portion is similar in structure to a dataset: a dataspace defines the layout of an array of data elements, and a datatype defines the storage layout and interpretation of the elements See the figure below.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig10.gif" alt=""/>
<div class="caption">
Attribute data elements</div></div>
</td></tr>
</table>
<p>In fact, an attribute is very similar to a dataset with the following limitations: </p><ul>
<li>
An attribute can only be accessed via the object </li>
<li>
Attribute names are significant only within the object </li>
<li>
An attribute should be a small object </li>
<li>
The data of an attribute must be read or written in a single access (partial reading or writing is not allowed) </li>
<li>
Attributes do not have attributes</li>
</ul>
<p>Note that the value of an attribute can be an object reference. A shared attribute or an attribute that is a large array can be implemented as a reference to a dataset.</p>
<p>The name, dataspace, and datatype of an attribute are specified when it is created and cannot be changed over the life of the attribute. An attribute can be opened by name, by index, or by iterating through all the attributes of the object.</p>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_a__u_g.html#sec_attribute">HDF5 Attributes</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_plist"></a>
Property List</h3>
<p>HDF5 has a generic property list object. Each list is a collection of name-value pairs. Each class of property list has a specific set of properties. Each property has an implicit name, a datatype, and a value. See the figure below. A property list object is created and used in ways similar to the other objects of the HDF5 library.</p>
<p>Property Lists are attached to the object in the library, and they can be used by any part of the library. Some properties are permanent (for example, the chunking strategy for a dataset), others are transient (for example, buffer sizes for data transfer). A common use of a Property List is to pass parameters from the calling program to a VFL driver or a module of the pipeline.</p>
<p>Property lists are conceptually similar to attributes. Property lists are information relevant to the behavior of the library while attributes are relevant to the user's data and application.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig11_b.gif" alt=""/>
<div class="caption">
The property list</div></div>
</td></tr>
</table>
<p>Property lists are used to control optional behavior for file creation, file access, dataset creation, dataset transfer (read, write), and file mounting. Some property list classes are shown in the table below. Details of the different property lists are explained in the relevant sections of this document.</p>
<table class="doxtable">
<caption>Property list classes and their usage</caption>
<tr>
<th>Property List Class </th><th>Used </th><th>Examples </th></tr>
<tr>
<td><a class="el" href="_h5_ppublic_8h.html#a206f334f1e6c973e1215a3148b45b977">H5P_FILE_CREATE</a> </td><td>Properties for file creation. </td><td>Set size of user block. </td></tr>
<tr>
<td><a class="el" href="_h5_ppublic_8h.html#a60ec2d4334addfc0eda89614598ee38e">H5P_FILE_ACCESS</a> </td><td>Properties for file access. </td><td>Set parameters for VFL driver. An example is MPI I/O. </td></tr>
<tr>
<td><a class="el" href="_h5_ppublic_8h.html#afcd7f8186c404f3a1d768632eacba102">H5P_DATASET_CREATE</a> </td><td>Properties for dataset creation. </td><td>Set chunking, compression, or fill value. </td></tr>
<tr>
<td><a class="el" href="_h5_ppublic_8h.html#a6f9c8a5aba72c0445fff384bf418a80d">H5P_DATASET_XFER</a> </td><td>Properties for raw data transfer (read and write). </td><td>Tune buffer sizes or memory management. </td></tr>
<tr>
<td><a class="el" href="_h5_ppublic_8h.html#a3f57eb3c4081b40ff8b036f438e68e5b">H5P_FILE_MOUNT</a> </td><td>Properties for file mounting. </td><td></td></tr>
</table>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_p__u_g.html#sec_plist">Properties and Property Lists in HDF5</a>.</dd></dl>
<h3><a class="anchor" id="subsubsec_data_model_abstract_link"></a>
Link</h3>
<p>This section is under construction.</p>
<h2><a class="anchor" id="subsec_data_model_storage"></a>
The HDF5 Storage Model</h2>
<h3><a class="anchor" id="subsubsec_data_model_storage_spec"></a>
The Abstract Storage Model: the HDF5 Format Specification</h3>
<p>The <a href="https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html">HDF5 File Format Specification</a> defines how HDF5 objects and data are mapped to a linear address space. The address space is assumed to be a contiguous array of bytes stored on some random access medium. The format defines the standard for how the objects of the abstract data model are mapped to linear addresses. The stored representation is self-describing in the sense that the format defines all the information necessary to read and reconstruct the original objects of the abstract data model.</p>
<p>The HDF5 File Format Specification is organized in three parts: </p><ul>
<li>
Level 0: File signature and super block </li>
<li>
Level 1: File infrastructure <ul>
<li>
Level 1A: B-link trees and B-tree nodes </li>
<li>
Level 1B: Group </li>
<li>
Level 1C: Group entry </li>
<li>
Level 1D: Local heaps </li>
<li>
Level 1E: Global heap </li>
<li>
Level 1F: Free-space index</li>
</ul>
</li>
<li>
Level 2: Data object <ul>
<li>
Level 2A: Data object headers </li>
<li>
Level 2B: Shared data object headers </li>
<li>
Level 2C: Data object data storage</li>
</ul>
</li>
</ul>
<p>The Level 0 specification defines the header block for the file. Header block elements include a signature, version information, key parameters of the file layout (such as which VFL file drivers are needed), and pointers to the rest of the file. Level 1 defines the data structures used throughout the file: the B-trees, heaps, and groups. Level 2 defines the data structure for storing the data objects and data. In all cases, the data structures are completely specified so that every bit in the file can be faithfully interpreted.</p>
<p>It is important to realize that the structures defined in the HDF5 file format are not the same as the abstract data model: the object headers, heaps, and B-trees of the file specification are not represented in the abstract data model. The format defines a number of objects for managing the storage including header blocks, B-trees, and heaps. The HDF5 File Format Specification defines how the abstract objects (for example, groups and datasets) are represented as headers, B-tree blocks, and other elements.</p>
<p>The HDF5 library implements operations to write HDF5 objects to the linear format and to read from the linear format to create HDF5 objects. It is important to realize that a single HDF5 abstract object is usually stored as several objects. A dataset, for example, might be stored in a header and in one or more data blocks, and these objects might not be contiguous on the hard disk.</p>
<h3><a class="anchor" id="subsubsec_data_model_storage_imple"></a>
Concrete Storage Model</h3>
<p>The HDF5 file format defines an abstract linear address space. This can be implemented in different storage media such as a single file or multiple files on disk or in memory. The HDF5 Library defines an open interface called the Virtual File Layer (VFL). The VFL allows different concrete storage models to be selected.</p>
<p>The VFL defines an abstract model, an API for random access storage, and an API to plug in alternative VFL driver modules. The model defines the operations that the VFL driver must and may support, and the plug-in API enables the HDF5 library to recognize the driver and pass it control and data.</p>
<p>A number of VFL drivers have been defined in the HDF5 library. Some work with a single file, and some work with multiple files split in various ways. Some work in serial computing environments, and some work in parallel computing environments. Most work with disk copies of HDF5 files, but one works with a memory copy. These drivers are listed in the <a class="el" href="/home/runner/work/hdf5/hdf5/hdfsrc/doxygen/examples/tables/fileDriverLists.dox#table_file_drivers">Supported file drivers</a> table.</p>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_f__u_g.html#subsec_file_alternate_drivers">Alternate File Storage Layouts and Low-level File Drivers</a>.</dd></dl>
<p>Each driver isolates the details of reading and writing storage so that the rest of the HDF5 library and user program can be almost the same for different storage methods. The exception to this rule is that some VFL drivers need information from the calling application. This information is passed using property lists. For example, the Parallel driver requires certain control information that must be provided by the application.</p>
<h2><a class="anchor" id="subsec_data_model_structure"></a>
The Structure of an HDF5 File</h2>
<h3><a class="anchor" id="subsubsec_data_model_structure_file"></a>
Overall File Structure</h3>
<p>An HDF5 file is organized as a rooted, directed graph. Named data objects are the nodes of the graph, and links are the directed arcs. Each arc of the graph has a name, and the root group has the name “/”. Objects are created and then inserted into the graph with the link operation which creates a named link from a group to the object. For example, the figure below illustrates the structure of an HDF5 file when one dataset is created. An object can be the target of more than one link. The names on the links must be unique within each group, but there may be many links with the same name in different groups. Link names are unambiguous: some ancestor will have a different name, or they are the same object. The graph is navigated with path names similar to Unix file systems. An object can be opened with a full path starting at the root group or with a relative path and a starting node (group). Note that all paths are relative to a single HDF5 file. In this sense, an HDF5 file is analogous to a single Unix file system.</p>
<table class="doxtable">
<caption>An HDF5 file with one dataset</caption>
<tr>
<td><div class="image">
<img src="Dmodel_fig12_a.gif" alt=""/>
</div>
</td><td><div class="image">
<img src="Dmodel_fig12_b.gif" alt=""/>
</div>
</td></tr>
</table>
<p>Note: In the figure above are two figures. The top figure represents a newly created file with one group, /. In the bottom figure, a dataset called /dset1 has been created.</p>
<p>It is important to note that, just like the Unix file system, HDF5 objects do not have names. The names are associated with paths. An object has a unique (within the file) object identifier, but a single object may have many names because there may be many paths to the same object. An object can be renamed (moved to another group) by adding and deleting links. In this case, the object itself never moves. For that matter, membership in a group has no implication for the physical location of the stored object.</p>
<p>Deleting a link to an object does not necessarily delete the object. The object remains available as long as there is at least one link to it. After all the links to an object are deleted, it can no longer be opened although the storage may or may not be reclaimed.</p>
<p>It is important to realize that the linking mechanism can be used to construct very complex graphs of objects. For example, it is possible for an object to be shared between several groups and even to have more than one name in the same group. It is also possible for a group to be a member of itself or to be in a “cycle” in the graph. An example of a cycle is where a child is the parent of one of its own ancestors.</p>
<h3><a class="anchor" id="subsubsec_data_model_structure_path"></a>
HDF5 Path Names and Navigation</h3>
<p>The structure of the file constitutes the name space for the objects in the file. A path name is a string of components separated by ‘/’. Each component is the name of a link or the special character “.” for the current group. Link names (components) can be any string of ASCII characters not containing ‘/’ (except the string “.” which is reserved). However, users are advised to avoid the use of punctuation and non-printing characters because they may create problems for other software. The figure below gives a BNF grammar for HDF5 path names.</p>
<p><em>A BNF grammar for path names</em> </p><div class="fragment"><div class="line">PathName ::= AbsolutePathName | RelativePathName</div>
<div class="line">Separator ::= <span class="stringliteral">"/"</span> [<span class="stringliteral">"/"</span>]*</div>
<div class="line">AbsolutePathName ::= Separator [ RelativePathName ]</div>
<div class="line">RelativePathName ::= Component [ Separator RelativePathName ]*</div>
<div class="line">Component ::= <span class="stringliteral">"."</span> | Name</div>
<div class="line">Name ::= Character+ - {<span class="stringliteral">"."</span>}</div>
<div class="line">Character ::= {c: c in {{ legal ASCII characters } - {<span class="charliteral">'/'</span>}}</div>
</div><!-- fragment --><p>An object can always be addressed by a full or absolute path which would start at the root group. As already noted, a given object can have more than one full path name. An object can also be addressed by a relative path which would start at a group and include the path to the object.</p>
<p>The structure of an HDF5 file is “self-describing.” This means that it is possible to navigate the file to discover all the objects in the file. Basically, the structure is traversed as a graph starting at one node and recursively visiting the nodes of the graph.</p>
<h3><a class="anchor" id="subsubsec_data_model_structure_example"></a>
Examples of HDF5 File Structures</h3>
<p>The figures below show some possible HDF5 file structures with groups and datasets. The first figure shows the structure of a file with three groups. The second shows a dataset created in “/group1”. The third figure shows the structure after a dataset called dset2 has been added to the root group. The fourth figure shows the structure after another group and dataset have been added.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig14_a.gif" alt=""/>
<div class="caption">
An HDF5 file structure with groups</div></div>
</td></tr>
</table>
<p>Note: The figure above shows three groups; /group1 and /group2 are members of the root group.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig14_b.gif" alt=""/>
<div class="caption">
An HDF5 file structure with groups and a dataset</div></div>
</td></tr>
</table>
<p>Note: The figure above shows that a dataset has been created in /group1: /group1/dset1.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig14_c.gif" alt=""/>
<div class="caption">
An HDF5 file structure with groups and datasets</div></div>
</td></tr>
</table>
<p>Note: In the figure above, another dataset has been added as a member of the root group: /dset2.</p>
<table class="doxtable">
<tr>
<td><div class="image">
<img src="Dmodel_fig14_c.gif" alt=""/>
<div class="caption">
Another HDF5 file structure with groups and datasets</div></div>
</td></tr>
</table>
<p>Note: In the figure above, another group and dataset have been added reusing object names: <em>/group2/group2/dset2</em>. </p><ol>
<li>
HDF5 requires random access to the linear address space. For this reason it is not well suited for some data media such as streams. </li>
<li>
It could be said that HDF5 extends the organizing concepts of a file system to the internal structure of a single file. </li>
<li>
As of HDF5-1.4, the storage used for an object is reclaimed, even if all links are deleted.</li>
</ol>
<p>Next Chapter <a class="el" href="_h5__u_g.html#sec_program">The HDF5 Library and Programming Model</a> </p>
</div></div><!-- contents -->
</div><!-- PageDoc -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="footer">Generated by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.9.1 </li>
</ul>
</div>
</body>
</html>
|