summaryrefslogtreecommitdiffstats
path: root/develop/_l_b_dset_layout.html
blob: 0fc301cf6d570a6a51b9e19aad4431330fbb15c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.10.0"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>HDF5: Dataset Storage Layout</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript" src="cookie.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
  $(function() { init_search(); });
/* @license-end */
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="hdf5doxy.css" rel="stylesheet" type="text/css">
<!-- <link href="hdf5doxy.css" rel="stylesheet" type="text/css"/>
 -->
<script type="text/javascript" src="hdf5_navtree_hacks.js"></script>
</head>
<body>
<div style="background:#FFDDDD;font-size:120%;text-align:center;margin:0;padding:5px">Please, help us to better serve our user community by answering the following short survey:  <a href="https://www.hdfgroup.org/website-survey/">https://www.hdfgroup.org/website-survey/</a></div>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <td id="projectlogo"><img alt="Logo" src="HDFG-logo.png"/></td>
  <td id="projectalign" style="padding-left: 0.5em;">
   <div id="projectname"><a href="https://www.hdfgroup.org">HDF5</a>
   &#160;<span id="projectnumber">1.15.0.68e8c0e</span>
   </div>
   <div id="projectbrief">API Reference</div>
  </td>
   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
        <span class="left">
          <span id="MSearchSelect"                onmouseover="return searchBox.OnSearchSelectShow()"                onmouseout="return searchBox.OnSearchSelectHide()">&#160;</span>
          <input type="text" id="MSearchField" value="" placeholder="Search" accesskey="S"
               onfocus="searchBox.OnSearchFieldFocus(true)" 
               onblur="searchBox.OnSearchFieldFocus(false)" 
               onkeyup="searchBox.OnSearchFieldChange(event)"/>
          </span><span class="right">
            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
          </span>
        </div>
</td>
 </tr>
 </tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.10.0 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
  <div id="nav-tree">
    <div id="nav-tree-contents">
      <div id="nav-sync" class="sync"></div>
    </div>
  </div>
  <div id="splitbar" style="-moz-user-select:none;" 
       class="ui-resizable-handle">
  </div>
</div>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
$(function(){initNavTree('_l_b_dset_layout.html',''); initResizable(); });
/* @license-end */
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
     onmouseover="return searchBox.OnSearchSelectShow()"
     onmouseout="return searchBox.OnSearchSelectHide()"
     onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>

<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<div id="MSearchResults">
<div class="SRPage">
<div id="SRIndex">
<div id="SRResults"></div>
<div class="SRStatus" id="Loading">Loading...</div>
<div class="SRStatus" id="Searching">Searching...</div>
<div class="SRStatus" id="NoMatches">No Matches</div>
</div>
</div>
</div>
</div>

<div><div class="header">
  <div class="headertitle"><div class="title">Dataset Storage Layout</div></div>
</div><!--header-->
<div class="contents">
<div class="textblock"><p>Navigate back: <a class="el" href="index.html">Main</a> / <a class="el" href="_getting_started.html">Getting Started with HDF5</a> / <a class="el" href="_learn_basics.html">Learning the Basics</a> </p><hr  />
<h1><a class="anchor" id="secLBDsetLayoutDesc"></a>
Description of a Dataset</h1>
<h1><a class="anchor" id="secLBDsetLayout"></a>
Dataset Storage Layout</h1>
<p>The storage information, or storage layout, defines how the raw data values in the dataset are physically stored on disk. There are three ways that a dataset can be stored: </p><ul>
<li>contiguous </li>
<li>chunked </li>
<li>compact</li>
</ul>
<p>See the <a class="el" href="group___d_c_p_l.html#ga75d80991a8f467e0d454c53a383ae7f9" title="Sets the type of storage used to store the raw data for a dataset.">H5Pset_layout</a>/<a class="el" href="group___d_c_p_l.html#ga655530b0f40990507fedeef6b3068db3" title="Returns the layout of the raw data for a dataset.">H5Pget_layout</a> APIs for details.</p>
<h2><a class="anchor" id="subsecLBDsetLayoutCont"></a>
Contiguous</h2>
<p>If the storage layout is contiguous, then the raw data values will be stored physically adjacent to each other in the HDF5 file (in one contiguous block). This is the default layout for a dataset. In other words, if you do not explicitly change the storage layout for the dataset, then it will be stored contiguously. </p><table class="doxtable">
<tr>
<td><div class="image">
<img src="tutr-locons.png" alt=""/>
</div>
   </td></tr>
</table>
<h2><a class="anchor" id="subsecLBDsetLayoutChunk"></a>
Chunked</h2>
<p>With a chunked storage layout the data is stored in equal-sized blocks or chunks of a pre-defined size. The HDF5 library always writes and reads the entire chunk: </p><table class="doxtable">
<tr>
<td><div class="image">
<img src="tutr-lochk.png" alt=""/>
</div>
   </td></tr>
</table>
<p>Each chunk is stored as a separate contiguous block in the HDF5 file. There is a chunk index which keeps track of the chunks associated with a dataset: </p><table class="doxtable">
<tr>
<td><div class="image">
<img src="tutr-lochks.png" alt=""/>
</div>
   </td></tr>
</table>
<h3><a class="anchor" id="susubsecLBDsetLayoutChunkWhy"></a>
Why Chunking ?</h3>
<p>Chunking is required for enabling compression and other filters, as well as for creating extendible or unlimited dimension datasets.</p>
<p>It is also commonly used when subsetting very large datasets. Using the chunking layout can greatly improve performance when subsetting large datasets, because only the chunks required will need to be accessed. However, it is easy to use chunking without considering the consequences of the chunk size, which can lead to strikingly poor performance.</p>
<p>Note that a chunk always has the same rank as the dataset and the chunk's dimensions do not need to be factors of the dataset dimensions.</p>
<p>Writing or reading a chunked dataset is transparent to the application. You would use the same set of operations that you would use for a contiguous dataset. For example: </p><div class="fragment"><div class="line"><a class="code hl_define" href="_h5version_8h.html#a7dba2e5b2045f31c0932123ffb54f7a3">H5Dopen</a> (...);</div>
<div class="line"><a class="code hl_function" href="group___h5_s.html#ga6adfdf1b95dc108a65bf66e97d38536d">H5Sselect_hyperslab</a> (...);</div>
<div class="line"><a class="code hl_function" href="group___h5_d.html#ga8287d5a7be7b8e55ffeff68f7d26811c">H5Dread</a> (...);</div>
<div class="ttc" id="a_h5version_8h_html_a7dba2e5b2045f31c0932123ffb54f7a3"><div class="ttname"><a href="_h5version_8h.html#a7dba2e5b2045f31c0932123ffb54f7a3">H5Dopen</a></div><div class="ttdeci">#define H5Dopen</div><div class="ttdef"><b>Definition</b> H5version.h:903</div></div>
<div class="ttc" id="agroup___h5_d_html_ga8287d5a7be7b8e55ffeff68f7d26811c"><div class="ttname"><a href="group___h5_d.html#ga8287d5a7be7b8e55ffeff68f7d26811c">H5Dread</a></div><div class="ttdeci">herr_t H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t dxpl_id, void *buf)</div><div class="ttdoc">Reads raw data from a dataset into a provided buffer.</div></div>
<div class="ttc" id="agroup___h5_s_html_ga6adfdf1b95dc108a65bf66e97d38536d"><div class="ttname"><a href="group___h5_s.html#ga6adfdf1b95dc108a65bf66e97d38536d">H5Sselect_hyperslab</a></div><div class="ttdeci">herr_t H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t start[], const hsize_t stride[], const hsize_t count[], const hsize_t block[])</div><div class="ttdoc">Selects a hyperslab region to add to the current selected region.</div></div>
</div><!-- fragment --><h3><a class="anchor" id="susubsecLBDsetLayoutChunkProb"></a>
Problems Using Chunking</h3>
<p>Issues that can cause performance problems with chunking include: </p><ul>
<li>Chunks are too small. If a very small chunk size is specified for a dataset it can cause the dataset to be excessively large and it can result in degraded performance when accessing the dataset. The smaller the chunk size the more chunks that HDF5 has to keep track of, and the more time it will take to search for a chunk. </li>
<li>Chunks are too large. An entire chunk has to be read and uncompressed before performing an operation. There can be a performance penalty for reading a small subset, if the chunk size is substantially larger than the subset. Also, a dataset may be larger than expected if there are chunks that only contain a small amount of data. </li>
<li>A chunk does not fit in the Chunk Cache. Every chunked dataset has a chunk cache associated with it that has a default size of 1 MB. The purpose of the chunk cache is to improve performance by keeping chunks that are accessed frequently in memory so that they do not have to be accessed from disk. If a chunk is too large to fit in the chunk cache, it can significantly degrade performance. However, the size of the chunk cache can be increased by calling <a class="el" href="group___d_a_p_l.html#ga104d00442c31714ee073dee518f661f1" title="Sets the raw data chunk cache parameters.">H5Pset_chunk_cache</a>.</li>
</ul>
<p>It is a good idea to: </p><ul>
<li>Avoid very small chunk sizes, and be aware of the 1 MB chunk cache size default. </li>
<li>Test the data with different chunk sizes to determine the optimal chunk size to use. </li>
<li>Consider the chunk size in terms of the most common access patterns that will be used once the dataset has been created.</li>
</ul>
<h2><a class="anchor" id="subsecLBDsetLayoutCom"></a>
Compact</h2>
<p>A compact dataset is one in which the raw data is stored in the object header of the dataset. This layout is for very small datasets that can easily fit in the object header.</p>
<p>The compact layout can improve storage and access performance for files that have many very tiny datasets. With one I/O access both the header and data values can be read. The compact layout reduces the size of a file, as the data is stored with the header which will always be allocated for a dataset. However, the object header is 64 KB in size, so this layout can only be used for very small datasets.</p>
<h1><a class="anchor" id="secLBDsetLayoutProg"></a>
Programming Model to Modify the Storage Layout</h1>
<p>To modify the storage layout, the following steps must be done: </p><ul>
<li>Create a Dataset Creation Property list. (See <a class="el" href="group___p_l_c_r.html#gaf1b11da01d4d45d788c45f8bc5f0cbfa" title="Creates a new property list as an instance of a property list class.">H5Pcreate</a>) </li>
<li>Modify the property list. To use chunked storage layout, call: <a class="el" href="group___d_c_p_l.html#ga3584d592e377da3604b7604e266dcf5b" title="Sets the size of the chunks used to store a chunked layout dataset.">H5Pset_chunk</a> To use the compact storage layout, call: <a class="el" href="group___d_c_p_l.html#ga75d80991a8f467e0d454c53a383ae7f9" title="Sets the type of storage used to store the raw data for a dataset.">H5Pset_layout</a> </li>
<li>Create a dataset with the modified property list. (See <a class="el" href="group___h5_d.html#ga0647ba4bbd26d5230cc07f3a5685b2cf">H5Dcreate</a>) </li>
<li>Close the property list. (See <a class="el" href="group___p_l_c_r.html#ga5dce61149211d3ef319452aa598887fb" title="Terminates access to a property list.">H5Pclose</a>) For example code, see the <a class="el" href="_h_d_f5_examples.html">HDF5 Examples</a> page. Specifically look at the <a class="el" href="_ex_a_p_i.html">Examples by API</a>. There are examples for different languages.</li>
</ul>
<p>The C example to create a chunked dataset is: <a href="https://github.com/HDFGroup/hdf5/blob/develop/HDF5Examples/C/H5D/h5ex_d_chunk.c">h5ex_d_chunk.c</a> The C example to create a compact dataset is: <a href="https://github.com/HDFGroup/hdf5/blob/develop/HDF5Examples/C/H5D/h5ex_d_compact.c">h5ex_d_compact.c</a></p>
<h1><a class="anchor" id="secLBDsetLayoutChange"></a>
Changing the Layout after Dataset Creation</h1>
<p>The dataset layout is a Dataset Creation Property List. This means that once the dataset has been created the dataset layout cannot be changed. The h5repack utility can be used to write a file to a new with a new layout.</p>
<h1><a class="anchor" id="secLBDsetLayoutSource"></a>
Sources of Information</h1>
<p><a href="https://confluence.hdfgroup.org/display/HDF5/Chunking+in+HDF5">Chunking in HDF5</a> (See the documentation on <a href="https://confluence.hdfgroup.org/display/HDF5/Advanced+Topics+in+HDF5">Advanced Topics in HDF5</a>) </p><dl class="section see"><dt>See also</dt><dd><a class="el" href="_h5_p__u_g.html#sec_plist">Properties and Property Lists in HDF5</a> in the HDF5 <a class="el" href="_u_g.html">HDF5 User Guide</a>.</dd></dl>
<hr  />
<p> Navigate back: <a class="el" href="index.html">Main</a> / <a class="el" href="_getting_started.html">Getting Started with HDF5</a> / <a class="el" href="_learn_basics.html">Learning the Basics</a> </p>
</div></div><!-- contents -->
</div><!-- PageDoc -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
  <ul>
    <li class="footer">Generated by
    <a href="http://www.doxygen.org/index.html">
    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.10.0 </li>
  </ul>
</div>
</body>
</html>