summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfiling/H5FDsubfile_int.c
blob: a7dd86455cb8f9fcae8723b8a168b6dc4206df93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 * Purpose:     This is part of a parallel subfiling I/O driver.
 *
 */

/***********/
/* Headers */
/***********/

#include "H5FDsubfiling_priv.h"

/*-------------------------------------------------------------------------
 * Function:    H5FD__subfiling__truncate_sub_files
 *
 *              Note: This code should be moved -- most likely to the IOC
 *                    code files.
 *
 * Purpose:     Apply a truncate operation to the subfiles.
 *
 *              In the context of the I/O concentrators, the eof must be
 *              translated into the appropriate value for each of the
 *              subfiles, and then applied to same.
 *
 *              Further, we must ensure that all prior I/O requests complete
 *              before the truncate is applied.
 *
 *              We do this as follows:
 *
 *              1) Run a barrier on entry.
 *
 *              2) Determine if this rank is a IOC.  If it is, compute
 *                 the correct EOF for this subfile, and send a truncate
 *                 request to the IOC.
 *
 *              3) On the IOC thread, allow all pending I/O requests
 *                 received prior to the truncate request to complete
 *                 before performing the truncate.
 *
 *              4) Run a barrier on exit.
 *
 *              Observe that the barrier on entry ensures that any prior
 *              I/O requests will have been queue before the truncate
 *              request is sent to the IOC.
 *
 *              Similarly, the barrier on exit ensures that no subsequent
 *              I/O request will reach the IOC before the truncate request
 *              has been queued.
 *
 * Return:      SUCCEED/FAIL
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5FD__subfiling__truncate_sub_files(hid_t context_id, int64_t logical_file_eof, MPI_Comm comm)
{
    subfiling_context_t *sf_context = NULL;
    MPI_Request         *recv_reqs  = NULL;
    int64_t              msg[3]     = {0};
    int64_t             *recv_msgs  = NULL;
    int                  mpi_size;
    int                  mpi_code;
    herr_t               ret_value = SUCCEED;

    if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &mpi_size)))
        H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code);

    /* Barrier on entry */
    if (mpi_size > 1)
        if (MPI_SUCCESS != (mpi_code = MPI_Barrier(comm)))
            H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);

    if (NULL == (sf_context = (subfiling_context_t *)H5_get_subfiling_object(context_id)))
        H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "can't get subfile context");

    if (sf_context->topology->rank_is_ioc) {
        int64_t num_full_stripes;
        int64_t num_leftover_stripes;
        int64_t partial_stripe_len;
        int     num_subfiles_owned;

        num_full_stripes     = logical_file_eof / sf_context->sf_blocksize_per_stripe;
        partial_stripe_len   = logical_file_eof % sf_context->sf_blocksize_per_stripe;
        num_leftover_stripes = partial_stripe_len / sf_context->sf_stripe_size;

        num_subfiles_owned = sf_context->sf_num_fids;

        if (NULL == (recv_reqs = malloc((size_t)num_subfiles_owned * sizeof(*recv_reqs))))
            H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
                                    "can't allocate receive requests array");
        if (NULL == (recv_msgs = malloc((size_t)num_subfiles_owned * 3 * sizeof(*recv_msgs))))
            H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate message array");

        /*
         * Post early receives for messages from the IOC main
         * thread that will signal completion of the truncate
         * operation
         */
        for (int i = 0; i < num_subfiles_owned; i++) {
            if (MPI_SUCCESS !=
                (mpi_code = MPI_Irecv(&recv_msgs[3 * i], 1, H5_subfiling_rpc_msg_type,
                                      sf_context->topology->io_concentrators[sf_context->topology->ioc_idx],
                                      TRUNC_COMPLETED, sf_context->sf_eof_comm, &recv_reqs[i])))
                H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Irecv failed", mpi_code);
        }

        /* Compute the EOF for each subfile this IOC owns */
        for (int i = 0; i < num_subfiles_owned; i++) {
            int64_t subfile_eof = num_full_stripes * sf_context->sf_stripe_size;
            int64_t global_subfile_idx;

            global_subfile_idx =
                (i * sf_context->topology->n_io_concentrators) + sf_context->topology->ioc_idx;

            if (global_subfile_idx < num_leftover_stripes) {
                subfile_eof += sf_context->sf_stripe_size;
            }
            else if (global_subfile_idx == num_leftover_stripes) {
                subfile_eof += partial_stripe_len % sf_context->sf_stripe_size;
            }

            /* Direct the IOC to truncate this subfile to the correct EOF */
            msg[0] = subfile_eof;
            msg[1] = i;
            msg[2] = -1; /* padding -- not used in this message */

            if (MPI_SUCCESS !=
                (mpi_code = MPI_Send(msg, 1, H5_subfiling_rpc_msg_type,
                                     sf_context->topology->io_concentrators[sf_context->topology->ioc_idx],
                                     TRUNC_OP, sf_context->sf_msg_comm)))
                H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Send failed", mpi_code);
        }

        /* Wait for truncate operations to complete */
        H5_GCC_DIAG_OFF("stringop-overflow")
        if (MPI_SUCCESS != (mpi_code = MPI_Waitall(num_subfiles_owned, recv_reqs, MPI_STATUSES_IGNORE)))
            H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Waitall", mpi_code);
        H5_GCC_DIAG_ON("stringop-overflow")

        /* sanity check -- compute the file eof using the same mechanism used to
         * compute the subfile eof.  Assert that the computed value and the
         * actual value match.
         *
         * Do this only for debug builds -- probably delete this before release.
         *
         *                                           JRM -- 12/15/21
         */

#ifndef NDEBUG
        {
            int64_t test_file_eof = 0;

            for (int i = 0; i < sf_context->sf_num_subfiles; i++) {
                test_file_eof += num_full_stripes * sf_context->sf_stripe_size;

                if (i < num_leftover_stripes) {
                    test_file_eof += sf_context->sf_stripe_size;
                }
                else if (i == num_leftover_stripes) {
                    test_file_eof += partial_stripe_len % sf_context->sf_stripe_size;
                }
            }

            assert(test_file_eof == logical_file_eof);
        }
#endif /* NDEBUG */
    }

    /* Barrier on exit */
    if (mpi_size > 1)
        if (MPI_SUCCESS != (mpi_code = MPI_Barrier(comm)))
            H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);

done:
    free(recv_msgs);
    free(recv_reqs);

    H5_SUBFILING_FUNC_LEAVE;
} /* H5FD__subfiling__truncate_sub_files() */

/*-------------------------------------------------------------------------
 * Function:    H5FD__subfiling__get_real_eof
 *
 *              Note: This code should be moved -- most likely to the IOC
 *                    code files.
 *
 * Purpose:     Query each subfile to get its local EOF, and then use this
 *              data to calculate the actual EOF.
 *
 *              Do this as follows:
 *
 *              1) allocate an array of int64_t of length equal to the
 *                 the number of subfiles, and initialize all fields to -1.
 *
 *              2) Send each subfile's IOC a message requesting that
 *                 subfile's EOF.
 *
 *              3) Await reply from each IOC, storing the reply in
 *                 the appropriate entry in the array allocated in 1.
 *
 *              4) After all IOCs have replied, compute the offset of
 *                 each subfile in the logical file.  Take the maximum
 *                 of these values, and report this value as the overall
 *                 EOF.
 *
 *              Note that this operation is not collective, and can return
 *              invalid data if other ranks perform writes while this
 *              operation is in progress.
 *
 *              SUBFILING NOTE:
 *              The EOF calculation for subfiling is somewhat different
 *              than for the more traditional HDF5 file implementations.
 *              This statement derives from the fact that unlike "normal"
 *              HDF5 files, subfiling introduces a multi-file representation
 *              of a single HDF5 file.  The plurality of subfiles represents
 *              a software RAID-0 based HDF5 file.  As such, each subfile
 *              contains a designated portion of the address space of the
 *              virtual HDF5 storage.  We have no notion of HDF5 datatypes,
 *              datasets, metadata, or other HDF5 structures; only BYTES.
 *
 *              The organization of the bytes within subfiles is consistent
 *              with the RAID-0 striping, i.e. there are IO Concentrators
 *              (IOCs) which correspond to a stripe-count (in Lustre) as
 *              well as a stripe_size.  The combination of these two
 *              variables determines the "address" (a combination of IOC
 *              and a file offset) of any storage operation.
 *
 *              Having a defined storage layout, the virtual file EOF
 *              calculation should be the MAXIMUM value returned by the
 *              collection of IOCs.  Every MPI rank which hosts an IOC
 *              maintains its own EOF by updating that value for each
 *              WRITE operation that completes, i.e. if a new local EOF
 *              is greater than the existing local EOF, the new EOF
 *              will replace the old.  The local EOF calculation is as
 *              follows.
 *              1. At file creation, each IOC is assigned a rank value
 *                 (0 to N-1, where N is the total number of IOCs) and
 *                 a 'sf_base_addr' = 'ioc_idx' * 'sf_stripe_size')
 *                 we also determine the 'sf_blocksize_per_stripe' which
 *                 is simply the 'sf_stripe_size' * 'n_ioc_concentrators'
 *
 *              2. For every write operation, the IOC receives a message
 *                 containing a file_offset and the data_size.
 *
 *              3. The file_offset + data_size are in turn used to
 *                 create a stripe_id:
 *                   IOC-(ioc_rank)       IOC-(ioc_rank+1)
 *                   |<- sf_base_address  |<- sf_base_address  |
 *                ID +--------------------+--------------------+
 *                 0:|<- sf_stripe_size ->|<- sf_stripe_size ->|
 *                 1:|<- sf_stripe_size ->|<- sf_stripe_size ->|
 *                   ~                    ~                    ~
 *                 N:|<- sf_stripe_size ->|<- sf_stripe_size ->|
 *                   +--------------------+--------------------+
 *
 *                The new 'stripe_id' is then used to calculate a
 *                potential new EOF:
 *                sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr
 *                         + ((file_offset + data_size) % sf_stripe_size)
 *
 *              4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof.
 *
 * Return:      SUCCEED/FAIL
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5FD__subfiling__get_real_eof(hid_t context_id, int64_t *logical_eof_ptr)
{
    subfiling_context_t *sf_context  = NULL;
    MPI_Request         *recv_reqs   = NULL;
    int64_t             *recv_msg    = NULL;
    int64_t             *sf_eofs     = NULL; /* dynamically allocated array for subfile EOFs */
    int64_t              msg[3]      = {0, 0, 0};
    int64_t              logical_eof = 0;
    int64_t              sf_logical_eof;
    int                  n_io_concentrators = 0;
    int                  num_subfiles       = 0;
    int                  mpi_code;            /* MPI return code */
    herr_t               ret_value = SUCCEED; /* Return value */

    assert(logical_eof_ptr);

    if (NULL == (sf_context = (subfiling_context_t *)H5_get_subfiling_object(context_id)))
        H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "can't get subfile context");

    assert(sf_context->topology);

    n_io_concentrators = sf_context->topology->n_io_concentrators;
    num_subfiles       = sf_context->sf_num_subfiles;

    assert(n_io_concentrators > 0);
    assert(num_subfiles >= n_io_concentrators);

    if (NULL == (sf_eofs = malloc((size_t)num_subfiles * sizeof(int64_t))))
        H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate subfile EOFs array");
    if (NULL == (recv_reqs = malloc((size_t)num_subfiles * sizeof(*recv_reqs))))
        H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate receive requests array");
    if (NULL == (recv_msg = malloc((size_t)num_subfiles * sizeof(msg))))
        H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate message array");

    for (int i = 0; i < num_subfiles; i++) {
        sf_eofs[i]   = -1;
        recv_reqs[i] = MPI_REQUEST_NULL;
    }

    /* Post early non-blocking receives for the EOF of each subfile */
    for (int i = 0; i < num_subfiles; i++) {
        int ioc_rank = sf_context->topology->io_concentrators[i % n_io_concentrators];

        if (MPI_SUCCESS != (mpi_code = MPI_Irecv(&recv_msg[3 * i], 1, H5_subfiling_rpc_msg_type, ioc_rank,
                                                 GET_EOF_COMPLETED, sf_context->sf_eof_comm, &recv_reqs[i])))
            H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Irecv", mpi_code);
    }

    /* Send each subfile's IOC a message requesting that subfile's EOF */

    msg[1] = -1; /* padding -- not used in this message */
    msg[2] = -1; /* padding -- not used in this message */

    for (int i = 0; i < num_subfiles; i++) {
        int ioc_rank = sf_context->topology->io_concentrators[i % n_io_concentrators];

        /* Set subfile index for receiving IOC */
        msg[0] = i / n_io_concentrators;

        if (MPI_SUCCESS != (mpi_code = MPI_Send(msg, 1, H5_subfiling_rpc_msg_type, ioc_rank, GET_EOF_OP,
                                                sf_context->sf_msg_comm)))
            H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Send", mpi_code);
    }

    /* Wait for EOF communication to complete */
    H5_GCC_DIAG_OFF("stringop-overflow")
    if (MPI_SUCCESS != (mpi_code = MPI_Waitall(num_subfiles, recv_reqs, MPI_STATUSES_IGNORE)))
        H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Waitall", mpi_code);
    H5_GCC_DIAG_ON("stringop-overflow")

    for (int i = 0; i < num_subfiles; i++) {
#ifndef NDEBUG
        int ioc_rank = (int)recv_msg[3 * i];
#endif

        assert(ioc_rank >= 0);
        assert(ioc_rank < n_io_concentrators);
        assert(sf_eofs[i] == -1);

        sf_eofs[i] = recv_msg[(3 * i) + 1];
    }

    /* 4) After all IOCs have replied, compute the offset of
     *    each subfile in the logical file.  Take the maximum
     *    of these values, and report this value as the overall
     *    EOF.
     */

    for (int i = 0; i < num_subfiles; i++) {

        /* compute number of complete stripes */
        sf_logical_eof = sf_eofs[i] / sf_context->sf_stripe_size;

        /* multiply by stripe size */
        sf_logical_eof *= sf_context->sf_stripe_size * num_subfiles;

        /* if the subfile doesn't end on a stripe size boundary, must add in a partial stripe */
        if (sf_eofs[i] % sf_context->sf_stripe_size > 0) {

            /* add in the size of the partial stripe up to but not including this subfile */
            sf_logical_eof += i * sf_context->sf_stripe_size;

            /* finally, add in the number of bytes in the last partial stripe depth in the subfile */
            sf_logical_eof += sf_eofs[i] % sf_context->sf_stripe_size;
        }

        if (sf_logical_eof > logical_eof) {

            logical_eof = sf_logical_eof;
        }
    }

#ifdef H5_SUBFILING_DEBUG
    H5_subfiling_log(context_id, "%s: calculated logical EOF = %" PRId64 ".", __func__, logical_eof);
#endif

    *logical_eof_ptr = logical_eof;

done:
    if (ret_value < 0) {
        for (int i = 0; i < num_subfiles; i++) {
            if (recv_reqs && (recv_reqs[i] != MPI_REQUEST_NULL)) {
                if (MPI_SUCCESS != (mpi_code = MPI_Cancel(&recv_reqs[i])))
                    H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Cancel", mpi_code);
            }
        }
    }

    free(recv_msg);
    free(recv_reqs);
    free(sf_eofs);

    H5_SUBFILING_FUNC_LEAVE;
} /* H5FD__subfiling__get_real_eof() */
href='#n1704'>1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * Copyright by the Board of Trustees of the University of Illinois.         *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
 * access to either file, you may request a copy from help@hdfgroup.org.     *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/****************/
/* Module Setup */
/****************/

#define H5F_PACKAGE		/*suppress error about including H5Fpkg 	  */
#define H5O_PACKAGE		/*suppress error about including H5Opkg 	  */
#define H5SM_PACKAGE		/*suppress error about including H5SMpkg	  */

/***********/
/* Headers */
/***********/
#include "H5private.h"		/* Generic Functions			*/
#include "H5ACprivate.h"	/* Metadata cache			*/
#include "H5Eprivate.h"		/* Error handling		  	*/
#include "H5Fpkg.h"		/* File access                          */
#include "H5FLprivate.h"	/* Free Lists                           */
#include "H5MFprivate.h"        /* File memory management		*/
#include "H5MMprivate.h"	/* Memory management			*/
#include "H5Opkg.h"             /* Object Headers                       */
#include "H5SMpkg.h"            /* Shared object header messages        */


/****************/
/* Local Macros */
/****************/


/******************/
/* Local Typedefs */
/******************/
/* Udata struct for calls to H5SM_read_iter_op */
typedef struct H5SM_read_udata_t {
    H5F_t *file;                    /* File in which sharing is happening (in) */
    H5O_msg_crt_idx_t idx;          /* Creation index of this message (in) */
    size_t buf_size;                /* Size of the encoded message (out) */
    void *encoding_buf;             /* The encoded message (out) */
} H5SM_read_udata_t;


/********************/
/* Local Prototypes */
/********************/
static herr_t H5SM_create_index(H5F_t *f, H5SM_index_header_t *header,
                                hid_t dxpl_id);
static herr_t H5SM_delete_index(H5F_t *f, H5SM_index_header_t *header,
                                hid_t dxpl_id, hbool_t delete_heap);
static haddr_t H5SM_create_list(H5F_t *f, H5SM_index_header_t *header, hid_t dxpl_id);
static size_t H5SM_find_in_list(const H5SM_list_t *list, const H5SM_mesg_key_t *key, size_t *empty_pos);
static herr_t H5SM_convert_list_to_btree(H5F_t * f, H5SM_index_header_t * header,
                H5SM_list_t **_list, H5HF_t *fheap, H5O_t *open_oh, hid_t dxpl_id);
static herr_t H5SM_convert_btree_to_list(H5F_t * f, H5SM_index_header_t * header, hid_t dxpl_id);
static herr_t H5SM_incr_ref(void *record, void *_op_data, hbool_t *changed);
static herr_t H5SM_write_mesg(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh,
    H5SM_index_header_t *header, unsigned type_id, void *mesg,
    unsigned *cache_flags_ptr);
static herr_t H5SM_decr_ref(void *record, void *op_data, hbool_t *changed);
static herr_t H5SM_delete_from_index(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh,
    H5SM_index_header_t *header, const H5O_shared_t * mesg,
    unsigned *cache_flags, void ** /*out*/ encoded_mesg);
static herr_t H5SM_type_to_flag(unsigned type_id, unsigned *type_flag);
static herr_t H5SM_read_iter_op(H5O_t *oh, H5O_mesg_t *mesg, unsigned sequence,
    hbool_t *oh_modified, void *_udata);
static herr_t H5SM_read_mesg_fh_cb(const void *obj, size_t obj_len, void *_udata);
static herr_t H5SM_read_mesg(H5F_t *f, const H5SM_sohm_t *mesg, H5HF_t *fheap,
               H5O_t * open_oh, hid_t dxpl_id, size_t *encoding_size /*out*/,
               void ** encoded_mesg /*out*/);


/*********************/
/* Package Variables */
/*********************/

H5FL_DEFINE(H5SM_master_table_t);
H5FL_ARR_DEFINE(H5SM_index_header_t, H5O_SHMESG_MAX_NINDEXES);
H5FL_DEFINE(H5SM_list_t);
H5FL_ARR_DEFINE(H5SM_sohm_t, H5O_SHMESG_MAX_LIST_SIZE);


/*****************************/
/* Library Private Variables */
/*****************************/


/*******************/
/* Local Variables */
/*******************/



/*-------------------------------------------------------------------------
 * Function:    H5SM_init
 *
 * Purpose:     Initializes the Shared Message interface.
 *
 *              Creates a master SOHM table in the file and in the cache.
 *              This function should not be called for files that have
 *              SOHMs disabled in the FCPL.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_init(H5F_t *f, H5P_genplist_t * fc_plist, const H5O_loc_t *ext_loc, hid_t dxpl_id)
{
    H5O_shmesg_table_t sohm_table;      /* SOHM message for superblock extension */
    H5SM_master_table_t *table = NULL;  /* SOHM master table for file */
    haddr_t table_addr = HADDR_UNDEF;   /* Address of SOHM master table in file */
    unsigned num_indexes;               /* Number of SOHM indices */
    unsigned list_max, btree_min;       /* Phase change limits for SOHM indices */
    unsigned index_type_flags[H5O_SHMESG_MAX_NINDEXES]; /* Messages types stored in each index */
    unsigned minsizes[H5O_SHMESG_MAX_NINDEXES]; /* Message size sharing threshhold for each index */
    unsigned type_flags_used;           /* Message type flags used, for sanity checking */
    hsize_t table_size;                 /* Size of SOHM master table in file */
    unsigned x;                         /* Local index variable */
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI(H5SM_init, NULL)

    HDassert(f);
    /* File should not already have a SOHM table */
    HDassert(f->shared->sohm_addr == HADDR_UNDEF);

    /* Initialize master table */
    if(NULL == (table = H5FL_MALLOC(H5SM_master_table_t)))
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for SOHM table")

    /* Get information from fcpl */
    if(H5P_get(fc_plist, H5F_CRT_SHMSG_NINDEXES_NAME, &num_indexes)<0)
        HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get number of indexes")
    if(H5P_get(fc_plist, H5F_CRT_SHMSG_INDEX_TYPES_NAME, &index_type_flags)<0)
        HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get SOHM type flags")
    if(H5P_get(fc_plist, H5F_CRT_SHMSG_LIST_MAX_NAME, &list_max)<0)
        HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get SOHM list maximum")
    if(H5P_get(fc_plist, H5F_CRT_SHMSG_BTREE_MIN_NAME, &btree_min)<0)
        HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get SOHM btree minimum")
    if(H5P_get(fc_plist, H5F_CRT_SHMSG_INDEX_MINSIZE_NAME, &minsizes) < 0)
        HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get SOHM message min sizes")

    /* Verify that values are valid */
    if(num_indexes > H5O_SHMESG_MAX_NINDEXES)
        HGOTO_ERROR(H5E_PLIST, H5E_BADRANGE, FAIL, "number of indexes in property list is too large")

    /* Check that type flags weren't duplicated anywhere */
    type_flags_used = 0;
    for(x = 0; x < num_indexes; ++x) {
        if(index_type_flags[x] & type_flags_used)
            HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "the same shared message type flag is assigned to more than one index")
        type_flags_used |= index_type_flags[x];
    } /* end for */

    /* Set version and number of indexes in table and in superblock.
     * Right now we just use one byte to hold the number of indexes.
     */
    HDassert(num_indexes < 256);
    table->num_indexes = num_indexes;

    /* Check that list and btree cutoffs make sense.  There can't be any
     * values greater than the list max but less than the btree min; the
     * list max has to be greater than or equal to one less than the btree
     * min.
     */
    HDassert(list_max + 1 >= btree_min);
    HDassert(table->num_indexes > 0 && table->num_indexes <= H5O_SHMESG_MAX_NINDEXES);

    /* Allocate the SOHM indexes as an array. */
    if(NULL == (table->indexes = (H5SM_index_header_t *)H5FL_ARR_MALLOC(H5SM_index_header_t, (size_t)table->num_indexes)))
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for SOHM indexes")

    /* Initialize all of the indexes, but don't allocate space for them to
     * hold messages until we actually need to write to them.
     */
    for(x = 0; x < table->num_indexes; x++)
    {
        table->indexes[x].btree_min = btree_min;
        table->indexes[x].list_max = list_max;
        table->indexes[x].mesg_types = index_type_flags[x];
        table->indexes[x].min_mesg_size = minsizes[x];
        table->indexes[x].index_addr = HADDR_UNDEF;
        table->indexes[x].heap_addr = HADDR_UNDEF;
        table->indexes[x].num_messages = 0;

        /* Indexes start as lists unless the list-to-btree threshold is zero */
        if(table->indexes[x].list_max > 0)
            table->indexes[x].index_type = H5SM_LIST;
        else
            table->indexes[x].index_type = H5SM_BTREE;
    } /* end for */

    /* Allocate space for the table on disk */
    table_size = (hsize_t) H5SM_TABLE_SIZE(f) + (hsize_t) (table->num_indexes * H5SM_INDEX_HEADER_SIZE(f));
    if(HADDR_UNDEF == (table_addr = H5MF_alloc(f, H5FD_MEM_SOHM_TABLE, dxpl_id, table_size)))
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for SOHM table")

    /* Cache the new table */
    if(H5AC_set(f, dxpl_id, H5AC_SOHM_TABLE, table_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "can't add SOHM table to cache")

    /* Record the address of the master table in the file */
    f->shared->sohm_addr = table_addr;

    /* Check for sharing attributes in this file, which means that creation
     *  indices must be tracked on object header message in the file.
     */
    if(type_flags_used & H5O_SHMESG_ATTR_FLAG)
        f->shared->store_msg_crt_idx = TRUE;

    /* Write shared message information to the superblock extension */
    sohm_table.addr = f->shared->sohm_addr;
    sohm_table.version = f->shared->sohm_vers;
    sohm_table.nindexes = f->shared->sohm_nindexes;
    if(H5O_msg_create(ext_loc, H5O_SHMESG_ID, H5O_MSG_FLAG_CONSTANT | H5O_MSG_FLAG_DONTSHARE, H5O_UPDATE_TIME, &sohm_table, dxpl_id) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_CANTINIT, FAIL, "unable to update SOHM header message")

done:
    if(ret_value < 0) {
        if(table_addr != HADDR_UNDEF)
            H5MF_xfree(f, H5FD_MEM_SOHM_TABLE, dxpl_id, table_addr, (hsize_t)H5SM_TABLE_SIZE(f));
        if(table != NULL)
            (void)H5FL_FREE(H5SM_master_table_t, table);
    } /* end if */

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_init() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_type_to_flag
 *
 * Purpose:     Get the shared message flag for a given message type.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, October 10, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_type_to_flag(unsigned type_id, unsigned *type_flag)
{
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI_NOINIT(H5SM_type_to_flag)

    /* Translate the H5O type_id into an H5SM type flag */
    switch(type_id) {
        case H5O_FILL_ID:
            type_id = H5O_FILL_NEW_ID;
            /* Fall through... */

        case H5O_SDSPACE_ID:
        case H5O_DTYPE_ID:
        case H5O_FILL_NEW_ID:
        case H5O_PLINE_ID:
        case H5O_ATTR_ID:
            *type_flag = (unsigned)1 << type_id;
            break;

        default:
            HGOTO_ERROR(H5E_OHDR, H5E_BADTYPE, FAIL, "unknown message type ID")
    } /* end switch */

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_type_to_flag() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_get_index
 *
 * Purpose:     Get the index number for a given message type.
 *
 *              Returns the number of the index in the supplied table
 *              that holds messages of type type_id, or negative if
 *              there is no index for this message type.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, October 10, 2006
 *
 *-------------------------------------------------------------------------
 */
ssize_t
H5SM_get_index(const H5SM_master_table_t *table, unsigned type_id)
{
    size_t x;
    unsigned type_flag;
    ssize_t ret_value = FAIL;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_get_index)

    /* Translate the H5O type_id into an H5SM type flag */
    if(H5SM_type_to_flag(type_id, &type_flag) < 0)
        HGOTO_ERROR(H5E_OHDR, H5E_CANTGET, FAIL, "can't map message type to flag")

    /* Search the indexes until we find one that matches this flag or we've
     * searched them all.
     */
    for(x = 0; x < table->num_indexes; ++x)
        if(table->indexes[x].mesg_types & type_flag)
            HGOTO_DONE((ssize_t)x)

    /* At this point, ret_value is either the location of the correct
     * index or it's still FAIL because we didn't find an index.
     */
done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_get_index() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_type_shared
 *
 * Purpose:     Check if a given message type is shared in a file.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Tuesday, December 12, 2006
 *
 *-------------------------------------------------------------------------
 */
htri_t
H5SM_type_shared(H5F_t *f, unsigned type_id, hid_t dxpl_id)
{
    H5SM_master_table_t *table = NULL;  /* Shared object master table */
    unsigned type_flag;                 /* Flag corresponding to message type */
    size_t u;                           /* Local index variable */
    htri_t ret_value = FALSE;           /* Return value */

    FUNC_ENTER_NOAPI_NOINIT(H5SM_type_shared)

    /* Translate the H5O type_id into an H5SM type flag */
    if(H5SM_type_to_flag(type_id, &type_flag) < 0)
        HGOTO_ERROR(H5E_OHDR, H5E_CANTGET, FAIL, "can't map message type to flag")

    /* Look up the master SOHM table */
    if(H5F_addr_defined(f->shared->sohm_addr)) {
        if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_READ)))
            HGOTO_ERROR(H5E_OHDR, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")
    } /* end if */
    else
        /* No shared messages of any type */
        HGOTO_DONE(FALSE)

    /* Search the indexes until we find one that matches this flag or we've
     * searched them all.
     */
    for(u = 0; u < table->num_indexes; u++)
        if(table->indexes[u].mesg_types & type_flag)
            HGOTO_DONE(TRUE)

done:
    /* Release the master SOHM table */
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_OHDR, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_type_shared() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_get_fheap_addr
 *
 * Purpose:     Gets the address of the fractal heap used to store
 *              messages of type type_id.
 *
 * Return:      Non-negative on success/negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, October 3, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_get_fheap_addr(H5F_t *f, hid_t dxpl_id, unsigned type_id, haddr_t *fheap_addr)
{
    H5SM_master_table_t *table = NULL;  /* Shared object master table */
    ssize_t index_num;                  /* Which index */
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI(H5SM_get_fheap_addr, FAIL)

    /* Sanity checks */
    HDassert(f);
    HDassert(fheap_addr);

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_READ)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    /* Look up index for message type */
    if((index_num = H5SM_get_index(table, type_id)) < 0)
	HGOTO_ERROR(H5E_SOHM, H5E_CANTPROTECT, FAIL, "unable to find correct SOHM index")

    /* Retrieve heap address for index */
    *fheap_addr = table->indexes[index_num].heap_addr;

done:
    /* Release the master SOHM table */
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_get_fheap_addr() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_create_index
 *
 * Purpose:     Allocates storage for an index, populating the HEADER struct.
 *
 * Return:      Non-negative on success/negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_create_index(H5F_t *f, H5SM_index_header_t *header, hid_t dxpl_id)
{
    haddr_t list_addr=HADDR_UNDEF;   /* Address of SOHM list */
    haddr_t tree_addr=HADDR_UNDEF;   /* Address of SOHM B-tree */
    H5HF_create_t fheap_cparam;      /* Fractal heap creation parameters */
    H5HF_t *fheap = NULL;
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_create_index)

    HDassert(header);
    HDassert(header->index_addr == HADDR_UNDEF);
    HDassert(header->btree_min <= header->list_max + 1);

    /* In most cases, the index starts as a list */
    if(header->list_max > 0) {
        header->index_type = H5SM_LIST;

        if((list_addr = H5SM_create_list(f, header, dxpl_id)) == HADDR_UNDEF)
            HGOTO_ERROR(H5E_SOHM, H5E_CANTCREATE, FAIL, "list creation failed for SOHM index")

        header->index_addr = list_addr;
    } /* end if */
    /* index is a B-tree */
    else {
        header->index_type = H5SM_BTREE;

        if(H5B2_create(f, dxpl_id, H5SM_INDEX, (size_t)H5SM_B2_NODE_SIZE,
                  (size_t)H5SM_SOHM_ENTRY_SIZE(f), H5SM_B2_SPLIT_PERCENT,
                  H5SM_B2_MERGE_PERCENT, &tree_addr) < 0)
            HGOTO_ERROR(H5E_BTREE, H5E_CANTCREATE, FAIL, "B-tree creation failed for SOHM index")

        header->index_addr = tree_addr;
    } /* end else */

    /* Create a heap to hold the shared messages that the list or B-tree will index */
    HDmemset(&fheap_cparam, 0, sizeof(fheap_cparam));
    fheap_cparam.managed.width = H5O_FHEAP_MAN_WIDTH;
    fheap_cparam.managed.start_block_size = H5O_FHEAP_MAN_START_BLOCK_SIZE;
    fheap_cparam.managed.max_direct_size = H5O_FHEAP_MAN_MAX_DIRECT_SIZE;
    fheap_cparam.managed.max_index = H5O_FHEAP_MAN_MAX_INDEX;
    fheap_cparam.managed.start_root_rows = H5O_FHEAP_MAN_START_ROOT_ROWS;
    fheap_cparam.checksum_dblocks = H5O_FHEAP_CHECKSUM_DBLOCKS;
    fheap_cparam.id_len = 0;
    fheap_cparam.max_man_size = H5O_FHEAP_MAX_MAN_SIZE;
    if(NULL == (fheap = H5HF_create(f, dxpl_id, &fheap_cparam)))
        HGOTO_ERROR(H5E_HEAP, H5E_CANTINIT, FAIL, "unable to create fractal heap")

    if(H5HF_get_heap_addr(fheap, &(header->heap_addr)) < 0)
        HGOTO_ERROR(H5E_HEAP, H5E_CANTGETSIZE, FAIL, "can't get fractal heap address")

#ifndef NDEBUG
{
    size_t fheap_id_len;             /* Size of a fractal heap ID */

    /* Sanity check ID length */
    if(H5HF_get_id_len(fheap, &fheap_id_len) < 0)
        HGOTO_ERROR(H5E_HEAP, H5E_CANTGETSIZE, FAIL, "can't get fractal heap ID length")
    HDassert(fheap_id_len == H5O_FHEAP_ID_LEN);
}
#endif /* NDEBUG */

done:
    /* Close the fractal heap if one has been created */
    if(fheap && H5HF_close(fheap, dxpl_id) < 0)
        HDONE_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_create_index */


/*-------------------------------------------------------------------------
 * Function:    H5SM_delete_index
 *
 * Purpose:     De-allocates storage for an index whose header is HEADER.
 *
 *              If DELETE_HEAP is TRUE, deletes the index's heap, eliminating
 *              it completely.
 *
 *              If DELETE_HEAP is FALSE, the heap is not deleted.  This is
 *              useful when deleting only the index header as the index is
 *              converted from a list to a B-tree and back again.
 *
 * Return:      Non-negative on success/negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, January 4, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_delete_index(H5F_t *f, H5SM_index_header_t *header, hid_t dxpl_id, hbool_t delete_heap)
{
    hsize_t     list_size;        /* Size of list on disk */
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_delete_index)

    /* Determine whether index is a list or a B-tree. */
    if(header->index_type == H5SM_LIST) {
        /* Eject entry from cache */
        if(H5AC_expunge_entry(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr) < 0)
            HGOTO_ERROR(H5E_HEAP, H5E_CANTREMOVE, FAIL, "unable to remove list index from cache")

        /* Free the file space used */
        list_size = H5SM_LIST_SIZE(f, header->list_max);
        if(H5MF_xfree(f, H5FD_MEM_SOHM_INDEX, dxpl_id, header->index_addr, list_size) < 0)
            HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to free shared message list")
    } /* end if */
    else {
        HDassert(header->index_type == H5SM_BTREE);

        /* Delete from the B-tree. */
        if(H5B2_delete(f, dxpl_id, H5SM_INDEX, header->index_addr, NULL, NULL) < 0)
            HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to delete B-tree")

        /* Revert to list unless B-trees can have zero records */
        if(header->btree_min > 0)
            header->index_type = H5SM_LIST;
    } /* end else */

    /* Free the index's heap if requested. */
    if(delete_heap == TRUE) {
        if(H5HF_delete(f, dxpl_id, header->heap_addr) < 0)
            HGOTO_ERROR(H5E_SOHM, H5E_CANTDELETE, FAIL, "unable to delete fractal heap")
        header->heap_addr = HADDR_UNDEF;
    } /* end if */

    /* Reset index info */
    header->index_addr = HADDR_UNDEF;
    header->num_messages = 0;

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_delete_index */


/*-------------------------------------------------------------------------
 * Function:    H5SM_create_list
 *
 * Purpose:     Creates a list of SOHM messages.
 *
 *              Called when a new index is created from scratch or when a
 *              B-tree needs to be converted back into a list.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Monday, August 28, 2006
 *
 *-------------------------------------------------------------------------
 */
static haddr_t
H5SM_create_list(H5F_t *f, H5SM_index_header_t *header, hid_t dxpl_id)
{
    H5SM_list_t *list = NULL;   /* List of messages */
    hsize_t x;                  /* Counter variable */
    hsize_t size = 0;           /* Size of list on disk */
    size_t num_entries;         /* Number of messages to create in list */
    haddr_t addr = HADDR_UNDEF; /* Address of the list on disk */
    haddr_t ret_value;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_create_list)

    HDassert(f);
    HDassert(header);

    num_entries = header->list_max;

    /* Allocate list in memory */
    if((list = H5FL_MALLOC(H5SM_list_t)) == NULL)
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, HADDR_UNDEF, "file allocation failed for SOHM list")
    if((list->messages = (H5SM_sohm_t *)H5FL_ARR_MALLOC(H5SM_sohm_t, num_entries)) == NULL)
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, HADDR_UNDEF, "file allocation failed for SOHM list")

    /* Initialize messages in list */
    HDmemset(list->messages, 0, sizeof(H5SM_sohm_t) * num_entries);
    for(x=0; x<num_entries; x++)
        list->messages[x].location = H5SM_NO_LOC;

    /* Point list at header passed in */
    list->header = header;

    /* Allocate space for the list on disk */
    size = H5SM_LIST_SIZE(f, num_entries);
    if(HADDR_UNDEF == (addr = H5MF_alloc(f, H5FD_MEM_SOHM_INDEX, dxpl_id, size)))
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, HADDR_UNDEF, "file allocation failed for SOHM list")

    /* Put the list into the cache */
    if(H5AC_set(f, dxpl_id, H5AC_SOHM_LIST, addr, list, H5AC__NO_FLAGS_SET) < 0)
	HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, HADDR_UNDEF, "can't add SOHM list to cache")

    /* Set return value */
    ret_value = addr;

done:
    if(ret_value == HADDR_UNDEF) {
        if(list != NULL) {
            if(list->messages != NULL)
                H5FL_ARR_FREE(H5SM_sohm_t, list->messages);
            (void)H5FL_FREE(H5SM_list_t, list);
        } /* end if */
        if(addr != HADDR_UNDEF)
            H5MF_xfree(f, H5FD_MEM_SOHM_INDEX, dxpl_id, addr, size);
    } /* end if */

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_create_list */


/*-------------------------------------------------------------------------
 * Function:    H5SM_convert_list_to_btree
 *
 * Purpose:     Given a list index, turns it into a B-tree index.  This is
 *              done when too many messages are added to the list.
 *
 *              Requires that *_LIST be a valid list and currently protected
 *              in the cache.  Unprotects (and expunges) *_LIST from the cache.
 *
 *              _LIST needs to be a double pointer so that the calling function
 *              knows if it is released from the cache if this function exits
 *              in error.  Trying to free it again will trigger an assert.
 *
 * Return:      Non-negative on success
 *              Negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, January 4, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_convert_list_to_btree(H5F_t *f, H5SM_index_header_t *header,
    H5SM_list_t **_list, H5HF_t *fheap, H5O_t *open_oh, hid_t dxpl_id)
{
    H5SM_list_t *list;                  /* Pointer to the existing message list */
    H5SM_mesg_key_t key;                /* Key for inserting records in v2 B-tree */
    haddr_t     tree_addr;              /* New v2 B-tree's address */
    size_t      num_messages;		/* Number of messages being tracked */
    size_t      x;
    void *      encoding_buf = NULL;
    herr_t      ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_convert_list_to_btree)

    HDassert(_list && *_list);
    HDassert(header);

    /* Get pointer to list of messages to convert */
    list = *_list;

    /* Create the new v2 B-tree for tracking the messages */
    if(H5B2_create(f, dxpl_id, H5SM_INDEX, (size_t)H5SM_B2_NODE_SIZE,
            (size_t)H5SM_SOHM_ENTRY_SIZE(f), H5SM_B2_SPLIT_PERCENT,
            H5SM_B2_MERGE_PERCENT, &tree_addr) < 0)
        HGOTO_ERROR(H5E_BTREE, H5E_CANTCREATE, FAIL, "B-tree creation failed for SOHM index")

    /* Set up key values that all messages will use.  Since these messages
     * are in the heap, they have a heap ID and no encoding or type_id.
     */
    key.file = f;
    key.dxpl_id = dxpl_id;
    key.fheap = fheap;
    key.encoding_size = 0;
    key.encoding = NULL;

    /* Insert each record into the new B-tree */
    for(x = 0; x < header->list_max; x++) {
        if(list->messages[x].location != H5SM_NO_LOC) {
            /* Copy message into key */
            key.message = list->messages[x];

            /* Get the encoded message */
            if(H5SM_read_mesg(f, &(key.message), fheap, open_oh, dxpl_id, &key.encoding_size, &encoding_buf) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTLOAD, FAIL, "Couldn't read SOHM message in list")

            key.encoding = encoding_buf;

            /* Insert the message into the B-tree */
            if(H5B2_insert(f, dxpl_id, H5SM_INDEX, tree_addr, &key) < 0)
                HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, FAIL, "couldn't add SOHM to B-tree")

            /* Free buffer from H5SM_read_mesg */
            if(encoding_buf)
                encoding_buf = H5MM_xfree(encoding_buf);
        } /* end if */
    } /* end for */

    /* Unprotect list in cache and release heap */
    if(H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__DELETED_FLAG) < 0)
	HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to release SOHM list")
    *_list = list = NULL;

    /* Delete the old list index (but not its heap, which the new index is
     * still using!)
     */
    num_messages = header->num_messages;        /* preserve this across the index deletion */
    if(H5SM_delete_index(f, header, dxpl_id, FALSE) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_CANTDELETE, FAIL, "can't free list index")

    /* Set/restore header info */
    header->index_addr = tree_addr;
    header->index_type = H5SM_BTREE;
    header->num_messages = num_messages;

done:
    /* Free the buffer, if it hasn't already been freed (because of error) */
    if(encoding_buf)
        encoding_buf = H5MM_xfree(encoding_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* H5SM_convert_list_to_btree() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_convert_btree_to_list
 *
 * Purpose:     Given a B-tree index, turns it into a list index.  This is
 *              done when too many messages are deleted from the B-tree.
 *
 * Return:      Non-negative on success
 *              Negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, January 4, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_convert_btree_to_list(H5F_t * f, H5SM_index_header_t * header, hid_t dxpl_id)
{
    H5SM_list_t     *list = NULL;
    haddr_t          btree_addr;
    herr_t           ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_convert_btree_to_list)

    /* Remember the address of the old B-tree, but change the header over to be
     * a list..
     */
    btree_addr = header->index_addr;

    header->num_messages = 0;
    header->index_type = H5SM_LIST;

    /* Create a new list index */
    if(HADDR_UNDEF == (header->index_addr = H5SM_create_list(f, header, dxpl_id)))
        HGOTO_ERROR(H5E_SOHM, H5E_CANTINIT, FAIL, "unable to create shared message list")

    if(NULL == (list = (H5SM_list_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, NULL, header, H5AC_WRITE)))
        HGOTO_ERROR(H5E_SOHM, H5E_CANTPROTECT, FAIL, "unable to load SOHM list index")

    /* Delete the B-tree and have messages copy themselves to the
     * list as they're deleted
     */
    if(H5B2_delete(f, dxpl_id, H5SM_INDEX, btree_addr, H5SM_btree_convert_to_list_op, list) < 0)
        HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to delete B-tree")

done:
    /* Release the SOHM list from the cache */
    if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__DIRTIED_FLAG) < 0)
        HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect SOHM index")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_convert_btree_to_list() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_can_share_common
 *
 * Purpose:     "trivial" checks for determining if a message can be shared.
 *
 * Note:	These checks are common to the "can share" and "try share"
 *		routines and are the "fast" checks before we need to protect
 *		the SOHM master table.
 *
 * Return:      TRUE if message could be a SOHM
 *              FALSE if this message couldn't be a SOHM
 *              Negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Wednesday, February 21, 2007
 *
 *-------------------------------------------------------------------------
 */
static htri_t
H5SM_can_share_common(const H5F_t *f, unsigned type_id, const void *mesg)
{
    htri_t ret_value;           /* Return value */

    FUNC_ENTER_NOAPI_NOINIT(H5SM_can_share_common)

    /* Check whether this message ought to be shared or not */
    /* If sharing is disabled in this file, don't share the message */
    if(!H5F_addr_defined(f->shared->sohm_addr))
        HGOTO_DONE(FALSE)

    /* Type-specific check */
    if((ret_value = H5O_msg_can_share(type_id, mesg)) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_BADTYPE, FAIL, "can_share callback returned error")
    if(ret_value == FALSE)
        HGOTO_DONE(FALSE)

    /* At this point, the message passes the "trivial" checks and is worth
     *  further checks.
     */

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_can_share_common() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_can_share
 *
 * Purpose:     Checks if an object header message would be shared or is
 *		already shared.
 *
 *              If not, returns FALSE and does nothing.
 *
 * Return:      TRUE if message will be a SOHM
 *              FALSE if this message won't be a SOHM
 *              Negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Wednesday, February 21, 2007
 *
 *-------------------------------------------------------------------------
 */
htri_t
H5SM_can_share(H5F_t *f, hid_t dxpl_id, H5SM_master_table_t *table,
    ssize_t *sohm_index_num, unsigned type_id, const void *mesg)
{
    size_t              mesg_size;
    H5SM_master_table_t *my_table = NULL;
    ssize_t             index_num;
    htri_t              tri_ret;
    htri_t              ret_value = TRUE;

    FUNC_ENTER_NOAPI(H5SM_can_share, FAIL)

    /* "trivial" sharing checks */
    if((tri_ret = H5SM_can_share_common(f, type_id, mesg)) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_BADTYPE, FAIL, "'trivial' sharing checks returned error")
    if(tri_ret == FALSE)
        HGOTO_DONE(FALSE)

    /* Look up the master SOHM table */
    /* (use incoming master SOHM table if possible) */
    if(table)
        my_table = table;
    else {
        if(NULL == (my_table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_READ)))
            HGOTO_ERROR(H5E_SOHM, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")
    } /* end if */

    /* Find the right index for this message type.  If there is no such index
     * then this type of message isn't shareable
     */
    if((index_num = H5SM_get_index(my_table, type_id)) < 0) {
        H5E_clear_stack(NULL); /*ignore error*/
        HGOTO_DONE(FALSE)
    } /* end if */

    /* If the message isn't big enough, don't bother sharing it */
    if(0 == (mesg_size = H5O_msg_raw_size(f, type_id, TRUE, mesg)))
        HGOTO_ERROR(H5E_OHDR, H5E_BADMESG, FAIL, "unable to get OH message size")
    if(mesg_size < my_table->indexes[index_num].min_mesg_size)
        HGOTO_DONE(FALSE)

    /* At this point, the message will be shared, set the index number if requested. */
    if(sohm_index_num)
        *sohm_index_num = index_num;

done:
    /* Release the master SOHM table, if we protected it */
    if(my_table && my_table != table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, my_table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_SOHM, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_can_share() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_try_share
 *
 * Purpose:     Attempts to share an object header message.
 *
 *              MESG_LOC is an H5O_mesg_loc_t struct that gives the message's
 *              location in an object header (address and index).  This
 *              function sets the type_id in MESG_LOC.
 *              If MESG_LOC is not NULL, this message will be "unique but
 *              shareable" and will be entered in the index but not actually
 *              shared.  If it is NULL, this message will be fully shared if
 *              it is shareable at all.
 *
 *              OPEN_OH is the object header that is currently open and
 *              protected.  If NULL, the SM module will protect any object
 *              header it needs (which can cause an error if that OH is
 *              already protected!).
 *
 *              MESG_FLAGS will have the H5O_MSG_FLAG_SHAREABLE or
 *              H5O_MSG_FLAG_SHARED flag set if one is appropriate.  This is
 *              the only way to tell the difference between a message that
 *              has just been fully shared and a message that is only
 *              "shareable" and is still in the object header, so it cannot
 *              be NULL if MESG_LOC is not NULL.  If MESG_LOC is NULL, then
 *              the message won't be "unique but shareable" and MESG_FLAGS
 *              can be NULL as well.
 *
 *              If the message should be shared (if sharing has been
 *              enabled and this message qualifies), this function turns the
 *              message into a shared message, sets the H5O_MSG_FLAG_SHARED
 *              flag in mesg_flags, and returns TRUE.
 *
 *              If the message isn't shared, returns FALSE.  If the message
 *              isn't shared but was entered in the shared message index,
 *              the H5O_MSG_FLAG_SHAREABLE flag will be set in mesg_flags
 *              and returns TRUE.
 *
 *              If this message was already shared, increments its reference
 *              count, and leaves it otherwise unchanged, returning TRUE and
 *              setting the H5O_MSG_FLAG_SHARED flag in mesg_flags.
 *
 *              If mesg_flags is NULL, the calling function should just look
 *              at the return value to determine if the message was shared
 *              or not.  Such messages should never be "unique but shareable."
 *
 * Return:      TRUE if message is now a SOHM
 *              FALSE if this message is not a SOHM
 *              Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
htri_t
H5SM_try_share(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh, unsigned type_id,
    void *mesg, unsigned *mesg_flags)
{
    H5SM_master_table_t *table = NULL;
    unsigned            cache_flags = H5AC__NO_FLAGS_SET;
    ssize_t             index_num;
    htri_t              tri_ret;
    htri_t              ret_value = TRUE;

    FUNC_ENTER_NOAPI(H5SM_try_share, FAIL)

    /* "trivial" sharing checks */
    if(mesg_flags && (*mesg_flags & H5O_MSG_FLAG_DONTSHARE))
        HGOTO_DONE(FALSE)
    if((tri_ret = H5SM_can_share_common(f, type_id, mesg)) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_BADTYPE, FAIL, "'trivial' sharing checks returned error")
    if(tri_ret == FALSE)
        HGOTO_DONE(FALSE)

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_WRITE)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    /* "complex" sharing checks */
    if((tri_ret = H5SM_can_share(f, dxpl_id, table, &index_num, type_id, mesg)) < 0)
        HGOTO_ERROR(H5E_SOHM, H5E_BADTYPE, FAIL, "'complex' sharing checks returned error")
    if(tri_ret == FALSE)
        HGOTO_DONE(FALSE)

    /* At this point, the message will be shared. */

    /* If the index hasn't been allocated yet, create it */
    if(table->indexes[index_num].index_addr == HADDR_UNDEF) {
        if(H5SM_create_index(f, &(table->indexes[index_num]), dxpl_id) < 0)
            HGOTO_ERROR(H5E_SOHM, H5E_CANTINIT, FAIL, "unable to create SOHM index")
        cache_flags |= H5AC__DIRTIED_FLAG;
    } /* end if */

    /* Write the message as a shared message.  This may or may not cause the
     * message to become shared (if it is unique, it will not be shared).
     */
    if(H5SM_write_mesg(f, dxpl_id, open_oh, &(table->indexes[index_num]),
            type_id, mesg, &cache_flags) < 0)
	HGOTO_ERROR(H5E_SOHM, H5E_CANTINSERT, FAIL, "can't write shared message")

    /* Set flags if this message was "written" without error; it is now
     * either fully shared or "shareable".
     */
    if(mesg_flags) {
        if(((H5O_shared_t *)mesg)->type == H5O_SHARE_TYPE_HERE)
            *mesg_flags |= H5O_MSG_FLAG_SHAREABLE;
        else
            *mesg_flags |= H5O_MSG_FLAG_SHARED;
    } /* end if */

done:
    /* Release the master SOHM table */
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, cache_flags) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_try_share() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_incr_ref
 *
 * Purpose:	Increment the reference count for a SOHM message and return
 *              the message's heap ID.
 *
 *              The message pointer is actually returned via op_data, which
 *              should be a pointer to a H5SM_fheap_id_t.
 *
 * Return:	Non-negative on success
 *              Negative on failure
 *
 * Programmer:	James Laird
 *              Monday, November 6, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_incr_ref(void *record, void *_op_data, hbool_t *changed)
{
    H5SM_sohm_t *message = (H5SM_sohm_t *) record;
    H5SM_incr_ref_opdata *op_data = (H5SM_incr_ref_opdata *) _op_data;
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_incr_ref)

    HDassert(record);
    HDassert(op_data);
    HDassert(changed);

    /* If the message was previously shared in an object header, share
     * it in the heap now.
     */
    if(message->location == H5SM_IN_OH) {
        HDassert(op_data->key && op_data->key->fheap);

        /* Put the message in the heap and record its new heap ID */
        if(H5HF_insert(op_data->key->fheap, op_data->dxpl_id, op_data->key->encoding_size, op_data->key->encoding, &message->u.heap_loc.fheap_id) < 0)
            HGOTO_ERROR(H5E_HEAP, H5E_CANTINSERT, FAIL, "unable to insert message into fractal heap")

        message->location = H5SM_IN_HEAP;
        message->u.heap_loc.ref_count = 2;
    } /* end if */
    else {
        HDassert(message->location == H5SM_IN_HEAP);
        /* If it's already in the heap, just increment the ref count */
        ++message->u.heap_loc.ref_count;
    } /* end else */

    /* If we got here, the message has changed */
    *changed = TRUE;

    /* Check for retrieving the heap ID */
    if(op_data)
       op_data->fheap_id = message->u.heap_loc.fheap_id;

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_incr_ref() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_write_mesg
 *
 * Purpose:     Adds a shareable message to an index.  If this is the first
 *              such message and we have an object header location for this
 *              message, we record it in the index but don't modify the
 *              message passed in.
 *
 *              If the message is already in the index or we don't have an
 *              object header location for it, it is shared in the heap
 *              and this function sets its sharing struct to reflect this.
 *
 *              The index could be a list or a B-tree.
 *
 * Return:      TRUE if message is now shared
 *              FALSE if message is in index but is not shared
 *              Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_write_mesg(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh,
    H5SM_index_header_t *header, unsigned type_id, void *mesg,
    unsigned *cache_flags_ptr)
{
    H5SM_list_t           *list = NULL;     /* List index */
    H5SM_mesg_key_t       key;              /* Key used to search the index */
    H5O_shared_t          shared;           /* Shared H5O message */
    hbool_t               found = FALSE;    /* Was the message in the index? */
    H5HF_t                *fheap = NULL;    /* Fractal heap handle */
    size_t                buf_size;         /* Size of the encoded message */
    void *                encoding_buf = NULL; /* Buffer for encoded message */
    size_t                empty_pos = UFAIL; /* Empty entry in list */
    herr_t                ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_write_mesg)

    /* Sanity check */
    HDassert(header);
    HDassert(header->index_type != H5SM_BADTYPE);
    HDassert(cache_flags_ptr);

    /* Encode the message to be written */
    if((buf_size = H5O_msg_raw_size(f, type_id, TRUE, mesg)) == 0)
	HGOTO_ERROR(H5E_OHDR, H5E_BADSIZE, FAIL, "can't find message size")
    if(NULL == (encoding_buf = H5MM_malloc(buf_size)))
	HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "can't allocate buffer for encoding")
    if(H5O_msg_encode(f, type_id, TRUE, (unsigned char *)encoding_buf, mesg) < 0)
	HGOTO_ERROR(H5E_OHDR, H5E_CANTENCODE, FAIL, "can't encode message to be shared")

    /* Open the fractal heap for this index */
    if(NULL == (fheap = H5HF_open(f, dxpl_id, header->heap_addr)))
	HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

    /* Set up a key for the message to be written */
    key.dxpl_id = dxpl_id;
    key.file = f;
    key.fheap = fheap;
    key.encoding = encoding_buf;
    key.encoding_size = buf_size;
    key.message.hash = H5_checksum_lookup3(encoding_buf, buf_size, type_id);
    key.message.location = H5SM_NO_LOC;

    /* Assume the message is already in the index and try to increment its
     * reference count.  If this fails, the message isn't in the index after
     * all and we'll need to add it.
     */
    if(header->index_type == H5SM_LIST) {
        size_t list_pos;        /* Position in a list index */

        /* The index is a list; get it from the cache */
        if(NULL == (list = (H5SM_list_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, NULL, header, H5AC_WRITE)))
	    HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM index")

        /* See if the message is already in the index and get its location.
         * Also record the first empty list position we find in case we need it
         * later.
         */
        list_pos = H5SM_find_in_list(list, &key, &empty_pos);
        if(list_pos != UFAIL) {
            /* If the message was previously shared in an object header, share
             * it in the heap now.
             */
            if(list->messages[list_pos].location == H5SM_IN_OH) {
                /* Put the message in the heap and record its new heap ID */
                if(H5HF_insert(fheap, dxpl_id, key.encoding_size, key.encoding, &shared.u.heap_id) < 0)
                    HGOTO_ERROR(H5E_HEAP, H5E_CANTINSERT, FAIL, "unable to insert message into fractal heap")

                list->messages[list_pos].location = H5SM_IN_HEAP;
                list->messages[list_pos].u.heap_loc.fheap_id = shared.u.heap_id;
                list->messages[list_pos].u.heap_loc.ref_count = 2;
            } /* end if */
            else {
                /* If the message was already in the heap, increase its ref count */
                HDassert(list->messages[list_pos].location == H5SM_IN_HEAP);
                ++(list->messages[list_pos].u.heap_loc.ref_count);
            } /* end else */

            /* Set up the shared location to point to the shared location */
            shared.u.heap_id = list->messages[list_pos].u.heap_loc.fheap_id;
            found = TRUE;
        } /* end if */
    } /* end if */
    /* Index is a B-tree */
    else {
        H5SM_incr_ref_opdata op_data;

        HDassert(header->index_type == H5SM_BTREE);

        op_data.key = &key;
        op_data.dxpl_id = dxpl_id;

        /* If this returns failure, it means that the message wasn't found. */
        /* If it succeeds, set the heap_id in the shared struct.  It will
         * return a heap ID, since a message with a reference count greater
         * than 1 is always shared in the heap.
         */
        if(H5B2_modify(f, dxpl_id, H5SM_INDEX, header->index_addr, &key, H5SM_incr_ref, &op_data) >= 0) {
            shared.u.heap_id = op_data.fheap_id;
            found = TRUE;
        } /* end if */
    } /* end else */

    if(found)
        /* If the message was found, it's shared in the heap (now).  Set up a
         * shared message so we can mark it as shared.
         */
        shared.type = H5O_SHARE_TYPE_SOHM;
    else {
        htri_t share_in_ohdr;           /* Whether the new message can be shared in another object's header */

        /* Add the message to the index */

        /* Check if the message can be shared in another object's header */
        if((share_in_ohdr = H5O_msg_can_share_in_ohdr(type_id)) < 0)
            HGOTO_ERROR(H5E_SOHM, H5E_BADTYPE, FAIL, "'share in ohdr' check returned error")

        /* If this message can be shared in an object header location, it is
         *      "shareable" but not shared in the heap.  Insert it in the index
         *      but don't modify the original message.
         *
         * If it can't be shared in an object header location or there's no
         *      object header location available, insert it in the heap.
         */
        if(share_in_ohdr && open_oh) {
            /* Set up shared component info */
            shared.type = H5O_SHARE_TYPE_HERE;

            /* Retrieve any creation index from the native message */
            if(H5O_msg_get_crt_index(type_id, mesg, &shared.u.loc.index) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTGET, FAIL, "unable to retrieve creation index")
            shared.u.loc.oh_addr = H5O_OH_GET_ADDR(open_oh);

            /* Copy shared component info into key for inserting into index */
            key.message.location = H5SM_IN_OH;
            key.message.u.mesg_loc = shared.u.loc;
        } /* end if */
        else {
            /* Put the message in the heap and record its new heap ID */
            if(H5HF_insert(fheap, dxpl_id, key.encoding_size, key.encoding, &shared.u.heap_id) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTINSERT, FAIL, "unable to insert message into fractal heap")

            /* Set up shared component info */
            /* (heap ID set above) */
            shared.type = H5O_SHARE_TYPE_SOHM;

            key.message.location = H5SM_IN_HEAP;
            key.message.u.heap_loc.fheap_id = shared.u.heap_id;
            key.message.u.heap_loc.ref_count = 1;
        } /* end else */

        /* Set common information */
        key.message.msg_type_id = type_id;

        /* Check whether the list has grown enough that it needs to become a B-tree */
        if(header->index_type == H5SM_LIST && header->num_messages >= header->list_max)
            if(H5SM_convert_list_to_btree(f, header, &list, fheap, open_oh, dxpl_id) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTDELETE, FAIL, "unable to convert list to B-tree")

        /* Insert the new message into the SOHM index */
        if(header->index_type == H5SM_LIST) {
            /* Index is a list.  Find an empty spot if we haven't already */
            if(empty_pos == UFAIL)
                if((H5SM_find_in_list(list, NULL, &empty_pos) == UFAIL) || empty_pos == UFAIL)
                    HGOTO_ERROR(H5E_SOHM, H5E_CANTINSERT, FAIL, "unable to find empty entry in list")

            /* Insert message into list */
            HDassert(list->messages[empty_pos].location == H5SM_NO_LOC);
            HDassert(key.message.location != H5SM_NO_LOC);
            list->messages[empty_pos] = key.message;
        } /* end if */
        /* Index is a B-tree */
        else {
            HDassert(header->index_type == H5SM_BTREE);

            if(H5B2_insert(f, dxpl_id, H5SM_INDEX, header->index_addr, &key) < 0)
                HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, FAIL, "couldn't add SOHM to B-tree")
        } /* end else */

        ++(header->num_messages);
        (*cache_flags_ptr) |= H5AC__DIRTIED_FLAG;
    } /* end else */

    /* Set the file pointer & message type for the shared component */
    shared.file = f;
    shared.msg_type_id = type_id;

    /* Update the original message's shared component */
    if(H5O_msg_set_share(type_id, &shared, mesg) < 0)
        HGOTO_ERROR(H5E_OHDR, H5E_BADMESG, FAIL, "unable to set sharing information")

done:
    /* Release the fractal heap if we opened it */
    if(fheap && H5HF_close(fheap, dxpl_id) < 0)
        HDONE_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")

    /* If we got a list out of the cache, release it (it is always dirty after writing a message) */
    if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__DIRTIED_FLAG) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM index")

    if(encoding_buf)
        encoding_buf = H5MM_xfree(encoding_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_write_mesg() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_delete
 *
 * Purpose:     Given an object header message that is being deleted,
 *              checks if it is a SOHM.  If so, decrements its reference
 *              count.
 *
 *              If an object header is currently protected, it needs to
 *              be passed in as open_oh so the SM code doesn't try to
 *              re-protect it.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_delete(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh, H5O_shared_t *sh_mesg)
{
    H5SM_master_table_t  *table = NULL;
    unsigned              cache_flags = H5AC__NO_FLAGS_SET;
    ssize_t               index_num;
    void                 *mesg_buf = NULL;
    void                 *native_mesg = NULL;
    unsigned              type_id;              /* Message type ID to operate on */
    herr_t                ret_value = SUCCEED;

    FUNC_ENTER_NOAPI(H5SM_delete, FAIL)

    HDassert(f);
    HDassert(f->shared->sohm_addr != HADDR_UNDEF);
    HDassert(sh_mesg);

    /* Get message type */
    type_id = sh_mesg->msg_type_id;

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_WRITE)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    /* Find the correct index and try to delete from it */
    if((index_num = H5SM_get_index(table, type_id)) < 0)
	HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "unable to find correct SOHM index")

    /* If mesg_buf is not NULL, the message's reference count has reached
     * zero and any file space it uses needs to be freed.  mesg_buf holds the
     * serialized form of the message.
     */
    if(H5SM_delete_from_index(f, dxpl_id, open_oh, &(table->indexes[index_num]), sh_mesg, &cache_flags, &mesg_buf) < 0)
	HGOTO_ERROR(H5E_SOHM, H5E_CANTDELETE, FAIL, "unable to delete mesage from SOHM index")

    /* Release the master SOHM table */
    if(H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, cache_flags) < 0)
	HGOTO_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")
    table = NULL;

    /* If buf was allocated, delete the message it holds.  This message may
     * reference other shared messages that also need to be deleted, so the
     * master table needs to be unprotected when we do this.
     */
    if(mesg_buf) {
        if(NULL == (native_mesg = H5O_msg_decode(f, dxpl_id, type_id, (const unsigned char *)mesg_buf)))
            HGOTO_ERROR(H5E_OHDR, H5E_CANTDECODE, FAIL, "can't decode shared message.")

        if(H5O_msg_delete(f, dxpl_id, open_oh, type_id, native_mesg) < 0)
            HGOTO_ERROR(H5E_OHDR, H5E_CANTFREE, FAIL, "can't delete shared message.")
    } /* end if */

done:
    /* Release the master SOHM table (should only happen on error) */
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, cache_flags) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    /* Release any native message we decoded */
    if(native_mesg)
        H5O_msg_free(type_id, native_mesg);

    /* Free encoding buf */
    if(mesg_buf)
        mesg_buf = H5MM_xfree(mesg_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_delete() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_find_in_list
 *
 * Purpose:     Find a message's location in a list.  Also find the first
 *              empty location in the list (since if we don't find the
 *              message, we may want to insert it into an open spot).
 *
 *              If KEY is NULL, simply find the first empty location in the
 *              list.
 *
 *              If EMPTY_POS is NULL, don't store anything in it.
 *
 * Return:      Message's position in the list on success
 *              UFAIL if message couldn't be found
 *              empty_pos set to position of empty message or UFAIL.
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
static size_t
H5SM_find_in_list(const H5SM_list_t *list, const H5SM_mesg_key_t *key, size_t *empty_pos)
{
    size_t               x;
    size_t               ret_value;

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_find_in_list)

    HDassert(list);
    /* Both key and empty_pos can be NULL, but not both! */
    HDassert(key || empty_pos);

    /* Initialize empty_pos to an invalid value */
    if(empty_pos)
        *empty_pos = UFAIL;

    /* Find the first (only) message equal to the key passed in.
     * Also record the first empty position we find.
     */
    for(x = 0; x < list->header->list_max; x++) {
        if(key && (list->messages[x].location != H5SM_NO_LOC) &&
                (0 == H5SM_message_compare(key, &(list->messages[x]))))
            HGOTO_DONE(x)
        else if(empty_pos && list->messages[x].location == H5SM_NO_LOC) {
            /* Note position */
            *empty_pos = x;

            /* Found earlier position possible, don't check any more */
            empty_pos = NULL;
        } /* end if */
    } /* end for */

    /* If we reached this point, we didn't find the message */
    ret_value = UFAIL;

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_find_in_list */


/*-------------------------------------------------------------------------
 * Function:	H5SM_get_hash_fh_cb
 *
 * Purpose:	Callback for fractal heap operator, to make copy of link when
 *              when lookup up a link by index
 *
 * Return:	SUCCEED/FAIL
 *
 * Programmer:	Quincey Koziol
 *		koziol@hdfgroup.org
 *		Nov  7 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_get_hash_fh_cb(const void *obj, size_t obj_len, void *_udata)
{
    H5SM_fh_ud_gh_t *udata = (H5SM_fh_ud_gh_t *)_udata;       /* User data for fractal heap 'op' callback */

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_get_hash_fh_cb)

    /* Compute hash value on raw message */
    udata->hash = H5_checksum_lookup3(obj, obj_len, udata->type_id);

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_get_hash_fh_cb() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_decr_ref
 *
 * Purpose:	Decrement the reference count for a SOHM message.  Doesn't
 *              remove the record from the B-tree even if the refcount
 *              reaches zero.
 *
 *              The new message is returned through op_data.  If its
 *              reference count is zero, the calling function should
 *              remove this record from the B-tree.
 *
 * Return:	Non-negative on success
 *              Negative on failure
 *
 * Programmer:	James Laird
 *              Monday, November 6, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_decr_ref(void *record, void *op_data, hbool_t *changed)
{
    H5SM_sohm_t *message = (H5SM_sohm_t *) record;

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_decr_ref)

    HDassert(record);
    HDassert(op_data);
    HDassert(changed);

    /* Adjust the message's reference count if it's stored in the heap.
     * Messages stored in object headers always have refcounts of 1,
     * so the calling function should know to just delete such a message
     */
    if(message->location == H5SM_IN_HEAP) {
        --message->u.heap_loc.ref_count;
        *changed = TRUE;
    } /* end if */

    if(op_data)
       *(H5SM_sohm_t *)op_data = *message;

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_decr_ref() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_delete_from_index
 *
 * Purpose:     Decrement the reference count for a particular message in this
 *              index.  If the reference count reaches zero, allocate a buffer
 *              to hold the serialized form of this message so that any
 *              resources it uses can be freed, and return this buffer in
 *              ENCODED_MESG.
 *
 * Return:      Non-negative on success
 *              Negative on failure
 *
 * Programmer:  James Laird
 *              Tuesday, May 2, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_delete_from_index(H5F_t *f, hid_t dxpl_id, H5O_t *open_oh,
    H5SM_index_header_t *header, const H5O_shared_t *mesg,
    unsigned *cache_flags, void ** /*out*/ encoded_mesg)
{
    H5SM_list_t     *list = NULL;
    H5SM_mesg_key_t key;
    H5SM_sohm_t     message;            /* Deleted message returned from index */
    H5SM_sohm_t    *message_ptr;        /* Pointer to deleted message returned from index */
    H5HF_t         *fheap = NULL;       /* Fractal heap that contains the message */
    size_t          buf_size;           /* Size of the encoded message (out) */
    void            *encoding_buf = NULL; /* The encoded message (out) */
    unsigned        type_id;            /* Message type to operate on */
    herr_t          ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_delete_from_index)

    /* Sanity check */
    HDassert(f);
    HDassert(header);
    HDassert(mesg);
    HDassert(cache_flags);
    HDassert(*encoded_mesg == NULL);

    /* Get the message type for later */
    type_id = mesg->msg_type_id;

    /* Open the heap for this type of message. */
    if(NULL == (fheap = H5HF_open(f, dxpl_id, header->heap_addr)))
	HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

    /* Get the message size and encoded message for the message to be deleted,
     * either from its OH or from the heap.
     */
    if(mesg->type == H5O_SHARE_TYPE_HERE) {
        key.message.location = H5SM_IN_OH;
        key.message.msg_type_id = type_id;
        key.message.u.mesg_loc = mesg->u.loc;
    } /* end if */
    else {
        key.message.location = H5SM_IN_HEAP;
        key.message.msg_type_id = type_id;
        key.message.u.heap_loc.ref_count = 0; /* Refcount isn't relevant here */
        key.message.u.heap_loc.fheap_id = mesg->u.heap_id;
    } /* end else */

    /* Get the encoded message */
    if(H5SM_read_mesg(f, &key.message, fheap, open_oh, dxpl_id, &buf_size, &encoding_buf) < 0)
	HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

    /* Set up key for message to be deleted. */
    key.file = f;
    key.dxpl_id = dxpl_id;
    key.fheap = fheap;
    key.encoding = encoding_buf;
    key.encoding_size = buf_size;
    key.message.hash = H5_checksum_lookup3(encoding_buf, buf_size, type_id);

    /* Try to find the message in the index */
    if(header->index_type == H5SM_LIST) {
        size_t list_pos;        /* Position of the message in the list */

        /* If the index is stored as a list, get it from the cache */
        if(NULL == (list = (H5SM_list_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, NULL, header, H5AC_WRITE)))
	    HGOTO_ERROR(H5E_SOHM, H5E_CANTPROTECT, FAIL, "unable to load SOHM index")

        /* Find the message in the list */
        if((list_pos = H5SM_find_in_list(list, &key, NULL)) == UFAIL)
	    HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "message not in index")

        if(list->messages[list_pos].location == H5SM_IN_HEAP)
            --(list->messages[list_pos].u.heap_loc.ref_count);

        /* Point to the message */
        message_ptr = &list->messages[list_pos];
    } /* end if */
    else {
        /* Index is a B-tree */
        HDassert(header->index_type == H5SM_BTREE);

        /* If this returns failure, it means that the message wasn't found.
         * If it succeeds, a copy of the modified message will be returned. */
        if(H5B2_modify(f, dxpl_id, H5SM_INDEX, header->index_addr, &key, H5SM_decr_ref, &message) <0)
	    HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "message not in index")

        /* Point to the message */
        message_ptr = &message;
    } /* end else */

    /* If the ref count is zero or this message was in an OH (which always
     * has a ref count of 1) delete the message from the index
     */
    if(message_ptr->location == H5SM_IN_OH || message_ptr->u.heap_loc.ref_count == 0) {
        /* Save the location */
        H5SM_storage_loc_t old_loc = message_ptr->location;

        /* Updated the index header, so set its dirty flag */
        --header->num_messages;
        *cache_flags |= H5AC__DIRTIED_FLAG;

        /* Remove the message from the index */
        if(header->index_type == H5SM_LIST)
            message_ptr->location = H5SM_NO_LOC;
        else {
            if(H5B2_remove(f, dxpl_id, H5SM_INDEX, header->index_addr, &key, NULL, NULL) < 0)
                HGOTO_ERROR(H5E_BTREE, H5E_CANTREMOVE, FAIL, "unable to delete message")
        } /* end else */

        /* Remove the message from the heap if it was stored in the heap*/
        if(old_loc == H5SM_IN_HEAP)
            if(H5HF_remove(fheap, dxpl_id, &(message_ptr->u.heap_loc.fheap_id)) < 0)
	        HGOTO_ERROR(H5E_SOHM, H5E_CANTREMOVE, FAIL, "unable to remove message from heap")


        /* Return the message's encoding so anything it references can be freed */
        *encoded_mesg = encoding_buf;

        /* If there are no messages left in the index, delete it */
        if(header->num_messages == 0) {

            /* Unprotect cache and release heap */
            if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__DELETED_FLAG) < 0)
	        HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to release SOHM list")
            list = NULL;

            HDassert(fheap);
            if(H5HF_close(fheap, dxpl_id) < 0)
                HGOTO_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")
            fheap = NULL;

            /* Delete the index and its heap */
            if(H5SM_delete_index(f, header, dxpl_id, TRUE) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTDELETE, FAIL, "can't delete empty index")
        }  /* end if */
        else if(header->index_type == H5SM_BTREE && header->num_messages < header->btree_min) {
            /* Otherwise, if we've just passed the btree-to-list cutoff, convert
             * this B-tree into a list
             */
           if(H5SM_convert_btree_to_list(f, header, dxpl_id) < 0)
                HGOTO_ERROR(H5E_SOHM, H5E_CANTINIT, FAIL, "unable to convert btree to list")
        } /* end if */
    } /* end if */

done:
    /* Release the SOHM list */
    if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__DIRTIED_FLAG) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM index")

    /* Release the fractal heap if we opened it */
    if(fheap && H5HF_close(fheap, dxpl_id) < 0)
        HDONE_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")

    /* Free the message encoding, if we're not returning it in encoded_mesg
     * or if there's been an error.
     */
    if(encoding_buf && (NULL == *encoded_mesg || ret_value < 0))
        encoding_buf = H5MM_xfree(encoding_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_delete_from_index() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_get_info
 *
 * Purpose:     Get the shared message info for a file, if there is any.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, May 11, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_get_info(const H5O_loc_t *ext_loc, H5P_genplist_t *fc_plist, hid_t dxpl_id)
{
    H5F_t *f = ext_loc->file;           /* File pointer (convenience variable) */
    H5F_file_t *shared = f->shared;     /* Shared file info (convenience variable) */
    H5O_shmesg_table_t sohm_table;      /* SOHM message from superblock extension */
    H5SM_master_table_t *table = NULL;  /* SOHM master table */
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI(H5SM_get_info, FAIL)

    /* Sanity check */
    HDassert(ext_loc);
    HDassert(f && shared);
    HDassert(fc_plist);

    /* Read in shared message information, if it exists */
    if(NULL == H5O_msg_read(ext_loc, H5O_SHMESG_ID, &sohm_table, dxpl_id)) {
        /* Reset error from "failed" message read */
        H5E_clear_stack(NULL);

        /* No SOHM info in file */
        shared->sohm_addr = HADDR_UNDEF;
        shared->sohm_nindexes = 0;
        shared->sohm_vers = 0;

        /* Shared object header messages are disabled */
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_NINDEXES_NAME, &shared->sohm_nindexes) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set number of SOHM indexes")
    } /* end if */
    else {
        unsigned index_flags[H5O_SHMESG_MAX_NINDEXES];  /* Message flags for each index */
        unsigned minsizes[H5O_SHMESG_MAX_NINDEXES];     /* Minimum message size for each index */
        unsigned sohm_l2b;           /* SOHM list-to-btree cutoff */
        unsigned sohm_b2l;           /* SOHM btree-to-list cutoff */
        unsigned u;                  /* Local index variable */

        /* Portably initialize the arrays */
        HDmemset(index_flags, 0, sizeof(index_flags));
        HDmemset(minsizes, 0, sizeof(minsizes));

        /* Set SOHM info from file */
        shared->sohm_addr = sohm_table.addr;
        shared->sohm_vers = sohm_table.version;
        shared->sohm_nindexes = sohm_table.nindexes;
        HDassert(H5F_addr_defined(shared->sohm_addr));
        HDassert(shared->sohm_nindexes > 0 && shared->sohm_nindexes <= H5O_SHMESG_MAX_NINDEXES);

        /* Read the rest of the SOHM table information from the cache */
        if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, shared->sohm_addr, NULL, NULL, H5AC_READ)))
            HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

        /* Get index conversion limits */
        sohm_l2b = table->indexes[0].list_max;
        sohm_b2l = table->indexes[0].btree_min;

        /* Iterate through all indices */
        for(u = 0; u < table->num_indexes; ++u) {
            /* Pack information about the individual SOHM index */
            index_flags[u] = table->indexes[u].mesg_types;
            minsizes[u] = table->indexes[u].min_mesg_size;

            /* Sanity check */
            HDassert(sohm_l2b == table->indexes[u].list_max);
            HDassert(sohm_b2l == table->indexes[u].btree_min);

            /* Check for sharing attributes in this file, which means that creation
             *  indices must be tracked on object header message in the file.
             */
            if(index_flags[u] & H5O_SHMESG_ATTR_FLAG)
                shared->store_msg_crt_idx = TRUE;
        } /* end for */

        /* Set values in the property list */
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_NINDEXES_NAME, &shared->sohm_nindexes) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set number of SOHM indexes")
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_INDEX_TYPES_NAME, index_flags) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set type flags for indexes")
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_INDEX_MINSIZE_NAME, minsizes) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set type flags for indexes")
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_LIST_MAX_NAME, &sohm_l2b) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set SOHM cutoff in property list")
        if(H5P_set(fc_plist, H5F_CRT_SHMSG_BTREE_MIN_NAME, &sohm_b2l) < 0)
            HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set SOHM cutoff in property list")
    } /* end else */

done:
    /* Release the master SOHM table if we took it out of the cache */
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, shared->sohm_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_get_info() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_message_encode
 *
 * Purpose:	Serialize a H5SM_sohm_t struct into a buffer RAW.
 *
 * Return:	Non-negative on success
 *              Negative on failure
 *
 * Programmer:	James Laird
 *              Monday, November 6, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_message_encode(const H5F_t *f, uint8_t *raw, const void *_nrecord)
{
    const H5SM_sohm_t *message = (const H5SM_sohm_t *)_nrecord;

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_message_encode)

    *raw++ = message->location;
    UINT32ENCODE(raw, message->hash);

    if(message->location == H5SM_IN_HEAP) {
        UINT32ENCODE(raw, message->u.heap_loc.ref_count);
        UINT64ENCODE(raw, message->u.heap_loc.fheap_id);
    } /* end if */
    else {
        HDassert(message->location == H5SM_IN_OH);

        *raw++ = 0;     /* reserved (possible flags byte) */
        *raw++ = message->msg_type_id;
        UINT16ENCODE(raw, message->u.mesg_loc.index);
        H5F_addr_encode(f, &raw, message->u.mesg_loc.oh_addr);
    } /* end else */

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_message_encode */


/*-------------------------------------------------------------------------
 * Function:	H5SM_message_decode
 *
 * Purpose:	Read an encoded SOHM message from RAW into an H5SM_sohm_t struct.
 *
 * Return:	Non-negative on success
 *              Negative on failure
 *
 * Programmer:	James Laird
 *              Monday, November 6, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_message_decode(const H5F_t UNUSED *f, const uint8_t *raw, void *_nrecord)
{
    H5SM_sohm_t *message = (H5SM_sohm_t *)_nrecord;

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_message_decode)

    message->location = *raw++;
    UINT32DECODE(raw, message->hash);

    if(message->location == H5SM_IN_HEAP) {
        UINT32DECODE(raw, message->u.heap_loc.ref_count);
        UINT64DECODE(raw, message->u.heap_loc.fheap_id);
    } /* end if */
    else {
        HDassert(message->location == H5SM_IN_OH);

        raw++;          /* reserved */
        message->msg_type_id = *raw++;
        UINT16DECODE(raw, message->u.mesg_loc.index);
        H5F_addr_decode(f, &raw, &message->u.mesg_loc.oh_addr);
    } /* end else */

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_message_decode */


/*-------------------------------------------------------------------------
 * Function:    H5SM_reconstitute
 *
 * Purpose:     Reconstitute a shared object header message structure from
 *              a plain heap ID.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Monday, December 18, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_reconstitute(H5O_shared_t *sh_mesg, H5F_t *f, unsigned msg_type_id,
    H5O_fheap_id_t heap_id)
{
    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_reconstitute)

    /* Sanity check args */
    HDassert(sh_mesg);

    /* Set flag for shared message */
    sh_mesg->type = H5O_SHARE_TYPE_SOHM;
    sh_mesg->file = f;
    sh_mesg->msg_type_id = msg_type_id;
    sh_mesg->u.heap_id = heap_id;

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_reconstitute() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_get_refcount_bt2_cb
 *
 * Purpose:	v2 B-tree 'find' callback to retrieve the record for a message
 *
 * Return:	SUCCEED/FAIL
 *
 * Programmer:	Quincey Koziol
 *              Tuesday, December 19, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_get_refcount_bt2_cb(const void *_record, void *_op_data)
{
    const H5SM_sohm_t *record = (const H5SM_sohm_t *)_record;  /* v2 B-tree record for message */
    H5SM_sohm_t *op_data = (H5SM_sohm_t *)_op_data;       /* "op data" from v2 B-tree find */

    FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5SM_get_refcount_bt2_cb)

    /*
     * Check arguments.
     */
    HDassert(record);
    HDassert(op_data);

    /* Make a copy of the record */
    *op_data = *record;

    FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5SM_get_refcount_bt2_cb() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_get_refcount
 *
 * Purpose:     Retrieve the reference count for a message shared in the heap
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Quincey Koziol
 *              Tuesday, December 19, 2006
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_get_refcount(H5F_t *f, hid_t dxpl_id, unsigned type_id,
    const H5O_shared_t *sh_mesg, hsize_t *ref_count)
{
    H5HF_t *fheap = NULL;               /* Fractal heap that contains shared messages */
    H5SM_master_table_t *table = NULL;  /* SOHM master table */
    H5SM_list_t *list = NULL;           /* SOHM index list for message type (if in list form) */
    H5SM_index_header_t *header=NULL;   /* Index header for message type */
    H5SM_mesg_key_t key;                /* Key for looking up message */
    H5SM_sohm_t message;                /* Shared message returned from callback */
    ssize_t index_num;                  /* Table index for message type */
    size_t buf_size;                    /* Size of the encoded message */
    void * encoding_buf = NULL;         /* Buffer for encoded message */
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI_NOINIT(H5SM_get_refcount)

    /* Sanity check */
    HDassert(f);
    HDassert(sh_mesg);
    HDassert(ref_count);

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_READ)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    /* Find the correct index and find the message in it */
    if((index_num = H5SM_get_index(table, type_id)) < 0)
	HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "unable to find correct SOHM index")
    header = &(table->indexes[index_num]);

    /* Open the heap for this message type */
    if(NULL == (fheap = H5HF_open(f, dxpl_id, header->heap_addr)))
	HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

    /* Set up a SOHM message to correspond to the shared message passed in */
    key.message.location = H5SM_IN_HEAP;
    key.message.u.heap_loc.fheap_id = sh_mesg->u.heap_id;
    key.message.u.heap_loc.ref_count = 0; /* Ref count isn't needed to find message */

    /* Get the encoded message */
    if(H5SM_read_mesg(f, &key.message, fheap, NULL, dxpl_id, &buf_size, &encoding_buf) < 0)
	HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

    /* Set up key for message to locate */
    key.file = f;
    key.dxpl_id = dxpl_id;
    key.fheap = fheap;
    key.encoding = encoding_buf;
    key.encoding_size = buf_size;
    key.message.hash = H5_checksum_lookup3(encoding_buf, buf_size, type_id);

    /* Try to find the message in the index */
    if(header->index_type == H5SM_LIST) {
        size_t list_pos;        /* Position of the message in the list */

        /* If the index is stored as a list, get it from the cache */
        if(NULL == (list = (H5SM_list_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, NULL, header, H5AC_READ)))
	    HGOTO_ERROR(H5E_SOHM, H5E_CANTPROTECT, FAIL, "unable to load SOHM index")

        /* Find the message in the list */
        if((list_pos = H5SM_find_in_list(list, &key, NULL)) == UFAIL)
	    HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "message not in index")

        /* Copy the message */
        message = list->messages[list_pos];
    } /* end if */
    else {
        /* Index is a B-tree */
        HDassert(header->index_type == H5SM_BTREE);

        /* Look up the message in the v2 B-tree */
        if(H5B2_find(f, dxpl_id, H5SM_INDEX, header->index_addr, &key, H5SM_get_refcount_bt2_cb, &message) < 0)
	    HGOTO_ERROR(H5E_SOHM, H5E_NOTFOUND, FAIL, "message not in index")
    } /* end else */

    /* Set the refcount for the message */
    HDassert(message.location == H5SM_IN_HEAP);
    *ref_count = message.u.heap_loc.ref_count;

done:
    /* Release resources */
    if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, header->index_addr, list, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM index")
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")
    if(fheap && H5HF_close(fheap, dxpl_id) < 0)
        HDONE_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")
    if(encoding_buf)
        encoding_buf = H5MM_xfree(encoding_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_get_refcount() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_read_iter_op
 *
 * Purpose:	OH iteration callback to get the encoded version of a message
 *              by index.
 *
 *              The buffer needs to be freed.
 *
 * Return:	0 if this is not the message we're searching for
 *              1 if this is the message we're searching for (with encoded
 *                      value returned in udata)
 *              negative on error
 *
 * Programmer:	James Laird
 *              Wednesday, February 21, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_read_iter_op(H5O_t *oh, H5O_mesg_t *mesg/*in,out*/, unsigned sequence,
    hbool_t UNUSED *oh_modified, void *_udata/*in,out*/)
{
    H5SM_read_udata_t *udata = (H5SM_read_udata_t *) _udata;
    herr_t ret_value = H5_ITER_CONT;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_read_iter_op)

    /*
     * Check arguments.
     */
    HDassert(oh);
    HDassert(mesg);
    HDassert(udata);
    HDassert(NULL == udata->encoding_buf);

    /* Check the creation index for this message */
    if(sequence == udata->idx) {
        /* Check if the message is dirty & flush it to the object header if so */
        if(mesg->dirty)
            if(H5O_msg_flush(udata->file, oh, mesg) < 0)
                HGOTO_ERROR(H5E_OHDR, H5E_CANTENCODE, H5_ITER_ERROR, "unable to encode object header message")

        /* Get the message's encoded size */
        udata->buf_size = mesg->raw_size;
        HDassert(udata->buf_size);

        /* Allocate buffer to return the message in */
        if(NULL == (udata->encoding_buf = H5MM_malloc(udata->buf_size)))
            HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, H5_ITER_ERROR, "memory allocation failed")

        /* Copy the encoded message into the buffer to return */
        HDmemcpy(udata->encoding_buf, mesg->raw, udata->buf_size);

        /* Found the message we were looking for */
        ret_value = H5_ITER_STOP;
    } /* end if */

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_read_iter_op() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_read_mesg_fh_cb
 *
 * Purpose:	Callback for H5HF_op, used in H5SM_read_mesg below.
 *              Makes a copy of the message in the heap data, returned in the
 *              UDATA struct.
 *
 * Return:	Negative on error, non-negative on success
 *
 * Programmer:	Quincey Koziol
 *              Tuesday, June 26, 2007
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_read_mesg_fh_cb(const void *obj, size_t obj_len, void *_udata)
{
    H5SM_read_udata_t *udata = (H5SM_read_udata_t *)_udata;
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_read_mesg_fh_cb)

    /* Allocate a buffer to hold the message */
    if(NULL == (udata->encoding_buf = H5MM_malloc(obj_len)))
        HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, FAIL, "memory allocation failed")

    /* Copy the message from the heap */
    HDmemcpy(udata->encoding_buf, obj, obj_len);
    udata->buf_size = obj_len;

done:
    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_read_mesg_fh_cb() */


/*-------------------------------------------------------------------------
 * Function:	H5SM_read_mesg
 *
 * Purpose:	Given an H5SM_sohm_t sohm, encodes the message into a buffer.
 *              This buffer should then be freed.
 *
 * Return:	Non-negative on success/negative on error
 *
 * Programmer:	James Laird
 *              Wednesday, February 21, 2006
 *
 *-------------------------------------------------------------------------
 */
static herr_t
H5SM_read_mesg(H5F_t *f, const H5SM_sohm_t *mesg, H5HF_t *fheap,
    H5O_t *open_oh, hid_t dxpl_id, size_t *encoding_size /*out*/,
    void ** encoded_mesg /*out*/)
{
    H5SM_read_udata_t udata;    /* User data for callbacks */
    H5O_loc_t oloc;             /* Object location for message in object header */
    H5O_t *oh = NULL;           /* Object header for message in object header */
    herr_t ret_value = SUCCEED;

    FUNC_ENTER_NOAPI_NOINIT(H5SM_read_mesg)

    HDassert(f);
    HDassert(mesg);
    HDassert(fheap);

    /* Set up user data for message iteration */
    udata.file = f;
    udata.idx = mesg->u.mesg_loc.index;
    udata.encoding_buf = NULL;
    udata.idx = 0;

    /* Get the message size and encoded message for the message to be deleted,
     * either from its OH or from the heap.
     */
    if(mesg->location == H5SM_IN_OH) {
        /* Read message from object header */
        const H5O_msg_class_t *type = NULL;    /* Actual H5O class type for the ID */
        H5O_mesg_operator_t op;         /* Wrapper for operator */

        type = H5O_msg_class_g[mesg->msg_type_id];    /* map the type ID to the actual type object */
        HDassert(type);

        /* Reset object location for operation */
        if(H5O_loc_reset(&oloc) < 0)
            HGOTO_ERROR(H5E_SYM, H5E_CANTRESET, FAIL, "unable to initialize location")

        if(NULL == open_oh || mesg->u.mesg_loc.oh_addr != H5O_OH_GET_ADDR(open_oh)) {
            /* Open the object in the file */
            oloc.file = f;
            oloc.addr = mesg->u.mesg_loc.oh_addr;
            if(H5O_open(&oloc) < 0)
	        HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to open object header")

            /* Load the object header from the cache */
            if(NULL == (oh = (H5O_t *)H5AC_protect(oloc.file, dxpl_id, H5AC_OHDR, oloc.addr, NULL, NULL, H5AC_READ)))
	        HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header")
        } /* end if */
        else
            oh = open_oh;

        /* Use the "real" iterate routine so it doesn't try to protect the OH */
        op.op_type = H5O_MESG_OP_LIB;
        op.u.lib_op = H5SM_read_iter_op;
        if((ret_value = H5O_msg_iterate_real(f, oh, type, &op, &udata, dxpl_id)) < 0)
            HGOTO_ERROR(H5E_OHDR, H5E_BADITER, FAIL, "unable to iterate over object header messages")
    } /* end if */
    else {
        HDassert(mesg->location == H5SM_IN_HEAP);

        /* Copy the message from the heap */
        if(H5HF_op(fheap, dxpl_id, &(mesg->u.heap_loc.fheap_id), H5SM_read_mesg_fh_cb, &udata) < 0)
            HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "can't read message from fractal heap.")
    } /* end else */
    HDassert(udata.encoding_buf);
    HDassert(udata.buf_size);

    /* Record the returned values */
    *encoded_mesg = udata.encoding_buf;
    *encoding_size = udata.buf_size;

done:
    /* Close the object header if we opened one and had an error */
    if(oh && oh != open_oh) {
        if(H5AC_unprotect(oloc.file, dxpl_id, H5AC_OHDR, oloc.addr, oh, H5AC__NO_FLAGS_SET) < 0)
            HDONE_ERROR(H5E_OHDR, H5E_PROTECT, FAIL, "unable to release object header")

        if(H5O_close(&oloc) < 0)
            HDONE_ERROR(H5E_OHDR, H5E_CANTRELEASE, FAIL, "unable to close object header")
    } /* end if */

    /* Release the encoding buffer on error */
    if(ret_value < 0 && udata.encoding_buf)
        udata.encoding_buf = H5MM_xfree(udata.encoding_buf);

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_read_mesg */


/*-------------------------------------------------------------------------
 * Function:    H5SM_table_debug
 *
 * Purpose:     Print debugging information for the master table.
 *
 *              If table_vers and num_indexes are not UFAIL, they are used
 *              instead of the values in the superblock.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, January 18, 2007
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_table_debug(H5F_t *f, hid_t dxpl_id, haddr_t table_addr,
                         FILE *stream, int indent, int fwidth,
                         unsigned table_vers, unsigned num_indexes)
{
    H5SM_master_table_t *table = NULL;  /* SOHM master table */
    unsigned x;                         /* Counter variable */
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI(H5SM_table_debug, FAIL)

    HDassert(f);
    HDassert(table_addr != HADDR_UNDEF);
    HDassert(stream);
    HDassert(indent >= 0);
    HDassert(fwidth >= 0);

    /* If table_vers and num_indexes are UFAIL, replace them with values from
     * userblock
     */
    if(table_vers == UFAIL)
        table_vers = f->shared->sohm_vers;
    else if(table_vers != f->shared->sohm_vers)
	HDfprintf(stream, "*** SOHM TABLE VERSION DOESN'T MATCH VERSION IN SUPERBLOCK!\n");
    if(num_indexes == UFAIL)
        num_indexes = f->shared->sohm_nindexes;
    else if(num_indexes != f->shared->sohm_nindexes)
	HDfprintf(stream, "*** NUMBER OF SOHM INDEXES DOESN'T MATCH VALUE IN SUPERBLOCK!\n");

    /* Check arguments.  Version must be 0, the only version implemented so far */
    if(table_vers > HDF5_SHAREDHEADER_VERSION)
        HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "unknown shared message table version")
    if(num_indexes == 0 || num_indexes > H5O_SHMESG_MAX_NINDEXES)
        HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "number of indexes must be between 1 and H5O_SHMESG_MAX_NINDEXES")

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, table_addr, NULL, NULL, H5AC_READ)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    HDfprintf(stream, "%*sShared Message Master Table...\n", indent, "");
    for(x = 0; x < num_indexes; ++x) {
        HDfprintf(stream, "%*sIndex %d...\n", indent, "", x);
        HDfprintf(stream, "%*s%-*s %s\n", indent + 3, "", fwidth,
                "SOHM Index Type:",
                (table->indexes[x].index_type == H5SM_LIST ? "List" :
                (table->indexes[x].index_type == H5SM_BTREE ? "B-Tree" : "Unknown")));

        HDfprintf(stream, "%*s%-*s %a\n", indent + 3, "", fwidth,
                "Address of index:", table->indexes[x].index_addr);
        HDfprintf(stream, "%*s%-*s %a\n", indent + 3, "", fwidth,
                "Address of index's heap:", table->indexes[x].heap_addr);
        HDfprintf(stream, "%*s%-*s 0x%08x\n", indent + 3, "", fwidth,
                "Message type flags:", table->indexes[x].mesg_types);
        HDfprintf(stream, "%*s%-*s %Zu\n", indent + 3, "", fwidth,
                "Minimum size of messages:", table->indexes[x].min_mesg_size);
        HDfprintf(stream, "%*s%-*s %Zu\n", indent + 3, "", fwidth,
                "Number of messages:", table->indexes[x].num_messages);
        HDfprintf(stream, "%*s%-*s %Zu\n", indent + 3, "", fwidth,
                "Maximum list size:", table->indexes[x].list_max);
        HDfprintf(stream, "%*s%-*s %Zu\n", indent + 3, "", fwidth,
                "Minimum B-tree size:", table->indexes[x].btree_min);
    } /* end for */

done:
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, table_addr, table, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_table_debug() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_list_debug
 *
 * Purpose:     Print debugging information for a SOHM list.
 *
 *              Relies on the list version and number of messages passed in.
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  James Laird
 *              Thursday, January 18, 2007
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_list_debug(H5F_t *f, hid_t dxpl_id, haddr_t list_addr,
                         FILE *stream, int indent, int fwidth,
                         unsigned table_vers, size_t num_messages)
{

    H5SM_list_t *list = NULL;           /* SOHM index list for message type (if in list form) */
    H5SM_index_header_t header;         /* A "false" header used to read the list */
    unsigned x;                         /* Counter variable */
    herr_t ret_value = SUCCEED;         /* Return value */

    FUNC_ENTER_NOAPI(H5SM_list_debug, FAIL)

    HDassert(f);
    HDassert(list_addr != HADDR_UNDEF);
    HDassert(stream);
    HDassert(indent >= 0);
    HDassert(fwidth >= 0);

    /* Check arguments.  Version must be 0, the only version implemented so far */
    if(table_vers > H5SM_LIST_VERSION)
        HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "unknown shared message list version")
    if(num_messages == 0 || num_messages > H5O_SHMESG_MAX_LIST_SIZE)
        HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "number of indexes must be between 1 and H5O_SHMESG_MAX_NINDEXES")

    /* Create a temporary header using the arguments.  The cache needs this to load the list. */
    HDmemset(&header, 0, sizeof(H5SM_index_header_t));
    header.list_max = header.num_messages = num_messages;
    header.index_type = H5SM_LIST;
    header.index_addr = list_addr;

    /* Get the list from the cache */
    if(NULL == (list = (H5SM_list_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_LIST, list_addr, NULL, &header, H5AC_READ)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM index")

    HDfprintf(stream, "%*sShared Message List Index...\n", indent, "");
    for(x = 0; x < num_messages; ++x) {
        HDfprintf(stream, "%*sShared Object Header Message %d...\n", indent, "", x);
        HDfprintf(stream, "%*s%-*s %08lu\n", indent + 3, "", fwidth,
                "Hash value:", (unsigned long)list->messages[x].hash);
        if(list->messages[x].location == H5SM_IN_HEAP) {
            HDfprintf(stream, "%*s%-*s %s\n", indent + 3, "", fwidth,
                    "Location:", "in heap");
            HDfprintf(stream, "%*s%-*s %Zu\n", indent + 3, "", fwidth,
                    "Heap ID:", list->messages[x].u.heap_loc.fheap_id);
            HDfprintf(stream, "%*s%-*s %u\n", indent + 3, "", fwidth,
                    "Reference count:", list->messages[x].u.heap_loc.ref_count);
        } /* end if */
        else if(list->messages[x].location == H5SM_IN_OH) {
            HDfprintf(stream, "%*s%-*s %s\n", indent + 3, "", fwidth,
                    "Location:", "in object header");
            HDfprintf(stream, "%*s%-*s %a\n", indent + 3, "", fwidth,
                    "Object header address:", list->messages[x].u.mesg_loc.oh_addr);
            HDfprintf(stream, "%*s%-*s %u\n", indent + 3, "", fwidth,
                    "Message creation index:", list->messages[x].u.mesg_loc.oh_addr);
            HDfprintf(stream, "%*s%-*s %u\n", indent + 3, "", fwidth,
                    "Message type ID:", list->messages[x].msg_type_id);
        } /* end if */
        else
            HDfprintf(stream, "%*s%-*s %s\n", indent + 3, "", fwidth,
                    "Location:", "invalid");
    } /* end for */

done:
    if(list && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_LIST, list_addr, list, H5AC__NO_FLAGS_SET) < 0)
	HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "unable to close SOHM index")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_list_debug() */


/*-------------------------------------------------------------------------
 * Function:    H5SM_ih_size
 *
 * Purpose:     Loop through the master SOHM table (if there is one) to:
 *			1. collect storage used for header
 *                      1. collect storage used for B-tree and List
 *			   (include btree storage used by huge objects in fractal heap)
 *                      2. collect fractal heap storage
 *
 * Return:      Non-negative on success/Negative on failure
 *
 * Programmer:  Vailin Choi
 *              June 19, 2007
 *
 *-------------------------------------------------------------------------
 */
herr_t
H5SM_ih_size(H5F_t *f, hid_t dxpl_id, H5F_info_t *finfo)
{
    H5SM_master_table_t *table = NULL;          /* SOHM master table */
    H5HF_t              *fheap = NULL;          /* Fractal heap handle */
    unsigned    	u;                      /* Local index variable */
    herr_t              ret_value = SUCCEED;    /* Return value */

    FUNC_ENTER_NOAPI(H5SM_ih_size, FAIL)

    /* Sanity check */
    HDassert(f);
    HDassert(H5F_addr_defined(f->shared->sohm_addr));
    HDassert(finfo);

    /* Look up the master SOHM table */
    if(NULL == (table = (H5SM_master_table_t *)H5AC_protect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, NULL, NULL, H5AC_READ)))
	HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, FAIL, "unable to load SOHM master table")

    /* Get SOHM header size */
    finfo->sohm.hdr_size = (hsize_t) H5SM_TABLE_SIZE(f) +
                           (hsize_t)(table->num_indexes * H5SM_INDEX_HEADER_SIZE(f));

    /* Loop over all the indices for shared messages */
    for(u = 0; u < table->num_indexes; u++) {
        /* Get index storage size (for either B-tree or list) */
	if(table->indexes[u].index_type == H5SM_BTREE) {
	    if(H5F_addr_defined(table->indexes[u].index_addr))
		if(H5B2_iterate_size(f, dxpl_id, H5SM_INDEX, table->indexes[u].index_addr, &(finfo->sohm.msgs_info.index_size)) < 0)
                    HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree storage info")
        } /* end if */
        else if(table->indexes[u].index_type == H5SM_LIST)
	    finfo->sohm.msgs_info.index_size += H5SM_LIST_SIZE(f, table->indexes[u].list_max);

        /* Check for heap for this index */
	if(H5F_addr_defined(table->indexes[u].heap_addr)) {
            /* Open the fractal heap for this index */
            if(NULL == (fheap = H5HF_open(f, dxpl_id, table->indexes[u].heap_addr)))
                HGOTO_ERROR(H5E_HEAP, H5E_CANTOPENOBJ, FAIL, "unable to open fractal heap")

            /* Get heap storage size */
	    if(H5HF_size(fheap, dxpl_id, &(finfo->sohm.msgs_info.heap_size)) < 0)
		HGOTO_ERROR(H5E_HEAP, H5E_CANTGET, FAIL, "can't retrieve fractal heap storage info")

            /* Release the fractal heap */
            if(H5HF_close(fheap, dxpl_id) < 0)
                HGOTO_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")
            fheap = NULL;
        } /* end if */
    } /* end for */

done:
    /* Release resources */
    if(fheap && H5HF_close(fheap, dxpl_id) < 0)
        HDONE_ERROR(H5E_HEAP, H5E_CLOSEERROR, FAIL, "can't close fractal heap")
    if(table && H5AC_unprotect(f, dxpl_id, H5AC_SOHM_TABLE, f->shared->sohm_addr, table, H5AC__NO_FLAGS_SET) < 0)
        HDONE_ERROR(H5E_CACHE, H5E_CANTRELEASE, FAIL, "unable to close SOHM master table")

    FUNC_LEAVE_NOAPI(ret_value)
} /* end H5SM_ih_size() */